Skip to content

Commit

Permalink
cmd/compile: memcombine if values being stored are from consecutive l…
Browse files Browse the repository at this point in the history
…oads

If we load 2 values and then store those 2 loaded values, we can likely
perform that operation with a single wider load and store.

Fixes #60709

Change-Id: Ifc5f92c2f1b174c6ed82a69070f16cec6853c770
Reviewed-on: https://go-review.googlesource.com/c/go/+/502295
Reviewed-by: David Chase <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
Run-TryBot: Keith Randall <[email protected]>
  • Loading branch information
randall77 authored and dr2chase committed Jul 21, 2023
1 parent ffd9bd7 commit e713d6f
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 159 deletions.
30 changes: 0 additions & 30 deletions src/cmd/compile/internal/ssa/_gen/AMD64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -1487,36 +1487,6 @@
&& clobber(x)
=> (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)

(MOVBstore [i] {s} p
x1:(MOVBload [j] {s2} p2 mem)
mem2:(MOVBstore [i-1] {s} p
x2:(MOVBload [j-1] {s2} p2 mem) mem))
&& x1.Uses == 1
&& x2.Uses == 1
&& mem2.Uses == 1
&& clobber(x1, x2, mem2)
=> (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem)

(MOVWstore [i] {s} p
x1:(MOVWload [j] {s2} p2 mem)
mem2:(MOVWstore [i-2] {s} p
x2:(MOVWload [j-2] {s2} p2 mem) mem))
&& x1.Uses == 1
&& x2.Uses == 1
&& mem2.Uses == 1
&& clobber(x1, x2, mem2)
=> (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)

(MOVLstore [i] {s} p
x1:(MOVLload [j] {s2} p2 mem)
mem2:(MOVLstore [i-4] {s} p
x2:(MOVLload [j-4] {s2} p2 mem) mem))
&& x1.Uses == 1
&& x2.Uses == 1
&& mem2.Uses == 1
&& clobber(x1, x2, mem2)
=> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)

// Merge load and op
// TODO: add indexed variants?
((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem)
Expand Down
70 changes: 70 additions & 0 deletions src/cmd/compile/internal/ssa/memcombine.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,8 @@ func combineStores(root *Value, n int64) bool {
return false
}
if x.Aux.(*types.Type).Size() != size {
// TODO: the constant source and consecutive load source cases
// do not need all the stores to be the same size.
return false
}
base, off := splitPtr(x.Args[0])
Expand Down Expand Up @@ -572,6 +574,74 @@ func combineStores(root *Value, n int64) bool {
return true
}

// Check for consecutive loads as the source of the stores.
var loadMem *Value
var loadBase BaseAddress
var loadIdx int64
for i := int64(0); i < n; i++ {
load := a[i].store.Args[1]
if load.Op != OpLoad {
loadMem = nil
break
}
if load.Uses != 1 {
loadMem = nil
break
}
if load.Type.IsPtr() {
// Don't combine stores containing a pointer, as we need
// a write barrier for those. This can't currently happen,
// but might in the future if we ever have another
// 8-byte-reg/4-byte-ptr architecture like amd64p32.
loadMem = nil
break
}
mem := load.Args[1]
base, idx := splitPtr(load.Args[0])
if loadMem == nil {
// First one we found
loadMem = mem
loadBase = base
loadIdx = idx
continue
}
if base != loadBase || mem != loadMem {
loadMem = nil
break
}
if idx != loadIdx+(a[i].offset-a[0].offset) {
loadMem = nil
break
}
}
if loadMem != nil {
// Modify the first load to do a larger load instead.
load := a[0].store.Args[1]
switch size * n {
case 2:
load.Type = types.Types[types.TUINT16]
case 4:
load.Type = types.Types[types.TUINT32]
case 8:
load.Type = types.Types[types.TUINT64]
}

// Modify root to do the store.
for i := int64(0); i < n; i++ {
v := a[i].store
if v == root {
v.Aux = load.Type // widen store type
v.SetArg(0, ptr)
v.SetArg(1, load)
v.SetArg(2, mem)
} else {
clobber(v)
v.Type = types.Types[types.TBOOL] // erase memory type
}
}
return true
}

// Check that all the shift/trunc are of the same base value.
shiftBase := getShiftBase(a)
if shiftBase == nil {
Expand Down
129 changes: 0 additions & 129 deletions src/cmd/compile/internal/ssa/rewriteAMD64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions test/codegen/memcombine.go
Original file line number Diff line number Diff line change
Expand Up @@ -836,3 +836,25 @@ func zero_uint64_2(d1, d2 []uint64) {
d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
}

func loadstore(p, q *[4]uint8) {
// amd64:"MOVL",-"MOVB"
// arm64:"MOVWU",-"MOVBU"
x0, x1, x2, x3 := q[0], q[1], q[2], q[3]
// amd64:"MOVL",-"MOVB"
// arm64:"MOVW",-"MOVB"
p[0], p[1], p[2], p[3] = x0, x1, x2, x3
}

type S1 struct {
a, b int16
}

func loadstore2(p, q *S1) {
// amd64:"MOVL",-"MOVWLZX"
// arm64:"MOVWU",-"MOVH"
a, b := p.a, p.b
// amd64:"MOVL",-"MOVW"
// arm64:"MOVW",-"MOVH"
q.a, q.b = a, b
}

0 comments on commit e713d6f

Please sign in to comment.