Skip to content

Commit

Permalink
cmd/compile, runtime: make atomic loads/stores sequentially consisten…
Browse files Browse the repository at this point in the history
…t on s390x

The z/Architecture does not guarantee that a load following a store
will not be reordered with that store, unless they access the same
address. Therefore if we want to ensure the sequential consistency
of atomic loads and stores we need to perform serialization
operations after atomic stores.

We do not need to serialize in the runtime when using StoreRel[ease]
and LoadAcq[uire]. The z/Architecture already provides sufficient
ordering guarantees for these operations.

name              old time/op  new time/op  delta
AtomicLoad64-16   0.51ns ± 0%  0.51ns ± 0%     ~     (all equal)
AtomicStore64-16  0.51ns ± 0%  0.60ns ± 9%  +16.47%  (p=0.000 n=17+20)
AtomicLoad-16     0.51ns ± 0%  0.51ns ± 0%     ~     (all equal)
AtomicStore-16    0.51ns ± 0%  0.60ns ± 9%  +16.50%  (p=0.000 n=18+20)

Fixes #32428.

Change-Id: I88d19a4010c46070e4fff4b41587efe4c628d4d9
Reviewed-on: https://go-review.googlesource.com/c/go/+/180439
Run-TryBot: Michael Munday <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Austin Clements <[email protected]>
  • Loading branch information
mundaym committed Jun 6, 2019
1 parent 53deb81 commit ac8dbe7
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 43 deletions.
4 changes: 2 additions & 2 deletions src/cmd/compile/internal/gc/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -3093,7 +3093,7 @@ func init() {
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
},
sys.PPC64)
sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "Loadp",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem())
Expand Down Expand Up @@ -3125,7 +3125,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
sys.PPC64)
sys.PPC64, sys.S390X)

addF("runtime/internal/atomic", "Xchg",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
Expand Down
2 changes: 2 additions & 0 deletions src/cmd/compile/internal/s390x/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
bne := s.Prog(s390x.ABNE)
bne.To.Type = obj.TYPE_BRANCH
gc.Patch(bne, cs)
case ssa.OpS390XSYNC:
s.Prog(s390x.ASYNC)
case ssa.OpClobber:
// TODO: implement for clobberdead experiment. Nop is ok for now.
default:
Expand Down
19 changes: 9 additions & 10 deletions src/cmd/compile/internal/ssa/gen/S390X.rules
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,15 @@
(RoundToEven x) -> (FIDBR [4] x)
(Round x) -> (FIDBR [1] x)

// Atomic loads.
(AtomicLoad8 ptr mem) -> (MOVBZatomicload ptr mem)
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
(AtomicLoad64 ptr mem) -> (MOVDatomicload ptr mem)
(AtomicLoadPtr ptr mem) -> (MOVDatomicload ptr mem)

// Atomic stores.
(AtomicStore32 ptr val mem) -> (MOVWatomicstore ptr val mem)
(AtomicStore64 ptr val mem) -> (MOVDatomicstore ptr val mem)
(AtomicStorePtrNoWB ptr val mem) -> (MOVDatomicstore ptr val mem)
// Atomic loads and stores.
// The SYNC instruction (fast-BCR-serialization) prevents store-load
// reordering. Other sequences of memory operations (load-load,
// store-store and load-store) are already guaranteed not to be reordered.
(AtomicLoad(8|32|Acq32|64|Ptr) ptr mem) -> (MOV(BZ|WZ|WZ|D|D)atomicload ptr mem)
(AtomicStore(32|64|PtrNoWB) ptr val mem) -> (SYNC (MOV(W|D|D)atomicstore ptr val mem))

// Store-release doesn't require store-load ordering.
(AtomicStoreRel32 ptr val mem) -> (MOVWatomicstore ptr val mem)

// Atomic adds.
(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (LAA ptr val mem))
Expand Down
5 changes: 5 additions & 0 deletions src/cmd/compile/internal/ssa/gen/S390XOps.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ func init() {
fpstore = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}

sync = regInfo{inputs: []regMask{0}}

// LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}

Expand Down Expand Up @@ -493,6 +495,9 @@ func init() {
{name: "FlagGT"}, // CC=2 (greater than)
{name: "FlagOV"}, // CC=3 (overflow)

// Fast-BCR-serialization to ensure store-load ordering.
{name: "SYNC", argLength: 1, reg: sync, asm: "SYNC", typ: "Mem"},

// Atomic loads. These are just normal loads but return <value,memory> tuples
// so they can be properly ordered with other loads.
// load from arg0+auxint+aux. arg1=mem.
Expand Down
7 changes: 7 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 53 additions & 12 deletions src/cmd/compile/internal/ssa/rewriteS390X.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions src/runtime/internal/atomic/asm_s390x.s
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,30 @@

#include "textflag.h"

// func Store(ptr *uint32, val uint32)
TEXT ·Store(SB), NOSPLIT, $0
MOVD ptr+0(FP), R2
MOVWZ val+8(FP), R3
MOVW R3, 0(R2)
SYNC
RET

// func Store64(ptr *uint64, val uint64)
TEXT ·Store64(SB), NOSPLIT, $0
MOVD ptr+0(FP), R2
MOVD val+8(FP), R3
MOVD R3, 0(R2)
SYNC
RET

// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
TEXT ·StorepNoWB(SB), NOSPLIT, $0
MOVD ptr+0(FP), R2
MOVD val+8(FP), R3
MOVD R3, 0(R2)
SYNC
RET

// func Cas(ptr *uint32, old, new uint32) bool
// Atomically:
// if *ptr == old {
Expand Down
25 changes: 6 additions & 19 deletions src/runtime/internal/atomic/atomic_s390x.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,30 +36,17 @@ func LoadAcq(ptr *uint32) uint32 {
return *ptr
}

//go:noinline
//go:nosplit
func Store(ptr *uint32, val uint32) {
*ptr = val
}

//go:noinline
//go:nosplit
func Store64(ptr *uint64, val uint64) {
*ptr = val
}
//go:noescape
func Store(ptr *uint32, val uint32)

//go:notinheap
type noWB struct{}
//go:noescape
func Store64(ptr *uint64, val uint64)

// NO go:noescape annotation; see atomic_pointer.go.
//go:noinline
//go:nosplit
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) {
*(**noWB)(ptr) = (*noWB)(val)
}
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)

//go:noinline
//go:nosplit
//go:noinline
func StoreRel(ptr *uint32, val uint32) {
*ptr = val
}
Expand Down

0 comments on commit ac8dbe7

Please sign in to comment.