Skip to content

Commit

Permalink
[dev.link] cmd/internal/obj: handle content-addressable symbols with …
Browse files Browse the repository at this point in the history
…relocations

For content-addressable symbols with relocations, we build a
content hash based on its content and relocations. Depending on
the category of the referenced symbol, we choose different hash
algorithms such that the hash is globally consistent.

For now, we only support content-addressable symbols with
relocations when the current package's import path is known, so
that the symbol names are fully expanded. Otherwise, if the
referenced symbol is a named symbol whose name is not fully
expanded, the hash won't be globally consistent, and can cause
erroneous collisions. This is fine for now, as the deduplication
is just an optimization, not a requirement for correctness (until
we get to type descriptors).

Change-Id: I639e4e03dd749b5d71f0a55c2525926575b1ac30
Reviewed-on: https://go-review.googlesource.com/c/go/+/243142
Run-TryBot: Cherry Zhang <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Jeremy Faller <[email protected]>
  • Loading branch information
cherrymui committed Jul 20, 2020
1 parent 289c238 commit 526d99a
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 11 deletions.
3 changes: 2 additions & 1 deletion src/cmd/asm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func main() {
ctxt.Flag_dynlink = *flags.Dynlink
ctxt.Flag_shared = *flags.Shared || *flags.Dynlink
ctxt.IsAsm = true
ctxt.Pkgpath = *flags.Importpath
switch *flags.Spectre {
default:
log.Printf("unknown setting -spectre=%s", *flags.Spectre)
Expand Down Expand Up @@ -97,7 +98,7 @@ func main() {
}
if ok && !*flags.SymABIs {
ctxt.NumberSyms()
obj.WriteObjFile(ctxt, buf, *flags.Importpath)
obj.WriteObjFile(ctxt, buf)
}
if !ok || diag {
if failedFile != "" {
Expand Down
1 change: 1 addition & 0 deletions src/cmd/compile/internal/gc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,7 @@ func Main(archInit func(*Arch)) {
// Write object data to disk.
timings.Start("be", "dumpobj")
dumpdata()
Ctxt.Pkgpath = myimportpath
Ctxt.NumberSyms()
dumpobj()
if asmhdr != "" {
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/compile/internal/gc/obj.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func dumpLinkerObj(bout *bio.Writer) {

fmt.Fprintf(bout, "\n!\n")

obj.WriteObjFile(Ctxt, bout, myimportpath)
obj.WriteObjFile(Ctxt, bout)
}

func addptabs() {
Expand Down
1 change: 1 addition & 0 deletions src/cmd/internal/obj/link.go
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,7 @@ type Link struct {
Retpoline bool // emit use of retpoline stubs for indirect jmp/call
Bso *bufio.Writer
Pathname string
Pkgpath string // the current package's import path, "" if unknown
hashmu sync.Mutex // protects hash, funchash
hash map[string]*LSym // name -> sym mapping
funchash map[string]*LSym // name -> sym mapping for ABIInternal syms
Expand Down
77 changes: 70 additions & 7 deletions src/cmd/internal/obj/objfile2.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ import (
"cmd/internal/goobj2"
"cmd/internal/objabi"
"crypto/sha1"
"encoding/binary"
"fmt"
"io"
"path/filepath"
"strings"
)

// Entry point of writing new object file.
func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) {
func WriteObjFile(ctxt *Link, b *bio.Writer) {

debugAsmEmit(ctxt)

Expand All @@ -27,7 +29,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) {
w := writer{
Writer: goobj2.NewWriter(b),
ctxt: ctxt,
pkgpath: objabi.PathToPrefix(pkgpath),
pkgpath: objabi.PathToPrefix(ctxt.Pkgpath),
}

start := b.Offset()
Expand All @@ -39,7 +41,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) {
if ctxt.Flag_shared {
flags |= goobj2.ObjFlagShared
}
if pkgpath == "" {
if w.pkgpath == "" {
flags |= goobj2.ObjFlagNeedNameExpansion
}
if ctxt.IsAsm {
Expand Down Expand Up @@ -336,19 +338,80 @@ func (w *writer) Hash64(s *LSym) {
if !s.ContentAddressable() || len(s.R) != 0 {
panic("Hash of non-content-addresable symbol")
}
var b goobj2.Hash64Type
copy(b[:], s.P)
b := contentHash64(s)
w.Bytes(b[:])
}

func (w *writer) Hash(s *LSym) {
if !s.ContentAddressable() || len(s.R) != 0 { // TODO: currently we don't support content-addressable symbols with relocations
if !s.ContentAddressable() {
panic("Hash of non-content-addresable symbol")
}
b := goobj2.HashType(sha1.Sum(s.P))
b := w.contentHash(s)
w.Bytes(b[:])
}

func contentHash64(s *LSym) goobj2.Hash64Type {
var b goobj2.Hash64Type
copy(b[:], s.P)
return b
}

// Compute the content hash for a content-addressable symbol.
// We build a content hash based on its content and relocations.
// Depending on the category of the referenced symbol, we choose
// different hash algorithms such that the hash is globally
// consistent.
// - For referenced content-addressable symbol, its content hash
// is globally consistent.
// - For package symbol, its local index is globally consistent.
// - For non-package symbol, its fully-expanded name is globally
// consistent. For now, we require we know the current package
// path so we can always expand symbol names. (Otherwise,
// symbols with relocations are not considered hashable.)
//
// For now, we assume there is no circular dependencies among
// hashed symbols.
func (w *writer) contentHash(s *LSym) goobj2.HashType {
h := sha1.New()
h.Write(s.P)
var tmp [14]byte
for i := range s.R {
r := &s.R[i]
binary.LittleEndian.PutUint32(tmp[:4], uint32(r.Off))
tmp[4] = r.Siz
tmp[5] = uint8(r.Type)
binary.LittleEndian.PutUint64(tmp[6:14], uint64(r.Add))
h.Write(tmp[:])
rs := r.Sym
switch rs.PkgIdx {
case goobj2.PkgIdxHashed64:
h.Write([]byte{0})
t := contentHash64(rs)
h.Write(t[:])
case goobj2.PkgIdxHashed:
h.Write([]byte{1})
t := w.contentHash(rs)
h.Write(t[:])
case goobj2.PkgIdxBuiltin:
panic("unsupported")
case goobj2.PkgIdxNone:
h.Write([]byte{2})
io.WriteString(h, rs.Name) // name is already expanded at this point
case goobj2.PkgIdxSelf:
io.WriteString(h, w.pkgpath)
binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx))
h.Write(tmp[:4])
default:
io.WriteString(h, rs.Pkg)
binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx))
h.Write(tmp[:4])
}
}
var b goobj2.HashType
copy(b[:], h.Sum(nil))
return b
}

func makeSymRef(s *LSym) goobj2.SymRef {
if s == nil {
return goobj2.SymRef{}
Expand Down
87 changes: 87 additions & 0 deletions src/cmd/internal/obj/objfile_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package obj

import (
"cmd/internal/goobj2"
"cmd/internal/sys"
"testing"
)

var dummyArch = LinkArch{Arch: sys.ArchAMD64}

func TestContentHash64(t *testing.T) {
s1 := &LSym{P: []byte("A")}
s2 := &LSym{P: []byte("A\x00\x00\x00")}
s1.Set(AttrContentAddressable, true)
s2.Set(AttrContentAddressable, true)
h1 := contentHash64(s1)
h2 := contentHash64(s2)
if h1 != h2 {
t.Errorf("contentHash64(s1)=%x, contentHash64(s2)=%x, expect equal", h1, h2)
}

ctxt := Linknew(&dummyArch) // little endian
s3 := ctxt.Int64Sym(int64('A'))
h3 := contentHash64(s3)
if h1 != h3 {
t.Errorf("contentHash64(s1)=%x, contentHash64(s3)=%x, expect equal", h1, h3)
}
}

func TestContentHash(t *testing.T) {
syms := []*LSym{
&LSym{P: []byte("TestSymbol")}, // 0
&LSym{P: []byte("TestSymbol")}, // 1
&LSym{P: []byte("TestSymbol2")}, // 2
&LSym{P: []byte("")}, // 3
&LSym{P: []byte("")}, // 4
&LSym{P: []byte("")}, // 5
&LSym{P: []byte("")}, // 6
}
for _, s := range syms {
s.Set(AttrContentAddressable, true)
s.PkgIdx = goobj2.PkgIdxHashed
}
// s3 references s0
r := Addrel(syms[3])
r.Sym = syms[0]
// s4 references s0
r = Addrel(syms[4])
r.Sym = syms[0]
// s5 references s1
r = Addrel(syms[5])
r.Sym = syms[1]
// s6 references s2
r = Addrel(syms[6])
r.Sym = syms[2]

// compute hashes
h := make([]goobj2.HashType, len(syms))
w := &writer{}
for i := range h {
h[i] = w.contentHash(syms[i])
}

tests := []struct {
a, b int
equal bool
}{
{0, 1, true}, // same contents, no relocs
{0, 2, false}, // different contents
{3, 4, true}, // same contents, same relocs
{3, 5, true}, // recursively same contents
{3, 6, false}, // same contents, different relocs
}
for _, test := range tests {
if (h[test.a] == h[test.b]) != test.equal {
eq := "equal"
if !test.equal {
eq = "not equal"
}
t.Errorf("h%d=%x, h%d=%x, expect %s", test.a, h[test.a], test.b, h[test.b], eq)
}
}
}
6 changes: 4 additions & 2 deletions src/cmd/internal/obj/sym.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,10 @@ func (ctxt *Link) NumberSyms() {

var idx, hashedidx, hashed64idx, nonpkgidx int32
ctxt.traverseSyms(traverseDefs, func(s *LSym) {
if s.ContentAddressable() && len(s.R) == 0 { // TODO: currently we don't support content-addressable symbols with relocations
if len(s.P) <= 8 {
// if Pkgpath is unknown, cannot hash symbols with relocations, as it
// may reference named symbols whose names are not fully expanded.
if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) {
if len(s.P) <= 8 && len(s.R) == 0 { // we can use short hash only for symbols without relocations
s.PkgIdx = goobj2.PkgIdxHashed64
s.SymIdx = hashed64idx
if hashed64idx != int32(len(ctxt.hashed64defs)) {
Expand Down

0 comments on commit 526d99a

Please sign in to comment.