Skip to content

Commit

Permalink
cmd/internal/obj/riscv,cmd/link: add support for internal cgo linking…
Browse files Browse the repository at this point in the history
… on riscv64

Make it possible to internally link cgo on riscv64, which also adds
support for SDYNIMPORT calls without external linking being required.

This reduces the time of an ./all.bash run on a Sifive Hifive Unleashed by
approximately 20% (~140 minutes down to ~110 minutes).

Change-Id: I43f1348de31672718ae8676cc82f6fdc1dfee054
Reviewed-on: https://go-review.googlesource.com/c/go/+/431104
TryBot-Result: Gopher Robot <[email protected]>
Reviewed-by: Cherry Mui <[email protected]>
Run-TryBot: Joel Sing <[email protected]>
Reviewed-by: Than McIntosh <[email protected]>
  • Loading branch information
4a6f656c committed Aug 23, 2023
1 parent 70a8a41 commit e68c027
Show file tree
Hide file tree
Showing 22 changed files with 572 additions and 82 deletions.
4 changes: 1 addition & 3 deletions src/cmd/dist/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -576,9 +576,7 @@ func setup() {
func mustLinkExternal(goos, goarch string, cgoEnabled bool) bool {
if cgoEnabled {
switch goarch {
case "loong64",
"mips", "mipsle", "mips64", "mips64le",
"riscv64":
case "loong64", "mips", "mipsle", "mips64", "mips64le":
// Internally linking cgo is incomplete on some architectures.
// https://golang.org/issue/14449
return true
Expand Down
18 changes: 15 additions & 3 deletions src/cmd/internal/obj/riscv/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -619,14 +619,26 @@ var unaryDst = map[obj.As]bool{

// Instruction encoding masks.
const (
// JTypeImmMask is a mask including only the immediate portion of
// J-type instructions.
JTypeImmMask = 0xfffff000
// BTypeImmMask is a mask including only the immediate portion of
// B-type instructions.
BTypeImmMask = 0xfe000f80

// CBTypeImmMask is a mask including only the immediate portion of
// CB-type instructions.
CBTypeImmMask = 0x1c7c

// CJTypeImmMask is a mask including only the immediate portion of
// CJ-type instructions.
CJTypeImmMask = 0x1f7c

// ITypeImmMask is a mask including only the immediate portion of
// I-type instructions.
ITypeImmMask = 0xfff00000

// JTypeImmMask is a mask including only the immediate portion of
// J-type instructions.
JTypeImmMask = 0xfffff000

// STypeImmMask is a mask including only the immediate portion of
// S-type instructions.
STypeImmMask = 0xfe000f80
Expand Down
82 changes: 77 additions & 5 deletions src/cmd/internal/obj/riscv/obj.go
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,12 @@ func validateRaw(ctxt *obj.Link, ins *instruction) {
}
}

// extractBitAndShift extracts the specified bit from the given immediate,
// before shifting it to the requested position and returning it.
func extractBitAndShift(imm uint32, bit, pos int) uint32 {
return ((imm >> bit) & 1) << pos
}

// encodeR encodes an R-type RISC-V instruction.
func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 {
enc := encode(as)
Expand Down Expand Up @@ -1272,6 +1278,11 @@ func encodeSF(ins *instruction) uint32 {
return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm))
}

// encodeBImmediate encodes an immediate for a B-type RISC-V instruction.
func encodeBImmediate(imm uint32) uint32 {
return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7
}

// encodeB encodes a B-type RISC-V instruction.
func encodeB(ins *instruction) uint32 {
imm := immI(ins.as, ins.imm, 13)
Expand All @@ -1281,7 +1292,7 @@ func encodeB(ins *instruction) uint32 {
if enc == nil {
panic("encodeB: could not encode instruction")
}
return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7 | enc.opcode
return encodeBImmediate(imm) | rs2<<20 | rs1<<15 | enc.funct3<<12 | enc.opcode
}

// encodeU encodes a U-type RISC-V instruction.
Expand Down Expand Up @@ -1315,6 +1326,37 @@ func encodeJ(ins *instruction) uint32 {
return encodeJImmediate(imm) | rd<<7 | enc.opcode
}

// encodeCBImmediate encodes an immediate for a CB-type RISC-V instruction.
func encodeCBImmediate(imm uint32) uint32 {
// Bit order - [8|4:3|7:6|2:1|5]
bits := extractBitAndShift(imm, 8, 7)
bits |= extractBitAndShift(imm, 4, 6)
bits |= extractBitAndShift(imm, 3, 5)
bits |= extractBitAndShift(imm, 7, 4)
bits |= extractBitAndShift(imm, 6, 3)
bits |= extractBitAndShift(imm, 2, 2)
bits |= extractBitAndShift(imm, 1, 1)
bits |= extractBitAndShift(imm, 5, 0)
return (bits>>5)<<10 | (bits&0x1f)<<2
}

// encodeCJImmediate encodes an immediate for a CJ-type RISC-V instruction.
func encodeCJImmediate(imm uint32) uint32 {
// Bit order - [11|4|9:8|10|6|7|3:1|5]
bits := extractBitAndShift(imm, 11, 10)
bits |= extractBitAndShift(imm, 4, 9)
bits |= extractBitAndShift(imm, 9, 8)
bits |= extractBitAndShift(imm, 8, 7)
bits |= extractBitAndShift(imm, 10, 6)
bits |= extractBitAndShift(imm, 6, 5)
bits |= extractBitAndShift(imm, 7, 4)
bits |= extractBitAndShift(imm, 3, 3)
bits |= extractBitAndShift(imm, 2, 2)
bits |= extractBitAndShift(imm, 1, 1)
bits |= extractBitAndShift(imm, 5, 0)
return bits << 2
}

func encodeRawIns(ins *instruction) uint32 {
// Treat the raw value specially as a 32-bit unsigned integer.
// Nobody wants to enter negative machine code.
Expand All @@ -1324,14 +1366,34 @@ func encodeRawIns(ins *instruction) uint32 {
return uint32(ins.imm)
}

func EncodeJImmediate(imm int64) (int64, error) {
if !immIFits(imm, 21) {
return 0, fmt.Errorf("immediate %#x does not fit in 21 bits", imm)
func EncodeBImmediate(imm int64) (int64, error) {
if !immIFits(imm, 13) {
return 0, fmt.Errorf("immediate %#x does not fit in 13 bits", imm)
}
if imm&1 != 0 {
return 0, fmt.Errorf("immediate %#x is not a multiple of two", imm)
}
return int64(encodeJImmediate(uint32(imm))), nil
return int64(encodeBImmediate(uint32(imm))), nil
}

func EncodeCBImmediate(imm int64) (int64, error) {
if !immIFits(imm, 9) {
return 0, fmt.Errorf("immediate %#x does not fit in 9 bits", imm)
}
if imm&1 != 0 {
return 0, fmt.Errorf("immediate %#x is not a multiple of two", imm)
}
return int64(encodeCBImmediate(uint32(imm))), nil
}

func EncodeCJImmediate(imm int64) (int64, error) {
if !immIFits(imm, 12) {
return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm)
}
if imm&1 != 0 {
return 0, fmt.Errorf("immediate %#x is not a multiple of two", imm)
}
return int64(encodeCJImmediate(uint32(imm))), nil
}

func EncodeIImmediate(imm int64) (int64, error) {
Expand All @@ -1341,6 +1403,16 @@ func EncodeIImmediate(imm int64) (int64, error) {
return imm << 20, nil
}

func EncodeJImmediate(imm int64) (int64, error) {
if !immIFits(imm, 21) {
return 0, fmt.Errorf("immediate %#x does not fit in 21 bits", imm)
}
if imm&1 != 0 {
return 0, fmt.Errorf("immediate %#x is not a multiple of two", imm)
}
return int64(encodeJImmediate(uint32(imm))), nil
}

func EncodeSImmediate(imm int64) (int64, error) {
if !immIFits(imm, 12) {
return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm)
Expand Down
27 changes: 27 additions & 0 deletions src/cmd/internal/objabi/reloctype.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,33 @@ const (
// LUI + I-type instruction sequence.
R_RISCV_TLS_LE

// R_RISCV_GOT_HI20 resolves the high 20 bits of a 32-bit PC-relative GOT
// address.
R_RISCV_GOT_HI20

// R_RISCV_PCREL_HI20 resolves the high 20 bits of a 32-bit PC-relative
// address.
R_RISCV_PCREL_HI20

// R_RISCV_PCREL_LO12_I resolves the low 12 bits of a 32-bit PC-relative
// address using an I-type instruction.
R_RISCV_PCREL_LO12_I

// R_RISCV_PCREL_LO12_S resolves the low 12 bits of a 32-bit PC-relative
// address using an S-type instruction.
R_RISCV_PCREL_LO12_S

// R_RISCV_BRANCH resolves a 12-bit PC-relative branch offset.
R_RISCV_BRANCH

// R_RISCV_RVC_BRANCH resolves an 8-bit PC-relative offset for a CB-type
// instruction.
R_RISCV_RVC_BRANCH

// R_RISCV_RVC_JUMP resolves an 11-bit PC-relative offset for a CJ-type
// instruction.
R_RISCV_RVC_JUMP

// R_PCRELDBL relocates s390x 2-byte aligned PC-relative addresses.
// TODO(mundaym): remove once variants can be serialized - see issue 14218.
R_PCRELDBL
Expand Down
43 changes: 25 additions & 18 deletions src/cmd/internal/objabi/reloctype_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/cmd/link/internal/amd64/asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant
return -1
}

func elfsetupplt(ctxt *ld.Link, plt, got *loader.SymbolBuilder, dynamic loader.Sym) {
func elfsetupplt(ctxt *ld.Link, ldr *loader.Loader, plt, got *loader.SymbolBuilder, dynamic loader.Sym) {
if plt.Size() == 0 {
// pushq got+8(IP)
plt.AddUint8(0xff)
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/link/internal/arm/asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
return true
}

func elfsetupplt(ctxt *ld.Link, plt, got *loader.SymbolBuilder, dynamic loader.Sym) {
func elfsetupplt(ctxt *ld.Link, ldr *loader.Loader, plt, got *loader.SymbolBuilder, dynamic loader.Sym) {
if plt.Size() == 0 {
// str lr, [sp, #-4]!
plt.AddUint32(ctxt.Arch, 0xe52de004)
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/link/internal/arm64/asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -1091,7 +1091,7 @@ func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sy
return loader.ExtReloc{}, false
}

func elfsetupplt(ctxt *ld.Link, plt, gotplt *loader.SymbolBuilder, dynamic loader.Sym) {
func elfsetupplt(ctxt *ld.Link, ldr *loader.Loader, plt, gotplt *loader.SymbolBuilder, dynamic loader.Sym) {
if plt.Size() == 0 {
// stp x16, x30, [sp, #-16]!
// identifying information
Expand Down
18 changes: 8 additions & 10 deletions src/cmd/link/internal/ld/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,19 +582,17 @@ func (st *relocSymState) relocsym(s loader.Sym, P []byte) {
case 1:
P[off] = byte(int8(o))
case 2:
if o != int64(int16(o)) {
st.err.Errorf(s, "relocation address for %s is too big: %#x", ldr.SymName(rs), o)
if (rt == objabi.R_PCREL || rt == objabi.R_CALL) && o != int64(int16(o)) {
st.err.Errorf(s, "pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), o)
} else if o != int64(int16(o)) && o != int64(uint16(o)) {
st.err.Errorf(s, "non-pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), uint64(o))
}
target.Arch.ByteOrder.PutUint16(P[off:], uint16(o))
case 4:
if rt == objabi.R_PCREL || rt == objabi.R_CALL {
if o != int64(int32(o)) {
st.err.Errorf(s, "pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), o)
}
} else {
if o != int64(int32(o)) && o != int64(uint32(o)) {
st.err.Errorf(s, "non-pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), uint64(o))
}
if (rt == objabi.R_PCREL || rt == objabi.R_CALL) && o != int64(int32(o)) {
st.err.Errorf(s, "pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), o)
} else if o != int64(int32(o)) && o != int64(uint32(o)) {
st.err.Errorf(s, "non-pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), uint64(o))
}
target.Arch.ByteOrder.PutUint32(P[off:], uint32(o))
case 8:
Expand Down
4 changes: 2 additions & 2 deletions src/cmd/link/internal/ld/elf.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ type ELFArch struct {

Reloc1 func(*Link, *OutBuf, *loader.Loader, loader.Sym, loader.ExtReloc, int, int64) bool
RelocSize uint32 // size of an ELF relocation record, must match Reloc1.
SetupPLT func(ctxt *Link, plt, gotplt *loader.SymbolBuilder, dynamic loader.Sym)
SetupPLT func(ctxt *Link, ldr *loader.Loader, plt, gotplt *loader.SymbolBuilder, dynamic loader.Sym)

// DynamicReadOnly can be set to true to make the .dynamic
// section read-only. By default it is writable.
Expand Down Expand Up @@ -1585,7 +1585,7 @@ func (ctxt *Link) doelf() {
// S390X uses .got instead of .got.plt
gotplt = got
}
thearch.ELF.SetupPLT(ctxt, plt, gotplt, dynamic.Sym())
thearch.ELF.SetupPLT(ctxt, ctxt.loader, plt, gotplt, dynamic.Sym())

/*
* .dynamic table
Expand Down
14 changes: 14 additions & 0 deletions src/cmd/link/internal/ld/pcln.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"internal/buildcfg"
"os"
"path/filepath"
"strings"
)

const funcSize = 11 * 4 // funcSize is the size of the _func object in runtime/runtime2.go
Expand Down Expand Up @@ -99,6 +100,19 @@ func makePclntab(ctxt *Link, container loader.Bitmap) (*pclntab, []*sym.Compilat
}

func emitPcln(ctxt *Link, s loader.Sym, container loader.Bitmap) bool {
if ctxt.Target.IsRISCV64() {
// Avoid adding local symbols to the pcln table - RISC-V
// linking generates a very large number of these, particularly
// for HI20 symbols (which we need to load in order to be able
// to resolve relocations). Unnecessarily including all of
// these symbols quickly blows out the size of the pcln table
// and overflows hash buckets.
symName := ctxt.loader.SymName(s)
if symName == "" || strings.HasPrefix(symName, ".L") {
return false
}
}

// We want to generate func table entries only for the "lowest
// level" symbols, not containers of subsymbols.
return !container.Has(s)
Expand Down
Loading

0 comments on commit e68c027

Please sign in to comment.