Skip to content

Commit

Permalink
Merge pull request #37702 from JuliaLang/yyc/cpu
Browse files Browse the repository at this point in the history
Collection of a few CPU detection improvements
  • Loading branch information
yuyichao committed Sep 26, 2020
2 parents d7b391d + e9ad329 commit 9fe272c
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 31 deletions.
2 changes: 1 addition & 1 deletion src/features_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ JL_FEATURE_DEF(bf16, 32 + 14, 110000) // HWCAP2_BF16, ARMv8.2-BF16. Required in
// JL_FEATURE_DEF(dgh, 32 + 15, UINT32_MAX) // HWCAP2_DGH, ARMv8.0-DGH. Not implement in LLVM yet
JL_FEATURE_DEF(rand, 32 + 16, 0) // HWCAP2_RNG, ARMv8.5-RNG
JL_FEATURE_DEF(bti, 32 + 17, 0) // HWCAP2_BTI
JL_FEATURE_DEF(mte, 32 + 18, 0) // HWCAP2_MTE, ARMv8.5-MemTag (reserved as of kernel 5.9-rc1)

// custom bits to match llvm model
JL_FEATURE_DEF(v8_1a, 32 * 2 + 0, 0)
Expand All @@ -74,7 +75,6 @@ JL_FEATURE_DEF(v8_6a, 32 * 2 + 5, 110000)
// am: ID_AA64PFR0_EL1.AMU (0b1, 0b10)
// specrestrict: ID_AA64PFR0_EL1.CSV2 (0b10)
// predres: ID_AA64PFR0_EL1.CSV3 (0b1)
// mte: ID_AA64PFR1_EL1.MTE (0b1, 0b10)
// ecv: ID_AA64MMFR0_EL1.ECV (0b1, 0b10) (LLVM 11)
// lor: ID_AA64MMFR1_EL1.LO (0b1)
// perfmon: ID_AA64DFR0_EL1.PMUVer (0b1, 0b100, 0b101, 0b110)
Expand Down
90 changes: 60 additions & 30 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ enum class CPU : uint32_t {
arm_cortex_x1,
arm_neoverse_e1,
arm_neoverse_n1,
arm_zeus,
arm_neoverse_v1,
arm_neoverse_n2,

// Cavium
// aarch64
Expand Down Expand Up @@ -277,7 +278,9 @@ constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
// .DIT: dit
// .BT: bti

// ID_AA64PFR1_EL1.SSBS: ssbs
// ID_AA64PFR1_EL1
// .SSBS: ssbs
// .MTE: mte

// ID_AA64MMFR2_EL1.AT: uscat

Expand Down Expand Up @@ -305,7 +308,9 @@ constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, full
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
constexpr auto arm_neoverse_e1 = armv8_2a | get_feature_masks(rcpc, fullfp16, ssbs);
constexpr auto arm_neoverse_n1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
constexpr auto arm_zeus = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
constexpr auto arm_neoverse_v1 = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
constexpr auto arm_neoverse_n2 = armv8_5a | get_feature_masks(sve, i8mm, bf16, fullfp16, sve2,
sve2_bitperm, rand, mte);
constexpr auto cavium_thunderx = armv8a_crc_crypto;
constexpr auto cavium_thunderx88 = armv8a_crc_crypto;
constexpr auto cavium_thunderx88p1 = armv8a_crc_crypto;
Expand Down Expand Up @@ -367,7 +372,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
{"neoverse-e1", CPU::arm_neoverse_e1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_e1},
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
{"zeus", CPU::arm_zeus, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_zeus},
{"neoverse-v1", CPU::arm_neoverse_v1, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_v1},
{"neoverse-n2", CPU::arm_neoverse_n2, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_n2},
{"thunderx", CPU::cavium_thunderx, CPU::generic, 0, Feature::cavium_thunderx},
{"thunderxt88", CPU::cavium_thunderx88, CPU::generic, 0, Feature::cavium_thunderx88},
{"thunderxt88p1", CPU::cavium_thunderx88p1, CPU::cavium_thunderx88, UINT32_MAX,
Expand Down Expand Up @@ -558,6 +564,8 @@ constexpr auto arm_cortex_a77 = armv8_2a;
constexpr auto arm_cortex_a78 = armv8_2a;
constexpr auto arm_cortex_x1 = armv8_2a;
constexpr auto arm_neoverse_n1 = armv8_2a;
constexpr auto arm_neoverse_v1 = armv8_4a;
constexpr auto arm_neoverse_n2 = armv8_5a;
constexpr auto nvidia_denver1 = armv8a; // TODO? (crc, crypto)
constexpr auto nvidia_denver2 = armv8a_crc_crypto;
constexpr auto apm_xgene1 = armv8a;
Expand Down Expand Up @@ -640,6 +648,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
{"neoverse-v1", CPU::arm_neoverse_v1, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_v1},
{"neoverse-n2", CPU::arm_neoverse_n2, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_n2},
{"denver1", CPU::nvidia_denver1, CPU::arm_cortex_a53, UINT32_MAX, Feature::nvidia_denver1},
{"denver2", CPU::nvidia_denver2, CPU::arm_cortex_a57, UINT32_MAX, Feature::nvidia_denver2},
{"xgene1", CPU::apm_xgene1, CPU::armv8_a, UINT32_MAX, Feature::apm_xgene1},
Expand Down Expand Up @@ -809,7 +819,7 @@ static std::set<CPUID> get_cpuinfo(void)
static CPU get_cpu_name(CPUID cpuid)
{
switch (cpuid.implementer) {
case 0x41: // ARM
case 0x41: // 'A': ARM
switch (cpuid.part) {
case 0xb02: return CPU::arm_mpcore;
case 0xb36: return CPU::arm_1136jf_s;
Expand Down Expand Up @@ -849,20 +859,22 @@ static CPU get_cpu_name(CPUID cpuid)
case 0xd20: return CPU::arm_cortex_m23;
case 0xd21: return CPU::arm_cortex_m33;
// case 0xd22: return CPU::arm_cortex_m55;
case 0xd40: return CPU::arm_zeus;
case 0xd40: return CPU::arm_neoverse_v1;
case 0xd41: return CPU::arm_cortex_a78;
case 0xd43: return CPU::arm_cortex_a65ae;
case 0xd44: return CPU::arm_cortex_x1;
case 0xd49: return CPU::arm_neoverse_n2;
case 0xd4a: return CPU::arm_neoverse_e1;
default: return CPU::generic;
}
case 0x42: // Broadcom (Cavium)
case 0x42: // 'B': Broadcom (Cavium)
switch (cpuid.part) {
// case 0x00f: return CPU::broadcom_brahma_b15;
// case 0x100: return CPU::broadcom_brahma_b53;
case 0x516: return CPU::cavium_thunderx2t99p1;
default: return CPU::generic;
}
case 0x43: // Cavium
case 0x43: // 'C': Cavium
switch (cpuid.part) {
case 0xa0: return CPU::cavium_thunderx;
case 0xa1:
Expand All @@ -881,73 +893,87 @@ static CPU get_cpu_name(CPUID cpuid)
case 0xb8: return CPU::marvell_thunderx3t110;
default: return CPU::generic;
}
case 0x46: // Fujitsu
case 0x46: // 'F': Fujitsu
switch (cpuid.part) {
case 0x1: return CPU::fujitsu_a64fx;
default: return CPU::generic;
}
case 0x48: // HiSilicon
case 0x48: // 'H': HiSilicon
switch (cpuid.part) {
case 0xd01: return CPU::hisilicon_tsv110;
case 0xd40: return CPU::arm_cortex_a76; // Kirin 980
default: return CPU::generic;
}
case 0x4e: // NVIDIA
case 0x4e: // 'N': NVIDIA
switch (cpuid.part) {
case 0x000: return CPU::nvidia_denver1;
case 0x003: return CPU::nvidia_denver2;
case 0x004: return CPU::nvidia_carmel;
default: return CPU::generic;
}
case 0x50: // AppliedMicro
case 0x50: // 'P': AppliedMicro
// x-gene 2
// x-gene 3
switch (cpuid.part) {
case 0x000: return CPU::apm_xgene1;
default: return CPU::generic;
}
case 0x51: // Qualcomm
case 0x51: // 'Q': Qualcomm
switch (cpuid.part) {
case 0x00f:
case 0x02d:
return CPU::qualcomm_scorpion;
case 0x04d:
case 0x06f:
return CPU::qualcomm_krait;
case 0x201:
case 0x205:
case 0x211:
case 0x201: // silver
case 0x205: // gold
case 0x211: // silver
return CPU::qualcomm_kyro;
case 0x800:
case 0x801:
case 0x802:
case 0x803:
case 0x804:
case 0x805:
return CPU::arm_cortex_a73; // second-generation Kryo
// kryo 2xx
case 0x800: // gold
return CPU::arm_cortex_a73;
case 0x801: // silver
return CPU::arm_cortex_a53;
// kryo 3xx
case 0x802: // gold
return CPU::arm_cortex_a75;
case 0x803: // silver
return CPU::arm_cortex_a55;
// kryo 4xx
case 0x804: // gold
return CPU::arm_cortex_a76;
case 0x805: // silver
return CPU::arm_cortex_a55;
// kryo 5xx seems to be using ID for cortex-a77 directly
case 0xc00:
return CPU::qualcomm_falkor;
case 0xc01:
return CPU::qualcomm_saphira;
default: return CPU::generic;
}
case 0x53: // Samsung
if (cpuid.part == 1)
case 0x53: // 'S': Samsung
if (cpuid.part == 1) {
if (cpuid.variant == 4)
return CPU::samsung_exynos_m2;
return CPU::samsung_exynos_m1;
}
if (cpuid.variant != 1)
return CPU::generic;
switch (cpuid.part) {
case 0x2: return CPU::samsung_exynos_m3;
case 0x3: return CPU::samsung_exynos_m4;
case 0x4: return CPU::samsung_exynos_m5;
default: return CPU::generic;
}
case 0x56: // Marvell
case 0x56: // 'V': Marvell
switch (cpuid.part) {
case 0x581:
case 0x584:
return CPU::marvell_pj4;
default: return CPU::generic;
}
case 0x61: // Apple
case 0x61: // 'a': Apple
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html
switch (cpuid.part) {
case 0x0: // Swift
Expand Down Expand Up @@ -978,12 +1004,12 @@ static CPU get_cpu_name(CPUID cpuid)
return CPU::apple_a13;
default: return CPU::generic;
}
case 0x68: // Huaxintong Semiconductor
case 0x68: // 'h': Huaxintong Semiconductor
switch (cpuid.part) {
case 0x0: return CPU::hxt_phecda;
default: return CPU::generic;
}
case 0x69: // Intel
case 0x69: // 'i': Intel
switch (cpuid.part) {
case 0x001: return CPU::intel_3735d;
default: return CPU::generic;
Expand Down Expand Up @@ -1241,6 +1267,8 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
CPU::arm_cortex_a75,
CPU::arm_cortex_a76,
CPU::arm_neoverse_n1,
CPU::arm_neoverse_n2,
CPU::arm_neoverse_v1,
CPU::nvidia_denver2,
CPU::nvidia_carmel,
CPU::samsung_exynos_m1,
Expand Down Expand Up @@ -1327,6 +1355,8 @@ static inline const char *normalize_cpu_name(llvm::StringRef name)
{
if (name == "ares")
return "neoverse-n1";
if (name == "zeus")
return "neoverse-v1";
if (name == "cyclone")
return "apple-a7";
if (name == "typhoon")
Expand Down Expand Up @@ -1747,7 +1777,7 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
# if JL_LLVM_VERSION > 100000
"+tme,"
# endif
"+am,+specrestrict,+predres,+mte,+lor,+perfmon,+spe,+tracev8.4",
"+am,+specrestrict,+predres,+lor,+perfmon,+spe,+tracev8.4",
#else
"+dotprod",
#endif
Expand Down
10 changes: 10 additions & 0 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ enum class CPU : uint32_t {
intel_corei7_icelake_client,
intel_corei7_icelake_server,
intel_corei7_tigerlake,
intel_corei7_sapphirerapids,
intel_knights_landing,
intel_knights_mill,

Expand Down Expand Up @@ -209,6 +210,9 @@ constexpr auto icelake = cannonlake | get_feature_masks(avx512bitalg, vaes, avx5
constexpr auto icelake_server = icelake | get_feature_masks(pconfig, wbnoinvd);
constexpr auto tigerlake = icelake | get_feature_masks(avx512vp2intersect, movdiri,
movdir64b, shstk);
constexpr auto sapphirerapids = icelake_server |
get_feature_masks(amx_tile, amx_int8, amx_bf16, avx512bf16, serialize, cldemote, waitpkg,
ptwrite, tsxldtrk, enqcmd, shstk, avx512vp2intersect, movdiri, movdir64b);

constexpr auto k8_sse3 = get_feature_masks(sse3, cx16);
constexpr auto amdfam10 = k8_sse3 | get_feature_masks(sse4a, lzcnt, popcnt, sahf);
Expand Down Expand Up @@ -260,6 +264,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
Feature::icelake_server},
{"tigerlake", CPU::intel_corei7_tigerlake, CPU::intel_corei7_icelake_client, 100000,
Feature::tigerlake},
{"sapphirerapids", CPU::intel_corei7_sapphirerapids, CPU::intel_corei7_icelake_server, 120000,
Feature::sapphirerapids},

{"athlon64", CPU::amd_athlon_64, CPU::generic, 0, Feature::generic},
{"athlon-fx", CPU::amd_athlon_fx, CPU::generic, 0, Feature::generic},
Expand Down Expand Up @@ -419,6 +425,10 @@ static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t br
case 0x8d:
return CPU::intel_corei7_tigerlake;

// Sapphire Rapids
case 0x8f:
return CPU::intel_corei7_sapphirerapids;

case 0x1c: // Most 45 nm Intel Atom processors
case 0x26: // 45 nm Atom Lincroft
case 0x27: // 32 nm Atom Medfield
Expand Down

0 comments on commit 9fe272c

Please sign in to comment.