From 3b0148c2641d71b7af8d7b3d2738e8c92dc6d109 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Tue, 24 Sep 2019 19:51:38 -0400 Subject: [PATCH 1/7] Initial perf jitdump implementation --- src/inc/perfjitdump.h | 29 ++++ src/pal/src/CMakeLists.txt | 1 + src/pal/src/misc/perfjitdump.cpp | 271 +++++++++++++++++++++++++++++++ src/vm/perfmap.cpp | 34 ++-- 4 files changed, 324 insertions(+), 11 deletions(-) create mode 100644 src/inc/perfjitdump.h create mode 100644 src/pal/src/misc/perfjitdump.cpp diff --git a/src/inc/perfjitdump.h b/src/inc/perfjitdump.h new file mode 100644 index 000000000000..859b6b47ee97 --- /dev/null +++ b/src/inc/perfjitdump.h @@ -0,0 +1,29 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// =========================================================================== + +#ifndef PERF_JITDUMP_H +#define PERF_JITDUMP_H + +struct PerfJitDumpState; + +// Generates a perf jitdump file. +class PerfJitDump +{ +private: + static PerfJitDumpState& GetState(); + +public: + // Start the jitdump file + static void Start(); + + // Log a method to the jitdump file. + static void LogMethod(void* pCode, size_t codeSize, const char* symbol); + + // Finish the jitdump file + static void Finish(); +}; + +#endif // PERF_JITDUMP_H + diff --git a/src/pal/src/CMakeLists.txt b/src/pal/src/CMakeLists.txt index 1a5b3b33e4a4..542de8cde2aa 100644 --- a/src/pal/src/CMakeLists.txt +++ b/src/pal/src/CMakeLists.txt @@ -226,6 +226,7 @@ set(SOURCES misc/jitsupport.cpp misc/miscpalapi.cpp misc/msgbox.cpp + misc/perfjitdump.cpp misc/strutil.cpp misc/sysinfo.cpp misc/time.cpp diff --git a/src/pal/src/misc/perfjitdump.cpp b/src/pal/src/misc/perfjitdump.cpp new file mode 100644 index 000000000000..c599e8cb0bd5 --- /dev/null +++ b/src/pal/src/misc/perfjitdump.cpp @@ -0,0 +1,271 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// =========================================================================== + +#if defined(__linux__) +#define JITDUMP_SUPPORTED +#endif + +#include "pal/palinternal.h" +#include "pal/dbgmsg.h" + +#include +#include "perfjitdump.h" + +#ifdef JITDUMP_SUPPORTED + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../inc/llvm/ELF.h" + +SET_DEFAULT_DEBUG_CHANNEL(MISC); + +namespace +{ + enum + { +#ifdef BIGENDIAN + JIT_DUMP_MAGIC = 0x4454694A, +#else + JIT_DUMP_MAGIC = 0x4A695444, +#endif + JIT_DUMP_VERSION = 1, + +#if defined(_X86_) + ELF_MACHINE = EM_386, +#elif defined(_ARM_) + ELF_MACHINE = EM_ARM, +#elif defined(_AMD64_) + ELF_MACHINE = EM_X86_64, +#elif defined(_ARM64_) + ELF_MACHINE = EM_AARCH64, +#else +#error ELF_MACHINE unsupported for target +#endif + + JIT_CODE_LOAD = 0, + }; + + uint64_t GetTimeStampNS() + { +#if HAVE_CLOCK_MONOTONIC + struct timespec ts; + int result = clock_gettime(CLOCK_MONOTONIC, &ts); + + if (result != 0) + { + ASSERT("clock_gettime(CLOCK_MONOTONIC) failed: %d\n", result); + return 0; + } + else + { + return ts.tv_sec * 1000000000ULL + ts.tv_nsec; + } +#else + #error "The PAL jitdump requires clock_gettime(CLOCK_MONOTONIC) to be supported." +#endif + } + + struct FileHeader + { + FileHeader() : + magic(JIT_DUMP_MAGIC), + version(JIT_DUMP_VERSION), + total_size(sizeof(FileHeader)), + elf_mach(ELF_MACHINE), + pad1(0), + pid(getpid()), + timestamp(GetTimeStampNS()), + flags(0) + {} + + uint32_t magic; + uint32_t version; + uint32_t total_size; + uint32_t elf_mach; + uint32_t pad1; + uint32_t pid; + uint64_t timestamp; + uint64_t flags; + }; + + struct RecordHeader + { + uint32_t id; + uint32_t total_size; + uint64_t timestamp; + }; + + struct JitCoreLoadRecord + { + JitCoreLoadRecord() : + pid(getpid()), + tid(syscall(SYS_gettid)) + { + header.id = JIT_CODE_LOAD; + header.timestamp = GetTimeStampNS(); + } + + RecordHeader header; + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t code_addr; + uint64_t code_size; + uint64_t code_index; + // Null terminated name + // Optional native code + }; +}; + +struct PerfJitDumpState +{ + PerfJitDumpState() : + enabled(false), + fd(-1), + mmapAddr(nullptr), + mutex(PTHREAD_MUTEX_INITIALIZER), + codeIndex(0) + {} + + bool enabled; + int fd; + void *mmapAddr; + pthread_mutex_t mutex; + uint64_t codeIndex; + + void Start() + { + pthread_mutex_lock(&mutex); + + char jitdumpPath[1024]; + + snprintf(jitdumpPath, sizeof(jitdumpPath), "/tmp/jit-%i.dump", getpid()); + + fd = open(jitdumpPath, O_CREAT|O_TRUNC|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR ); + + // Write file header + FileHeader header; + + write(fd, &header, sizeof(FileHeader)); + + fsync(fd); + + // mmap jitdump file + // this is a marker for perf inject to find the jitdumpfile + mmapAddr = mmap(nullptr, sizeof(FileHeader), PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); + + enabled = true; + + pthread_mutex_unlock(&mutex); + } + + void LogMethod(void* pCode, size_t codeSize, const char* symbol) + { + if (enabled) + { + pthread_mutex_lock(&mutex); + + size_t symbolLen = strlen(symbol); + + JitCoreLoadRecord record; + + record.vma = (uint64_t) pCode; + record.code_addr = (uint64_t) pCode; + record.code_size = codeSize; + record.code_index = ++codeIndex; + record.header.total_size = sizeof(JitCoreLoadRecord) + symbolLen + 1 + codeSize; + record.header.timestamp = GetTimeStampNS(); + + write(fd, &record, sizeof(JitCoreLoadRecord)); + + write(fd, symbol, symbolLen + 1); + + write(fd, pCode, codeSize); + + fsync(fd); + + pthread_mutex_unlock(&mutex); + } + } + + void Finish() + { + if (enabled) + { + // Lock the mutex + pthread_mutex_lock(&mutex); + + enabled = false; + + munmap(mmapAddr, sizeof(FileHeader)); + + fsync(fd); + + close(fd); + + pthread_mutex_unlock(&mutex); + } + } +}; + + +PerfJitDumpState& PerfJitDump::GetState() +{ + static PerfJitDumpState s; + + return s; +} + +void PerfJitDump::Start() +{ + GetState().Start(); +} + +void PerfJitDump::LogMethod(void* pCode, size_t codeSize, const char* symbol) +{ + GetState().LogMethod(pCode, codeSize, symbol); +} + +void PerfJitDump::Finish() +{ + GetState().Finish(); +} + +#else // JITDUMP_SUPPORTED + +struct PerfJitDumpState +{ +}; + +PerfJitDumpState& PerfJitDump::GetState() +{ + static PerfJitDumpState s; + + return s; +} + +void PerfJitDump::Start() +{ +} + +void PerfJitDump::LogMethod(void* pCode, size_t codeSize, const char* symbol) +{ +} + +void PerfJitDump::Finish() +{ +} + +#endif // JITDUMP_SUPPORTED diff --git a/src/vm/perfmap.cpp b/src/vm/perfmap.cpp index bc49bfef69cb..fd4a8b3c6362 100644 --- a/src/vm/perfmap.cpp +++ b/src/vm/perfmap.cpp @@ -11,6 +11,7 @@ #include "perfmap.h" #include "perfinfo.h" #include "pal.h" +#include "perfjitdump.h" // The code addresses are actually native image offsets during crossgen. Print // them as 32-bit numbers for consistent output when cross-targeting and to @@ -50,6 +51,9 @@ void PerfMap::Initialize() { s_ShowOptimizationTiers = true; } +#ifndef CROSSGEN_COMPILE + PerfJitDump::Start(); +#endif } } @@ -60,6 +64,9 @@ void PerfMap::Destroy() if (s_Current != nullptr) { +#ifndef CROSSGEN_COMPILE + PerfJitDump::Finish(); +#endif delete s_Current; s_Current = nullptr; } @@ -183,24 +190,23 @@ void PerfMap::LogMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, cons EX_TRY { // Get the full method signature. - SString fullMethodSignature; - pMethod->GetFullMethodInfo(fullMethodSignature); + SString name; + pMethod->GetFullMethodInfo(name); // Build the map file line. StackScratchBuffer scratch; - SString line; - line.Printf(FMT_CODE_ADDR " %x %s", pCode, codeSize, fullMethodSignature.GetANSI(scratch)); if (optimizationTier != nullptr && s_ShowOptimizationTiers) { - line.AppendPrintf("[%s]\n", optimizationTier); - } - else - { - line.Append(W('\n')); + name.AppendPrintf("[%s]", optimizationTier); } + SString line; + line.Printf(FMT_CODE_ADDR " %x %s\n", pCode, codeSize, name.GetANSI(scratch)); // Write the line. WriteLine(line); +#ifndef CROSSGEN_COMPILE + PerfJitDump::LogMethod((void*)pCode, codeSize, name.GetANSI(scratch)); +#endif } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); } @@ -282,15 +288,21 @@ void PerfMap::LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, } if(!stubType) { - stubOwner = "?"; + stubType = "?"; } // Build the map file line. + StackScratchBuffer scratch; + SString name; + name.Printf("stub<%d> %s<%s>", ++(s_Current->m_StubsMapped), stubType, stubOwner); SString line; - line.Printf(FMT_CODE_ADDR " %x stub<%d> %s<%s>\n", pCode, codeSize, ++(s_Current->m_StubsMapped), stubType, stubOwner); + line.Printf(FMT_CODE_ADDR " %x %s\n", pCode, codeSize, name.GetANSI(scratch)); // Write the line. s_Current->WriteLine(line); +#ifndef CROSSGEN_COMPILE + PerfJitDump::LogMethod((void*)pCode, codeSize, name.GetANSI(scratch)); +#endif } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); } From 1793bad4fcb4c42e1bbeb027c604363005d82d3e Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Wed, 25 Sep 2019 19:56:40 -0400 Subject: [PATCH 2/7] Fix spelling --- src/pal/src/misc/perfjitdump.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pal/src/misc/perfjitdump.cpp b/src/pal/src/misc/perfjitdump.cpp index c599e8cb0bd5..0299fe935b23 100644 --- a/src/pal/src/misc/perfjitdump.cpp +++ b/src/pal/src/misc/perfjitdump.cpp @@ -107,9 +107,9 @@ namespace uint64_t timestamp; }; - struct JitCoreLoadRecord + struct JitCodeLoadRecord { - JitCoreLoadRecord() : + JitCodeLoadRecord() : pid(getpid()), tid(syscall(SYS_gettid)) { @@ -179,16 +179,16 @@ struct PerfJitDumpState size_t symbolLen = strlen(symbol); - JitCoreLoadRecord record; + JitCodeLoadRecord record; record.vma = (uint64_t) pCode; record.code_addr = (uint64_t) pCode; record.code_size = codeSize; record.code_index = ++codeIndex; - record.header.total_size = sizeof(JitCoreLoadRecord) + symbolLen + 1 + codeSize; + record.header.total_size = sizeof(JitCodeLoadRecord) + symbolLen + 1 + codeSize; record.header.timestamp = GetTimeStampNS(); - write(fd, &record, sizeof(JitCoreLoadRecord)); + write(fd, &record, sizeof(JitCodeLoadRecord)); write(fd, symbol, symbolLen + 1); From 9bd19d0390517f8636d33d25dbf3925e38b8cbea Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Fri, 27 Sep 2019 11:28:25 -0400 Subject: [PATCH 3/7] Correct JIT_DUMP_MAGIC Writer should always write the same value, independent of endianness. --- src/pal/src/misc/perfjitdump.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/pal/src/misc/perfjitdump.cpp b/src/pal/src/misc/perfjitdump.cpp index 0299fe935b23..88415f376c4a 100644 --- a/src/pal/src/misc/perfjitdump.cpp +++ b/src/pal/src/misc/perfjitdump.cpp @@ -35,11 +35,7 @@ namespace { enum { -#ifdef BIGENDIAN - JIT_DUMP_MAGIC = 0x4454694A, -#else JIT_DUMP_MAGIC = 0x4A695444, -#endif JIT_DUMP_VERSION = 1, #if defined(_X86_) From 2c25dffe140c7ef3a6b4a5cb04f567ebae262a01 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Thu, 3 Oct 2019 13:43:18 -0400 Subject: [PATCH 4/7] Add PreJIT to jitdump --- src/vm/perfmap.cpp | 36 ++++++++++++++++++++++++++++++++++++ src/vm/perfmap.h | 3 +++ src/vm/prestub.cpp | 5 +++++ 3 files changed, 44 insertions(+) diff --git a/src/vm/perfmap.cpp b/src/vm/perfmap.cpp index fd4a8b3c6362..0938c8876369 100644 --- a/src/vm/perfmap.cpp +++ b/src/vm/perfmap.cpp @@ -269,6 +269,42 @@ void PerfMap::LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t cod s_Current->LogMethod(pMethod, pCode, codeSize, optimizationTier); } +// Log a pre-compiled method to the perfmap. +void PerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode) +{ + LIMITED_METHOD_CONTRACT; + + // Get information about the NGEN'd method code. + EECodeInfo codeInfo(pCode); + _ASSERTE(codeInfo.IsValid()); + + IJitManager::MethodRegionInfo methodRegionInfo; + codeInfo.GetMethodRegionInfo(&methodRegionInfo); + + // Logging failures should not cause any exceptions to flow upstream. + EX_TRY + { + // Get the full method signature. + SString name; + pMethod->GetFullMethodInfo(name); + + StackScratchBuffer scratch; + + // NGEN can split code between hot and cold sections which are separate in memory. + // Emit an entry for each section if it is used. + if (methodRegionInfo.hotSize > 0) + { + PerfJitDump::LogMethod((void*)methodRegionInfo.hotStartAddress, methodRegionInfo.hotSize, name.GetANSI(scratch)); + } + + if (methodRegionInfo.coldSize > 0) + { + PerfJitDump::LogMethod((void*)methodRegionInfo.coldStartAddress, methodRegionInfo.coldSize, name.GetANSI(scratch)); + } + } + EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); +} + // Log a set of stub to the map. void PerfMap::LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, size_t codeSize) { diff --git a/src/vm/perfmap.h b/src/vm/perfmap.h index 5788dcc60a06..ffba3332a266 100644 --- a/src/vm/perfmap.h +++ b/src/vm/perfmap.h @@ -70,6 +70,9 @@ class PerfMap // Log a JIT compiled method to the map. static void LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, PrepareCodeConfig *pConfig); + // Log a pre-compiled method to the map. + static void LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode); + // Log a set of stub to the map. static void LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, size_t codeSize); diff --git a/src/vm/prestub.cpp b/src/vm/prestub.cpp index 2858c515be48..f0e44c940c67 100644 --- a/src/vm/prestub.cpp +++ b/src/vm/prestub.cpp @@ -384,6 +384,11 @@ PCODE MethodDesc::PrepareILBasedCode(PrepareCodeConfig* pConfig) if (pCode == NULL) pCode = GetPrecompiledCode(pConfig); + +#ifdef FEATURE_PERFMAP + if (pCode != NULL) + PerfMap::LogPreCompiledMethod(this, pCode); +#endif } if (pCode == NULL) From 4c69ef853b6ff888f2a89349ea21b08dd2725974 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Thu, 3 Oct 2019 17:54:35 -0400 Subject: [PATCH 5/7] Feedback Revise PAL API Add error handling Clean up #ifdef CROSSGEN Add COMPlus_PerfMapJitDumpPath Add placeholder debuginfo and unwindinfo arguments --- .../project-docs/clr-configuration-knobs.md | 1 + src/inc/clrconfigvalues.h | 1 + src/inc/perfjitdump.h | 32 ++-- src/pal/src/misc/perfjitdump.cpp | 171 +++++++++++++----- src/vm/perfmap.cpp | 48 +++-- 5 files changed, 174 insertions(+), 79 deletions(-) diff --git a/Documentation/project-docs/clr-configuration-knobs.md b/Documentation/project-docs/clr-configuration-knobs.md index 9786bc4b8d30..62f1aba509fc 100644 --- a/Documentation/project-docs/clr-configuration-knobs.md +++ b/Documentation/project-docs/clr-configuration-knobs.md @@ -709,6 +709,7 @@ Name | Description | Type | Class | Default Value | Flags `ETWEnabled` | This flag is used on OSes < Vista to enable/disable ETW. It is disabled by default | `DWORD` | `EXTERNAL` | `0` | REGUTIL_default `MsBetweenAttachCheck` | | `DWORD` | `EXTERNAL` | `500` | `PerfMapEnabled` | This flag is used on Linux to enable writing /tmp/perf-$pid.map. It is disabled by default | `DWORD` | `EXTERNAL` | `0` | REGUTIL_default +`PerfMapJitDumpPath` | Specifies a path to write the perf jitdump file. Defaults to GetTempPathA() | `STRING` | `EXTERNAL` | | REGUTIL_default `PerfMapIgnoreSignal` | When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs. It is disabled by default | `DWORD` | `EXTERNAL` | `0` | REGUTIL_default `ProfAPI_AttachProfilerMinTimeoutInMs` | Timeout in ms for the minimum time out value of AttachProfiler | `DWORD` | `EXTERNAL` | `10*1000` | `ProfAPI_DetachMaxSleepMs` | The maximum time, in milliseconds, the CLR will wait before checking whether a profiler that is in the process of detaching is ready to be unloaded. | `DWORD` | `EXTERNAL` | `0` | diff --git a/src/inc/clrconfigvalues.h b/src/inc/clrconfigvalues.h index a8788f2f6999..d781f24e3be5 100644 --- a/src/inc/clrconfigvalues.h +++ b/src/inc/clrconfigvalues.h @@ -562,6 +562,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_ProfAPI_ValidateNGENInstrumentation, W("Pro #ifdef FEATURE_PERFMAP RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PerfMapEnabled, W("PerfMapEnabled"), 0, "This flag is used on Linux to enable writing /tmp/perf-$pid.map. It is disabled by default", CLRConfig::REGUTIL_default) +CONFIG_STRING_INFO_EX(EXTERNAL_PerfMapJitDumpPath, W("PerfMapJitDumpPath"), "Specifies a path to write the perf jitdump file. Defaults to GetTempPathA()", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PerfMapIgnoreSignal, W("PerfMapIgnoreSignal"), 0, "When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs. It is disabled by default", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapShowOptimizationTiers, W("PerfMapShowOptimizationTiers"), 1, "Shows optimization tiers in the perf map for methods, as part of the symbol name. Useful for seeing separate stack frames for different optimization tiers of each method.") RETAIL_CONFIG_STRING_INFO(EXTERNAL_NativeImagePerfMapFormat, W("NativeImagePerfMapFormat"), "Specifies the format of native image perfmap files generated by crossgen. Valid options are RVA or OFFSET.") diff --git a/src/inc/perfjitdump.h b/src/inc/perfjitdump.h index 859b6b47ee97..593f5b1f5c9b 100644 --- a/src/inc/perfjitdump.h +++ b/src/inc/perfjitdump.h @@ -6,24 +6,20 @@ #ifndef PERF_JITDUMP_H #define PERF_JITDUMP_H -struct PerfJitDumpState; - -// Generates a perf jitdump file. -class PerfJitDump -{ -private: - static PerfJitDumpState& GetState(); - -public: - // Start the jitdump file - static void Start(); - - // Log a method to the jitdump file. - static void LogMethod(void* pCode, size_t codeSize, const char* symbol); - - // Finish the jitdump file - static void Finish(); -}; +int +PALAPI +// Start the jitdump file +PAL_PerfJitDump_Start(const char* path); + +int +PALAPI +// Log a method to the jitdump file. +PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo); + +int +PALAPI +// Finish the jitdump file +PAL_PerfJitDump_Finish(); #endif // PERF_JITDUMP_H diff --git a/src/pal/src/misc/perfjitdump.cpp b/src/pal/src/misc/perfjitdump.cpp index 88415f376c4a..20478a22a53a 100644 --- a/src/pal/src/misc/perfjitdump.cpp +++ b/src/pal/src/misc/perfjitdump.cpp @@ -3,7 +3,7 @@ // See the LICENSE file in the project root for more information. // =========================================================================== -#if defined(__linux__) +#if defined(__linux__) && !defined(CROSSGEN_COMPILE) #define JITDUMP_SUPPORTED #endif @@ -26,6 +26,7 @@ #include #include #include +#include #include "../inc/llvm/ELF.h" @@ -141,38 +142,66 @@ struct PerfJitDumpState pthread_mutex_t mutex; uint64_t codeIndex; - void Start() + int Start(const char* path) { - pthread_mutex_lock(&mutex); + int result = 0; - char jitdumpPath[1024]; + // Write file header + FileHeader header; - snprintf(jitdumpPath, sizeof(jitdumpPath), "/tmp/jit-%i.dump", getpid()); + result = pthread_mutex_lock(&mutex); - fd = open(jitdumpPath, O_CREAT|O_TRUNC|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR ); + if (enabled) + goto exit; - // Write file header - FileHeader header; + if (result != 0) + goto exit; + + char jitdumpPath[PATH_MAX]; + + result = snprintf(jitdumpPath, sizeof(jitdumpPath), "%s/jit-%i.dump", path, getpid()); + + if (result >= PATH_MAX) + goto exit; + + result = open(jitdumpPath, O_CREAT|O_TRUNC|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR ); + + if (result == -1) + goto exit; + + fd = result; - write(fd, &header, sizeof(FileHeader)); + result = write(fd, &header, sizeof(FileHeader)); - fsync(fd); + if (result == -1) + goto exit; + + result = fsync(fd); + + if (result == -1) + goto exit; // mmap jitdump file // this is a marker for perf inject to find the jitdumpfile mmapAddr = mmap(nullptr, sizeof(FileHeader), PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); + if (mmapAddr == MAP_FAILED) + goto exit; + enabled = true; - pthread_mutex_unlock(&mutex); +exit: + result = pthread_mutex_unlock(&mutex); + + return result; } - void LogMethod(void* pCode, size_t codeSize, const char* symbol) + int LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo) { + int result = 0; + if (enabled) { - pthread_mutex_lock(&mutex); - size_t symbolLen = strlen(symbol); JitCodeLoadRecord record; @@ -182,86 +211,134 @@ struct PerfJitDumpState record.code_size = codeSize; record.code_index = ++codeIndex; record.header.total_size = sizeof(JitCodeLoadRecord) + symbolLen + 1 + codeSize; + + result = pthread_mutex_lock(&mutex); + + if (result != 0) + goto exit; + + if (!enabled) + goto exit; + + // ToDo write debugInfo and unwindInfo immediately before the JitCodeLoadRecord (while lock is held). + record.header.timestamp = GetTimeStampNS(); - write(fd, &record, sizeof(JitCodeLoadRecord)); + result = write(fd, &record, sizeof(JitCodeLoadRecord)); - write(fd, symbol, symbolLen + 1); + if (result == -1) + goto exit; - write(fd, pCode, codeSize); + result = write(fd, symbol, symbolLen + 1); - fsync(fd); + if (result == -1) + goto exit; - pthread_mutex_unlock(&mutex); + result = write(fd, pCode, codeSize); + + if (result == -1) + goto exit; + + result = fsync(fd); + + if (result == -1) + goto exit; + +exit: + if (result != 0) + enabled = false; + + result = pthread_mutex_unlock(&mutex); } + return result; } - void Finish() + int Finish() { + int result = 0; + if (enabled) { + enabled = false; + // Lock the mutex - pthread_mutex_lock(&mutex); + result = pthread_mutex_lock(&mutex); - enabled = false; + if (result != 0) + goto exit; + + result = munmap(mmapAddr, sizeof(FileHeader)); + + if (result == -1) + goto exit; - munmap(mmapAddr, sizeof(FileHeader)); + result = fsync(fd); - fsync(fd); + if (result == -1) + goto exit; - close(fd); + result = close(fd); - pthread_mutex_unlock(&mutex); + if (result == -1) + goto exit; + +exit: + result = pthread_mutex_unlock(&mutex); } + return result; } }; -PerfJitDumpState& PerfJitDump::GetState() +PerfJitDumpState& GetState() { static PerfJitDumpState s; return s; } -void PerfJitDump::Start() +int +PALAPI +PAL_PerfJitDump_Start(const char* path) { - GetState().Start(); + return GetState().Start(path); } -void PerfJitDump::LogMethod(void* pCode, size_t codeSize, const char* symbol) +int +PALAPI +PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo) { - GetState().LogMethod(pCode, codeSize, symbol); + return GetState().LogMethod(pCode, codeSize, symbol, debugInfo, unwindInfo); } -void PerfJitDump::Finish() +int +PALAPI +PAL_PerfJitDump_Finish() { - GetState().Finish(); + return GetState().Finish(); } #else // JITDUMP_SUPPORTED -struct PerfJitDumpState -{ -}; - -PerfJitDumpState& PerfJitDump::GetState() -{ - static PerfJitDumpState s; - - return s; -} - -void PerfJitDump::Start() +int +PALAPI +PAL_PerfJitDump_Start(const char* path) { + return 0; } -void PerfJitDump::LogMethod(void* pCode, size_t codeSize, const char* symbol) +int +PALAPI +PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo) { + return 0; } -void PerfJitDump::Finish() +int +PALAPI +PAL_PerfJitDump_Finish() { + return 0; } #endif // JITDUMP_SUPPORTED diff --git a/src/vm/perfmap.cpp b/src/vm/perfmap.cpp index 0938c8876369..dee353f40143 100644 --- a/src/vm/perfmap.cpp +++ b/src/vm/perfmap.cpp @@ -51,9 +51,20 @@ void PerfMap::Initialize() { s_ShowOptimizationTiers = true; } -#ifndef CROSSGEN_COMPILE - PerfJitDump::Start(); -#endif + + char jitdumpPath[4096]; + + // CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PerfMapJitDumpPath) returns a LPWSTR + // Use GetEnvironmentVariableA because it is simpler. + // Keep comment here to make it searchable. + auto len = GetEnvironmentVariableA("COMPlus_PerfMapJitDumpPath", jitdumpPath, sizeof(jitdumpPath) - 1); + + if (len == 0) + { + GetTempPathA(sizeof(jitdumpPath) - 1, jitdumpPath); + } + + PAL_PerfJitDump_Start(jitdumpPath); } } @@ -64,9 +75,7 @@ void PerfMap::Destroy() if (s_Current != nullptr) { -#ifndef CROSSGEN_COMPILE - PerfJitDump::Finish(); -#endif + PAL_PerfJitDump_Finish(); delete s_Current; s_Current = nullptr; } @@ -204,9 +213,7 @@ void PerfMap::LogMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, cons // Write the line. WriteLine(line); -#ifndef CROSSGEN_COMPILE - PerfJitDump::LogMethod((void*)pCode, codeSize, name.GetANSI(scratch)); -#endif + PAL_PerfJitDump_LogMethod((void*)pCode, codeSize, name.GetANSI(scratch), nullptr, nullptr); } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); } @@ -274,6 +281,11 @@ void PerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode) { LIMITED_METHOD_CONTRACT; + if (s_Current == nullptr) + { + return; + } + // Get information about the NGEN'd method code. EECodeInfo codeInfo(pCode); _ASSERTE(codeInfo.IsValid()); @@ -290,16 +302,26 @@ void PerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode) StackScratchBuffer scratch; + if (s_ShowOptimizationTiers) + { + name.AppendPrintf(W("[PreJIT]")); + } + // NGEN can split code between hot and cold sections which are separate in memory. // Emit an entry for each section if it is used. if (methodRegionInfo.hotSize > 0) { - PerfJitDump::LogMethod((void*)methodRegionInfo.hotStartAddress, methodRegionInfo.hotSize, name.GetANSI(scratch)); + PAL_PerfJitDump_LogMethod((void*)methodRegionInfo.hotStartAddress, methodRegionInfo.hotSize, name.GetANSI(scratch), nullptr, nullptr); } if (methodRegionInfo.coldSize > 0) { - PerfJitDump::LogMethod((void*)methodRegionInfo.coldStartAddress, methodRegionInfo.coldSize, name.GetANSI(scratch)); + if (s_ShowOptimizationTiers) + { + pMethod->GetFullMethodInfo(name); + name.AppendPrintf(W("[PreJit-cold]")); + } + PAL_PerfJitDump_LogMethod((void*)methodRegionInfo.coldStartAddress, methodRegionInfo.coldSize, name.GetANSI(scratch), nullptr, nullptr); } } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); @@ -336,9 +358,7 @@ void PerfMap::LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, // Write the line. s_Current->WriteLine(line); -#ifndef CROSSGEN_COMPILE - PerfJitDump::LogMethod((void*)pCode, codeSize, name.GetANSI(scratch)); -#endif + PAL_PerfJitDump_LogMethod((void*)pCode, codeSize, name.GetANSI(scratch), nullptr, nullptr); } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); } From 70c97241cf261d59b3a8cc2613343e3a1719161b Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Fri, 4 Oct 2019 16:34:14 -0400 Subject: [PATCH 6/7] RETAIL ... PerfMapJitDumpPath --- src/inc/clrconfigvalues.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inc/clrconfigvalues.h b/src/inc/clrconfigvalues.h index d781f24e3be5..a2a903b080ee 100644 --- a/src/inc/clrconfigvalues.h +++ b/src/inc/clrconfigvalues.h @@ -562,7 +562,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_ProfAPI_ValidateNGENInstrumentation, W("Pro #ifdef FEATURE_PERFMAP RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PerfMapEnabled, W("PerfMapEnabled"), 0, "This flag is used on Linux to enable writing /tmp/perf-$pid.map. It is disabled by default", CLRConfig::REGUTIL_default) -CONFIG_STRING_INFO_EX(EXTERNAL_PerfMapJitDumpPath, W("PerfMapJitDumpPath"), "Specifies a path to write the perf jitdump file. Defaults to GetTempPathA()", CLRConfig::REGUTIL_default) +RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_PerfMapJitDumpPath, W("PerfMapJitDumpPath"), "Specifies a path to write the perf jitdump file. Defaults to GetTempPathA()", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PerfMapIgnoreSignal, W("PerfMapIgnoreSignal"), 0, "When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs. It is disabled by default", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapShowOptimizationTiers, W("PerfMapShowOptimizationTiers"), 1, "Shows optimization tiers in the perf map for methods, as part of the symbol name. Useful for seeing separate stack frames for different optimization tiers of each method.") RETAIL_CONFIG_STRING_INFO(EXTERNAL_NativeImagePerfMapFormat, W("NativeImagePerfMapFormat"), "Specifies the format of native image perfmap files generated by crossgen. Valid options are RVA or OFFSET.") From 6feed069d1e31c6a825762406c200168b7f8e101 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Fri, 4 Oct 2019 19:52:47 -0400 Subject: [PATCH 7/7] Feedback Revise error handling Remove perfjitdump.h Fix #if CROSSGEN_COMPILE --- src/inc/perfjitdump.h | 25 ---------- src/pal/inc/pal.h | 18 +++++++ src/pal/src/misc/perfjitdump.cpp | 84 +++++++++++++++++++++++--------- src/vm/perfmap.cpp | 5 +- 4 files changed, 81 insertions(+), 51 deletions(-) delete mode 100644 src/inc/perfjitdump.h diff --git a/src/inc/perfjitdump.h b/src/inc/perfjitdump.h deleted file mode 100644 index 593f5b1f5c9b..000000000000 --- a/src/inc/perfjitdump.h +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. -// =========================================================================== - -#ifndef PERF_JITDUMP_H -#define PERF_JITDUMP_H - -int -PALAPI -// Start the jitdump file -PAL_PerfJitDump_Start(const char* path); - -int -PALAPI -// Log a method to the jitdump file. -PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo); - -int -PALAPI -// Finish the jitdump file -PAL_PerfJitDump_Finish(); - -#endif // PERF_JITDUMP_H - diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index 86319ec5ebfd..374f81b4dd4d 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -529,6 +529,24 @@ PAL_ProbeMemory( DWORD cbBuffer, BOOL fWriteAccess); +PALIMPORT +int +PALAPI +// Start the jitdump file +PAL_PerfJitDump_Start(const char* path); + +PALIMPORT +int +PALAPI +// Log a method to the jitdump file. +PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo); + +PALIMPORT +int +PALAPI +// Finish the jitdump file +PAL_PerfJitDump_Finish(); + /******************* winuser.h Entrypoints *******************************/ PALIMPORT LPSTR diff --git a/src/pal/src/misc/perfjitdump.cpp b/src/pal/src/misc/perfjitdump.cpp index 20478a22a53a..77c2382c2e98 100644 --- a/src/pal/src/misc/perfjitdump.cpp +++ b/src/pal/src/misc/perfjitdump.cpp @@ -3,7 +3,7 @@ // See the LICENSE file in the project root for more information. // =========================================================================== -#if defined(__linux__) && !defined(CROSSGEN_COMPILE) +#if defined(__linux__) #define JITDUMP_SUPPORTED #endif @@ -11,7 +11,6 @@ #include "pal/dbgmsg.h" #include -#include "perfjitdump.h" #ifdef JITDUMP_SUPPORTED @@ -131,7 +130,7 @@ struct PerfJitDumpState PerfJitDumpState() : enabled(false), fd(-1), - mmapAddr(nullptr), + mmapAddr(MAP_FAILED), mutex(PTHREAD_MUTEX_INITIALIZER), codeIndex(0) {} @@ -142,6 +141,30 @@ struct PerfJitDumpState pthread_mutex_t mutex; uint64_t codeIndex; + int FatalError(bool locked) + { + enabled = false; + + if (mmapAddr != MAP_FAILED) + { + munmap(mmapAddr, sizeof(FileHeader)); + mmapAddr = MAP_FAILED; + } + + if (fd != -1) + { + close(fd); + fd = -1; + } + + if (locked) + { + pthread_mutex_unlock(&mutex); + } + + return -1; + } + int Start(const char* path) { int result = 0; @@ -151,10 +174,10 @@ struct PerfJitDumpState result = pthread_mutex_lock(&mutex); - if (enabled) - goto exit; - if (result != 0) + return FatalError(false); + + if (enabled) goto exit; char jitdumpPath[PATH_MAX]; @@ -162,38 +185,41 @@ struct PerfJitDumpState result = snprintf(jitdumpPath, sizeof(jitdumpPath), "%s/jit-%i.dump", path, getpid()); if (result >= PATH_MAX) - goto exit; + return FatalError(true); result = open(jitdumpPath, O_CREAT|O_TRUNC|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR ); if (result == -1) - goto exit; + return FatalError(true); fd = result; result = write(fd, &header, sizeof(FileHeader)); if (result == -1) - goto exit; + return FatalError(true); result = fsync(fd); if (result == -1) - goto exit; + return FatalError(true); // mmap jitdump file // this is a marker for perf inject to find the jitdumpfile mmapAddr = mmap(nullptr, sizeof(FileHeader), PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); if (mmapAddr == MAP_FAILED) - goto exit; + return FatalError(true); enabled = true; exit: result = pthread_mutex_unlock(&mutex); - return result; + if (result != 0) + return FatalError(false); + + return 0; } int LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo) @@ -215,7 +241,7 @@ struct PerfJitDumpState result = pthread_mutex_lock(&mutex); if (result != 0) - goto exit; + return FatalError(false); if (!enabled) goto exit; @@ -227,30 +253,31 @@ struct PerfJitDumpState result = write(fd, &record, sizeof(JitCodeLoadRecord)); if (result == -1) - goto exit; + return FatalError(true); result = write(fd, symbol, symbolLen + 1); if (result == -1) - goto exit; + return FatalError(true); result = write(fd, pCode, codeSize); if (result == -1) - goto exit; + return FatalError(true); result = fsync(fd); if (result == -1) - goto exit; + return FatalError(true); exit: + result = pthread_mutex_unlock(&mutex); + if (result != 0) - enabled = false; + return FatalError(false); - result = pthread_mutex_unlock(&mutex); } - return result; + return 0; } int Finish() @@ -265,27 +292,36 @@ struct PerfJitDumpState result = pthread_mutex_lock(&mutex); if (result != 0) + return FatalError(false); + + if (!enabled) goto exit; result = munmap(mmapAddr, sizeof(FileHeader)); if (result == -1) - goto exit; + return FatalError(true); + + mmapAddr = MAP_FAILED; result = fsync(fd); if (result == -1) - goto exit; + return FatalError(true); result = close(fd); if (result == -1) - goto exit; + return FatalError(true); + fd = -1; exit: result = pthread_mutex_unlock(&mutex); + + if (result != 0) + return -1; } - return result; + return 0; } }; diff --git a/src/vm/perfmap.cpp b/src/vm/perfmap.cpp index dee353f40143..6a749c768998 100644 --- a/src/vm/perfmap.cpp +++ b/src/vm/perfmap.cpp @@ -11,7 +11,6 @@ #include "perfmap.h" #include "perfinfo.h" #include "pal.h" -#include "perfjitdump.h" // The code addresses are actually native image offsets during crossgen. Print // them as 32-bit numbers for consistent output when cross-targeting and to @@ -52,12 +51,13 @@ void PerfMap::Initialize() s_ShowOptimizationTiers = true; } +#ifndef CROSSGEN_COMPILE char jitdumpPath[4096]; // CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PerfMapJitDumpPath) returns a LPWSTR // Use GetEnvironmentVariableA because it is simpler. // Keep comment here to make it searchable. - auto len = GetEnvironmentVariableA("COMPlus_PerfMapJitDumpPath", jitdumpPath, sizeof(jitdumpPath) - 1); + DWORD len = GetEnvironmentVariableA("COMPlus_PerfMapJitDumpPath", jitdumpPath, sizeof(jitdumpPath) - 1); if (len == 0) { @@ -65,6 +65,7 @@ void PerfMap::Initialize() } PAL_PerfJitDump_Start(jitdumpPath); +#endif // CROSSGEN_COMPILE } }