diff --git a/Documentation/project-docs/clr-configuration-knobs.md b/Documentation/project-docs/clr-configuration-knobs.md index 9786bc4b8d30..62f1aba509fc 100644 --- a/Documentation/project-docs/clr-configuration-knobs.md +++ b/Documentation/project-docs/clr-configuration-knobs.md @@ -709,6 +709,7 @@ Name | Description | Type | Class | Default Value | Flags `ETWEnabled` | This flag is used on OSes < Vista to enable/disable ETW. It is disabled by default | `DWORD` | `EXTERNAL` | `0` | REGUTIL_default `MsBetweenAttachCheck` | | `DWORD` | `EXTERNAL` | `500` | `PerfMapEnabled` | This flag is used on Linux to enable writing /tmp/perf-$pid.map. It is disabled by default | `DWORD` | `EXTERNAL` | `0` | REGUTIL_default +`PerfMapJitDumpPath` | Specifies a path to write the perf jitdump file. Defaults to GetTempPathA() | `STRING` | `EXTERNAL` | | REGUTIL_default `PerfMapIgnoreSignal` | When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs. It is disabled by default | `DWORD` | `EXTERNAL` | `0` | REGUTIL_default `ProfAPI_AttachProfilerMinTimeoutInMs` | Timeout in ms for the minimum time out value of AttachProfiler | `DWORD` | `EXTERNAL` | `10*1000` | `ProfAPI_DetachMaxSleepMs` | The maximum time, in milliseconds, the CLR will wait before checking whether a profiler that is in the process of detaching is ready to be unloaded. | `DWORD` | `EXTERNAL` | `0` | diff --git a/src/inc/clrconfigvalues.h b/src/inc/clrconfigvalues.h index a8788f2f6999..a2a903b080ee 100644 --- a/src/inc/clrconfigvalues.h +++ b/src/inc/clrconfigvalues.h @@ -562,6 +562,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_ProfAPI_ValidateNGENInstrumentation, W("Pro #ifdef FEATURE_PERFMAP RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PerfMapEnabled, W("PerfMapEnabled"), 0, "This flag is used on Linux to enable writing /tmp/perf-$pid.map. It is disabled by default", CLRConfig::REGUTIL_default) +RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_PerfMapJitDumpPath, W("PerfMapJitDumpPath"), "Specifies a path to write the perf jitdump file. Defaults to GetTempPathA()", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PerfMapIgnoreSignal, W("PerfMapIgnoreSignal"), 0, "When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs. It is disabled by default", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapShowOptimizationTiers, W("PerfMapShowOptimizationTiers"), 1, "Shows optimization tiers in the perf map for methods, as part of the symbol name. Useful for seeing separate stack frames for different optimization tiers of each method.") RETAIL_CONFIG_STRING_INFO(EXTERNAL_NativeImagePerfMapFormat, W("NativeImagePerfMapFormat"), "Specifies the format of native image perfmap files generated by crossgen. Valid options are RVA or OFFSET.") diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index 86319ec5ebfd..374f81b4dd4d 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -529,6 +529,24 @@ PAL_ProbeMemory( DWORD cbBuffer, BOOL fWriteAccess); +PALIMPORT +int +PALAPI +// Start the jitdump file +PAL_PerfJitDump_Start(const char* path); + +PALIMPORT +int +PALAPI +// Log a method to the jitdump file. +PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo); + +PALIMPORT +int +PALAPI +// Finish the jitdump file +PAL_PerfJitDump_Finish(); + /******************* winuser.h Entrypoints *******************************/ PALIMPORT LPSTR diff --git a/src/pal/src/CMakeLists.txt b/src/pal/src/CMakeLists.txt index 1a5b3b33e4a4..542de8cde2aa 100644 --- a/src/pal/src/CMakeLists.txt +++ b/src/pal/src/CMakeLists.txt @@ -226,6 +226,7 @@ set(SOURCES misc/jitsupport.cpp misc/miscpalapi.cpp misc/msgbox.cpp + misc/perfjitdump.cpp misc/strutil.cpp misc/sysinfo.cpp misc/time.cpp diff --git a/src/pal/src/misc/perfjitdump.cpp b/src/pal/src/misc/perfjitdump.cpp new file mode 100644 index 000000000000..77c2382c2e98 --- /dev/null +++ b/src/pal/src/misc/perfjitdump.cpp @@ -0,0 +1,380 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// =========================================================================== + +#if defined(__linux__) +#define JITDUMP_SUPPORTED +#endif + +#include "pal/palinternal.h" +#include "pal/dbgmsg.h" + +#include + +#ifdef JITDUMP_SUPPORTED + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../inc/llvm/ELF.h" + +SET_DEFAULT_DEBUG_CHANNEL(MISC); + +namespace +{ + enum + { + JIT_DUMP_MAGIC = 0x4A695444, + JIT_DUMP_VERSION = 1, + +#if defined(_X86_) + ELF_MACHINE = EM_386, +#elif defined(_ARM_) + ELF_MACHINE = EM_ARM, +#elif defined(_AMD64_) + ELF_MACHINE = EM_X86_64, +#elif defined(_ARM64_) + ELF_MACHINE = EM_AARCH64, +#else +#error ELF_MACHINE unsupported for target +#endif + + JIT_CODE_LOAD = 0, + }; + + uint64_t GetTimeStampNS() + { +#if HAVE_CLOCK_MONOTONIC + struct timespec ts; + int result = clock_gettime(CLOCK_MONOTONIC, &ts); + + if (result != 0) + { + ASSERT("clock_gettime(CLOCK_MONOTONIC) failed: %d\n", result); + return 0; + } + else + { + return ts.tv_sec * 1000000000ULL + ts.tv_nsec; + } +#else + #error "The PAL jitdump requires clock_gettime(CLOCK_MONOTONIC) to be supported." +#endif + } + + struct FileHeader + { + FileHeader() : + magic(JIT_DUMP_MAGIC), + version(JIT_DUMP_VERSION), + total_size(sizeof(FileHeader)), + elf_mach(ELF_MACHINE), + pad1(0), + pid(getpid()), + timestamp(GetTimeStampNS()), + flags(0) + {} + + uint32_t magic; + uint32_t version; + uint32_t total_size; + uint32_t elf_mach; + uint32_t pad1; + uint32_t pid; + uint64_t timestamp; + uint64_t flags; + }; + + struct RecordHeader + { + uint32_t id; + uint32_t total_size; + uint64_t timestamp; + }; + + struct JitCodeLoadRecord + { + JitCodeLoadRecord() : + pid(getpid()), + tid(syscall(SYS_gettid)) + { + header.id = JIT_CODE_LOAD; + header.timestamp = GetTimeStampNS(); + } + + RecordHeader header; + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t code_addr; + uint64_t code_size; + uint64_t code_index; + // Null terminated name + // Optional native code + }; +}; + +struct PerfJitDumpState +{ + PerfJitDumpState() : + enabled(false), + fd(-1), + mmapAddr(MAP_FAILED), + mutex(PTHREAD_MUTEX_INITIALIZER), + codeIndex(0) + {} + + bool enabled; + int fd; + void *mmapAddr; + pthread_mutex_t mutex; + uint64_t codeIndex; + + int FatalError(bool locked) + { + enabled = false; + + if (mmapAddr != MAP_FAILED) + { + munmap(mmapAddr, sizeof(FileHeader)); + mmapAddr = MAP_FAILED; + } + + if (fd != -1) + { + close(fd); + fd = -1; + } + + if (locked) + { + pthread_mutex_unlock(&mutex); + } + + return -1; + } + + int Start(const char* path) + { + int result = 0; + + // Write file header + FileHeader header; + + result = pthread_mutex_lock(&mutex); + + if (result != 0) + return FatalError(false); + + if (enabled) + goto exit; + + char jitdumpPath[PATH_MAX]; + + result = snprintf(jitdumpPath, sizeof(jitdumpPath), "%s/jit-%i.dump", path, getpid()); + + if (result >= PATH_MAX) + return FatalError(true); + + result = open(jitdumpPath, O_CREAT|O_TRUNC|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR ); + + if (result == -1) + return FatalError(true); + + fd = result; + + result = write(fd, &header, sizeof(FileHeader)); + + if (result == -1) + return FatalError(true); + + result = fsync(fd); + + if (result == -1) + return FatalError(true); + + // mmap jitdump file + // this is a marker for perf inject to find the jitdumpfile + mmapAddr = mmap(nullptr, sizeof(FileHeader), PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); + + if (mmapAddr == MAP_FAILED) + return FatalError(true); + + enabled = true; + +exit: + result = pthread_mutex_unlock(&mutex); + + if (result != 0) + return FatalError(false); + + return 0; + } + + int LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo) + { + int result = 0; + + if (enabled) + { + size_t symbolLen = strlen(symbol); + + JitCodeLoadRecord record; + + record.vma = (uint64_t) pCode; + record.code_addr = (uint64_t) pCode; + record.code_size = codeSize; + record.code_index = ++codeIndex; + record.header.total_size = sizeof(JitCodeLoadRecord) + symbolLen + 1 + codeSize; + + result = pthread_mutex_lock(&mutex); + + if (result != 0) + return FatalError(false); + + if (!enabled) + goto exit; + + // ToDo write debugInfo and unwindInfo immediately before the JitCodeLoadRecord (while lock is held). + + record.header.timestamp = GetTimeStampNS(); + + result = write(fd, &record, sizeof(JitCodeLoadRecord)); + + if (result == -1) + return FatalError(true); + + result = write(fd, symbol, symbolLen + 1); + + if (result == -1) + return FatalError(true); + + result = write(fd, pCode, codeSize); + + if (result == -1) + return FatalError(true); + + result = fsync(fd); + + if (result == -1) + return FatalError(true); + +exit: + result = pthread_mutex_unlock(&mutex); + + if (result != 0) + return FatalError(false); + + } + return 0; + } + + int Finish() + { + int result = 0; + + if (enabled) + { + enabled = false; + + // Lock the mutex + result = pthread_mutex_lock(&mutex); + + if (result != 0) + return FatalError(false); + + if (!enabled) + goto exit; + + result = munmap(mmapAddr, sizeof(FileHeader)); + + if (result == -1) + return FatalError(true); + + mmapAddr = MAP_FAILED; + + result = fsync(fd); + + if (result == -1) + return FatalError(true); + + result = close(fd); + + if (result == -1) + return FatalError(true); + + fd = -1; +exit: + result = pthread_mutex_unlock(&mutex); + + if (result != 0) + return -1; + } + return 0; + } +}; + + +PerfJitDumpState& GetState() +{ + static PerfJitDumpState s; + + return s; +} + +int +PALAPI +PAL_PerfJitDump_Start(const char* path) +{ + return GetState().Start(path); +} + +int +PALAPI +PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo) +{ + return GetState().LogMethod(pCode, codeSize, symbol, debugInfo, unwindInfo); +} + +int +PALAPI +PAL_PerfJitDump_Finish() +{ + return GetState().Finish(); +} + +#else // JITDUMP_SUPPORTED + +int +PALAPI +PAL_PerfJitDump_Start(const char* path) +{ + return 0; +} + +int +PALAPI +PAL_PerfJitDump_LogMethod(void* pCode, size_t codeSize, const char* symbol, void* debugInfo, void* unwindInfo) +{ + return 0; +} + +int +PALAPI +PAL_PerfJitDump_Finish() +{ + return 0; +} + +#endif // JITDUMP_SUPPORTED diff --git a/src/vm/perfmap.cpp b/src/vm/perfmap.cpp index bc49bfef69cb..6a749c768998 100644 --- a/src/vm/perfmap.cpp +++ b/src/vm/perfmap.cpp @@ -50,6 +50,22 @@ void PerfMap::Initialize() { s_ShowOptimizationTiers = true; } + +#ifndef CROSSGEN_COMPILE + char jitdumpPath[4096]; + + // CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PerfMapJitDumpPath) returns a LPWSTR + // Use GetEnvironmentVariableA because it is simpler. + // Keep comment here to make it searchable. + DWORD len = GetEnvironmentVariableA("COMPlus_PerfMapJitDumpPath", jitdumpPath, sizeof(jitdumpPath) - 1); + + if (len == 0) + { + GetTempPathA(sizeof(jitdumpPath) - 1, jitdumpPath); + } + + PAL_PerfJitDump_Start(jitdumpPath); +#endif // CROSSGEN_COMPILE } } @@ -60,6 +76,7 @@ void PerfMap::Destroy() if (s_Current != nullptr) { + PAL_PerfJitDump_Finish(); delete s_Current; s_Current = nullptr; } @@ -183,24 +200,21 @@ void PerfMap::LogMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, cons EX_TRY { // Get the full method signature. - SString fullMethodSignature; - pMethod->GetFullMethodInfo(fullMethodSignature); + SString name; + pMethod->GetFullMethodInfo(name); // Build the map file line. StackScratchBuffer scratch; - SString line; - line.Printf(FMT_CODE_ADDR " %x %s", pCode, codeSize, fullMethodSignature.GetANSI(scratch)); if (optimizationTier != nullptr && s_ShowOptimizationTiers) { - line.AppendPrintf("[%s]\n", optimizationTier); - } - else - { - line.Append(W('\n')); + name.AppendPrintf("[%s]", optimizationTier); } + SString line; + line.Printf(FMT_CODE_ADDR " %x %s\n", pCode, codeSize, name.GetANSI(scratch)); // Write the line. WriteLine(line); + PAL_PerfJitDump_LogMethod((void*)pCode, codeSize, name.GetANSI(scratch), nullptr, nullptr); } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); } @@ -263,6 +277,57 @@ void PerfMap::LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t cod s_Current->LogMethod(pMethod, pCode, codeSize, optimizationTier); } +// Log a pre-compiled method to the perfmap. +void PerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode) +{ + LIMITED_METHOD_CONTRACT; + + if (s_Current == nullptr) + { + return; + } + + // Get information about the NGEN'd method code. + EECodeInfo codeInfo(pCode); + _ASSERTE(codeInfo.IsValid()); + + IJitManager::MethodRegionInfo methodRegionInfo; + codeInfo.GetMethodRegionInfo(&methodRegionInfo); + + // Logging failures should not cause any exceptions to flow upstream. + EX_TRY + { + // Get the full method signature. + SString name; + pMethod->GetFullMethodInfo(name); + + StackScratchBuffer scratch; + + if (s_ShowOptimizationTiers) + { + name.AppendPrintf(W("[PreJIT]")); + } + + // NGEN can split code between hot and cold sections which are separate in memory. + // Emit an entry for each section if it is used. + if (methodRegionInfo.hotSize > 0) + { + PAL_PerfJitDump_LogMethod((void*)methodRegionInfo.hotStartAddress, methodRegionInfo.hotSize, name.GetANSI(scratch), nullptr, nullptr); + } + + if (methodRegionInfo.coldSize > 0) + { + if (s_ShowOptimizationTiers) + { + pMethod->GetFullMethodInfo(name); + name.AppendPrintf(W("[PreJit-cold]")); + } + PAL_PerfJitDump_LogMethod((void*)methodRegionInfo.coldStartAddress, methodRegionInfo.coldSize, name.GetANSI(scratch), nullptr, nullptr); + } + } + EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); +} + // Log a set of stub to the map. void PerfMap::LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, size_t codeSize) { @@ -282,15 +347,19 @@ void PerfMap::LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, } if(!stubType) { - stubOwner = "?"; + stubType = "?"; } // Build the map file line. + StackScratchBuffer scratch; + SString name; + name.Printf("stub<%d> %s<%s>", ++(s_Current->m_StubsMapped), stubType, stubOwner); SString line; - line.Printf(FMT_CODE_ADDR " %x stub<%d> %s<%s>\n", pCode, codeSize, ++(s_Current->m_StubsMapped), stubType, stubOwner); + line.Printf(FMT_CODE_ADDR " %x %s\n", pCode, codeSize, name.GetANSI(scratch)); // Write the line. s_Current->WriteLine(line); + PAL_PerfJitDump_LogMethod((void*)pCode, codeSize, name.GetANSI(scratch), nullptr, nullptr); } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); } diff --git a/src/vm/perfmap.h b/src/vm/perfmap.h index 5788dcc60a06..ffba3332a266 100644 --- a/src/vm/perfmap.h +++ b/src/vm/perfmap.h @@ -70,6 +70,9 @@ class PerfMap // Log a JIT compiled method to the map. static void LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, PrepareCodeConfig *pConfig); + // Log a pre-compiled method to the map. + static void LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode); + // Log a set of stub to the map. static void LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, size_t codeSize); diff --git a/src/vm/prestub.cpp b/src/vm/prestub.cpp index 2858c515be48..f0e44c940c67 100644 --- a/src/vm/prestub.cpp +++ b/src/vm/prestub.cpp @@ -384,6 +384,11 @@ PCODE MethodDesc::PrepareILBasedCode(PrepareCodeConfig* pConfig) if (pCode == NULL) pCode = GetPrecompiledCode(pConfig); + +#ifdef FEATURE_PERFMAP + if (pCode != NULL) + PerfMap::LogPreCompiledMethod(this, pCode); +#endif } if (pCode == NULL)