From 5d40bc2247162ba05cfc54a66be89be7ce54f44b Mon Sep 17 00:00:00 2001 From: mdh1418 Date: Sat, 16 May 2026 00:59:36 -0400 Subject: [PATCH 1/2] Add POSIX in-proc crash report watchdog Add a pipe-backed watchdog for in-proc crash reporting, using an async-signal-safe write from the crash path and a detached watchdog thread initialized during startup. Expose best-effort initialization through TryInitialize, document process-lifetime watchdog state, and use a conservative 30-second default timeout. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/debug/crashreport/CMakeLists.txt | 1 + .../debug/crashreport/inproccrashreporter.cpp | 5 + .../debug/crashreport/inproccrashreporter.h | 1 + .../crashreport/inproccrashreportwatchdog.cpp | 361 ++++++++++++++++++ .../crashreport/inproccrashreportwatchdog.h | 24 ++ src/coreclr/vm/crashreportstackwalker.cpp | 38 ++ 6 files changed, 430 insertions(+) create mode 100644 src/coreclr/debug/crashreport/inproccrashreportwatchdog.cpp create mode 100644 src/coreclr/debug/crashreport/inproccrashreportwatchdog.h diff --git a/src/coreclr/debug/crashreport/CMakeLists.txt b/src/coreclr/debug/crashreport/CMakeLists.txt index f88699a4c6a464..eedc3715be4536 100644 --- a/src/coreclr/debug/crashreport/CMakeLists.txt +++ b/src/coreclr/debug/crashreport/CMakeLists.txt @@ -3,6 +3,7 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) set(CRASHREPORT_SOURCES signalsafejsonwriter.cpp inproccrashreporter.cpp + inproccrashreportwatchdog.cpp ) add_library(inproccrashreport OBJECT ${CRASHREPORT_SOURCES}) diff --git a/src/coreclr/debug/crashreport/inproccrashreporter.cpp b/src/coreclr/debug/crashreport/inproccrashreporter.cpp index fe771432eee5f4..50095554ae5176 100644 --- a/src/coreclr/debug/crashreport/inproccrashreporter.cpp +++ b/src/coreclr/debug/crashreport/inproccrashreporter.cpp @@ -6,12 +6,14 @@ // Streams a createdump-shaped JSON skeleton to a crashreport.json file. #include "inproccrashreporter.h" +#include "inproccrashreportwatchdog.h" #include "signalsafejsonwriter.h" #include "pal.h" #include #include +#include #include #include #include @@ -233,6 +235,7 @@ InProcCrashReporter::CreateReport( { return; } + CrashReportWatchdogScope watchdogScope; char reportPath[CRASHREPORT_PATH_BUFFER_SIZE]; reportPath[0] = '\0'; @@ -345,6 +348,8 @@ InProcCrashReporter::Initialize( m_enumerateThreadsCallback = settings.enumerateThreadsCallback; CrashReportHelpers::CopyString(m_reportPath, sizeof(m_reportPath), settings.reportPath); + (void)CrashReportWatchdogTryInitialize(settings.timeoutSeconds); + m_processName[0] = '\0'; #if defined(__ANDROID__) // On Android every app forks from the Zygote, so /proc/self/exe always diff --git a/src/coreclr/debug/crashreport/inproccrashreporter.h b/src/coreclr/debug/crashreport/inproccrashreporter.h index 5018f3b0d10793..4b3eb3c52ed9a9 100644 --- a/src/coreclr/debug/crashreport/inproccrashreporter.h +++ b/src/coreclr/debug/crashreport/inproccrashreporter.h @@ -60,6 +60,7 @@ using InProcCrashReportEnumerateThreadsCallback = void (*)( struct InProcCrashReporterSettings { const char* reportPath; + uint32_t timeoutSeconds; InProcCrashReportIsManagedThreadCallback isManagedThreadCallback; InProcCrashReportWalkStackCallback walkStackCallback; InProcCrashReportEnumerateThreadsCallback enumerateThreadsCallback; diff --git a/src/coreclr/debug/crashreport/inproccrashreportwatchdog.cpp b/src/coreclr/debug/crashreport/inproccrashreportwatchdog.cpp new file mode 100644 index 00000000000000..9f0a6ea2fe29bd --- /dev/null +++ b/src/coreclr/debug/crashreport/inproccrashreportwatchdog.cpp @@ -0,0 +1,361 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// POSIX watchdog for in-proc crash reporting. + +#include "inproccrashreportwatchdog.h" + +#include "pal.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Single-byte pipe protocol: the crash-reporting thread writes Started when +// CreateReport begins and Finished when it leaves. +static constexpr char CrashReportWatchdogStartedCommand = 'S'; +static constexpr char CrashReportWatchdogFinishedCommand = 'F'; +static constexpr long CrashReportWatchdogNanosecondsPerMillisecond = 1000000; +static constexpr long CrashReportWatchdogNanosecondsPerSecond = 1000000000; +static constexpr int CrashReportWatchdogMillisecondsPerSecond = 1000; +static constexpr int CrashReportWatchdogSignalExitCodeOffset = 128; +static constexpr clockid_t CrashReportWatchdogClock = CLOCK_MONOTONIC; + +// Process-lifetime watchdog state. Successful initialization intentionally keeps +// the detached thread and pipe open until process exit; failed initialization +// paths close the pipe before returning. +static pthread_t s_crashReportWatchdogThread; +static uint32_t s_crashReportTimeoutSeconds; +static LONG s_crashReportWatchdogInitializationStarted; +static int s_crashReportWatchdogPipe[2] = { -1, -1 }; +// Crash-path publication point. The signal handler reads only this fd, so it +// cannot observe an enabled flag while still seeing a stale pipe descriptor. +static volatile sig_atomic_t s_crashReportWatchdogWriteFd = -1; + +// Signal and timeout helpers. + +static void +CrashReportWatchdogBuildFatalSignalSet(sigset_t* signalSet) +{ + sigemptyset(signalSet); + sigaddset(signalSet, SIGABRT); + sigaddset(signalSet, SIGBUS); + sigaddset(signalSet, SIGFPE); + sigaddset(signalSet, SIGILL); + sigaddset(signalSet, SIGSEGV); + sigaddset(signalSet, SIGTRAP); +} + +static void +CrashReportWatchdogAbort() +{ + struct sigaction action; + memset(&action, 0, sizeof(action)); + action.sa_handler = SIG_DFL; + sigemptyset(&action.sa_mask); + (void)sigaction(SIGABRT, &action, nullptr); + + sigset_t signalSet; + sigemptyset(&signalSet); + sigaddset(&signalSet, SIGABRT); + (void)pthread_sigmask(SIG_UNBLOCK, &signalSet, nullptr); + + abort(); + _exit(CrashReportWatchdogSignalExitCodeOffset + SIGABRT); +} + +static bool +CrashReportWatchdogBuildDeadline(struct timespec* deadline) +{ + if (clock_gettime(CrashReportWatchdogClock, deadline) != 0) + { + return false; + } + + const time_t maxTime = std::numeric_limits::max(); + if (static_cast(s_crashReportTimeoutSeconds) > static_cast(maxTime)) + { + return false; + } + + time_t timeoutSeconds = static_cast(s_crashReportTimeoutSeconds); + if (deadline->tv_sec > maxTime - timeoutSeconds) + { + return false; + } + + deadline->tv_sec += timeoutSeconds; + return true; +} + +// Pipe channel helpers. + +static void +CrashReportWatchdogClosePipe() +{ + if (s_crashReportWatchdogPipe[0] != -1) + { + close(s_crashReportWatchdogPipe[0]); + s_crashReportWatchdogPipe[0] = -1; + } + + if (s_crashReportWatchdogPipe[1] != -1) + { + close(s_crashReportWatchdogPipe[1]); + s_crashReportWatchdogPipe[1] = -1; + } +} + +static bool +CrashReportWatchdogConfigurePipeFd(int fd) +{ + int descriptorFlags = fcntl(fd, F_GETFD); + if (descriptorFlags == -1 || fcntl(fd, F_SETFD, descriptorFlags | FD_CLOEXEC) != 0) + { + return false; + } + + int statusFlags = fcntl(fd, F_GETFL); + return statusFlags != -1 && fcntl(fd, F_SETFL, statusFlags | O_NONBLOCK) == 0; +} + +static bool +CrashReportWatchdogInitializePipe() +{ + if (pipe(s_crashReportWatchdogPipe) != 0) + { + return false; + } + + if (!CrashReportWatchdogConfigurePipeFd(s_crashReportWatchdogPipe[0]) || + !CrashReportWatchdogConfigurePipeFd(s_crashReportWatchdogPipe[1])) + { + CrashReportWatchdogClosePipe(); + return false; + } + + return true; +} + +// Watchdog thread wait loop. + +static int +CrashReportWatchdogGetRemainingMilliseconds(const struct timespec* deadline) +{ + struct timespec now; + if (clock_gettime(CrashReportWatchdogClock, &now) != 0) + { + return 0; + } + + if (now.tv_sec > deadline->tv_sec || + (now.tv_sec == deadline->tv_sec && now.tv_nsec >= deadline->tv_nsec)) + { + return 0; + } + + time_t remainingSeconds = deadline->tv_sec - now.tv_sec; + long remainingNanoseconds = deadline->tv_nsec - now.tv_nsec; + if (remainingNanoseconds < 0) + { + remainingSeconds--; + remainingNanoseconds += CrashReportWatchdogNanosecondsPerSecond; + } + + const int maxMilliseconds = std::numeric_limits::max(); + if (static_cast(remainingSeconds) > + static_cast(maxMilliseconds / CrashReportWatchdogMillisecondsPerSecond)) + { + return maxMilliseconds; + } + + unsigned long long remainingMilliseconds = + static_cast(remainingSeconds) * CrashReportWatchdogMillisecondsPerSecond + + static_cast( + (remainingNanoseconds + CrashReportWatchdogNanosecondsPerMillisecond - 1) / + CrashReportWatchdogNanosecondsPerMillisecond); + + return remainingMilliseconds > static_cast(maxMilliseconds) + ? maxMilliseconds + : static_cast(remainingMilliseconds); +} + +static bool +CrashReportWatchdogWaitForCommand(char expectedCommand, const struct timespec* deadline) +{ + while (true) + { + int timeoutMilliseconds = -1; + if (deadline != nullptr) + { + timeoutMilliseconds = CrashReportWatchdogGetRemainingMilliseconds(deadline); + if (timeoutMilliseconds == 0) + { + return false; + } + } + + struct pollfd pollFd; + pollFd.fd = s_crashReportWatchdogPipe[0]; + pollFd.events = POLLIN; + pollFd.revents = 0; + + int pollResult = poll(&pollFd, 1, timeoutMilliseconds); + if (pollResult == -1) + { + if (errno == EINTR) + { + continue; + } + + return false; + } + + if (pollResult == 0 || (pollFd.revents & POLLIN) == 0) + { + return false; + } + + char command; + ssize_t readResult = read(s_crashReportWatchdogPipe[0], &command, sizeof(command)); + if (readResult == sizeof(command)) + { + if (command == expectedCommand) + { + return true; + } + + continue; + } + + if (readResult == -1 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)) + { + continue; + } + + return false; + } +} + +static void* +CrashReportWatchdogThread(void*) +{ + sigset_t signalSet; + CrashReportWatchdogBuildFatalSignalSet(&signalSet); + (void)pthread_sigmask(SIG_BLOCK, &signalSet, nullptr); + + // Keep within minipal's portable 15-character limit to avoid truncation. + (void)minipal_set_thread_name(pthread_self(), ".NET CrashWdg"); + + while (true) + { + if (!CrashReportWatchdogWaitForCommand(CrashReportWatchdogStartedCommand, nullptr)) + { + continue; + } + + struct timespec deadline; + if (!CrashReportWatchdogBuildDeadline(&deadline)) + { + CrashReportWatchdogAbort(); + } + + if (!CrashReportWatchdogWaitForCommand(CrashReportWatchdogFinishedCommand, &deadline)) + { + CrashReportWatchdogAbort(); + } + } + + return nullptr; +} + +// Watchdog entry points. + +bool +CrashReportWatchdogTryInitialize(uint32_t timeoutSeconds) +{ + if (timeoutSeconds == 0) + { + return false; + } + + if (static_cast(timeoutSeconds) > static_cast(std::numeric_limits::max())) + { + return false; + } + + if (InterlockedCompareExchange(&s_crashReportWatchdogInitializationStarted, 1, 0) != 0) + { + return s_crashReportWatchdogWriteFd != -1; + } + + // The watchdog is best-effort. The one-time flag prevents duplicate + // watchdog threads, but setup failures reset it so a later init can retry. + if (!CrashReportWatchdogInitializePipe()) + { + InterlockedExchange(&s_crashReportWatchdogInitializationStarted, 0); + return false; + } + + s_crashReportTimeoutSeconds = timeoutSeconds; + + // Block fatal signals before pthread_create so the watchdog inherits the + // mask; restore this thread's mask immediately after creation. This keeps + // process-directed fatal signals from landing on the watchdog thread. + sigset_t signalSet; + sigset_t previousSignalSet; + CrashReportWatchdogBuildFatalSignalSet(&signalSet); + int maskResult = pthread_sigmask(SIG_BLOCK, &signalSet, &previousSignalSet); + if (maskResult != 0) + { + CrashReportWatchdogClosePipe(); + InterlockedExchange(&s_crashReportWatchdogInitializationStarted, 0); + return false; + } + + if (pthread_create(&s_crashReportWatchdogThread, nullptr, CrashReportWatchdogThread, nullptr) != 0) + { + (void)pthread_sigmask(SIG_SETMASK, &previousSignalSet, nullptr); + CrashReportWatchdogClosePipe(); + InterlockedExchange(&s_crashReportWatchdogInitializationStarted, 0); + return false; + } + + (void)pthread_sigmask(SIG_SETMASK, &previousSignalSet, nullptr); + + (void)pthread_detach(s_crashReportWatchdogThread); + s_crashReportWatchdogWriteFd = s_crashReportWatchdogPipe[1]; + return true; +} + +CrashReportWatchdogScope::CrashReportWatchdogScope() +{ + // This runs from the crash-reporting path. Keep this and any future callees + // async-signal-safe. + sig_atomic_t writeFd = s_crashReportWatchdogWriteFd; + if (writeFd != -1) + { + char command = CrashReportWatchdogStartedCommand; + (void)write(static_cast(writeFd), &command, sizeof(command)); + } +} + +CrashReportWatchdogScope::~CrashReportWatchdogScope() +{ + // This runs from the crash-reporting path. Keep this and any future callees + // async-signal-safe. + sig_atomic_t writeFd = s_crashReportWatchdogWriteFd; + if (writeFd != -1) + { + char command = CrashReportWatchdogFinishedCommand; + (void)write(static_cast(writeFd), &command, sizeof(command)); + } +} diff --git a/src/coreclr/debug/crashreport/inproccrashreportwatchdog.h b/src/coreclr/debug/crashreport/inproccrashreportwatchdog.h new file mode 100644 index 00000000000000..c1f5697e008e41 --- /dev/null +++ b/src/coreclr/debug/crashreport/inproccrashreportwatchdog.h @@ -0,0 +1,24 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// POSIX watchdog for in-proc crash reporting. + +#pragma once + +#include + +// Attempts to initialize the watchdog during normal runtime startup. This is +// not async-signal-safe and must not be called from the crash-reporting path. +bool CrashReportWatchdogTryInitialize(uint32_t timeoutSeconds); + +class CrashReportWatchdogScope +{ +public: + // The constructor and destructor run in the crash-reporting path. Keep them + // async-signal-safe: they may only notify the pre-created watchdog channel. + CrashReportWatchdogScope(); + ~CrashReportWatchdogScope(); + + CrashReportWatchdogScope(const CrashReportWatchdogScope&) = delete; + CrashReportWatchdogScope& operator=(const CrashReportWatchdogScope&) = delete; +}; diff --git a/src/coreclr/vm/crashreportstackwalker.cpp b/src/coreclr/vm/crashreportstackwalker.cpp index 1670ec970d91ff..8967e107718661 100644 --- a/src/coreclr/vm/crashreportstackwalker.cpp +++ b/src/coreclr/vm/crashreportstackwalker.cpp @@ -10,6 +10,8 @@ #include "peassembly.h" #include #include +#include +#include #ifdef FEATURE_INPROC_CRASHREPORT @@ -24,6 +26,9 @@ struct WalkContext }; static void BuildTypeName(LPUTF8 buffer, size_t bufferSize, LPCUTF8 namespaceName, LPCUTF8 className); +// Parses configuration during CrashReportConfigure initialization. This is not +// async-signal-safe and must not be called from the crash-reporting path. +static DWORD GetCrashReportTimeoutSeconds(); static StackWalkAction @@ -434,6 +439,7 @@ CrashReportConfigure() InProcCrashReporterSettings settings = {}; settings.reportPath = dumpName; + settings.timeoutSeconds = GetCrashReportTimeoutSeconds(); settings.isManagedThreadCallback = CrashReportIsCurrentThreadManaged; settings.walkStackCallback = CrashReportWalkStack; settings.enumerateThreadsCallback = CrashReportEnumerateThreads; @@ -443,4 +449,36 @@ CrashReportConfigure() InProcCrashReportInitialize(settings); } +// Parses configuration during CrashReportConfigure initialization. This is not +// async-signal-safe and must not be called from the crash-reporting path. +static DWORD +GetCrashReportTimeoutSeconds() +{ + // Keep the default conservative: successful reports can be large, while 0 + // remains available to disable the watchdog for diagnostics. + static constexpr DWORD DefaultTimeoutSeconds = 30; + + CLRConfigNoCache timeoutCfg = CLRConfigNoCache::Get("CrashReportTimeoutSeconds", /*noprefix*/ false, &getenv); + if (!timeoutCfg.IsSet()) + { + return DefaultTimeoutSeconds; + } + + const char* timeoutString = timeoutCfg.AsString(); + if (timeoutString == nullptr || timeoutString[0] == '\0') + { + return DefaultTimeoutSeconds; + } + + errno = 0; + char* end = nullptr; + unsigned long timeoutSeconds = strtoul(timeoutString, &end, 10); + if (errno != 0 || end == timeoutString || *end != '\0' || timeoutSeconds > UINT32_MAX) + { + return DefaultTimeoutSeconds; + } + + return static_cast(timeoutSeconds); +} + #endif // FEATURE_INPROC_CRASHREPORT From d6d2a6c0ba16777f56d460358dc2cc17ae9e6ff5 Mon Sep 17 00:00:00 2001 From: Mitchell Hwang Date: Wed, 20 May 2026 11:50:20 -0400 Subject: [PATCH 2/2] Address in-proc watchdog review feedback Consolidate watchdog state into an owner object and keep the crash-path notification narrow and async-signal-safe. Add descriptor validation, timeout handling cleanup, and diagnostic logging for watchdog monitoring and timeout aborts. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../debug/crashreport/inproccrashreporter.cpp | 2 +- .../crashreport/inproccrashreportwatchdog.cpp | 414 +++++++++--------- .../crashreport/inproccrashreportwatchdog.h | 55 ++- src/coreclr/vm/crashreportstackwalker.cpp | 35 +- 4 files changed, 287 insertions(+), 219 deletions(-) diff --git a/src/coreclr/debug/crashreport/inproccrashreporter.cpp b/src/coreclr/debug/crashreport/inproccrashreporter.cpp index 50095554ae5176..d5940d94e757d9 100644 --- a/src/coreclr/debug/crashreport/inproccrashreporter.cpp +++ b/src/coreclr/debug/crashreport/inproccrashreporter.cpp @@ -348,7 +348,7 @@ InProcCrashReporter::Initialize( m_enumerateThreadsCallback = settings.enumerateThreadsCallback; CrashReportHelpers::CopyString(m_reportPath, sizeof(m_reportPath), settings.reportPath); - (void)CrashReportWatchdogTryInitialize(settings.timeoutSeconds); + (void)CrashReportWatchdog::TryInitialize(settings.timeoutSeconds); m_processName[0] = '\0'; #if defined(__ANDROID__) diff --git a/src/coreclr/debug/crashreport/inproccrashreportwatchdog.cpp b/src/coreclr/debug/crashreport/inproccrashreportwatchdog.cpp index 9f0a6ea2fe29bd..a58226cc3a8e4f 100644 --- a/src/coreclr/debug/crashreport/inproccrashreportwatchdog.cpp +++ b/src/coreclr/debug/crashreport/inproccrashreportwatchdog.cpp @@ -5,210 +5,255 @@ #include "inproccrashreportwatchdog.h" -#include "pal.h" - #include #include #include +#include #include #include #include #include #include -#include #include +#include +#include #include -// Single-byte pipe protocol: the crash-reporting thread writes Started when -// CreateReport begins and Finished when it leaves. -static constexpr char CrashReportWatchdogStartedCommand = 'S'; -static constexpr char CrashReportWatchdogFinishedCommand = 'F'; -static constexpr long CrashReportWatchdogNanosecondsPerMillisecond = 1000000; -static constexpr long CrashReportWatchdogNanosecondsPerSecond = 1000000000; -static constexpr int CrashReportWatchdogMillisecondsPerSecond = 1000; -static constexpr int CrashReportWatchdogSignalExitCodeOffset = 128; -static constexpr clockid_t CrashReportWatchdogClock = CLOCK_MONOTONIC; - -// Process-lifetime watchdog state. Successful initialization intentionally keeps -// the detached thread and pipe open until process exit; failed initialization -// paths close the pipe before returning. -static pthread_t s_crashReportWatchdogThread; -static uint32_t s_crashReportTimeoutSeconds; -static LONG s_crashReportWatchdogInitializationStarted; -static int s_crashReportWatchdogPipe[2] = { -1, -1 }; -// Crash-path publication point. The signal handler reads only this fd, so it -// cannot observe an enabled flag while still seeing a stale pipe descriptor. -static volatile sig_atomic_t s_crashReportWatchdogWriteFd = -1; - -// Signal and timeout helpers. - -static void -CrashReportWatchdogBuildFatalSignalSet(sigset_t* signalSet) +#ifndef FD_CLOEXEC +#error "Crash report watchdog requires FD_CLOEXEC support." +#endif + +static constexpr uint32_t CRASH_REPORT_WATCHDOG_SECONDS_TO_MILLISECONDS = 1000; +static constexpr int CRASH_REPORT_WATCHDOG_SIGNAL_EXIT_CODE_OFFSET = 128; + +LONG CrashReportWatchdog::s_initializationStarted; +CrashReportWatchdog* CrashReportWatchdog::s_instance; +volatile sig_atomic_t CrashReportWatchdog::s_writeFd = -1; + +uint32_t +CrashReportWatchdog::ClampTimeoutSeconds(uint32_t timeoutSeconds) { - sigemptyset(signalSet); - sigaddset(signalSet, SIGABRT); - sigaddset(signalSet, SIGBUS); - sigaddset(signalSet, SIGFPE); - sigaddset(signalSet, SIGILL); - sigaddset(signalSet, SIGSEGV); - sigaddset(signalSet, SIGTRAP); + uint32_t maxTimeoutSeconds = static_cast(std::numeric_limits::max() / CRASH_REPORT_WATCHDOG_SECONDS_TO_MILLISECONDS); + if (timeoutSeconds > maxTimeoutSeconds) + { + return maxTimeoutSeconds; + } + + return timeoutSeconds; } -static void -CrashReportWatchdogAbort() +CrashReportWatchdog::CrashReportWatchdog(uint32_t timeoutSeconds) + : m_timeoutSeconds(ClampTimeoutSeconds(timeoutSeconds)), + m_timeoutMs(static_cast(m_timeoutSeconds * CRASH_REPORT_WATCHDOG_SECONDS_TO_MILLISECONDS)), + m_thread() { - struct sigaction action; - memset(&action, 0, sizeof(action)); - action.sa_handler = SIG_DFL; - sigemptyset(&action.sa_mask); - (void)sigaction(SIGABRT, &action, nullptr); - - sigset_t signalSet; - sigemptyset(&signalSet); - sigaddset(&signalSet, SIGABRT); - (void)pthread_sigmask(SIG_UNBLOCK, &signalSet, nullptr); + m_pipe[0] = -1; + m_pipe[1] = -1; +} - abort(); - _exit(CrashReportWatchdogSignalExitCodeOffset + SIGABRT); +CrashReportWatchdog::~CrashReportWatchdog() +{ + ClosePipe(); } -static bool -CrashReportWatchdogBuildDeadline(struct timespec* deadline) +bool +CrashReportWatchdog::TryInitialize(uint32_t timeoutSeconds) { - if (clock_gettime(CrashReportWatchdogClock, deadline) != 0) + if (timeoutSeconds == 0) { return false; } - const time_t maxTime = std::numeric_limits::max(); - if (static_cast(s_crashReportTimeoutSeconds) > static_cast(maxTime)) + if (InterlockedCompareExchange(&s_initializationStarted, 1, 0) != 0) { + return s_writeFd != -1; + } + + // The watchdog is best-effort. The one-time flag prevents duplicate + // watchdog threads, but setup failures reset it so a later init can retry. + CrashReportWatchdog* watchdog = new (std::nothrow) CrashReportWatchdog(timeoutSeconds); + if (watchdog == nullptr) + { + InterlockedExchange(&s_initializationStarted, 0); return false; } - time_t timeoutSeconds = static_cast(s_crashReportTimeoutSeconds); - if (deadline->tv_sec > maxTime - timeoutSeconds) + if (!watchdog->Initialize()) { + delete watchdog; + InterlockedExchange(&s_initializationStarted, 0); return false; } - deadline->tv_sec += timeoutSeconds; + // Keep the watchdog alive for process lifetime; the detached thread and + // pipe remain available after initialization succeeds. + s_instance = watchdog; + s_writeFd = watchdog->m_pipe[1]; return true; } -// Pipe channel helpers. - -static void -CrashReportWatchdogClosePipe() +bool +CrashReportWatchdog::Initialize() { - if (s_crashReportWatchdogPipe[0] != -1) + if (!InitializePipe()) { - close(s_crashReportWatchdogPipe[0]); - s_crashReportWatchdogPipe[0] = -1; + return false; } - if (s_crashReportWatchdogPipe[1] != -1) + // Block fatal signals before pthread_create so the watchdog inherits the + // mask; restore this thread's mask immediately after creation. This keeps + // process-directed fatal signals from landing on the watchdog thread. + sigset_t signalSet; + sigset_t previousSignalSet; + BuildFatalSignalSet(&signalSet); + int maskResult = pthread_sigmask(SIG_BLOCK, &signalSet, &previousSignalSet); + if (maskResult != 0) { - close(s_crashReportWatchdogPipe[1]); - s_crashReportWatchdogPipe[1] = -1; + ClosePipe(); + return false; } -} -static bool -CrashReportWatchdogConfigurePipeFd(int fd) -{ - int descriptorFlags = fcntl(fd, F_GETFD); - if (descriptorFlags == -1 || fcntl(fd, F_SETFD, descriptorFlags | FD_CLOEXEC) != 0) + if (pthread_create(&m_thread, nullptr, ThreadEntry, this) != 0) { + (void)pthread_sigmask(SIG_SETMASK, &previousSignalSet, nullptr); + ClosePipe(); return false; } - int statusFlags = fcntl(fd, F_GETFL); - return statusFlags != -1 && fcntl(fd, F_SETFL, statusFlags | O_NONBLOCK) == 0; + (void)pthread_sigmask(SIG_SETMASK, &previousSignalSet, nullptr); + + (void)pthread_detach(m_thread); + return true; } -static bool -CrashReportWatchdogInitializePipe() +bool +CrashReportWatchdog::InitializePipe() { - if (pipe(s_crashReportWatchdogPipe) != 0) + if (pipe(m_pipe) != 0) { return false; } - if (!CrashReportWatchdogConfigurePipeFd(s_crashReportWatchdogPipe[0]) || - !CrashReportWatchdogConfigurePipeFd(s_crashReportWatchdogPipe[1])) + if (!ConfigurePipeFd(m_pipe[0]) || + !ConfigurePipeFd(m_pipe[1])) { - CrashReportWatchdogClosePipe(); + ClosePipe(); return false; } return true; } -// Watchdog thread wait loop. - -static int -CrashReportWatchdogGetRemainingMilliseconds(const struct timespec* deadline) +bool +CrashReportWatchdog::ConfigurePipeFd(int fd) { - struct timespec now; - if (clock_gettime(CrashReportWatchdogClock, &now) != 0) + int descriptorFlags = fcntl(fd, F_GETFD); + if (descriptorFlags == -1 || fcntl(fd, F_SETFD, descriptorFlags | FD_CLOEXEC) != 0) { - return 0; + return false; } - if (now.tv_sec > deadline->tv_sec || - (now.tv_sec == deadline->tv_sec && now.tv_nsec >= deadline->tv_nsec)) + int statusFlags = fcntl(fd, F_GETFL); + return statusFlags != -1 && fcntl(fd, F_SETFL, statusFlags | O_NONBLOCK) == 0; +} + +void +CrashReportWatchdog::ClosePipe() +{ + if (m_pipe[0] != -1) { - return 0; + close(m_pipe[0]); + m_pipe[0] = -1; } - time_t remainingSeconds = deadline->tv_sec - now.tv_sec; - long remainingNanoseconds = deadline->tv_nsec - now.tv_nsec; - if (remainingNanoseconds < 0) + if (m_pipe[1] != -1) { - remainingSeconds--; - remainingNanoseconds += CrashReportWatchdogNanosecondsPerSecond; + close(m_pipe[1]); + m_pipe[1] = -1; } +} - const int maxMilliseconds = std::numeric_limits::max(); - if (static_cast(remainingSeconds) > - static_cast(maxMilliseconds / CrashReportWatchdogMillisecondsPerSecond)) +void +CrashReportWatchdog::BuildFatalSignalSet(sigset_t* signalSet) +{ + sigemptyset(signalSet); + sigaddset(signalSet, SIGABRT); + sigaddset(signalSet, SIGBUS); + sigaddset(signalSet, SIGFPE); + sigaddset(signalSet, SIGILL); + sigaddset(signalSet, SIGSEGV); + sigaddset(signalSet, SIGTRAP); +} + +void* +CrashReportWatchdog::ThreadEntry(void* context) +{ + CrashReportWatchdog* watchdog = static_cast(context); + if (watchdog != nullptr) { - return maxMilliseconds; + watchdog->ThreadLoop(); } - unsigned long long remainingMilliseconds = - static_cast(remainingSeconds) * CrashReportWatchdogMillisecondsPerSecond + - static_cast( - (remainingNanoseconds + CrashReportWatchdogNanosecondsPerMillisecond - 1) / - CrashReportWatchdogNanosecondsPerMillisecond); + return nullptr; +} + +void +CrashReportWatchdog::ThreadLoop() +{ + sigset_t signalSet; + BuildFatalSignalSet(&signalSet); + (void)pthread_sigmask(SIG_BLOCK, &signalSet, nullptr); + + // Keep within minipal's portable 15-character limit to avoid truncation. + (void)minipal_set_thread_name(pthread_self(), ".NET CrashWdg"); + + while (!WaitForCommand(Command::Started)) {} - return remainingMilliseconds > static_cast(maxMilliseconds) - ? maxMilliseconds - : static_cast(remainingMilliseconds); + minipal_log_print_info( + "In-proc crash report watchdog started monitoring with a %lu second timeout.\n", + static_cast(m_timeoutSeconds)); + + if (!WaitForCommand(Command::Finished, m_timeoutMs)) + { + minipal_log_write_error( + "In-proc crash report watchdog did not receive a finish notification before the timeout; aborting process.\n"); + Abort(); + } } -static bool -CrashReportWatchdogWaitForCommand(char expectedCommand, const struct timespec* deadline) +bool +CrashReportWatchdog::WaitForCommand(Command expectedCommand, int timeoutMs) { + int readFd = m_pipe[0]; + if (readFd == -1) + { + return false; + } + + int64_t deadlineMs = 0; + if (timeoutMs != CRASH_REPORT_WATCHDOG_INFINITE_TIMEOUT_MS) + { + deadlineMs = minipal_lowres_ticks() + timeoutMs; + } + while (true) { - int timeoutMilliseconds = -1; - if (deadline != nullptr) + int currentTimeoutMs = timeoutMs; + if (timeoutMs != CRASH_REPORT_WATCHDOG_INFINITE_TIMEOUT_MS) { - timeoutMilliseconds = CrashReportWatchdogGetRemainingMilliseconds(deadline); - if (timeoutMilliseconds == 0) + currentTimeoutMs = GetRemainingTimeoutMs(deadlineMs); + if (currentTimeoutMs == 0) { return false; } } struct pollfd pollFd; - pollFd.fd = s_crashReportWatchdogPipe[0]; + pollFd.fd = readFd; pollFd.events = POLLIN; pollFd.revents = 0; - int pollResult = poll(&pollFd, 1, timeoutMilliseconds); + int pollResult = poll(&pollFd, 1, currentTimeoutMs); if (pollResult == -1) { if (errno == EINTR) @@ -225,10 +270,10 @@ CrashReportWatchdogWaitForCommand(char expectedCommand, const struct timespec* d } char command; - ssize_t readResult = read(s_crashReportWatchdogPipe[0], &command, sizeof(command)); + ssize_t readResult = read(readFd, &command, sizeof(command)); if (readResult == sizeof(command)) { - if (command == expectedCommand) + if (command == static_cast(expectedCommand)) { return true; } @@ -245,117 +290,80 @@ CrashReportWatchdogWaitForCommand(char expectedCommand, const struct timespec* d } } -static void* -CrashReportWatchdogThread(void*) +// Called from CrashReportWatchdogScope on the crash-reporting path. Keep this +// async-signal-safe and preserve errno so watchdog notification does not +// perturb the failing thread's crash-reporting state. +void +CrashReportWatchdog::WriteCommand(Command command) { - sigset_t signalSet; - CrashReportWatchdogBuildFatalSignalSet(&signalSet); - (void)pthread_sigmask(SIG_BLOCK, &signalSet, nullptr); - - // Keep within minipal's portable 15-character limit to avoid truncation. - (void)minipal_set_thread_name(pthread_self(), ".NET CrashWdg"); - - while (true) + int savedErrno = errno; + sig_atomic_t writeFd = s_writeFd; + if (writeFd != -1) { - if (!CrashReportWatchdogWaitForCommand(CrashReportWatchdogStartedCommand, nullptr)) + char commandValue = static_cast(command); + while (true) { - continue; - } - - struct timespec deadline; - if (!CrashReportWatchdogBuildDeadline(&deadline)) - { - CrashReportWatchdogAbort(); - } + ssize_t writeResult = write(static_cast(writeFd), &commandValue, sizeof(commandValue)); + if (writeResult == sizeof(commandValue)) + { + break; + } - if (!CrashReportWatchdogWaitForCommand(CrashReportWatchdogFinishedCommand, &deadline)) - { - CrashReportWatchdogAbort(); + if (writeResult != -1 || errno != EINTR) + { + break; + } } } - return nullptr; + errno = savedErrno; } -// Watchdog entry points. - -bool -CrashReportWatchdogTryInitialize(uint32_t timeoutSeconds) +int +CrashReportWatchdog::GetRemainingTimeoutMs(int64_t deadlineMs) { - if (timeoutSeconds == 0) - { - return false; - } - - if (static_cast(timeoutSeconds) > static_cast(std::numeric_limits::max())) + int64_t remainingMs = deadlineMs - minipal_lowres_ticks(); + if (remainingMs <= 0) { - return false; + return 0; } - if (InterlockedCompareExchange(&s_crashReportWatchdogInitializationStarted, 1, 0) != 0) + int maxTimeoutMs = std::numeric_limits::max(); + if (remainingMs > maxTimeoutMs) { - return s_crashReportWatchdogWriteFd != -1; + return maxTimeoutMs; } - // The watchdog is best-effort. The one-time flag prevents duplicate - // watchdog threads, but setup failures reset it so a later init can retry. - if (!CrashReportWatchdogInitializePipe()) - { - InterlockedExchange(&s_crashReportWatchdogInitializationStarted, 0); - return false; - } + return static_cast(remainingMs); +} - s_crashReportTimeoutSeconds = timeoutSeconds; +// Terminate from the watchdog thread using the default SIGABRT action. The +// watchdog only gets here after the crash reporter started but did not finish +// before its configured timeout. +void +CrashReportWatchdog::Abort() +{ + struct sigaction action; + memset(&action, 0, sizeof(action)); + action.sa_handler = SIG_DFL; + sigemptyset(&action.sa_mask); + (void)sigaction(SIGABRT, &action, nullptr); - // Block fatal signals before pthread_create so the watchdog inherits the - // mask; restore this thread's mask immediately after creation. This keeps - // process-directed fatal signals from landing on the watchdog thread. sigset_t signalSet; - sigset_t previousSignalSet; - CrashReportWatchdogBuildFatalSignalSet(&signalSet); - int maskResult = pthread_sigmask(SIG_BLOCK, &signalSet, &previousSignalSet); - if (maskResult != 0) - { - CrashReportWatchdogClosePipe(); - InterlockedExchange(&s_crashReportWatchdogInitializationStarted, 0); - return false; - } - - if (pthread_create(&s_crashReportWatchdogThread, nullptr, CrashReportWatchdogThread, nullptr) != 0) - { - (void)pthread_sigmask(SIG_SETMASK, &previousSignalSet, nullptr); - CrashReportWatchdogClosePipe(); - InterlockedExchange(&s_crashReportWatchdogInitializationStarted, 0); - return false; - } - - (void)pthread_sigmask(SIG_SETMASK, &previousSignalSet, nullptr); + sigemptyset(&signalSet); + sigaddset(&signalSet, SIGABRT); + (void)pthread_sigmask(SIG_UNBLOCK, &signalSet, nullptr); - (void)pthread_detach(s_crashReportWatchdogThread); - s_crashReportWatchdogWriteFd = s_crashReportWatchdogPipe[1]; - return true; + abort(); + _exit(CRASH_REPORT_WATCHDOG_SIGNAL_EXIT_CODE_OFFSET + SIGABRT); } CrashReportWatchdogScope::CrashReportWatchdogScope() { - // This runs from the crash-reporting path. Keep this and any future callees - // async-signal-safe. - sig_atomic_t writeFd = s_crashReportWatchdogWriteFd; - if (writeFd != -1) - { - char command = CrashReportWatchdogStartedCommand; - (void)write(static_cast(writeFd), &command, sizeof(command)); - } + CrashReportWatchdog::WriteCommand(CrashReportWatchdog::Command::Started); } CrashReportWatchdogScope::~CrashReportWatchdogScope() { - // This runs from the crash-reporting path. Keep this and any future callees - // async-signal-safe. - sig_atomic_t writeFd = s_crashReportWatchdogWriteFd; - if (writeFd != -1) - { - char command = CrashReportWatchdogFinishedCommand; - (void)write(static_cast(writeFd), &command, sizeof(command)); - } + CrashReportWatchdog::WriteCommand(CrashReportWatchdog::Command::Finished); } diff --git a/src/coreclr/debug/crashreport/inproccrashreportwatchdog.h b/src/coreclr/debug/crashreport/inproccrashreportwatchdog.h index c1f5697e008e41..628d522d10b393 100644 --- a/src/coreclr/debug/crashreport/inproccrashreportwatchdog.h +++ b/src/coreclr/debug/crashreport/inproccrashreportwatchdog.h @@ -5,11 +5,60 @@ #pragma once +#include "pal.h" + +#include #include +#include + +class CrashReportWatchdogScope; + +class CrashReportWatchdog +{ +public: + // Attempts to initialize the watchdog during normal runtime startup. This is + // not async-signal-safe and must not be called from the crash-reporting path. + static bool TryInitialize(uint32_t timeoutSeconds); + +private: + friend class CrashReportWatchdogScope; + + static constexpr int CRASH_REPORT_WATCHDOG_INFINITE_TIMEOUT_MS = -1; + + enum class Command : char + { + Started = 'S', + Finished = 'F', + }; -// Attempts to initialize the watchdog during normal runtime startup. This is -// not async-signal-safe and must not be called from the crash-reporting path. -bool CrashReportWatchdogTryInitialize(uint32_t timeoutSeconds); + static uint32_t ClampTimeoutSeconds(uint32_t timeoutSeconds); + + explicit CrashReportWatchdog(uint32_t timeoutSeconds); + ~CrashReportWatchdog(); + + bool Initialize(); + bool InitializePipe(); + bool ConfigurePipeFd(int fd); + void ClosePipe(); + + static void BuildFatalSignalSet(sigset_t* signalSet); + static void* ThreadEntry(void* context); + + void ThreadLoop(); + bool WaitForCommand(Command expectedCommand, int timeoutMs = CRASH_REPORT_WATCHDOG_INFINITE_TIMEOUT_MS); + static void WriteCommand(Command command); + static int GetRemainingTimeoutMs(int64_t deadlineMs); + static void Abort(); + + uint32_t m_timeoutSeconds; + int m_timeoutMs; + pthread_t m_thread; + int m_pipe[2]; + + static LONG s_initializationStarted; + static CrashReportWatchdog* s_instance; + static volatile sig_atomic_t s_writeFd; +}; class CrashReportWatchdogScope { diff --git a/src/coreclr/vm/crashreportstackwalker.cpp b/src/coreclr/vm/crashreportstackwalker.cpp index 8967e107718661..ae984ce4804ba8 100644 --- a/src/coreclr/vm/crashreportstackwalker.cpp +++ b/src/coreclr/vm/crashreportstackwalker.cpp @@ -10,8 +10,6 @@ #include "peassembly.h" #include #include -#include -#include #ifdef FEATURE_INPROC_CRASHREPORT @@ -26,6 +24,7 @@ struct WalkContext }; static void BuildTypeName(LPUTF8 buffer, size_t bufferSize, LPCUTF8 namespaceName, LPCUTF8 className); +static bool IsDecimalDigits(const char* value); // Parses configuration during CrashReportConfigure initialization. This is not // async-signal-safe and must not be called from the crash-reporting path. static DWORD GetCrashReportTimeoutSeconds(); @@ -449,6 +448,25 @@ CrashReportConfigure() InProcCrashReportInitialize(settings); } +static bool +IsDecimalDigits(const char* value) +{ + if (value == nullptr || value[0] == '\0') + { + return false; + } + + for (const char* current = value; *current != '\0'; current++) + { + if (*current < '0' || *current > '9') + { + return false; + } + } + + return true; +} + // Parses configuration during CrashReportConfigure initialization. This is not // async-signal-safe and must not be called from the crash-reporting path. static DWORD @@ -465,20 +483,13 @@ GetCrashReportTimeoutSeconds() } const char* timeoutString = timeoutCfg.AsString(); - if (timeoutString == nullptr || timeoutString[0] == '\0') - { - return DefaultTimeoutSeconds; - } - - errno = 0; - char* end = nullptr; - unsigned long timeoutSeconds = strtoul(timeoutString, &end, 10); - if (errno != 0 || end == timeoutString || *end != '\0' || timeoutSeconds > UINT32_MAX) + DWORD timeoutSeconds; + if (!IsDecimalDigits(timeoutString) || !timeoutCfg.TryAsInteger(10, timeoutSeconds)) { return DefaultTimeoutSeconds; } - return static_cast(timeoutSeconds); + return timeoutSeconds; } #endif // FEATURE_INPROC_CRASHREPORT