Skip to content

Commit

Permalink
Aftermath crash dump support
Browse files Browse the repository at this point in the history
  • Loading branch information
jukim-nv authored and apanteleev committed Sep 24, 2024
1 parent a4c6f97 commit dc8a779
Show file tree
Hide file tree
Showing 23 changed files with 609 additions and 25 deletions.
46 changes: 38 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ cmake_dependent_option(NVRHI_INSTALL_EXPORTS "Install CMake exports" OFF "NVRHI_
option(NVRHI_WITH_VALIDATION "Build NVRHI the validation layer" ON)
option(NVRHI_WITH_VULKAN "Build the NVRHI Vulkan backend" ON)
option(NVRHI_WITH_RTXMU "Use RTXMU for acceleration structure management" OFF)
option(NVRHI_WITH_AFTERMATH "Include Aftermath support (requires NSight Aftermath SDK)" OFF)

cmake_dependent_option(NVRHI_WITH_NVAPI "Include NVAPI support (requires NVAPI SDK)" OFF "WIN32" OFF)
cmake_dependent_option(NVRHI_WITH_DX11 "Build the NVRHI D3D11 backend" ON "WIN32" OFF)
Expand Down Expand Up @@ -80,19 +81,36 @@ if (NVRHI_WITH_NVAPI AND NOT TARGET nvapi)
endif()
endif()

if (NVRHI_WITH_AFTERMATH AND NOT TARGET aftermath)
find_package(Aftermath REQUIRED)

if (AFTERMATH_FOUND)
add_library(aftermath SHARED IMPORTED GLOBAL)
target_include_directories(aftermath INTERFACE "${AFTERMATH_INCLUDE_DIR}")
if (WIN32)
set_property(TARGET aftermath PROPERTY IMPORTED_IMPLIB "${AFTERMATH_LIBRARY}")
endif()
set_property(TARGET aftermath PROPERTY IMPORTED_LOCATION "${AFTERMATH_RUNTIME_LIBRARY}")
file(COPY ${AFTERMATH_RUNTIME_LIBRARY} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
else()
message(FATAL_ERROR "NVRHI_WITH_AFTERMATH is enabled but cmake cannot find the Aftermath SDK in AFTERMATH_SEARCH_PATHS")
endif()
endif()

set(include_common
include/nvrhi/nvrhi.h
include/nvrhi/utils.h
include/nvrhi/common/containers.h
include/nvrhi/common/misc.h
include/nvrhi/common/resource.h)
include/nvrhi/common/resource.h
include/nvrhi/common/aftermath.h)
set(src_common
src/common/format-info.cpp
src/common/misc.cpp
src/common/state-tracking.cpp
src/common/state-tracking.h
src/common/utils.cpp)
src/common/utils.cpp
src/common/aftermath.cpp)

if(MSVC)
set(misc_common
Expand Down Expand Up @@ -208,6 +226,8 @@ target_include_directories(nvrhi PUBLIC

set_target_properties(nvrhi PROPERTIES FOLDER "NVRHI")

target_compile_definitions(nvrhi PRIVATE NVRHI_WITH_AFTERMATH=$<BOOL:${NVRHI_WITH_AFTERMATH}>)

# implementations

if (NVRHI_WITH_DX11)
Expand All @@ -232,10 +252,13 @@ if (NVRHI_WITH_DX11)

if (NVRHI_WITH_NVAPI)
target_link_libraries(${nvrhi_d3d11_target} PUBLIC nvapi)
target_compile_definitions(${nvrhi_d3d11_target} PRIVATE NVRHI_D3D11_WITH_NVAPI=1)
else()
target_compile_definitions(${nvrhi_d3d11_target} PRIVATE NVRHI_D3D11_WITH_NVAPI=0)
endif()
target_compile_definitions(${nvrhi_d3d11_target} PRIVATE NVRHI_D3D11_WITH_NVAPI=$<BOOL:${NVRHI_WITH_NVAPI}>)

if (NVRHI_WITH_AFTERMATH)
target_link_libraries(${nvrhi_d3d11_target} PUBLIC aftermath)
endif()
target_compile_definitions(${nvrhi_d3d11_target} PRIVATE NVRHI_WITH_AFTERMATH=$<BOOL:${NVRHI_WITH_AFTERMATH}>)
endif()

if (NVRHI_WITH_DX12)
Expand Down Expand Up @@ -265,10 +288,13 @@ if (NVRHI_WITH_DX12)

if (NVRHI_WITH_NVAPI)
target_link_libraries(${nvrhi_d3d12_target} PUBLIC nvapi)
target_compile_definitions(${nvrhi_d3d12_target} PRIVATE NVRHI_D3D12_WITH_NVAPI=1)
else()
target_compile_definitions(${nvrhi_d3d12_target} PRIVATE NVRHI_D3D12_WITH_NVAPI=0)
endif()
target_compile_definitions(${nvrhi_d3d12_target} PRIVATE NVRHI_D3D12_WITH_NVAPI=$<BOOL:${NVRHI_WITH_NVAPI}>)

if (NVRHI_WITH_AFTERMATH)
target_link_libraries(${nvrhi_d3d12_target} PUBLIC aftermath)
endif()
target_compile_definitions(${nvrhi_d3d12_target} PRIVATE NVRHI_WITH_AFTERMATH=$<BOOL:${NVRHI_WITH_AFTERMATH}>)
endif()

if (NVRHI_WITH_VULKAN)
Expand Down Expand Up @@ -305,6 +331,10 @@ if (NVRHI_WITH_VULKAN)
target_link_libraries(${nvrhi_vulkan_target} PRIVATE Vulkan::Headers)
endif()

if (NVRHI_WITH_AFTERMATH)
target_link_libraries(${nvrhi_vulkan_target} PUBLIC aftermath)
endif()
target_compile_definitions(${nvrhi_vulkan_target} PRIVATE NVRHI_WITH_AFTERMATH=$<BOOL:${NVRHI_WITH_AFTERMATH}>)
endif()


Expand Down
80 changes: 80 additions & 0 deletions cmake/FindAftermath.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.


find_package(PackageHandleStandardArgs)


if (NOT AFTERMATH_SEARCH_PATHS)
set (AFTERMATH_SEARCH_PATHS
"${CMAKE_SOURCE_DIR}/aftermath"
"${CMAKE_PROJECT_DIR}/aftermath")
endif()

if (WIN32)
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
find_library(AFTERMATH_LIBRARY GFSDK_Aftermath_Lib.x64
PATHS ${AFTERMATH_SEARCH_PATHS}
PATH_SUFFIXES "lib/x64")
find_file(AFTERMATH_RUNTIME_LIBRARY GFSDK_Aftermath_Lib.x64.dll
PATHS ${AFTERMATH_SEARCH_PATHS}
PATH_SUFFIXES "lib/x64")
else()
find_library(AFTERMATH_LIBRARY GFSDK_Aftermath_Lib.x86
PATHS ${AFTERMATH_SEARCH_PATHS}
PATH_SUFFIXES "lib/x86")
find_library(AFTERMATH_RUNTIME_LIBRARY GFSDK_Aftermath_Lib.x86.dll
PATHS ${AFTERMATH_SEARCH_PATHS}
PATH_SUFFIXES "lib/x86")
endif()
else()
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
find_library(AFTERMATH_RUNTIME_LIBRARY libGFSDK_Aftermath_Lib.x64.so
PATHS ${AFTERMATH_SEARCH_PATHS}
PATH_SUFFIXES "lib/x64")
else()
find_library(AFTERMATH_RUNTIME_LIBRARY libGFSDK_Aftermath_Lib.x86.so
PATHS ${AFTERMATH_SEARCH_PATHS}
PATH_SUFFIXES "lib/x86")
endif()
endif()

find_path(AFTERMATH_INCLUDE_DIR GFSDK_Aftermath.h
PATHS ${AFTERMATH_SEARCH_PATHS}
PATH_SUFFIXES "include")

include(FindPackageHandleStandardArgs)

if (WIN32)
find_package_handle_standard_args(Aftermath
REQUIRED_VARS
AFTERMATH_INCLUDE_DIR
AFTERMATH_LIBRARY
AFTERMATH_RUNTIME_LIBRARY
)
else()
find_package_handle_standard_args(Aftermath
REQUIRED_VARS
AFTERMATH_INCLUDE_DIR
AFTERMATH_RUNTIME_LIBRARY
)
endif()

92 changes: 92 additions & 0 deletions include/nvrhi/common/aftermath.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

#pragma once

#include <nvrhi/nvrhi.h>
#include <unordered_map>
#include <functional>
#include <deque>
#include <set>
#include <unordered_map>
#include <filesystem>

namespace nvrhi
{
typedef std::pair<bool, std::reference_wrapper<const std::string>> ResolvedMarker;
typedef std::pair<const void*, size_t> BinaryBlob;
typedef std::function<uint64_t(BinaryBlob, nvrhi::GraphicsAPI)> ShaderHashGeneratorFunction;
typedef std::function<BinaryBlob(uint64_t, ShaderHashGeneratorFunction)> ShaderBinaryLookupCallback;

// Aftermath will return the payload of the last marker the GPU executed, so in cases of nested regimes,
// we want the marker payloads to represent the whole "stack" of regimes, not just the last one
// AftermathMarkerTracker pushes/pops regimes to this stack
// The payload itself is a 64bit value, so AftermathMarkerTracker stores the mappings of strings<->hashes
// There should be one AftermathMarkerTracker per graphics API-level command list
class AftermathMarkerTracker
{
public:
AftermathMarkerTracker();

size_t pushEvent(const char* name);
void popEvent();

ResolvedMarker getEventString(size_t hash);
private:
// using a filesystem path to track the event stack since that automatically inserts "/" separators
// and is easy to push/pop entries
std::filesystem::path m_EventStack;

// Some apps have unique marker text on every frame (for example, by appending the frame number to the marker)
// In these cases, we want to cap the max number of strings stored to prevent memory usage from growing
const static size_t MaxEventStrings = 128;
std::array<size_t, MaxEventStrings> m_EventHashes;
size_t m_OldestHashIndex;
std::unordered_map<size_t, std::string> m_EventStrings;
};

// AftermathCrashDumpHelper tracks all nvrhi::IDevice-level constructs that we need when generating a crash dump
// It provides two services: resolving a marker hash to the original string, and getting the specific shader bytecode
// of a requested shader hash
// There should be one AftermathCrashDumpHelper per nvrhi::IDevice
// All command lists will register their AftermathMarkerTrackers with the AftermathCrashDumpHelper
// Any shader bytecode loading and management code (e.g. donut's ShaderFactory) should register a shader binary lookup callback
class AftermathCrashDumpHelper
{
public:
AftermathCrashDumpHelper();

void registerAftermathMarkerTracker(AftermathMarkerTracker* tracker);
void unRegisterAftermathMarkerTracker(AftermathMarkerTracker* tracker);
void registerShaderBinaryLookupCallback(void* client, ShaderBinaryLookupCallback lookupCallback);
void unRegisterShaderBinaryLookupCallback(void* client);

ResolvedMarker ResolveMarker(size_t markerHash);
BinaryBlob findShaderBinary(uint64_t shaderHash, ShaderHashGeneratorFunction hashGenerator);
private:
std::set<AftermathMarkerTracker*> m_MarkerTrackers;
// Command lists that are deleted on the CPU-side could still be executing (and crashing) GPU side,
// so we keep around a small number of recently destroyed marker trackers just in case
std::deque<AftermathMarkerTracker> m_DestroyedMarkerTrackers;
std::unordered_map<void*, ShaderBinaryLookupCallback> m_ShaderBinaryLookupCallbacks;
};
} // namespace nvrhi
1 change: 1 addition & 0 deletions include/nvrhi/d3d11.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ namespace nvrhi::d3d11
{
IMessageCallback* messageCallback = nullptr;
ID3D11DeviceContext* context = nullptr;
bool aftermathEnabled = false;
};

NVRHI_API DeviceHandle createDevice(const DeviceDesc& desc);
Expand Down
1 change: 1 addition & 0 deletions include/nvrhi/d3d12.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ namespace nvrhi::d3d12
uint32_t shaderResourceViewHeapSize = 16384;
uint32_t samplerHeapSize = 1024;
uint32_t maxTimerQueries = 256;
bool aftermathEnabled = false;
};

NVRHI_API DeviceHandle createDevice(const DeviceDesc& desc);
Expand Down
8 changes: 7 additions & 1 deletion include/nvrhi/nvrhi.h
Original file line number Diff line number Diff line change
Expand Up @@ -2635,6 +2635,8 @@ namespace nvrhi
// IDevice
//////////////////////////////////////////////////////////////////////////

class AftermathCrashDumpHelper;

class IDevice : public IResource
{
public:
Expand Down Expand Up @@ -2711,7 +2713,8 @@ namespace nvrhi
virtual CommandListHandle createCommandList(const CommandListParameters& params = CommandListParameters()) = 0;
virtual uint64_t executeCommandLists(ICommandList* const* pCommandLists, size_t numCommandLists, CommandQueue executionQueue = CommandQueue::Graphics) = 0;
virtual void queueWaitForCommandList(CommandQueue waitQueue, CommandQueue executionQueue, uint64_t instance) = 0;
virtual void waitForIdle() = 0;
// returns true if the wait completes successfully, false if detecting a problem (e.g. device removal)
virtual bool waitForIdle() = 0;

// Releases the resources that were referenced in the command lists that have finished executing.
// IMPORTANT: Call this method at least once per frame.
Expand All @@ -2725,6 +2728,9 @@ namespace nvrhi

virtual IMessageCallback* getMessageCallback() = 0;

virtual bool isAftermathEnabled() = 0;
virtual AftermathCrashDumpHelper& getAftermathCrashDumpHelper() = 0;

// Front-end for executeCommandLists(..., 1) for compatibility and convenience
uint64_t executeCommandList(ICommandList* commandList, CommandQueue executionQueue = CommandQueue::Graphics)
{
Expand Down
1 change: 1 addition & 0 deletions include/nvrhi/vulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ namespace nvrhi::vulkan

// Indicates if VkPhysicalDeviceVulkan12Features::bufferDeviceAddress was set to 'true' at device creation time
bool bufferDeviceAddressSupported = false;
bool aftermathEnabled = false;
};

NVRHI_API DeviceHandle createDevice(const DeviceDesc& desc);
Expand Down
Loading

0 comments on commit dc8a779

Please sign in to comment.