Update llpc from commit e48a029e

[Continuations] Add a test with boolean payload [Continuations] Add assertion on payload register count usage [Continuations] Add getUpperBoundOnTraceRayPayloadRegisters() [Continuations] Add shader jump scheduling workflow [Continuations] Add simple lit test for `_cont_ExitRayGen` inlining [Continuations] Add stable returnaddr argument [Continuations] Cleanup return in `else` block [Continuations] Cleanup use of `EntriesWithContinuationsFunctions` [Continuations] Dialectify continuation.return. [Continuations] Fix a regression caused by strip_whitelist.txt clean-up in GPURT [Continuations] Fix Cps Stack Lowering for KernelEntry [Continuations] Fix lit test for old LLVM [Continuations] Fix Traversal register count metadata [Continuations] Introduce `llvm::replaceCallsToFunction` helper [Continuations] Introduce `PayloadHelper` [Continuations] Make dialect stackstore take no pointer [Continuations] Move payload intrinsic handling to `LowerRaytracingPipeline` [Continuations] Move static helper function [Continuations] PAQ: Represent uniform structs by a single PAQ node [Continuations] Pass csp argument to Raygen, remove `_cont_SetupRayGen` [Continuations] Refactor `LowerAwait` pass [Continuations] Refactor PAQ node creation [Continuations] Remove unused includes [Continuations] Replace `forEachCall` with call to `replaceCallsToFunction` [Continuations] Return `PreservedAnalyes` directly. [Continuations] Run `LowerRaytracingPipeline` on Traversal [Continuations] Set preserved register count for IS [Continuations] Support _cont_ShaderStart [Continuations] Support GPURT options [Continuations] Use --lint-abort-on-error in lit tests [Continuations] Use as.continuation.reference consistently [Continuations] Use lgc.rt max payload size metadata [Continuations] Use max hit attributes from module [Continuations] Use updated update_test_checks.py to generate lit files Add SqBufRsrcWord3 initialization Add support for AmdExtD3DShaderIntrinsics_WaveScan Intrinsic Enable scalarization of descriptor loads Fix AmdTraceRayGetStaticId in ray query Fix invalid startswith call Fix static check error: Using 'memset' on struct that contains a 'std::string' Fix the issue where a maxVertOut value of zero leads to a division by maxVertOut, causing a crash Handle upstream llvm changes for metadata lgc: handle indirect load case in stridedIndexAdd lgc: Add BufferAddrToPtrOp and ExtendAddressOp lgc: Add BuilderCommon::CreateBuildVector lgc: Add debugging log for PatchBufferOp lgc: Add offset and ascii chars comment to bytes of data line lgc: emit amdgcn.fdot2.f16.f16 intrinsics lgc: Enable sinking in SimplifyCFG lgc: Extend lgc dialect with subgroupAll, AllEqual, Rotate lgc: Fix 'Add DbgInfo api to ModuleBunch' lgc: Fix crash in HLK Dot2AddHalfTest on GFX10.1 lgc: Fix IO error if pre-rast shader don't export layer lgc: Fix the hang when shaderDBG is ON lgc: Move CreateIf() to BuilderCommon so a front-end can use it lgc: Refactor CreateSubgroupClustered* with contant cluster size lgc: Some coding style changes in PatchEntryPointMutate lgc: Use rtz version 16bit interpolation llvmraytracing: Allow setLgcRtShaderStage/getLgcRtShaderStage on GlobalObject Refactor for CooperativeMatrix using LGC_Dialect Remove the macro VKI_COOPERATIVE_MATRIX gating under lgc/llpc test cases Replace struct_buffer_load with s_buffer_load for strided buffer in some case Revert 'Pack mesh shader outputs in LDS space' to fix corruption in The Talos Principle 2 Revert 'lgc: Add LoadBufferAddrOp' Rework SW stream-out Some coding changes for dynamic topology SPIRVReader: Map spirv device scope to agent Support dumping and reading GPURT options Support dynamic primitive topology when xfb is enabled Support uint64 AmdTraceRaySampleGpuTimer() Update tests affected by change for range attr in ctpop Update tests for upstream GEP change
mingshi2333 · Jun 4, 2024 · 6c770c7 · 6c770c7
1 parent 8252cfa
commit 6c770c7
Show file tree

Hide file tree

Showing 249 changed files with 13,709 additions and 9,881 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -127,10 +127,9 @@ endif(LLPC_BUILD_TOOLS)
 if(ICD_BUILD_LLPC)
  # Generate Strings for LLPC standalone tool and vkgc_gpurtshim
  add_subdirectory(util ${PROJECT_BINARY_DIR}/util)
+ add_subdirectory(gfxruntime ${PROJECT_BINARY_DIR}/gfxruntime)
 endif()
 
-add_subdirectory(gfxruntime ${PROJECT_BINARY_DIR}/gfxruntime)
-
 ### VKGC build LLPC ################################################################
 if(ICD_BUILD_LLPC)
  include("cmake/compilerutils.cmake")

diff --git a/compilerutils/include/compilerutils/TypeLowering.h b/compilerutils/include/compilerutils/TypeLowering.h
@@ -144,6 +144,7 @@ class TypeLowering {
 
  llvm::SmallVector<llvm::Value *> getValue(llvm::Value *);
  llvm::SmallVector<llvm::Value *> getValueOptional(llvm::Value *);
+ void replaceValue(llvm::Value *toReplace, llvm::Value *with) { replaceMappingWith(toReplace, with); }
  void replaceInstruction(llvm::Instruction *, llvm::ArrayRef<llvm::Value *>);
  void eraseInstruction(llvm::Instruction *);
 

diff --git a/gfxruntime/CMakeLists.txt b/gfxruntime/CMakeLists.txt
@@ -37,18 +37,18 @@ find_package(Python3
 )
 
 # Locate dxc binary.
-if (CMAKE_HOST_SYSTEM_NAME MATCHES "Linux")
- find_program(DXC_PATH dxc)
- if ("${DXC_PATH}" STREQUAL "DXC_PATH-NOTFOUND")
- message(FATAL_ERROR "Could not find shader compiler tool dxc.")
- endif()
 #if _WIN32
-elseif(WIN32)
- set(DXC_PATH "$ENV{DK_ROOT}/DirectXShaderCompiler/8c9d92b/bin")
+if(WIN32)
  if (NOT EXISTS "${DXC_PATH}")
  message(FATAL_ERROR "Unable to find DirectXShaderCompiler directory: ${DXC_PATH}")
  endif()
+endif()
 #endif
+if (NOT DXC_PATH)
+ find_program(DXC_PATH dxc)
+ if ("${DXC_PATH}" STREQUAL "DXC_PATH-NOTFOUND")
+ message(FATAL_ERROR "Could not find shader compiler tool dxc.")
+ endif()
 endif()
 
 set(CMAKE_CURRENT_SOURCE_DIR ${PROJECT_SOURCE_DIR}/gfxruntime)

diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h
@@ -106,6 +106,7 @@ struct optional_bool : private std::optional<bool> {
  using std::optional<bool>::has_value;
  using std::optional<bool>::value;
  using std::optional<bool>::value_or;
+ using std::optional<bool>::operator*;
 };
 
 /// Enumerates result codes of LLPC operations.
@@ -576,7 +577,6 @@ struct ShaderModuleUsage {
  unsigned localSizeX; ///< Compute shader work-group size in the X dimension
  unsigned localSizeY; ///< Compute shader work-group size in the Y dimension
  unsigned localSizeZ; ///< Compute shader work-group size in the Z dimension
- bool useBarycentric; ///< Whether to use gl_BarycentricXX or pervertexEXT decoration
  bool disableDualSource; ///< Whether disable dualSource blend
  uint32_t clipDistanceArraySize; ///< Count of output clip distance
 };
@@ -768,7 +768,7 @@ struct PipelineShaderOptions {
  unsigned ldsSpillLimitDwords;
 
  /// Attempt to scalarize waterfall descriptor loads.
- bool scalarizeWaterfallLoads;
+ optional_bool scalarizeWaterfallLoads;
 
  /// Force rearranges threadId within group into blocks of 8*8 or 8*4
  bool overrideForceThreadIdSwizzling;
@@ -1141,6 +1141,12 @@ struct RtState {
  bool rtIpOverride;
 };
 
+/// GPURT option
+struct GpurtOption {
+ uint64_t nameHash; ///< A hash value that is used as name.
+ uint64_t value; ///< Value of the setting
+};
+
 struct UniformConstantMapEntry {
  unsigned location; ///< Starting location of the uniform constant variable
  unsigned offset; ///< Offset of the uniform constant variable in the final buffer
@@ -1264,6 +1270,7 @@ struct GraphicsPipelineBuildInfo {
  /// return extra meta data.
  bool enableEarlyCompile; ///< Whether enable early compile
  bool useSoftwareVertexBufferDescriptors; ///< Use software vertex buffer descriptors to structure SRD.
+ bool dynamicTopology; ///< Whether primitive topology is dynamic.
 #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62
  BinaryData shaderLibrary; ///< SPIR-V library binary data
 #endif
@@ -1290,6 +1297,8 @@ struct GraphicsPipelineBuildInfo {
  uint8_t vbAddressLowBits[MaxVertexBindings]; ///< Lowest two bits of vertex buffer addresses
  float pixelTransferScale[4]; ///< Scale apply to render color target
  float pixelTransferBias[4]; ///< Bias apply to render color target
+ bool enableColorClampVs; ///< Enable clamp vertex output color
+ bool enableColorClampFs; ///< Enable clamp fragment output color
  } glState;
  const auto &getGlState() const { return glState; }
 #endif
@@ -1363,6 +1372,8 @@ struct RayTracingPipelineBuildInfo {
  /// stored inside the ELF
  size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data
  unsigned cpsFlags; ///< Cps feature flags
+ GpurtOption *pGpurtOptions; ///< Array of GPURT options
+ unsigned gpurtOptionCount; ///< Number of GPURT options
 };
 
 /// Ray tracing max shader name length

diff --git a/lgc/builder/ArithBuilder.cpp b/lgc/builder/ArithBuilder.cpp
@@ -1267,9 +1267,20 @@ Value *BuilderImpl::CreateFDot2(Value *a, Value *b, Value *scalar, Value *clamp,
  assert(scalar->getType()->isFloatTy());
  assert(clamp->getType()->isIntegerTy() && clamp->getType()->getIntegerBitWidth() == 1);
 
- Value *result = CreateIntrinsic(scalar->getType(), Intrinsic::amdgcn_fdot2, {a, b, scalar, clamp});
- result->setName(instName);
- return result;
+ // GFX10.1 doesn't support v_dot2_f32_f16.
+ if (m_pipelineState->getTargetInfo().getGfxIpVersion() >= GfxIpVersion({10, 3})) {
+ Value *result = CreateIntrinsic(scalar->getType(), Intrinsic::amdgcn_fdot2, {a, b, scalar, clamp});
+ result->setName(instName);
+ return result;
+ }
+
+ // The half dot product result cannot be +/-inf if it exceeds the range of half. Two v_fma_mix_f32 can do this but
+ // it is currently unavailable.
+ Type *floatVecTy = FixedVectorType::get(scalar->getType(), 2);
+ Value *fa = CreateFPExt(a, floatVecTy);
+ Value *fb = CreateFPExt(b, floatVecTy);
+ Value *dot = CreateDotProduct(fa, fb);
+ return CreateFAdd(dot, scalar, instName);
 }
 
 // =====================================================================================================================

diff --git a/lgc/builder/BuilderBase.cpp b/lgc/builder/BuilderBase.cpp
@@ -293,3 +293,13 @@ Instruction *BuilderBase::CreateWaterfallEnd(Value *nonUniform, Value *waterfall
 
  return resultValue;
 }
+
+// =====================================================================================================================
+// Create code to build a vector out of a number of scalar elements of the same type.
+Value *BuilderCommon::CreateBuildVector(llvm::ArrayRef<llvm::Value *> elements, const llvm::Twine &instName) {
+ Value *vector = PoisonValue::get(FixedVectorType::get(elements[0]->getType(), elements.size()));
+ for (unsigned idx = 0; idx != elements.size() - 1; ++idx)
+ vector = CreateInsertElement(vector, elements[idx], idx);
+ vector = CreateInsertElement(vector, elements.back(), elements.size() - 1, instName);
+ return vector;
+}