Skip to content

Commit

Permalink
Merge pull request #15917 from hrydgard/vulkan-bandwidth-opt
Browse files Browse the repository at this point in the history
Vulkan bandwidth optimizations (configure renderpass load/store better)
  • Loading branch information
unknownbrackets committed Aug 28, 2022
2 parents fa1d2cf + b447092 commit 3e865ab
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 77 deletions.
68 changes: 29 additions & 39 deletions Common/GPU/Vulkan/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ void VulkanQueueRunner::CreateDeviceObjects() {
INFO_LOG(G3D, "VulkanQueueRunner::CreateDeviceObjects");
InitBackbufferRenderPass();

framebufferRenderPass_ = GetRenderPass(VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR);
RPKey key{ VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,
VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE };
framebufferRenderPass_ = GetRenderPass(key);

#if 0
// Just to check whether it makes sense to split some of these. drawidx is way bigger than the others...
Expand Down Expand Up @@ -189,6 +191,21 @@ void VulkanQueueRunner::InitBackbufferRenderPass() {
_assert_(res == VK_SUCCESS);
}

static VkAttachmentLoadOp ConvertLoadAction(VKRRenderPassLoadAction action) {
switch (action) {
case VKRRenderPassLoadAction::CLEAR: return VK_ATTACHMENT_LOAD_OP_CLEAR;
case VKRRenderPassLoadAction::KEEP: return VK_ATTACHMENT_LOAD_OP_LOAD;
default: return VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}
}

static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
switch (action) {
case VKRRenderPassStoreAction::STORE: return VK_ATTACHMENT_STORE_OP_STORE;
default: return VK_ATTACHMENT_STORE_OP_DONT_CARE;
}
}

VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
auto pass = renderPasses_.Get(key);
if (pass) {
Expand All @@ -198,19 +215,8 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
VkAttachmentDescription attachments[2] = {};
attachments[0].format = VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
switch (key.colorLoadAction) {
case VKRRenderPassLoadAction::CLEAR:
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
break;
case VKRRenderPassLoadAction::KEEP:
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
break;
case VKRRenderPassLoadAction::DONT_CARE:
default:
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
break;
}
attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[0].loadOp = ConvertLoadAction(key.colorLoadAction);
attachments[0].storeOp = ConvertStoreAction(key.colorStoreAction);
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
Expand All @@ -219,30 +225,10 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {

attachments[1].format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
attachments[1].samples = VK_SAMPLE_COUNT_1_BIT;
switch (key.depthLoadAction) {
case VKRRenderPassLoadAction::CLEAR:
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
break;
case VKRRenderPassLoadAction::KEEP:
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
break;
case VKRRenderPassLoadAction::DONT_CARE:
attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
break;
}
switch (key.stencilLoadAction) {
case VKRRenderPassLoadAction::CLEAR:
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
break;
case VKRRenderPassLoadAction::KEEP:
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
break;
case VKRRenderPassLoadAction::DONT_CARE:
attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
break;
}
attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[1].loadOp = ConvertLoadAction(key.depthLoadAction);
attachments[1].storeOp = ConvertStoreAction(key.depthStoreAction);
attachments[1].stencilLoadOp = ConvertLoadAction(key.stencilLoadAction);
attachments[1].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction);
attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[1].flags = 0;
Expand Down Expand Up @@ -1385,7 +1371,11 @@ void VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step

TransitionToOptimal(cmd, fb->color.image, fb->color.layout, fb->depth.image, fb->depth.layout, &recordBarrier_);

renderPass = GetRenderPass(step.render.colorLoad, step.render.depthLoad, step.render.stencilLoad);
RPKey key{
step.render.colorLoad, step.render.depthLoad, step.render.stencilLoad,
step.render.colorStore, step.render.depthStore, step.render.stencilStore,
};
renderPass = GetRenderPass(key);

// The transition from the optimal format happens after EndRenderPass, now that we don't
// do it as part of the renderpass itself anymore.
Expand Down
23 changes: 14 additions & 9 deletions Common/GPU/Vulkan/VulkanQueueRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,16 @@ enum class VKRStepType : uint8_t {
READBACK_IMAGE,
};

// Must be the same order as Draw::RPAction
enum class VKRRenderPassLoadAction : uint8_t {
DONT_CARE,
KEEP, // default. avoid when possible.
CLEAR,
KEEP,
DONT_CARE,
};

enum class VKRRenderPassStoreAction : uint8_t {
STORE, // default. avoid when possible.
DONT_CARE,
};

struct TransitionRequest {
Expand Down Expand Up @@ -156,6 +162,9 @@ struct VKRStep {
VKRRenderPassLoadAction colorLoad;
VKRRenderPassLoadAction depthLoad;
VKRRenderPassLoadAction stencilLoad;
VKRRenderPassStoreAction colorStore;
VKRRenderPassStoreAction depthStore;
VKRRenderPassStoreAction stencilStore;
u8 clearStencil;
uint32_t clearColor;
float clearDepth;
Expand Down Expand Up @@ -232,15 +241,11 @@ class VulkanQueueRunner {
VKRRenderPassLoadAction colorLoadAction;
VKRRenderPassLoadAction depthLoadAction;
VKRRenderPassLoadAction stencilLoadAction;
VKRRenderPassStoreAction colorStoreAction;
VKRRenderPassStoreAction depthStoreAction;
VKRRenderPassStoreAction stencilStoreAction;
};

// Only call this from the render thread! Also ok during initialization (LoadCache).
VkRenderPass GetRenderPass(
VKRRenderPassLoadAction colorLoadAction, VKRRenderPassLoadAction depthLoadAction, VKRRenderPassLoadAction stencilLoadAction) {
RPKey key{ colorLoadAction, depthLoadAction, stencilLoadAction };
return GetRenderPass(key);
}

VkRenderPass GetRenderPass(const RPKey &key);

bool GetRenderPassKey(VkRenderPass passToFind, RPKey *outKey) const {
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,9 @@ void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRR
step->render.colorLoad = color;
step->render.depthLoad = depth;
step->render.stencilLoad = stencil;
step->render.colorStore = VKRRenderPassStoreAction::STORE;
step->render.depthStore = VKRRenderPassStoreAction::STORE;
step->render.stencilStore = VKRRenderPassStoreAction::STORE;
step->render.clearColor = clearColor;
step->render.clearDepth = clearDepth;
step->render.clearStencil = clearStencil;
Expand Down
25 changes: 25 additions & 0 deletions Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,31 @@ class VulkanRenderManager {

void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask);

// Cheaply set that we don't care about the contents of a surface at the start of the current render pass.
// This set the corresponding load-op of the current render pass to DONT_CARE.
// Useful when we don't know at bind-time whether we will overwrite the surface or not.
void SetLoadDontCare(VkImageAspectFlags aspects) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
if (aspects & VK_IMAGE_ASPECT_COLOR_BIT)
curRenderStep_->render.colorLoad = VKRRenderPassLoadAction::DONT_CARE;
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
curRenderStep_->render.depthLoad = VKRRenderPassLoadAction::DONT_CARE;
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
curRenderStep_->render.stencilLoad = VKRRenderPassLoadAction::DONT_CARE;
}

// Cheaply set that we don't care about the contents of a surface at the end of the current render pass.
// This set the corresponding store-op of the current render pass to DONT_CARE.
void SetStoreDontCare(VkImageAspectFlags aspects) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
if (aspects & VK_IMAGE_ASPECT_COLOR_BIT)
curRenderStep_->render.colorStore = VKRRenderPassStoreAction::DONT_CARE;
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
curRenderStep_->render.depthStore = VKRRenderPassStoreAction::DONT_CARE;
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
curRenderStep_->render.stencilStore = VKRRenderPassStoreAction::DONT_CARE;
}

void Draw(VkPipelineLayout layout, VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_);
VkRenderData data{ VKRRenderCommand::DRAW };
Expand Down
17 changes: 17 additions & 0 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,8 @@ class VKContext : public DrawContext {

void InvalidateCachedState() override;

void InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) override;

private:
VulkanTexture *GetNullTexture();
VulkanContext *vulkan_ = nullptr;
Expand Down Expand Up @@ -1604,4 +1606,19 @@ void VKContext::HandleEvent(Event ev, int width, int height, void *param1, void
}
}

void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) {
VkImageAspectFlags flags = 0;
if (channels & FBChannel::FB_COLOR_BIT)
flags |= VK_IMAGE_ASPECT_COLOR_BIT;
if (channels & FBChannel::FB_DEPTH_BIT)
flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (channels & FBChannel::FB_STENCIL_BIT)
flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
if (stage == FB_INVALIDATION_LOAD) {
renderManager_.SetLoadDontCare(flags);
} else if (stage == FB_INVALIDATION_STORE) {
renderManager_.SetStoreDontCare(flags);
}
}

} // namespace Draw
18 changes: 13 additions & 5 deletions Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ enum FBChannel {
FB_FORMAT_BIT = 128, // Actually retrieves the native format instead. D3D11 only.
};

enum FBInvalidationStage {
FB_INVALIDATION_LOAD = 1,
FB_INVALIDATION_STORE = 2,
};

enum FBBlitFilter {
FB_BLIT_NEAREST = 0,
FB_BLIT_LINEAR = 1,
Expand Down Expand Up @@ -568,9 +573,9 @@ struct TextureDesc {
};

enum class RPAction {
DONT_CARE,
CLEAR,
KEEP,
KEEP = 0,
CLEAR = 1,
DONT_CARE = 2,
};

struct RenderPassInfo {
Expand Down Expand Up @@ -655,8 +660,11 @@ class DrawContext {

virtual void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) = 0;

// Useful in OpenGL ES to give hints about framebuffers on tiler GPUs.
virtual void InvalidateFramebuffer(Framebuffer *fbo) {}
// Could be useful in OpenGL ES to give hints about framebuffers on tiler GPUs
// using glInvalidateFramebuffer, although drivers are known to botch that so we currently don't use it.
// In Vulkan, this sets the LOAD_OP or the STORE_OP (depending on stage) of the current render pass instance to DONT_CARE.
// channels is a bitwise combination of FBChannel::COLOR, DEPTH and STENCIL.
virtual void InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) {}

// Dynamic state
virtual void SetScissorRect(int left, int top, int width, int height) = 0;
Expand Down
7 changes: 5 additions & 2 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1307,8 +1307,6 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
else
DEBUG_LOG(FRAMEBUF, "Displaying FBO %08x", vfb->fb_address);

// TODO ES3: Use glInvalidateFramebuffer to discard depth/stencil data at the end of frame.

float u0 = offsetX / (float)vfb->bufferWidth;
float v0 = offsetY / (float)vfb->bufferHeight;
float u1 = (480.0f + offsetX) / (float)vfb->bufferWidth;
Expand Down Expand Up @@ -2770,6 +2768,11 @@ void FramebufferManagerCommon::BlitUsingRaster(
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster");
draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0);

if (destX1 == 0.0f && destY1 == 0.0f && destX2 >= destW && destY2 >= destH) {
// We overwrite the whole channel of the framebuffer, so we can invalidate the current contents.
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_LOAD, pipeline->info.writeChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT);
}

Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f };
draw_->SetViewports(1, &vp);
draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height());
Expand Down
38 changes: 17 additions & 21 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1956,12 +1956,27 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
}

// If min is not < max, then we don't have values (wasn't set during decode.)
const KnownVertexBounds &bounds = gstate_c.vertBounds;
float u1 = 0.0f;
float v1 = 0.0f;
float u2 = depalWidth;
float v2 = framebuffer->renderHeight;
if (bounds.minV < bounds.maxV) {
u1 = (bounds.minU + gstate_c.curTextureXOffset) * framebuffer->renderScaleFactor;
v1 = (bounds.minV + gstate_c.curTextureYOffset) * framebuffer->renderScaleFactor;
u2 = (bounds.maxU + gstate_c.curTextureXOffset) * framebuffer->renderScaleFactor;
v2 = (bounds.maxV + gstate_c.curTextureYOffset) * framebuffer->renderScaleFactor;
// We need to reapply the texture next time since we cropped UV.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}

Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, depalWidth, framebuffer->renderHeight);
draw_->BindTexture(0, nullptr);
draw_->BindTexture(1, nullptr);
draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal");

draw_->SetScissorRect(0, 0, (int)depalWidth, (int)framebuffer->renderHeight);
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_STORE, Draw::FB_DEPTH_BIT | Draw::FB_STENCIL_BIT);
draw_->SetScissorRect(u1, v1, u2 - u1, v2 - v1);
Draw::Viewport vp{ 0.0f, 0.0f, (float)depalWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f };
draw_->SetViewports(1, &vp);

Expand All @@ -1972,25 +1987,6 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
draw_->BindSamplerStates(0, 1, &nearest);
draw_->BindSamplerStates(1, 1, &clutSampler);

// If min is not < max, then we don't have values (wasn't set during decode.)
const KnownVertexBounds &bounds = gstate_c.vertBounds;
float u1 = 0.0f;
float v1 = 0.0f;
float u2 = depalWidth;
float v2 = framebuffer->renderHeight;
if (bounds.minV < bounds.maxV) {
u1 = bounds.minU + gstate_c.curTextureXOffset;
v1 = bounds.minV + gstate_c.curTextureYOffset;
u2 = bounds.maxU + gstate_c.curTextureXOffset;
v2 = bounds.maxV + gstate_c.curTextureYOffset;
// We need to reapply the texture next time since we cropped UV.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}
u1 *= framebuffer->renderScaleFactor;
v1 *= framebuffer->renderScaleFactor;
u2 *= framebuffer->renderScaleFactor;
v2 *= framebuffer->renderScaleFactor;

draw2D_->Blit(textureShader, u1, v1, u2, v2, u1, v1, u2, v2, framebuffer->renderWidth, framebuffer->renderHeight, depalWidth, framebuffer->renderHeight, false, framebuffer->renderScaleFactor);

gstate_c.curTextureWidth = texWidth;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/ShaderManagerVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM
// instantaneous.

#define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 20
#define CACHE_VERSION 21
struct VulkanCacheHeader {
uint32_t magic;
uint32_t version;
Expand Down

0 comments on commit 3e865ab

Please sign in to comment.