File shadPS4-issue1873.patch of Package shadPS4
diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp
index 246c8c94..ef8643e4 100644
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@@ -604,21 +604,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
true);
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Gds) {
- rasterizer->InlineData(dma_data->dst_addr_lo,
- dma_data->SrcAddress<const void*>(),
- dma_data->NumBytes(), true);
+ rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
+ dma_data->NumBytes(), true, false);
} else if (dma_data->src_sel == DmaDataSrc::Data &&
dma_data->dst_sel == DmaDataDst::Memory) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data,
sizeof(u32), false);
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
dma_data->dst_sel == DmaDataDst::Memory) {
- // LOG_WARNING(Render_Vulkan, "GDS memory read");
+ rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
+ dma_data->NumBytes(), false, true);
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Memory) {
- rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
- dma_data->SrcAddress<const void*>(),
- dma_data->NumBytes(), false);
+ rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(),
+ dma_data->SrcAddress<VAddr>(), dma_data->NumBytes() - 1,
+ false, false);
} else {
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
@@ -768,20 +768,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true);
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Gds) {
- rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress<const void*>(),
- dma_data->NumBytes(), true);
+ rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
+ dma_data->NumBytes(), true, false);
} else if (dma_data->src_sel == DmaDataSrc::Data &&
dma_data->dst_sel == DmaDataDst::Memory) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data, sizeof(u32),
false);
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
dma_data->dst_sel == DmaDataDst::Memory) {
- // LOG_WARNING(Render_Vulkan, "GDS memory read");
+ rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
+ dma_data->NumBytes(), false, true);
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Memory) {
- rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
- dma_data->SrcAddress<const void*>(), dma_data->NumBytes(),
- false);
+ rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->SrcAddress<VAddr>(),
+ dma_data->NumBytes() - 1, false, false);
} else {
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index cdf736a8..568a2924 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -246,6 +246,94 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
});
}
+void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
+ if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) {
+ if (!src_gds && !IsRegionRegistered(src, num_bytes)) {
+ // Both buffers were not transferred to GPU yet. Can safely copy in host memory.
+ memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
+ return;
+ }
+ // Without a readback there's nothing we can do with this
+ // Fallback to creating dst buffer on GPU to at least have this data there
+ }
+ if (!src_gds && !IsRegionRegistered(src, num_bytes)) {
+ InlineData(dst, std::bit_cast<void*>(src), num_bytes, dst_gds);
+ return;
+ }
+ auto& src_buffer = [&] -> const Buffer& {
+ if (src_gds) {
+ return gds_buffer;
+ }
+ const BufferId buffer_id = FindBuffer(src, num_bytes);
+ return slot_buffers[buffer_id];
+ }();
+ auto& dst_buffer = [&] -> const Buffer& {
+ if (dst_gds) {
+ return gds_buffer;
+ }
+ const BufferId buffer_id = FindBuffer(dst, num_bytes);
+ return slot_buffers[buffer_id];
+ }();
+ vk::BufferCopy region{
+ .srcOffset = src_buffer.Offset(src),
+ .dstOffset = dst_buffer.Offset(dst),
+ .size = num_bytes,
+ };
+ const vk::BufferMemoryBarrier2 buf_barriers_before[2] = {
+ {
+ .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
+ .srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
+ .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
+ .dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
+ .buffer = dst_buffer.Handle(),
+ .offset = dst_buffer.Offset(dst),
+ .size = num_bytes,
+ },
+ {
+ .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
+ .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
+ .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
+ .dstAccessMask = vk::AccessFlagBits2::eTransferRead,
+ .buffer = src_buffer.Handle(),
+ .offset = src_buffer.Offset(src),
+ .size = num_bytes,
+ },
+ };
+ scheduler.EndRendering();
+ const auto cmdbuf = scheduler.CommandBuffer();
+ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
+ .dependencyFlags = vk::DependencyFlagBits::eByRegion,
+ .bufferMemoryBarrierCount = 2,
+ .pBufferMemoryBarriers = buf_barriers_before,
+ });
+ cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region);
+ const vk::BufferMemoryBarrier2 buf_barriers_after[2] = {
+ {
+ .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
+ .srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
+ .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
+ .dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
+ .buffer = dst_buffer.Handle(),
+ .offset = dst_buffer.Offset(dst),
+ .size = num_bytes,
+ },
+ {
+ .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
+ .srcAccessMask = vk::AccessFlagBits2::eTransferRead,
+ .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
+ .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
+ .buffer = src_buffer.Handle(),
+ .offset = src_buffer.Offset(src),
+ .size = num_bytes,
+ },
+ };
+ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
+ .dependencyFlags = vk::DependencyFlagBits::eByRegion,
+ .bufferMemoryBarrierCount = 2,
+ .pBufferMemoryBarriers = buf_barriers_after,
+ });
+}
+
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
bool is_texel_buffer, BufferId buffer_id) {
// For small uniform buffers that have not been modified by gpu
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 71a6bed2..43343c76 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -90,6 +90,8 @@ public:
/// Writes a value to GPU buffer.
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
+ void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
+
/// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
bool is_texel_buffer = false,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 87d07a96..f174e56a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -908,6 +908,10 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo
buffer_cache.InlineData(address, value, num_bytes, is_gds);
}
+void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
+ buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds);
+}
+
u32 Rasterizer::ReadDataFromGds(u32 gds_offset) {
auto* gds_buf = buffer_cache.GetGdsBuffer();
u32 value;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 2fac8c8d..ac74772c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -54,6 +54,7 @@ public:
bool from_guest = false);
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
+ void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
u32 ReadDataFromGds(u32 gsd_offset);
bool InvalidateMemory(VAddr addr, u64 size);
bool IsMapped(VAddr addr, u64 size);