File shadPS4-PR1451.patch of Package shadPS4
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f4c23b7c..0ad08f59 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,8 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
+include(CMakeDependentOption)
+
project(shadPS4 CXX C ASM ${ADDITIONAL_LANGUAGES})
# Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory.
@@ -33,6 +35,7 @@ endif()
option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)
option(ENABLE_DISCORD_RPC "Enable the Discord RPC integration" ON)
option(ENABLE_UPDATER "Enables the options to updater" ON)
+option(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" OFF)
# First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR.
if (APPLE AND CMAKE_OSX_ARCHITECTURES)
@@ -1058,6 +1061,10 @@ if (ENABLE_QT_GUI)
endif()
endif()
+if (ENABLE_USERFAULTFD)
+ add_definitions(-DENABLE_USERFAULTFD)
+endif()
+
if (WIN32)
target_link_libraries(shadps4 PRIVATE mincore winpthreads)
diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp
index 522e6fd5..236a5d14 100644
--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@@ -153,6 +153,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
if (info.pixel_format == vk::Format::eUndefined) {
return;
}
+ ASSERT(info.resources.layers * info.resources.levels <= 64);
+ subres_state =
+ std::numeric_limits<u64>::max() >> (64 - info.resources.levels * info.resources.layers);
mip_hashes.resize(info.resources.levels);
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
// the texture cache should re-create the resource with the usage requested
diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h
index 404e25e8..822ec0df 100644
--- a/src/video_core/texture_cache/image.h
+++ b/src/video_core/texture_cache/image.h
@@ -97,9 +97,24 @@ struct Image {
depth_id = image_id;
}
+ void ForEachSubresource(VAddr addr, size_t size, auto&& func) {
+ const u32 num_layers = info.resources.layers;
+ for (u32 m = 0; const auto& mip : info.mips_layout) {
+ for (u32 l = 0; l < num_layers; l++) {
+ const VAddr mip_addr = info.guest_address + mip.offset * num_layers + mip.size * l;
+ const VAddr mip_addr_end = mip_addr + mip.size;
+ if (mip_addr < addr + size && addr < mip_addr_end) {
+ func(m * num_layers + l);
+ }
+ }
+ m++;
+ }
+ }
+
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);
+
void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
void Upload(vk::Buffer buffer, u64 offset);
@@ -121,6 +136,7 @@ struct Image {
VAddr track_addr_end = 0;
std::vector<ImageViewInfo> image_view_infos;
std::vector<ImageViewId> image_view_ids;
+ u64 subres_state{};
ImageId depth_id{};
// Resource state tracking
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
index d41ee57c..29b35750 100644
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -69,6 +69,8 @@ void TextureCache::InvalidateMemory(VAddr addr, size_t size) {
// Modified region overlaps image, so the image was definitely accessed by this fault.
// Untrack the image, so that the range is unprotected and the guest can write freely.
image.flags |= ImageFlagBits::CpuDirty;
+ image.ForEachSubresource(pages_start, pages_end - pages_start,
+ [&](u32 index) { image.subres_state |= 1ULL << index; });
UntrackImage(image_id);
} else if (pages_end < image_end) {
// This page access may or may not modify the image.
@@ -93,12 +95,13 @@ void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) {
std::scoped_lock lock{mutex};
ForEachImageInRegion(address, max_size, [&](ImageId image_id, Image& image) {
// Only consider images that match base address.
- // TODO: Maybe also consider subresources
if (image.info.guest_address != address) {
return;
}
- // Ensure image is reuploaded when accessed again.
+ // Mark any subresources as dirty.
image.flags |= ImageFlagBits::GpuDirty;
+ image.ForEachSubresource(address, max_size,
+ [&](u32 index) { image.subres_state |= 1ULL << index; });
});
}
@@ -503,8 +506,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
image.hash = hash;
}
- const auto& num_layers = image.info.resources.layers;
- const auto& num_mips = image.info.resources.levels;
+ const u32 num_layers = image.info.resources.layers;
+ const u32 num_mips = image.info.resources.levels;
ASSERT(num_mips == image.info.mips_layout.size());
const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified);
@@ -512,6 +515,12 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
boost::container::small_vector<vk::BufferImageCopy, 14> image_copy{};
for (u32 m = 0; m < num_mips; m++) {
+ const u32 mask = (1 << num_layers) - 1;
+ const u64 subres_state = (image.subres_state >> (m * num_layers)) & mask;
+ if (subres_state == 0) {
+ continue;
+ }
+
const u32 width = std::max(image.info.size.width >> m, 1u);
const u32 height = std::max(image.info.size.height >> m, 1u);
const u32 depth =
@@ -528,19 +537,40 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
image.mip_hashes[m] = hash;
}
- image_copy.push_back({
- .bufferOffset = mip.offset,
- .bufferRowLength = static_cast<u32>(mip.pitch),
- .bufferImageHeight = static_cast<u32>(mip.height),
- .imageSubresource{
- .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
- .mipLevel = m,
- .baseArrayLayer = 0,
- .layerCount = num_layers,
- },
- .imageOffset = {0, 0, 0},
- .imageExtent = {width, height, depth},
- });
+ if (subres_state == mask) {
+ image_copy.push_back({
+ .bufferOffset = mip.offset * num_layers,
+ .bufferRowLength = static_cast<u32>(mip.pitch),
+ .bufferImageHeight = static_cast<u32>(mip.height),
+ .imageSubresource{
+ .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
+ .mipLevel = m,
+ .baseArrayLayer = 0,
+ .layerCount = num_layers,
+ },
+ .imageOffset = {0, 0, 0},
+ .imageExtent = {width, height, depth},
+ });
+ } else {
+ for (u32 l = 0; l < num_layers; l++) {
+ if (!(subres_state & (1 << l))) {
+ continue;
+ }
+ image_copy.push_back({
+ .bufferOffset = mip.offset * num_layers + mip.size * l,
+ .bufferRowLength = static_cast<u32>(mip.pitch),
+ .bufferImageHeight = static_cast<u32>(mip.height),
+ .imageSubresource{
+ .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
+ .mipLevel = m,
+ .baseArrayLayer = l,
+ .layerCount = 1,
+ },
+ .imageOffset = {0, 0, 0},
+ .imageExtent = {width, height, depth},
+ });
+ }
+ }
}
if (image_copy.empty()) {
@@ -609,6 +639,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
.pBufferMemoryBarriers = &post_barrier,
});
image.flags &= ~ImageFlagBits::Dirty;
+ image.subres_state = 0;
}
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {