diff options
author | Liam <[email protected]> | 2024-01-22 12:40:50 -0500 |
---|---|---|
committer | Liam <[email protected]> | 2024-01-31 11:27:21 -0500 |
commit | a595e9e8a7a6a742481b1cd05455d3c639095413 (patch) | |
tree | 03f8dfaec328171c42b090988e93c51e8d726eda | |
parent | 10cf0585180bcf2eab38ebf65dc593fecc4ddf92 (diff) | |
download | yuzu-mainline-a595e9e8a7a6a742481b1cd05455d3c639095413.tar.gz yuzu-mainline-a595e9e8a7a6a742481b1cd05455d3c639095413.zip |
nvnflinger/gpu: implement layer stack composition
28 files changed, 469 insertions, 252 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 570acb193..eb8f643a2 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -775,6 +775,9 @@ add_library(core STATIC hle/service/nvnflinger/graphic_buffer_producer.h hle/service/nvnflinger/hos_binder_driver_server.cpp hle/service/nvnflinger/hos_binder_driver_server.h + hle/service/nvnflinger/hardware_composer.cpp + hle/service/nvnflinger/hardware_composer.h + hle/service/nvnflinger/hwc_layer.h hle/service/nvnflinger/nvnflinger.cpp hle/service/nvnflinger/nvnflinger.h hle/service/nvnflinger/parcel.h diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index c1ebbd62d..abe95303e 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include <boost/container/small_vector.hpp> + #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -38,19 +40,30 @@ NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvdisp_disp0::OnClose(DeviceFD fd) {} -void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, - u32 height, u32 stride, android::BufferTransformFlags transform, - const Common::Rectangle<int>& crop_rect, - std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { - const DAddr addr = nvmap.GetHandleAddress(buffer_handle); - LOG_TRACE(Service, - "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", - addr, offset, width, height, stride, format); +void nvdisp_disp0::Composite(std::span<const Nvnflinger::HwcLayer> sorted_layers) { + std::vector<Tegra::FramebufferConfig> output_layers; + std::vector<Service::Nvidia::NvFence> output_fences; + output_layers.reserve(sorted_layers.size()); + output_fences.reserve(sorted_layers.size()); + + for (auto& layer : sorted_layers) { + output_layers.emplace_back(Tegra::FramebufferConfig{ + .address = nvmap.GetHandleAddress(layer.buffer_handle), + .offset = layer.offset, + .width = layer.width, + .height = layer.height, + .stride = layer.stride, + .pixel_format = layer.format, + .transform_flags = layer.transform, + .crop_rect = layer.crop_rect, + }); - const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, - stride, format, transform, crop_rect}; + for (size_t i = 0; i < layer.acquire_fence.num_fences; i++) { + output_fences.push_back(layer.acquire_fence.fences[i]); + } + } - system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); + system.GPU().RequestComposite(std::move(output_layers), std::move(output_fences)); system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); system.GetPerfStats().EndSystemFrame(); system.GetPerfStats().BeginSystemFrame(); diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index 5f13a50a2..1082b85c2 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -8,8 +8,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" -#include "core/hle/service/nvnflinger/buffer_transform_flags.h" -#include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/hwc_layer.h" namespace Service::Nvidia::NvCore { class Container; @@ -35,11 +34,8 @@ public: void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; - /// Performs a screen flip, drawing the buffer pointed to by the handle. - void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, - u32 stride, android::BufferTransformFlags transform, - const Common::Rectangle<int>& crop_rect, - std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences); + /// Performs a screen flip, compositing each buffer. + void Composite(std::span<const Nvnflinger::HwcLayer> sorted_layers); Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvnflinger/buffer_item.h b/src/core/hle/service/nvnflinger/buffer_item.h index 7fd808f54..f9f262628 100644 --- a/src/core/hle/service/nvnflinger/buffer_item.h +++ b/src/core/hle/service/nvnflinger/buffer_item.h @@ -40,7 +40,7 @@ public: bool is_droppable{}; bool acquire_called{}; bool transform_to_display_inverse{}; - s32 swap_interval{}; + u32 swap_interval{}; }; } // namespace Service::android diff --git a/src/core/hle/service/nvnflinger/hardware_composer.cpp b/src/core/hle/service/nvnflinger/hardware_composer.cpp new file mode 100644 index 000000000..54889bb4f --- /dev/null +++ b/src/core/hle/service/nvnflinger/hardware_composer.cpp @@ -0,0 +1,190 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <boost/container/small_vector.hpp> + +#include "common/microprofile.h" +#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" +#include "core/hle/service/nvnflinger/buffer_item.h" +#include "core/hle/service/nvnflinger/buffer_item_consumer.h" +#include "core/hle/service/nvnflinger/buffer_queue_producer.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" +#include "core/hle/service/nvnflinger/hwc_layer.h" +#include "core/hle/service/nvnflinger/ui/graphic_buffer.h" +#include "core/hle/service/vi/display/vi_display.h" +#include "core/hle/service/vi/layer/vi_layer.h" + +namespace Service::Nvnflinger { + +HardwareComposer::HardwareComposer() = default; +HardwareComposer::~HardwareComposer() = default; + +u32 HardwareComposer::ComposeLocked(VI::Display& display, Nvidia::Devices::nvdisp_disp0& nvdisp, + u32 frame_advance) { + boost::container::small_vector<HwcLayer, 2> composition_stack; + + m_frame_number += frame_advance; + + // Release any necessary framebuffers. + for (auto& [layer_id, framebuffer] : m_framebuffers) { + if (framebuffer.release_frame_number > m_frame_number) { + // Not yet ready to release this framebuffer. + continue; + } + + if (!framebuffer.is_acquired) { + // Already released. + continue; + } + + if (auto* layer = display.FindLayer(layer_id); layer != nullptr) { + // TODO: support release fence + // This is needed to prevent screen tearing + layer->GetConsumer().ReleaseBuffer(framebuffer.item, android::Fence::NoFence()); + framebuffer.is_acquired = false; + } + } + + // Determine the number of vsync periods to wait before composing again. + std::optional<u32> swap_interval{}; + bool has_acquired_buffer{}; + + // Acquire all necessary framebuffers. + for (size_t i = 0; i < display.GetNumLayers(); i++) { + auto& layer = display.GetLayer(i); + auto layer_id = layer.GetLayerId(); + + // Try to fetch the framebuffer (either new or stale). + const auto result = this->CacheFramebufferLocked(layer, layer_id); + + // If we failed, skip this layer. + if (result == CacheStatus::NoBufferAvailable) { + continue; + } + + // If we acquired a new buffer, we need to present. + if (result == CacheStatus::BufferAcquired) { + has_acquired_buffer = true; + } + + const auto& buffer = m_framebuffers[layer_id]; + const auto& item = buffer.item; + const auto& igbp_buffer = *item.graphic_buffer; + + // TODO: get proper Z-index from layer + composition_stack.emplace_back(HwcLayer{ + .buffer_handle = igbp_buffer.BufferId(), + .offset = igbp_buffer.Offset(), + .format = igbp_buffer.ExternalFormat(), + .width = igbp_buffer.Width(), + .height = igbp_buffer.Height(), + .stride = igbp_buffer.Stride(), + .z_index = 0, + .transform = static_cast<android::BufferTransformFlags>(item.transform), + .crop_rect = item.crop, + .acquire_fence = item.fence, + }); + + // We need to compose again either before this frame is supposed to + // be released, or exactly on the vsync period it should be released. + // + // TODO: handle cases where swap intervals are relatively prime. So far, + // only swap intervals of 0, 1 and 2 have been observed, but if 3 were + // to be introduced, this would cause an issue. + if (swap_interval) { + swap_interval = std::min(*swap_interval, item.swap_interval); + } else { + swap_interval = item.swap_interval; + } + } + + // If any new buffers were acquired, we can present. + if (has_acquired_buffer) { + // Sort by Z-index. + std::stable_sort(composition_stack.begin(), composition_stack.end(), + [&](auto& l, auto& r) { return l.z_index < r.z_index; }); + + // Composite. + nvdisp.Composite(composition_stack); + } + + // Render MicroProfile. + MicroProfileFlip(); + + // If we advanced, then advance by at least 1 frame. + if (swap_interval) { + return std::max(*swap_interval, 1U); + } + + // Otherwise, advance by exactly one frame. + return 1U; +} + +void HardwareComposer::RemoveLayerLocked(VI::Display& display, LayerId layer_id) { + // Check if we are tracking a slot with this layer_id. + const auto it = m_framebuffers.find(layer_id); + if (it == m_framebuffers.end()) { + return; + } + + // Try to release the buffer item. + auto* const layer = display.FindLayer(layer_id); + if (layer && it->second.is_acquired) { + layer->GetConsumer().ReleaseBuffer(it->second.item, android::Fence::NoFence()); + } + + // Erase the slot. + m_framebuffers.erase(it); +} + +bool HardwareComposer::TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer& framebuffer) { + // Attempt the update. + const auto status = layer.GetConsumer().AcquireBuffer(&framebuffer.item, {}, false); + if (status != android::Status::NoError) { + return false; + } + + // We succeeded, so set the new release frame info. + framebuffer.release_frame_number = + m_frame_number + std::max(1U, framebuffer.item.swap_interval); + framebuffer.is_acquired = true; + + return true; +} + +HardwareComposer::CacheStatus HardwareComposer::CacheFramebufferLocked(VI::Layer& layer, + LayerId layer_id) { + // Check if this framebuffer is already present. + const auto it = m_framebuffers.find(layer_id); + if (it != m_framebuffers.end()) { + // If it's currently still acquired, we are done. + if (it->second.is_acquired) { + return CacheStatus::CachedBufferReused; + } + + // Try to acquire a new item. + if (this->TryAcquireFramebufferLocked(layer, it->second)) { + // We got a new item. + return CacheStatus::BufferAcquired; + } else { + // We didn't acquire a new item, but we can reuse the slot. + return CacheStatus::CachedBufferReused; + } + } + + // Framebuffer is not present, so try to create it. + Framebuffer framebuffer{}; + + if (this->TryAcquireFramebufferLocked(layer, framebuffer)) { + // Move the buffer item into a new slot. + m_framebuffers.emplace(layer_id, std::move(framebuffer)); + + // We succeeded. + return CacheStatus::BufferAcquired; + } + + // We couldn't acquire the buffer item, so don't create a slot. + return CacheStatus::NoBufferAvailable; +} + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/hardware_composer.h b/src/core/hle/service/nvnflinger/hardware_composer.h new file mode 100644 index 000000000..611afc169 --- /dev/null +++ b/src/core/hle/service/nvnflinger/hardware_composer.h @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <memory> +#include <boost/container/flat_map.hpp> + +#include "core/hle/service/nvnflinger/buffer_item.h" + +namespace Service::Nvidia::Devices { +class nvdisp_disp0; +} + +namespace Service::VI { +class Display; +class Layer; +} // namespace Service::VI + +namespace Service::Nvnflinger { + +using LayerId = u64; + +class HardwareComposer { +public: + explicit HardwareComposer(); + ~HardwareComposer(); + + u32 ComposeLocked(VI::Display& display, Nvidia::Devices::nvdisp_disp0& nvdisp, + u32 frame_advance); + void RemoveLayerLocked(VI::Display& display, LayerId layer_id); + +private: + // TODO: do we want to track frame number in vi instead? + u64 m_frame_number{0}; + +private: + using ReleaseFrameNumber = u64; + + struct Framebuffer { + android::BufferItem item{}; + ReleaseFrameNumber release_frame_number{}; + bool is_acquired{false}; + }; + + enum class CacheStatus : u32 { + NoBufferAvailable, + BufferAcquired, + CachedBufferReused, + }; + + boost::container::flat_map<LayerId, Framebuffer> m_framebuffers{}; + +private: + bool TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer& framebuffer); + CacheStatus CacheFramebufferLocked(VI::Layer& layer, LayerId layer_id); +}; + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/hwc_layer.h b/src/core/hle/service/nvnflinger/hwc_layer.h new file mode 100644 index 000000000..3af668a25 --- /dev/null +++ b/src/core/hle/service/nvnflinger/hwc_layer.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "core/hle/service/nvdrv/nvdata.h" +#include "core/hle/service/nvnflinger/buffer_transform_flags.h" +#include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/ui/fence.h" + +namespace Service::Nvnflinger { + +struct HwcLayer { + u32 buffer_handle; + u32 offset; + android::PixelFormat format; + u32 width; + u32 height; + u32 stride; + s32 z_index; + android::BufferTransformFlags transform; + Common::Rectangle<int> crop_rect; + android::Fence acquire_fence; +}; + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index 51133853c..e775a2ca8 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -18,6 +18,7 @@ #include "core/hle/service/nvnflinger/buffer_item_consumer.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h" #include "core/hle/service/nvnflinger/fb_share_buffer_manager.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/nvnflinger/nvnflinger.h" #include "core/hle/service/nvnflinger/ui/graphic_buffer.h" @@ -279,45 +280,18 @@ void Nvnflinger::Compose() { SCOPE_EXIT({ display.SignalVSyncEvent(); }); // Don't do anything for displays without layers. - if (!display.HasLayers()) - continue; - - // TODO(Subv): Support more than 1 layer. - VI::Layer& layer = display.GetLayer(0); - - android::BufferItem buffer{}; - const auto status = layer.GetConsumer().AcquireBuffer(&buffer, {}, false); - - if (status != android::Status::NoError) { + if (!display.HasLayers()) { continue; } - const auto& igbp_buffer = *buffer.graphic_buffer; - if (!system.IsPoweredOn()) { return; // We are likely shutting down } - // Now send the buffer to the GPU for drawing. - // TODO(Subv): Support more than just disp0. The display device selection is probably based - // on which display we're drawing (Default, Internal, External, etc) auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd); ASSERT(nvdisp); - Common::Rectangle<int> crop_rect{ - static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()), - static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())}; - - nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(), - igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), - static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect, - buffer.fence.fences, buffer.fence.num_fences); - - MicroProfileFlip(); - - swap_interval = buffer.swap_interval; - - layer.GetConsumer().ReleaseBuffer(buffer, android::Fence::NoFence()); + swap_interval = display.GetComposer().ComposeLocked(display, *nvdisp, swap_interval); } } diff --git a/src/core/hle/service/nvnflinger/nvnflinger.h b/src/core/hle/service/nvnflinger/nvnflinger.h index 369439142..73ff36620 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.h +++ b/src/core/hle/service/nvnflinger/nvnflinger.h @@ -46,6 +46,7 @@ class BufferQueueProducer; namespace Service::Nvnflinger { class FbShareBufferManager; +class HardwareComposer; class HosBinderDriverServer; class Nvnflinger final { diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp index dab1905cc..7f2af9acc 100644 --- a/src/core/hle/service/vi/display/vi_display.cpp +++ b/src/core/hle/service/vi/display/vi_display.cpp @@ -16,6 +16,7 @@ #include "core/hle/service/nvnflinger/buffer_queue_consumer.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h" #include "core/hle/service/nvnflinger/buffer_queue_producer.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/vi/display/vi_display.h" #include "core/hle/service/vi/layer/vi_layer.h" @@ -43,6 +44,7 @@ Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_, Core::System& system_) : display_id{id}, name{std::move(name_)}, hos_binder_driver_server{hos_binder_driver_server_}, service_context{service_context_} { + hardware_composer = std::make_unique<Nvnflinger::HardwareComposer>(); vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id)); } @@ -81,8 +83,6 @@ void Display::SignalVSyncEvent() { void Display::CreateLayer(u64 layer_id, u32 binder_id, Service::Nvidia::NvCore::Container& nv_core) { - ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment"); - auto [core, producer, consumer] = CreateBufferQueue(service_context, nv_core.GetNvMapFile()); auto buffer_item_consumer = std::make_shared<android::BufferItemConsumer>(std::move(consumer)); diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h index 8eb8a5155..220292cff 100644 --- a/src/core/hle/service/vi/display/vi_display.h +++ b/src/core/hle/service/vi/display/vi_display.h @@ -11,9 +11,14 @@ #include "common/common_types.h" #include "core/hle/result.h" +namespace Core { +class System; +} + namespace Kernel { class KEvent; -} +class KReadableEvent; +} // namespace Kernel namespace Service::android { class BufferQueueProducer; @@ -24,8 +29,9 @@ class ServiceContext; } namespace Service::Nvnflinger { +class HardwareComposer; class HosBinderDriverServer; -} +} // namespace Service::Nvnflinger namespace Service::Nvidia::NvCore { class Container; @@ -118,6 +124,10 @@ public: /// const Layer* FindLayer(u64 layer_id) const; + Nvnflinger::HardwareComposer& GetComposer() const { + return *hardware_composer; + } + private: u64 display_id; std::string name; @@ -125,6 +135,7 @@ private: KernelHelpers::ServiceContext& service_context; std::vector<std::unique_ptr<Layer>> layers; + std::unique_ptr<Nvnflinger::HardwareComposer> hardware_composer; Kernel::KEvent* vsync_event{}; bool is_abandoned{}; }; diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 73058db9a..d508ed28c 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -195,8 +195,9 @@ private: void GetSharedBufferMemoryHandleId(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const u64 buffer_id = rp.PopRaw<u64>(); + const u64 aruid = ctx.GetPID(); - LOG_INFO(Service_VI, "called. buffer_id={:#x}", buffer_id); + LOG_INFO(Service_VI, "called. buffer_id={:#x}, aruid={:#x}", buffer_id, aruid); struct OutputParameters { s32 nvmap_handle; @@ -206,7 +207,7 @@ private: OutputParameters out{}; Nvnflinger::SharedMemoryPoolLayout layout{}; const auto result = nvnflinger.GetSystemBufferManager().GetSharedBufferMemoryHandleId( - &out.size, &out.nvmap_handle, &layout, buffer_id, 0); + &out.size, &out.nvmap_handle, &layout, buffer_id, aruid); ctx.WriteBuffer(&layout, sizeof(layout)); diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index 10ddc75a7..6a18b76fb 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h @@ -7,6 +7,7 @@ #include "common/math_util.h" #include "core/hle/service/nvnflinger/buffer_transform_flags.h" #include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/ui/fence.h" namespace Tegra { @@ -21,7 +22,7 @@ struct FramebufferConfig { u32 stride{}; Service::android::PixelFormat pixel_format{}; Service::android::BufferTransformFlags transform_flags{}; - Common::Rectangle<int> crop_rect; + Common::Rectangle<int> crop_rect{}; }; Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width, diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 609704b33..f4a5d831c 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -274,11 +274,6 @@ struct GPU::Impl { } } - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - gpu_thread.SwapBuffers(framebuffer); - } - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory void FlushRegion(DAddr addr, u64 size) { gpu_thread.FlushRegion(addr, size); @@ -313,8 +308,9 @@ struct GPU::Impl { gpu_thread.FlushAndInvalidateRegion(addr, size); } - void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { + void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, + std::vector<Service::Nvidia::NvFence>&& fences) { + size_t num_fences{fences.size()}; size_t current_request_counter{}; { std::unique_lock<std::mutex> lk(request_swap_mutex); @@ -328,13 +324,12 @@ struct GPU::Impl { } } const auto wait_fence = - RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { + RequestSyncOperation([this, current_request_counter, &layers, &fences, num_fences] { auto& syncpoint_manager = host1x.GetSyncpointManager(); if (num_fences == 0) { - renderer->SwapBuffers(framebuffer); + renderer->Composite(layers); } - const auto executer = [this, current_request_counter, - framebuffer_copy = *framebuffer]() { + const auto executer = [this, current_request_counter, layers_copy = layers]() { { std::unique_lock<std::mutex> lk(request_swap_mutex); if (--request_swap_counters[current_request_counter] != 0) { @@ -342,7 +337,7 @@ struct GPU::Impl { } free_swap_counters.push_back(current_request_counter); } - renderer->SwapBuffers(&framebuffer_copy); + renderer->Composite(layers_copy); }; for (size_t i = 0; i < num_fences; i++) { syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); @@ -505,9 +500,9 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { return impl->ShaderNotify(); } -void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { - impl->RequestSwapBuffers(framebuffer, fences, num_fences); +void GPU::RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, + std::vector<Service::Nvidia::NvFence>&& fences) { + impl->RequestComposite(std::move(layers), std::move(fences)); } u64 GPU::GetTicks() const { @@ -554,10 +549,6 @@ void GPU::ClearCdmaInstance(u32 id) { impl->ClearCdmaInstance(id); } -void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - impl->SwapBuffers(framebuffer); -} - VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { return impl->OnCPURead(addr, size); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b3c1d15bd..c4602ca37 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -212,8 +212,8 @@ public: void RendererFrameEndNotify(); - void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences); + void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, + std::vector<Service::Nvidia::NvFence>&& fences); /// Performs any additional setup necessary in order to begin GPU emulation. /// This can be used to launch any necessary threads and register any necessary diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 788d4f61e..58d8110b8 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -40,8 +40,6 @@ static void RunThread(std::stop_token stop_token, Core::System& system, } if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { scheduler.Push(submit_list->channel, std::move(submit_list->entries)); - } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { - renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative<GPUTickCommand>(next.data)) { system.GPU().TickWork(); } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { @@ -78,10 +76,6 @@ void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { PushCommand(SubmitListCommand(channel, std::move(entries))); } -void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); -} - void ThreadManager::FlushRegion(DAddr addr, u64 size) { if (!is_async) { // Always flush with synchronous GPU mode diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 2de25e9ef..dc0fce9f8 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -44,14 +44,6 @@ struct SubmitListCommand final { Tegra::CommandList entries; }; -/// Command to signal to the GPU thread that a swap buffers is pending -struct SwapBuffersCommand final { - explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer_) - : framebuffer{std::move(framebuffer_)} {} - - std::optional<Tegra::FramebufferConfig> framebuffer; -}; - /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} @@ -81,8 +73,8 @@ struct FlushAndInvalidateRegionCommand final { struct GPUTickCommand final {}; using CommandData = - std::variant<std::monostate, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, - InvalidateRegionCommand, FlushAndInvalidateRegionCommand, GPUTickCommand>; + std::variant<std::monostate, SubmitListCommand, FlushRegionCommand, InvalidateRegionCommand, + FlushAndInvalidateRegionCommand, GPUTickCommand>; struct CommandDataContainer { CommandDataContainer() = default; @@ -118,9 +110,6 @@ public: /// Push GPU command entries to be processed void SubmitList(s32 channel, Tegra::CommandList&& entries); - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory void FlushRegion(DAddr addr, u64 size); diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 78ea5208b..3ad180f67 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -38,7 +38,7 @@ public: virtual ~RendererBase(); /// Finalize rendering the guest frame and draw into the presentation texture - virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; + virtual void Composite(std::span<const Tegra::FramebufferConfig> layers) = 0; [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp index 078feb925..c89daff53 100644 --- a/src/video_core/renderer_null/renderer_null.cpp +++ b/src/video_core/renderer_null/renderer_null.cpp @@ -13,8 +13,8 @@ RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gp RendererNull::~RendererNull() = default; -void RendererNull::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererNull::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) { + if (framebuffers.empty()) { return; } diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h index 9531b43f6..063b476bb 100644 --- a/src/video_core/renderer_null/renderer_null.h +++ b/src/video_core/renderer_null/renderer_null.h @@ -17,7 +17,7 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererNull() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span<const Tegra::FramebufferConfig> framebuffer) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &m_rasterizer; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 10a9f973c..e33a32592 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -125,15 +125,15 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, RendererOpenGL::~RendererOpenGL() = default; -void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererOpenGL::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) { + if (framebuffers.empty()) { return; } - RenderScreenshot(framebuffer); + RenderScreenshot(framebuffers); state_tracker.BindFramebuffer(0); - blit_screen->DrawScreen(std::span(framebuffer, 1), emu_window.GetFramebufferLayout()); + blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -159,7 +159,7 @@ void RendererOpenGL::AddTelemetryFields() { telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } -void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig* framebuffer) { +void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) { if (!renderer_settings.screenshot_requested) { return; } @@ -181,7 +181,7 @@ void RendererOpenGL::RenderScreenshot(const Tegra::FramebufferConfig* framebuffe glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); - blit_screen->DrawScreen(std::span(framebuffer, 1), layout); + blit_screen->DrawScreen(framebuffers, layout); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index df76d3d05..c4625c96e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -40,7 +40,7 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context_); ~RendererOpenGL() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &rasterizer; @@ -52,7 +52,7 @@ public: private: void AddTelemetryFields(); - void RenderScreenshot(const Tegra::FramebufferConfig* framebuffer); + void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers); Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 7bff1c436..6ee16595d 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -7,6 +7,20 @@ namespace Vulkan { +vk::Buffer CreateWrappedBuffer(MemoryAllocator& allocator, VkDeviceSize size, MemoryUsage usage) { + const VkBufferCreateInfo dst_buffer_info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + return allocator.CreateBuffer(dst_buffer_info, usage); +} + vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) { const VkImageCreateInfo image_ci{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -96,6 +110,70 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc scheduler.Finish(); } +void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffer, + VkExtent3D extent) { + const VkImageMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const VkImageMemoryBarrier image_write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + static constexpr VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + const VkBufferImageCopy copy{ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset{.x = 0, .y = 0, .z = 0}, + .imageExtent{extent}, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, + memory_write_barrier, nullptr, image_write_barrier); +} + vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h index fb4e4a8e4..1104aaa15 100644 --- a/src/video_core/renderer_vulkan/present/util.h +++ b/src/video_core/renderer_vulkan/present/util.h @@ -11,12 +11,16 @@ namespace Vulkan { #define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) +vk::Buffer CreateWrappedBuffer(MemoryAllocator& allocator, VkDeviceSize size, MemoryUsage usage); + vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format); void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL); void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, vk::Image& image, VkExtent2D dimensions, VkFormat format, std::span<const u8> initial_contents = {}); +void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffer, + VkExtent3D extent); void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image); vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 77837adde..48a105327 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -20,12 +20,14 @@ #include "core/frontend/graphics_context.h" #include "core/telemetry_session.h" #include "video_core/gpu.h" +#include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/textures/decoders.h" #include "video_core/vulkan_common/vulkan_debug_callback.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_instance.h" @@ -116,18 +118,20 @@ RendererVulkan::~RendererVulkan() { void(device.GetLogical().WaitIdle()); } -void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererVulkan::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) { + if (framebuffers.empty()) { return; } + SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + if (!render_window.IsShown()) { return; } - RenderScreenshot(framebuffer); + RenderScreenshot(framebuffers); Frame* frame = present_manager.GetRenderFrame(); - blit_swapchain.DrawToFrame(rasterizer, frame, std::span(framebuffer, 1), + blit_swapchain.DrawToFrame(rasterizer, frame, framebuffers, render_window.GetFramebufferLayout(), swapchain.GetImageCount(), swapchain.GetImageViewFormat()); scheduler.Flush(*frame->render_ready); @@ -163,156 +167,37 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig* framebuffer) { +void Vulkan::RendererVulkan::RenderScreenshot( + std::span<const Tegra::FramebufferConfig> framebuffers) { if (!renderer_settings.screenshot_requested) { return; } + + constexpr VkFormat ScreenshotFormat{VK_FORMAT_B8G8R8A8_UNORM}; const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; - auto frame = [&]() { - vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_B8G8R8A8_UNORM, - .extent = - { - .width = layout.width, - .height = layout.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }); - vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *staging_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_B8G8R8A8_UNORM, - .components{ - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - vk::Framebuffer screenshot_fb = - blit_screenshot.CreateFramebuffer(layout, *dst_view, VK_FORMAT_B8G8R8A8_UNORM); - return Frame{ - .width = layout.width, - .height = layout.height, - .image = std::move(staging_image), - .image_view = std::move(dst_view), - .framebuffer = std::move(screenshot_fb), - .cmdbuf{}, - .render_ready{}, - .present_done{}, - }; + auto frame = [&]() { + Frame f{}; + f.image = CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height}, + ScreenshotFormat); + f.image_view = CreateWrappedImageView(device, f.image, ScreenshotFormat); + f.framebuffer = blit_screenshot.CreateFramebuffer(layout, *f.image_view, ScreenshotFormat); + return f; }(); - blit_screenshot.DrawToFrame(rasterizer, &frame, std::span(framebuffer, 1), layout, 1, + blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1, VK_FORMAT_B8G8R8A8_UNORM); - const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); - const VkBufferCreateInfo dst_buffer_info{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = buffer_size, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - const vk::Buffer dst_buffer = - memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download); + const auto dst_buffer = CreateWrappedBuffer( + memory_allocator, static_cast<VkDeviceSize>(layout.width * layout.height * 4), + MemoryUsage::Download); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([&](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier read_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = *frame.image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - const VkImageMemoryBarrier image_write_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = *frame.image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - static constexpr VkMemoryBarrier memory_write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - }; - const VkBufferImageCopy copy{ - .bufferOffset = 0, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset{.x = 0, .y = 0, .z = 0}, - .imageExtent{ - .width = layout.width, - .height = layout.height, - .depth = 1, - }, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyImageToBuffer(*frame.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer, - copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, memory_write_barrier, nullptr, image_write_barrier); + DownloadColorImage(cmdbuf, *frame.image, *dst_buffer, + VkExtent3D{layout.width, layout.height, 1}); }); + // Ensure the copy is fully completed before saving the screenshot scheduler.Finish(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index bdeb43a54..c6d8a0f21 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -46,7 +46,7 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context_); ~RendererVulkan() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &rasterizer; @@ -59,7 +59,7 @@ public: private: void Report() const; - void RenderScreenshot(const Tegra::FramebufferConfig* framebuffer); + void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers); Core::TelemetrySession& telemetry_session; Tegra::MaxwellDeviceMemoryManager& device_memory; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b2dcbf80b..2275fcc46 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -115,7 +115,7 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, } vk::Framebuffer BlitScreen::CreateFramebuffer(const Layout::FramebufferLayout& layout, - const VkImageView& image_view, + VkImageView image_view, VkFormat current_view_format) { const bool format_updated = std::exchange(swapchain_view_format, current_view_format) != current_view_format; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 9a3476c77..cbdf2d5d0 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -56,7 +56,7 @@ public: VkFormat current_swapchain_view_format); [[nodiscard]] vk::Framebuffer CreateFramebuffer(const Layout::FramebufferLayout& layout, - const VkImageView& image_view, + VkImageView image_view, VkFormat current_view_format); private: |