3133 lines
113 KiB
C++
3133 lines
113 KiB
C++
/****************************************************************************
|
|
**
|
|
** Copyright (C) 2016 The Qt Company Ltd.
|
|
** Contact: https://www.qt.io/licensing/
|
|
**
|
|
** This file is part of the QtQuick module of the Qt Toolkit.
|
|
**
|
|
** $QT_BEGIN_LICENSE:LGPL$
|
|
** Commercial License Usage
|
|
** Licensees holding valid commercial Qt licenses may use this file in
|
|
** accordance with the commercial license agreement provided with the
|
|
** Software or, alternatively, in accordance with the terms contained in
|
|
** a written agreement between you and The Qt Company. For licensing terms
|
|
** and conditions see https://www.qt.io/terms-conditions. For further
|
|
** information use the contact form at https://www.qt.io/contact-us.
|
|
**
|
|
** GNU Lesser General Public License Usage
|
|
** Alternatively, this file may be used under the terms of the GNU Lesser
|
|
** General Public License version 3 as published by the Free Software
|
|
** Foundation and appearing in the file LICENSE.LGPL3 included in the
|
|
** packaging of this file. Please review the following information to
|
|
** ensure the GNU Lesser General Public License version 3 requirements
|
|
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
|
|
**
|
|
** GNU General Public License Usage
|
|
** Alternatively, this file may be used under the terms of the GNU
|
|
** General Public License version 2.0 or (at your option) the GNU General
|
|
** Public license version 3 or any later version approved by the KDE Free
|
|
** Qt Foundation. The licenses are as published by the Free Software
|
|
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
|
|
** included in the packaging of this file. Please review the following
|
|
** information to ensure the GNU General Public License requirements will
|
|
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
|
|
** https://www.gnu.org/licenses/gpl-3.0.html.
|
|
**
|
|
** $QT_END_LICENSE$
|
|
**
|
|
****************************************************************************/
|
|
|
|
#include "qsgd3d12engine_p.h"
|
|
#include "qsgd3d12engine_p_p.h"
|
|
#include "cs_mipmapgen.hlslh"
|
|
#include <QString>
|
|
#include <QColor>
|
|
#include <qmath.h>
|
|
#include <qalgorithms.h>
|
|
|
|
// Comment out to disable DeviceLossTester functionality in order to reduce
|
|
// code size and improve startup perf a tiny bit.
|
|
#define DEVLOSS_TEST
|
|
|
|
#ifdef DEVLOSS_TEST
|
|
#include "cs_tdr.hlslh"
|
|
#endif
|
|
|
|
#ifdef Q_OS_WINRT
|
|
#include <QtCore/private/qeventdispatcher_winrt_p.h>
|
|
#include <functional>
|
|
#include <windows.ui.xaml.h>
|
|
#include <windows.ui.xaml.media.dxinterop.h>
|
|
#endif
|
|
|
|
QT_BEGIN_NAMESPACE
|
|
|
|
// NOTE: Avoid categorized logging. It is slow.
|
|
|
|
#define DECLARE_DEBUG_VAR(variable) \
|
|
static bool debug_ ## variable() \
|
|
{ static bool value = qgetenv("QSG_RENDERER_DEBUG").contains(QT_STRINGIFY(variable)); return value; }
|
|
|
|
DECLARE_DEBUG_VAR(render)
|
|
|
|
static const int DEFAULT_SWAP_CHAIN_BUFFER_COUNT = 3;
|
|
static const int DEFAULT_FRAME_IN_FLIGHT_COUNT = 2;
|
|
static const int DEFAULT_WAITABLE_SWAP_CHAIN_MAX_LATENCY = 0;
|
|
|
|
static const int MAX_DRAW_CALLS_PER_LIST = 128;
|
|
|
|
static const int MAX_CACHED_ROOTSIG = 16;
|
|
static const int MAX_CACHED_PSO = 64;
|
|
|
|
static const int GPU_CBVSRVUAV_DESCRIPTORS = 512;
|
|
|
|
static const int BUCKETS_PER_HEAP = 8; // must match freeMap
|
|
static const int DESCRIPTORS_PER_BUCKET = 32; // the bit map (freeMap) is quint32
|
|
static const int MAX_DESCRIPTORS_PER_HEAP = BUCKETS_PER_HEAP * DESCRIPTORS_PER_BUCKET;
|
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE QSGD3D12CPUDescriptorHeapManager::allocate(D3D12_DESCRIPTOR_HEAP_TYPE type)
|
|
{
|
|
D3D12_CPU_DESCRIPTOR_HANDLE h = {};
|
|
for (Heap &heap : m_heaps) {
|
|
if (heap.type == type) {
|
|
for (int bucket = 0; bucket < _countof(heap.freeMap); ++bucket)
|
|
if (heap.freeMap[bucket]) {
|
|
uint freePos = qCountTrailingZeroBits(heap.freeMap[bucket]);
|
|
heap.freeMap[bucket] &= ~(1UL << freePos);
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("descriptor handle type %x reserve in bucket %d index %d", type, bucket, freePos);
|
|
freePos += bucket * DESCRIPTORS_PER_BUCKET;
|
|
h = heap.start;
|
|
h.ptr += freePos * heap.handleSize;
|
|
return h;
|
|
}
|
|
}
|
|
}
|
|
|
|
Heap heap;
|
|
heap.type = type;
|
|
heap.handleSize = m_handleSizes[type];
|
|
|
|
D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
|
|
heapDesc.NumDescriptors = MAX_DESCRIPTORS_PER_HEAP;
|
|
heapDesc.Type = type;
|
|
// The heaps created here are _never_ shader-visible.
|
|
|
|
HRESULT hr = m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap.heap));
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create heap with type 0x%x: %x", type, hr);
|
|
return h;
|
|
}
|
|
|
|
heap.start = heap.heap->GetCPUDescriptorHandleForHeapStart();
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("new descriptor heap, type %x, start %llu", type, heap.start.ptr);
|
|
|
|
heap.freeMap[0] = 0xFFFFFFFE;
|
|
for (int i = 1; i < _countof(heap.freeMap); ++i)
|
|
heap.freeMap[i] = 0xFFFFFFFF;
|
|
|
|
h = heap.start;
|
|
|
|
m_heaps.append(heap);
|
|
|
|
return h;
|
|
}
|
|
|
|
void QSGD3D12CPUDescriptorHeapManager::release(D3D12_CPU_DESCRIPTOR_HANDLE handle, D3D12_DESCRIPTOR_HEAP_TYPE type)
|
|
{
|
|
for (Heap &heap : m_heaps) {
|
|
if (heap.type == type
|
|
&& handle.ptr >= heap.start.ptr
|
|
&& handle.ptr < heap.start.ptr + heap.handleSize * MAX_DESCRIPTORS_PER_HEAP) {
|
|
unsigned long pos = (handle.ptr - heap.start.ptr) / heap.handleSize;
|
|
const int bucket = pos / DESCRIPTORS_PER_BUCKET;
|
|
const int indexInBucket = pos - bucket * DESCRIPTORS_PER_BUCKET;
|
|
heap.freeMap[bucket] |= 1UL << indexInBucket;
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("free descriptor handle type %x bucket %d index %d", type, bucket, indexInBucket);
|
|
return;
|
|
}
|
|
}
|
|
qWarning("QSGD3D12CPUDescriptorHeapManager: Attempted to release untracked descriptor handle %llu of type %d", handle.ptr, type);
|
|
}
|
|
|
|
void QSGD3D12CPUDescriptorHeapManager::initialize(ID3D12Device *device)
|
|
{
|
|
m_device = device;
|
|
|
|
for (int i = 0; i < D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES; ++i)
|
|
m_handleSizes[i] = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE(i));
|
|
}
|
|
|
|
void QSGD3D12CPUDescriptorHeapManager::releaseResources()
|
|
{
|
|
for (Heap &heap : m_heaps)
|
|
heap.heap = nullptr;
|
|
|
|
m_heaps.clear();
|
|
|
|
m_device = nullptr;
|
|
}
|
|
|
|
// One device per process, one everything else (engine) per window.
|
|
Q_GLOBAL_STATIC(QSGD3D12DeviceManager, deviceManager)
|
|
|
|
static void getHardwareAdapter(IDXGIFactory1 *factory, IDXGIAdapter1 **outAdapter)
|
|
{
|
|
const D3D_FEATURE_LEVEL fl = D3D_FEATURE_LEVEL_11_0;
|
|
ComPtr<IDXGIAdapter1> adapter;
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
|
|
for (int adapterIndex = 0; factory->EnumAdapters1(adapterIndex, &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
adapter->GetDesc1(&desc);
|
|
const QString name = QString::fromUtf16((char16_t *) desc.Description);
|
|
qDebug("Adapter %d: '%s' (flags 0x%x)", adapterIndex, qPrintable(name), desc.Flags);
|
|
}
|
|
|
|
if (qEnvironmentVariableIsSet("QT_D3D_ADAPTER_INDEX")) {
|
|
const int adapterIndex = qEnvironmentVariableIntValue("QT_D3D_ADAPTER_INDEX");
|
|
if (SUCCEEDED(factory->EnumAdapters1(adapterIndex, &adapter))
|
|
&& SUCCEEDED(D3D12CreateDevice(adapter.Get(), fl, _uuidof(ID3D12Device), nullptr))) {
|
|
adapter->GetDesc1(&desc);
|
|
const QString name = QString::fromUtf16((char16_t *) desc.Description);
|
|
qDebug("Using requested adapter '%s'", qPrintable(name));
|
|
*outAdapter = adapter.Detach();
|
|
return;
|
|
}
|
|
}
|
|
|
|
for (int adapterIndex = 0; factory->EnumAdapters1(adapterIndex, &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
|
|
adapter->GetDesc1(&desc);
|
|
if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE)
|
|
continue;
|
|
|
|
if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), fl, _uuidof(ID3D12Device), nullptr))) {
|
|
const QString name = QString::fromUtf16((char16_t *) desc.Description);
|
|
qDebug("Using adapter '%s'", qPrintable(name));
|
|
break;
|
|
}
|
|
}
|
|
|
|
*outAdapter = adapter.Detach();
|
|
}
|
|
|
|
ID3D12Device *QSGD3D12DeviceManager::ref()
|
|
{
|
|
ensureCreated();
|
|
m_ref.ref();
|
|
return m_device.Get();
|
|
}
|
|
|
|
void QSGD3D12DeviceManager::unref()
|
|
{
|
|
if (!m_ref.deref()) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("destroying d3d device");
|
|
m_device = nullptr;
|
|
m_factory = nullptr;
|
|
}
|
|
}
|
|
|
|
void QSGD3D12DeviceManager::deviceLossDetected()
|
|
{
|
|
for (DeviceLossObserver *observer : qAsConst(m_observers))
|
|
observer->deviceLost();
|
|
|
|
// Nothing else to do here. All windows are expected to release their
|
|
// resources and call unref() in response immediately.
|
|
}
|
|
|
|
IDXGIFactory4 *QSGD3D12DeviceManager::dxgi()
|
|
{
|
|
ensureCreated();
|
|
return m_factory.Get();
|
|
}
|
|
|
|
void QSGD3D12DeviceManager::ensureCreated()
|
|
{
|
|
if (m_device)
|
|
return;
|
|
|
|
HRESULT hr = CreateDXGIFactory2(0, IID_PPV_ARGS(&m_factory));
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create DXGI: 0x%x", hr);
|
|
return;
|
|
}
|
|
|
|
ComPtr<IDXGIAdapter1> adapter;
|
|
getHardwareAdapter(m_factory.Get(), &adapter);
|
|
|
|
bool warp = true;
|
|
if (adapter) {
|
|
HRESULT hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device));
|
|
if (SUCCEEDED(hr))
|
|
warp = false;
|
|
else
|
|
qWarning("Failed to create device: 0x%x", hr);
|
|
}
|
|
|
|
if (warp) {
|
|
qDebug("Using WARP");
|
|
m_factory->EnumWarpAdapter(IID_PPV_ARGS(&adapter));
|
|
HRESULT hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device));
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create WARP device: 0x%x", hr);
|
|
return;
|
|
}
|
|
}
|
|
|
|
ComPtr<IDXGIAdapter3> adapter3;
|
|
if (SUCCEEDED(adapter.As(&adapter3))) {
|
|
DXGI_QUERY_VIDEO_MEMORY_INFO vidMemInfo;
|
|
if (SUCCEEDED(adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &vidMemInfo))) {
|
|
qDebug("Video memory info: LOCAL: Budget %llu KB CurrentUsage %llu KB AvailableForReservation %llu KB CurrentReservation %llu KB",
|
|
vidMemInfo.Budget / 1024, vidMemInfo.CurrentUsage / 1024,
|
|
vidMemInfo.AvailableForReservation / 1024, vidMemInfo.CurrentReservation / 1024);
|
|
}
|
|
if (SUCCEEDED(adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &vidMemInfo))) {
|
|
qDebug("Video memory info: NON-LOCAL: Budget %llu KB CurrentUsage %llu KB AvailableForReservation %llu KB CurrentReservation %llu KB",
|
|
vidMemInfo.Budget / 1024, vidMemInfo.CurrentUsage / 1024,
|
|
vidMemInfo.AvailableForReservation / 1024, vidMemInfo.CurrentReservation / 1024);
|
|
}
|
|
}
|
|
}
|
|
|
|
void QSGD3D12DeviceManager::registerDeviceLossObserver(DeviceLossObserver *observer)
|
|
{
|
|
if (!m_observers.contains(observer))
|
|
m_observers.append(observer);
|
|
}
|
|
|
|
QSGD3D12Engine::QSGD3D12Engine()
|
|
{
|
|
d = new QSGD3D12EnginePrivate;
|
|
}
|
|
|
|
QSGD3D12Engine::~QSGD3D12Engine()
|
|
{
|
|
d->waitGPU();
|
|
d->releaseResources();
|
|
delete d;
|
|
}
|
|
|
|
bool QSGD3D12Engine::attachToWindow(WId window, const QSize &size, float dpr, int surfaceFormatSamples)
|
|
{
|
|
if (d->isInitialized()) {
|
|
qWarning("QSGD3D12Engine: Cannot attach active engine to window");
|
|
return false;
|
|
}
|
|
|
|
d->initialize(window, size, dpr, surfaceFormatSamples);
|
|
return d->isInitialized();
|
|
}
|
|
|
|
void QSGD3D12Engine::releaseResources()
|
|
{
|
|
d->releaseResources();
|
|
}
|
|
|
|
bool QSGD3D12Engine::hasResources() const
|
|
{
|
|
// An explicit releaseResources() or a device loss results in initialized == false.
|
|
return d->isInitialized();
|
|
}
|
|
|
|
void QSGD3D12Engine::setWindowSize(const QSize &size, float dpr)
|
|
{
|
|
d->setWindowSize(size, dpr);
|
|
}
|
|
|
|
WId QSGD3D12Engine::window() const
|
|
{
|
|
return d->currentWindow();
|
|
}
|
|
|
|
QSize QSGD3D12Engine::windowSize() const
|
|
{
|
|
return d->currentWindowSize();
|
|
}
|
|
|
|
float QSGD3D12Engine::windowDevicePixelRatio() const
|
|
{
|
|
return d->currentWindowDpr();
|
|
}
|
|
|
|
uint QSGD3D12Engine::windowSamples() const
|
|
{
|
|
return d->currentWindowSamples();
|
|
}
|
|
|
|
void QSGD3D12Engine::beginFrame()
|
|
{
|
|
d->beginFrame();
|
|
}
|
|
|
|
void QSGD3D12Engine::endFrame()
|
|
{
|
|
d->endFrame();
|
|
}
|
|
|
|
void QSGD3D12Engine::beginLayer()
|
|
{
|
|
d->beginLayer();
|
|
}
|
|
|
|
void QSGD3D12Engine::endLayer()
|
|
{
|
|
d->endLayer();
|
|
}
|
|
|
|
void QSGD3D12Engine::invalidateCachedFrameState()
|
|
{
|
|
d->invalidateCachedFrameState();
|
|
}
|
|
|
|
void QSGD3D12Engine::restoreFrameState(bool minimal)
|
|
{
|
|
d->restoreFrameState(minimal);
|
|
}
|
|
|
|
void QSGD3D12Engine::finalizePipeline(const QSGD3D12PipelineState &pipelineState)
|
|
{
|
|
d->finalizePipeline(pipelineState);
|
|
}
|
|
|
|
uint QSGD3D12Engine::genBuffer()
|
|
{
|
|
return d->genBuffer();
|
|
}
|
|
|
|
void QSGD3D12Engine::releaseBuffer(uint id)
|
|
{
|
|
d->releaseBuffer(id);
|
|
}
|
|
|
|
void QSGD3D12Engine::resetBuffer(uint id, const quint8 *data, int size)
|
|
{
|
|
d->resetBuffer(id, data, size);
|
|
}
|
|
|
|
void QSGD3D12Engine::markBufferDirty(uint id, int offset, int size)
|
|
{
|
|
d->markBufferDirty(id, offset, size);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueViewport(const QRect &rect)
|
|
{
|
|
d->queueViewport(rect);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueScissor(const QRect &rect)
|
|
{
|
|
d->queueScissor(rect);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueSetRenderTarget(uint id)
|
|
{
|
|
d->queueSetRenderTarget(id);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueClearRenderTarget(const QColor &color)
|
|
{
|
|
d->queueClearRenderTarget(color);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueClearDepthStencil(float depthValue, quint8 stencilValue, ClearFlags which)
|
|
{
|
|
d->queueClearDepthStencil(depthValue, stencilValue, which);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueSetBlendFactor(const QVector4D &factor)
|
|
{
|
|
d->queueSetBlendFactor(factor);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueSetStencilRef(quint32 ref)
|
|
{
|
|
d->queueSetStencilRef(ref);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueDraw(const DrawParams ¶ms)
|
|
{
|
|
d->queueDraw(params);
|
|
}
|
|
|
|
void QSGD3D12Engine::present()
|
|
{
|
|
d->present();
|
|
}
|
|
|
|
void QSGD3D12Engine::waitGPU()
|
|
{
|
|
d->waitGPU();
|
|
}
|
|
|
|
uint QSGD3D12Engine::genTexture()
|
|
{
|
|
return d->genTexture();
|
|
}
|
|
|
|
void QSGD3D12Engine::createTexture(uint id, const QSize &size, QImage::Format format, TextureCreateFlags flags)
|
|
{
|
|
d->createTexture(id, size, format, flags);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueTextureResize(uint id, const QSize &size)
|
|
{
|
|
d->queueTextureResize(id, size);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueTextureUpload(uint id, const QImage &image, const QPoint &dstPos)
|
|
{
|
|
d->queueTextureUpload(id, QVector<QImage>() << image, QVector<QPoint>() << dstPos);
|
|
}
|
|
|
|
void QSGD3D12Engine::queueTextureUpload(uint id, const QVector<QImage> &images, const QVector<QPoint> &dstPos)
|
|
{
|
|
d->queueTextureUpload(id, images, dstPos);
|
|
}
|
|
|
|
void QSGD3D12Engine::releaseTexture(uint id)
|
|
{
|
|
d->releaseTexture(id);
|
|
}
|
|
|
|
void QSGD3D12Engine::useTexture(uint id)
|
|
{
|
|
d->useTexture(id);
|
|
}
|
|
|
|
uint QSGD3D12Engine::genRenderTarget()
|
|
{
|
|
return d->genRenderTarget();
|
|
}
|
|
|
|
void QSGD3D12Engine::createRenderTarget(uint id, const QSize &size, const QVector4D &clearColor, uint samples)
|
|
{
|
|
d->createRenderTarget(id, size, clearColor, samples);
|
|
}
|
|
|
|
void QSGD3D12Engine::releaseRenderTarget(uint id)
|
|
{
|
|
d->releaseRenderTarget(id);
|
|
}
|
|
|
|
void QSGD3D12Engine::useRenderTargetAsTexture(uint id)
|
|
{
|
|
d->useRenderTargetAsTexture(id);
|
|
}
|
|
|
|
uint QSGD3D12Engine::activeRenderTarget() const
|
|
{
|
|
return d->activeRenderTarget();
|
|
}
|
|
|
|
QImage QSGD3D12Engine::executeAndWaitReadbackRenderTarget(uint id)
|
|
{
|
|
return d->executeAndWaitReadbackRenderTarget(id);
|
|
}
|
|
|
|
void QSGD3D12Engine::simulateDeviceLoss()
|
|
{
|
|
d->simulateDeviceLoss();
|
|
}
|
|
|
|
QSGRendererInterface::GraphicsApi QSGD3D12Engine::graphicsApi() const
|
|
{
|
|
return Direct3D12;
|
|
}
|
|
|
|
void *QSGD3D12Engine::getResource(Resource resource) const
|
|
{
|
|
return d->getResource(resource);
|
|
}
|
|
|
|
QSGRendererInterface::ShaderType QSGD3D12Engine::shaderType() const
|
|
{
|
|
return HLSL;
|
|
}
|
|
|
|
QSGRendererInterface::ShaderCompilationTypes QSGD3D12Engine::shaderCompilationType() const
|
|
{
|
|
return OfflineCompilation;
|
|
}
|
|
|
|
QSGRendererInterface::ShaderSourceTypes QSGD3D12Engine::shaderSourceType() const
|
|
{
|
|
return ShaderByteCode;
|
|
}
|
|
|
|
static inline quint32 alignedSize(quint32 size, quint32 byteAlign)
|
|
{
|
|
return (size + byteAlign - 1) & ~(byteAlign - 1);
|
|
}
|
|
|
|
quint32 QSGD3D12Engine::alignedConstantBufferSize(quint32 size)
|
|
{
|
|
return alignedSize(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
|
|
}
|
|
|
|
QSGD3D12Format QSGD3D12Engine::toDXGIFormat(QSGGeometry::Type sgtype, int tupleSize, int *size)
|
|
{
|
|
QSGD3D12Format format = FmtUnknown;
|
|
|
|
static const QSGD3D12Format formatMap_ub[] = { FmtUnknown,
|
|
FmtUNormByte,
|
|
FmtUNormByte2,
|
|
FmtUnknown,
|
|
FmtUNormByte4 };
|
|
|
|
static const QSGD3D12Format formatMap_f[] = { FmtUnknown,
|
|
FmtFloat,
|
|
FmtFloat2,
|
|
FmtFloat3,
|
|
FmtFloat4 };
|
|
|
|
switch (sgtype) {
|
|
case QSGGeometry::TypeUnsignedByte:
|
|
format = formatMap_ub[tupleSize];
|
|
if (size)
|
|
*size = tupleSize;
|
|
break;
|
|
case QSGGeometry::TypeFloat:
|
|
format = formatMap_f[tupleSize];
|
|
if (size)
|
|
*size = sizeof(float) * tupleSize;
|
|
break;
|
|
|
|
case QSGGeometry::TypeUnsignedShort:
|
|
format = FmtUnsignedShort;
|
|
if (size)
|
|
*size = sizeof(ushort) * tupleSize;
|
|
break;
|
|
case QSGGeometry::TypeUnsignedInt:
|
|
format = FmtUnsignedInt;
|
|
if (size)
|
|
*size = sizeof(uint) * tupleSize;
|
|
break;
|
|
|
|
case QSGGeometry::TypeByte:
|
|
case QSGGeometry::TypeInt:
|
|
case QSGGeometry::TypeShort:
|
|
qWarning("no mapping for GL type 0x%x", sgtype);
|
|
break;
|
|
|
|
default:
|
|
qWarning("unknown GL type 0x%x", sgtype);
|
|
break;
|
|
}
|
|
|
|
return format;
|
|
}
|
|
|
|
int QSGD3D12Engine::mipMapLevels(const QSize &size)
|
|
{
|
|
return ceil(log2(qMax(size.width(), size.height()))) + 1;
|
|
}
|
|
|
|
inline static bool isPowerOfTwo(int x)
|
|
{
|
|
// Assumption: x >= 1
|
|
return x == (x & -x);
|
|
}
|
|
|
|
QSize QSGD3D12Engine::mipMapAdjustedSourceSize(const QSize &size)
|
|
{
|
|
if (size.isEmpty())
|
|
return size;
|
|
|
|
QSize adjustedSize = size;
|
|
|
|
// ### for now only power-of-two sizes are mipmap-capable
|
|
if (!isPowerOfTwo(size.width()))
|
|
adjustedSize.setWidth(qNextPowerOfTwo(size.width()));
|
|
if (!isPowerOfTwo(size.height()))
|
|
adjustedSize.setHeight(qNextPowerOfTwo(size.height()));
|
|
|
|
return adjustedSize;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::releaseResources()
|
|
{
|
|
if (!initialized)
|
|
return;
|
|
|
|
mipmapper.releaseResources();
|
|
devLossTest.releaseResources();
|
|
|
|
frameCommandList = nullptr;
|
|
copyCommandList = nullptr;
|
|
|
|
copyCommandAllocator = nullptr;
|
|
for (int i = 0; i < frameInFlightCount; ++i) {
|
|
frameCommandAllocator[i] = nullptr;
|
|
pframeData[i].gpuCbvSrvUavHeap = nullptr;
|
|
delete frameFence[i];
|
|
}
|
|
|
|
defaultDS = nullptr;
|
|
for (int i = 0; i < swapChainBufferCount; ++i) {
|
|
backBufferRT[i] = nullptr;
|
|
defaultRT[i] = nullptr;
|
|
}
|
|
|
|
psoCache.clear();
|
|
rootSigCache.clear();
|
|
buffers.clear();
|
|
textures.clear();
|
|
renderTargets.clear();
|
|
|
|
cpuDescHeapManager.releaseResources();
|
|
|
|
commandQueue = nullptr;
|
|
copyCommandQueue = nullptr;
|
|
swapChain = nullptr;
|
|
|
|
delete presentFence;
|
|
textureUploadFence = nullptr;
|
|
|
|
deviceManager()->unref();
|
|
|
|
initialized = false;
|
|
|
|
// 'window' must be kept, may just be a device loss
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::initialize(WId w, const QSize &size, float dpr, int surfaceFormatSamples)
|
|
{
|
|
if (initialized)
|
|
return;
|
|
|
|
window = w;
|
|
windowSize = size;
|
|
windowDpr = dpr;
|
|
windowSamples = qMax(1, surfaceFormatSamples); // may be -1 or 0, whereas windowSamples is uint and >= 1
|
|
|
|
swapChainBufferCount = qMin(qEnvironmentVariableIntValue("QT_D3D_BUFFER_COUNT"), MAX_SWAP_CHAIN_BUFFER_COUNT);
|
|
if (swapChainBufferCount < 2)
|
|
swapChainBufferCount = DEFAULT_SWAP_CHAIN_BUFFER_COUNT;
|
|
|
|
frameInFlightCount = qMin(qEnvironmentVariableIntValue("QT_D3D_FRAME_COUNT"), MAX_FRAME_IN_FLIGHT_COUNT);
|
|
if (frameInFlightCount < 1)
|
|
frameInFlightCount = DEFAULT_FRAME_IN_FLIGHT_COUNT;
|
|
|
|
static const char *latReqEnvVar = "QT_D3D_WAITABLE_SWAP_CHAIN_MAX_LATENCY";
|
|
if (!qEnvironmentVariableIsSet(latReqEnvVar))
|
|
waitableSwapChainMaxLatency = DEFAULT_WAITABLE_SWAP_CHAIN_MAX_LATENCY;
|
|
else
|
|
waitableSwapChainMaxLatency = qBound(0, qEnvironmentVariableIntValue(latReqEnvVar), 16);
|
|
|
|
qDebug("d3d12 engine init. swap chain buffer count %d, max frames prepared without blocking %d",
|
|
swapChainBufferCount, frameInFlightCount);
|
|
if (waitableSwapChainMaxLatency)
|
|
qDebug("Swap chain frame latency waitable object enabled. Frame latency is %d", waitableSwapChainMaxLatency);
|
|
|
|
if (qEnvironmentVariableIntValue("QT_D3D_DEBUG") != 0) {
|
|
qDebug("Enabling debug layer");
|
|
ComPtr<ID3D12Debug> debugController;
|
|
if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController))))
|
|
debugController->EnableDebugLayer();
|
|
}
|
|
|
|
QSGD3D12DeviceManager *dev = deviceManager();
|
|
device = dev->ref();
|
|
dev->registerDeviceLossObserver(this);
|
|
|
|
D3D12_COMMAND_QUEUE_DESC queueDesc = {};
|
|
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
|
|
if (FAILED(device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&commandQueue)))) {
|
|
qWarning("Failed to create command queue");
|
|
return;
|
|
}
|
|
|
|
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
|
|
if (FAILED(device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(©CommandQueue)))) {
|
|
qWarning("Failed to create copy command queue");
|
|
return;
|
|
}
|
|
|
|
#ifndef Q_OS_WINRT
|
|
HWND hwnd = reinterpret_cast<HWND>(w);
|
|
|
|
DXGI_SWAP_CHAIN_DESC swapChainDesc = {};
|
|
swapChainDesc.BufferCount = swapChainBufferCount;
|
|
swapChainDesc.BufferDesc.Width = windowSize.width() * windowDpr;
|
|
swapChainDesc.BufferDesc.Height = windowSize.height() * windowDpr;
|
|
swapChainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; // D3D12 requires the flip model
|
|
swapChainDesc.OutputWindow = hwnd;
|
|
swapChainDesc.SampleDesc.Count = 1; // Flip does not support MSAA so no choice here
|
|
swapChainDesc.Windowed = TRUE;
|
|
if (waitableSwapChainMaxLatency)
|
|
swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
|
|
|
|
ComPtr<IDXGISwapChain> baseSwapChain;
|
|
HRESULT hr = dev->dxgi()->CreateSwapChain(commandQueue.Get(), &swapChainDesc, &baseSwapChain);
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create swap chain: 0x%x", hr);
|
|
return;
|
|
}
|
|
if (FAILED(baseSwapChain.As(&swapChain))) {
|
|
qWarning("Failed to cast swap chain");
|
|
return;
|
|
}
|
|
|
|
dev->dxgi()->MakeWindowAssociation(hwnd, DXGI_MWA_NO_ALT_ENTER);
|
|
#else
|
|
DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
|
|
swapChainDesc.Width = windowSize.width() * windowDpr;
|
|
swapChainDesc.Height = windowSize.height() * windowDpr;
|
|
swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
swapChainDesc.SampleDesc.Count = 1;
|
|
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
swapChainDesc.BufferCount = swapChainBufferCount;
|
|
swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
|
|
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
|
swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED;
|
|
if (waitableSwapChainMaxLatency)
|
|
swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
|
|
|
|
ComPtr<IDXGISwapChain1> baseSwapChain;
|
|
HRESULT hr = dev->dxgi()->CreateSwapChainForComposition(commandQueue.Get(), &swapChainDesc, nullptr, &baseSwapChain);
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create swap chain for composition: 0x%x", hr);
|
|
return;
|
|
}
|
|
if (FAILED(baseSwapChain.As(&swapChain))) {
|
|
qWarning("Failed to case swap chain");
|
|
return;
|
|
}
|
|
|
|
// The winrt platform plugin returns an ISwapChainPanel* from winId().
|
|
ComPtr<ABI::Windows::UI::Xaml::Controls::ISwapChainPanel> swapChainPanel
|
|
= reinterpret_cast<ABI::Windows::UI::Xaml::Controls::ISwapChainPanel *>(window);
|
|
ComPtr<ISwapChainPanelNative> swapChainPanelNative;
|
|
if (FAILED(swapChainPanel.As(&swapChainPanelNative))) {
|
|
qWarning("Failed to cast swap chain panel to native");
|
|
return;
|
|
}
|
|
hr = QEventDispatcherWinRT::runOnXamlThread([this, &swapChainPanelNative]() {
|
|
return swapChainPanelNative->SetSwapChain(swapChain.Get());
|
|
});
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to set swap chain on panel: 0x%x", hr);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
if (waitableSwapChainMaxLatency) {
|
|
if (FAILED(swapChain->SetMaximumFrameLatency(waitableSwapChainMaxLatency)))
|
|
qWarning("Failed to set maximum frame latency to %d", waitableSwapChainMaxLatency);
|
|
swapEvent = swapChain->GetFrameLatencyWaitableObject();
|
|
}
|
|
|
|
for (int i = 0; i < frameInFlightCount; ++i) {
|
|
if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frameCommandAllocator[i])))) {
|
|
qWarning("Failed to create command allocator");
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(©CommandAllocator)))) {
|
|
qWarning("Failed to create copy command allocator");
|
|
return;
|
|
}
|
|
|
|
for (int i = 0; i < frameInFlightCount; ++i) {
|
|
if (!createCbvSrvUavHeap(i, GPU_CBVSRVUAV_DESCRIPTORS))
|
|
return;
|
|
}
|
|
|
|
cpuDescHeapManager.initialize(device);
|
|
|
|
setupDefaultRenderTargets();
|
|
|
|
if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, frameCommandAllocator[0].Get(),
|
|
nullptr, IID_PPV_ARGS(&frameCommandList)))) {
|
|
qWarning("Failed to create command list");
|
|
return;
|
|
}
|
|
// created in recording state, close it for now
|
|
frameCommandList->Close();
|
|
|
|
if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, copyCommandAllocator.Get(),
|
|
nullptr, IID_PPV_ARGS(©CommandList)))) {
|
|
qWarning("Failed to create copy command list");
|
|
return;
|
|
}
|
|
copyCommandList->Close();
|
|
|
|
frameIndex = 0;
|
|
|
|
presentFence = createCPUWaitableFence();
|
|
for (int i = 0; i < frameInFlightCount; ++i)
|
|
frameFence[i] = createCPUWaitableFence();
|
|
|
|
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&textureUploadFence)))) {
|
|
qWarning("Failed to create fence");
|
|
return;
|
|
}
|
|
|
|
psoCache.setMaxCost(MAX_CACHED_PSO);
|
|
rootSigCache.setMaxCost(MAX_CACHED_ROOTSIG);
|
|
|
|
if (!mipmapper.initialize(this))
|
|
return;
|
|
|
|
if (!devLossTest.initialize(this))
|
|
return;
|
|
|
|
currentRenderTarget = 0;
|
|
|
|
initialized = true;
|
|
}
|
|
|
|
bool QSGD3D12EnginePrivate::createCbvSrvUavHeap(int pframeIndex, int descriptorCount)
|
|
{
|
|
D3D12_DESCRIPTOR_HEAP_DESC gpuDescHeapDesc = {};
|
|
gpuDescHeapDesc.NumDescriptors = descriptorCount;
|
|
gpuDescHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
|
gpuDescHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
|
|
if (FAILED(device->CreateDescriptorHeap(&gpuDescHeapDesc, IID_PPV_ARGS(&pframeData[pframeIndex].gpuCbvSrvUavHeap)))) {
|
|
qWarning("Failed to create shader-visible CBV-SRV-UAV heap");
|
|
return false;
|
|
}
|
|
|
|
pframeData[pframeIndex].gpuCbvSrvUavHeapSize = descriptorCount;
|
|
|
|
return true;
|
|
}
|
|
|
|
DXGI_SAMPLE_DESC QSGD3D12EnginePrivate::makeSampleDesc(DXGI_FORMAT format, uint samples)
|
|
{
|
|
DXGI_SAMPLE_DESC sampleDesc;
|
|
sampleDesc.Count = 1;
|
|
sampleDesc.Quality = 0;
|
|
|
|
if (samples > 1) {
|
|
D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msaaInfo = {};
|
|
msaaInfo.Format = format;
|
|
msaaInfo.SampleCount = samples;
|
|
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msaaInfo, sizeof(msaaInfo)))) {
|
|
if (msaaInfo.NumQualityLevels > 0) {
|
|
sampleDesc.Count = samples;
|
|
sampleDesc.Quality = msaaInfo.NumQualityLevels - 1;
|
|
} else {
|
|
qWarning("No quality levels for multisampling with sample count %d", samples);
|
|
}
|
|
} else {
|
|
qWarning("Failed to query multisample quality levels for sample count %d", samples);
|
|
}
|
|
}
|
|
|
|
return sampleDesc;
|
|
}
|
|
|
|
ID3D12Resource *QSGD3D12EnginePrivate::createColorBuffer(D3D12_CPU_DESCRIPTOR_HANDLE viewHandle, const QSize &size,
|
|
const QVector4D &clearColor, uint samples)
|
|
{
|
|
D3D12_CLEAR_VALUE clearValue = {};
|
|
clearValue.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
clearValue.Color[0] = clearColor.x();
|
|
clearValue.Color[1] = clearColor.y();
|
|
clearValue.Color[2] = clearColor.z();
|
|
clearValue.Color[3] = clearColor.w();
|
|
|
|
D3D12_HEAP_PROPERTIES heapProp = {};
|
|
heapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
|
|
D3D12_RESOURCE_DESC rtDesc = {};
|
|
rtDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
rtDesc.Width = size.width();
|
|
rtDesc.Height = size.height();
|
|
rtDesc.DepthOrArraySize = 1;
|
|
rtDesc.MipLevels = 1;
|
|
rtDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
rtDesc.SampleDesc = makeSampleDesc(rtDesc.Format, samples);
|
|
rtDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
|
|
|
ID3D12Resource *resource = nullptr;
|
|
if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &rtDesc,
|
|
D3D12_RESOURCE_STATE_RENDER_TARGET, &clearValue, IID_PPV_ARGS(&resource)))) {
|
|
qWarning("Failed to create offscreen render target of size %dx%d", size.width(), size.height());
|
|
return nullptr;
|
|
}
|
|
|
|
device->CreateRenderTargetView(resource, nullptr, viewHandle);
|
|
|
|
return resource;
|
|
}
|
|
|
|
ID3D12Resource *QSGD3D12EnginePrivate::createDepthStencil(D3D12_CPU_DESCRIPTOR_HANDLE viewHandle, const QSize &size, uint samples)
|
|
{
|
|
D3D12_CLEAR_VALUE depthClearValue = {};
|
|
depthClearValue.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
|
|
depthClearValue.DepthStencil.Depth = 1.0f;
|
|
depthClearValue.DepthStencil.Stencil = 0;
|
|
|
|
D3D12_HEAP_PROPERTIES heapProp = {};
|
|
heapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
|
|
D3D12_RESOURCE_DESC bufDesc = {};
|
|
bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
bufDesc.Width = size.width();
|
|
bufDesc.Height = size.height();
|
|
bufDesc.DepthOrArraySize = 1;
|
|
bufDesc.MipLevels = 1;
|
|
bufDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
|
|
bufDesc.SampleDesc = makeSampleDesc(bufDesc.Format, samples);
|
|
bufDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
bufDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
|
|
|
|
ID3D12Resource *resource = nullptr;
|
|
if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &bufDesc,
|
|
D3D12_RESOURCE_STATE_DEPTH_WRITE, &depthClearValue, IID_PPV_ARGS(&resource)))) {
|
|
qWarning("Failed to create depth-stencil buffer of size %dx%d", size.width(), size.height());
|
|
return nullptr;
|
|
}
|
|
|
|
D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilDesc = {};
|
|
depthStencilDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
|
|
depthStencilDesc.ViewDimension = bufDesc.SampleDesc.Count <= 1 ? D3D12_DSV_DIMENSION_TEXTURE2D : D3D12_DSV_DIMENSION_TEXTURE2DMS;
|
|
|
|
device->CreateDepthStencilView(resource, &depthStencilDesc, viewHandle);
|
|
|
|
return resource;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::setupDefaultRenderTargets()
|
|
{
|
|
for (int i = 0; i < swapChainBufferCount; ++i) {
|
|
if (FAILED(swapChain->GetBuffer(i, IID_PPV_ARGS(&backBufferRT[i])))) {
|
|
qWarning("Failed to get buffer %d from swap chain", i);
|
|
return;
|
|
}
|
|
defaultRTV[i] = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
|
if (windowSamples == 1) {
|
|
defaultRT[i] = backBufferRT[i];
|
|
device->CreateRenderTargetView(defaultRT[i].Get(), nullptr, defaultRTV[i]);
|
|
} else {
|
|
const QSize size(windowSize.width() * windowDpr, windowSize.height() * windowDpr);
|
|
const QColor cc(Qt::white); // ### what if setClearColor? non-fatal but debug layer warns...
|
|
const QVector4D clearColor(cc.redF(), cc.greenF(), cc.blueF(), cc.alphaF());
|
|
ID3D12Resource *msaaRT = createColorBuffer(defaultRTV[i], size, clearColor, windowSamples);
|
|
if (msaaRT)
|
|
defaultRT[i].Attach(msaaRT);
|
|
}
|
|
}
|
|
|
|
defaultDSV = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
|
const QSize size(windowSize.width() * windowDpr, windowSize.height() * windowDpr);
|
|
ID3D12Resource *ds = createDepthStencil(defaultDSV, size, windowSamples);
|
|
if (ds)
|
|
defaultDS.Attach(ds);
|
|
|
|
presentFrameIndex = 0;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::setWindowSize(const QSize &size, float dpr)
|
|
{
|
|
if (!initialized || (windowSize == size && windowDpr == dpr))
|
|
return;
|
|
|
|
waitGPU();
|
|
|
|
windowSize = size;
|
|
windowDpr = dpr;
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug() << "resize" << size << dpr;
|
|
|
|
// Clear these, otherwise resizing will fail.
|
|
defaultDS = nullptr;
|
|
cpuDescHeapManager.release(defaultDSV, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
|
for (int i = 0; i < swapChainBufferCount; ++i) {
|
|
backBufferRT[i] = nullptr;
|
|
defaultRT[i] = nullptr;
|
|
cpuDescHeapManager.release(defaultRTV[i], D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
|
}
|
|
|
|
const int w = windowSize.width() * windowDpr;
|
|
const int h = windowSize.height() * windowDpr;
|
|
HRESULT hr = swapChain->ResizeBuffers(swapChainBufferCount, w, h, DXGI_FORMAT_R8G8B8A8_UNORM,
|
|
waitableSwapChainMaxLatency ? DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT : 0);
|
|
if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
|
|
deviceManager()->deviceLossDetected();
|
|
return;
|
|
} else if (FAILED(hr)) {
|
|
qWarning("Failed to resize buffers: 0x%x", hr);
|
|
return;
|
|
}
|
|
|
|
setupDefaultRenderTargets();
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::deviceLost()
|
|
{
|
|
qWarning("D3D device lost, will attempt to reinitialize");
|
|
|
|
// Release all resources. This is important because otherwise reinitialization may fail.
|
|
releaseResources();
|
|
|
|
// Now in uninitialized state (but 'window' is still valid). Will recreate
|
|
// all the resources on the next beginFrame().
|
|
}
|
|
|
|
QSGD3D12CPUWaitableFence *QSGD3D12EnginePrivate::createCPUWaitableFence() const
|
|
{
|
|
QSGD3D12CPUWaitableFence *f = new QSGD3D12CPUWaitableFence;
|
|
HRESULT hr = device->CreateFence(f->value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&f->fence));
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create fence: 0x%x", hr);
|
|
return f;
|
|
}
|
|
f->event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
|
|
return f;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::waitForGPU(QSGD3D12CPUWaitableFence *f) const
|
|
{
|
|
const UINT64 newValue = f->value.fetchAndAddAcquire(1) + 1;
|
|
commandQueue->Signal(f->fence.Get(), newValue);
|
|
if (f->fence->GetCompletedValue() < newValue) {
|
|
HRESULT hr = f->fence->SetEventOnCompletion(newValue, f->event);
|
|
if (FAILED(hr)) {
|
|
qWarning("SetEventOnCompletion failed: 0x%x", hr);
|
|
return;
|
|
}
|
|
WaitForSingleObject(f->event, INFINITE);
|
|
}
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::transitionResource(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList,
|
|
D3D12_RESOURCE_STATES before, D3D12_RESOURCE_STATES after) const
|
|
{
|
|
D3D12_RESOURCE_BARRIER barrier;
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barrier.Transition.pResource = resource;
|
|
barrier.Transition.StateBefore = before;
|
|
barrier.Transition.StateAfter = after;
|
|
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
|
|
commandList->ResourceBarrier(1, &barrier);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::resolveMultisampledTarget(ID3D12Resource *msaa,
|
|
ID3D12Resource *resolve,
|
|
D3D12_RESOURCE_STATES resolveUsage,
|
|
ID3D12GraphicsCommandList *commandList) const
|
|
{
|
|
D3D12_RESOURCE_BARRIER barriers[2];
|
|
for (int i = 0; i < _countof(barriers); ++i) {
|
|
barriers[i].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
barriers[i].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barriers[i].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
}
|
|
|
|
barriers[0].Transition.pResource = msaa;
|
|
barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
|
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_RESOLVE_SOURCE;
|
|
barriers[1].Transition.pResource = resolve;
|
|
barriers[1].Transition.StateBefore = resolveUsage;
|
|
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_RESOLVE_DEST;
|
|
commandList->ResourceBarrier(2, barriers);
|
|
|
|
commandList->ResolveSubresource(resolve, 0, msaa, 0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
|
|
barriers[0].Transition.pResource = msaa;
|
|
barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_RESOLVE_SOURCE;
|
|
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
|
barriers[1].Transition.pResource = resolve;
|
|
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_RESOLVE_DEST;
|
|
barriers[1].Transition.StateAfter = resolveUsage;
|
|
commandList->ResourceBarrier(2, barriers);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::uavBarrier(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList) const
|
|
{
|
|
D3D12_RESOURCE_BARRIER barrier = {};
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
|
barrier.UAV.pResource = resource;
|
|
|
|
commandList->ResourceBarrier(1, &barrier);
|
|
}
|
|
|
|
ID3D12Resource *QSGD3D12EnginePrivate::createBuffer(int size)
|
|
{
|
|
ID3D12Resource *buf;
|
|
|
|
D3D12_HEAP_PROPERTIES uploadHeapProp = {};
|
|
uploadHeapProp.Type = D3D12_HEAP_TYPE_UPLOAD;
|
|
|
|
D3D12_RESOURCE_DESC bufDesc = {};
|
|
bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
bufDesc.Width = size;
|
|
bufDesc.Height = 1;
|
|
bufDesc.DepthOrArraySize = 1;
|
|
bufDesc.MipLevels = 1;
|
|
bufDesc.Format = DXGI_FORMAT_UNKNOWN;
|
|
bufDesc.SampleDesc.Count = 1;
|
|
bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
|
|
HRESULT hr = device->CreateCommittedResource(&uploadHeapProp, D3D12_HEAP_FLAG_NONE, &bufDesc,
|
|
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&buf));
|
|
if (FAILED(hr))
|
|
qWarning("Failed to create buffer resource: 0x%x", hr);
|
|
|
|
return buf;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::ensureBuffer(Buffer *buf)
|
|
{
|
|
Buffer::InFlightData &bfd(buf->d[currentPFrameIndex]);
|
|
// Only enlarge, never shrink
|
|
const bool newBufferNeeded = bfd.buffer ? (buf->cpuDataRef.size > bfd.resourceSize) : true;
|
|
if (newBufferNeeded) {
|
|
// Round it up and overallocate a little bit so that a subsequent
|
|
// buffer contents rebuild with a slightly larger total size does
|
|
// not lead to creating a new buffer.
|
|
const quint32 sz = alignedSize(buf->cpuDataRef.size, 4096);
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("new buffer[pf=%d] of size %d (actual data size %d)", currentPFrameIndex, sz, buf->cpuDataRef.size);
|
|
bfd.buffer.Attach(createBuffer(sz));
|
|
bfd.resourceSize = sz;
|
|
}
|
|
// Cache the actual data size in the per-in-flight-frame data as well.
|
|
bfd.dataSize = buf->cpuDataRef.size;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::updateBuffer(Buffer *buf)
|
|
{
|
|
if (buf->cpuDataRef.dirty.isEmpty())
|
|
return;
|
|
|
|
Buffer::InFlightData &bfd(buf->d[currentPFrameIndex]);
|
|
quint8 *p = nullptr;
|
|
const D3D12_RANGE readRange = { 0, 0 };
|
|
if (FAILED(bfd.buffer->Map(0, &readRange, reinterpret_cast<void **>(&p)))) {
|
|
qWarning("Map failed for buffer of size %d", buf->cpuDataRef.size);
|
|
return;
|
|
}
|
|
for (const auto &r : qAsConst(buf->cpuDataRef.dirty)) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("%p o %d s %d", buf, r.first, r.second);
|
|
memcpy(p + r.first, buf->cpuDataRef.p + r.first, r.second);
|
|
}
|
|
bfd.buffer->Unmap(0, nullptr);
|
|
buf->cpuDataRef.dirty.clear();
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::ensureDevice()
|
|
{
|
|
if (!initialized && window)
|
|
initialize(window, windowSize, windowDpr, windowSamples);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::beginFrame()
|
|
{
|
|
if (inFrame && !activeLayers)
|
|
qFatal("beginFrame called again without an endFrame, frame index was %d", frameIndex);
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug() << "***** begin frame, logical" << frameIndex << "present" << presentFrameIndex << "layer" << activeLayers;
|
|
|
|
if (inFrame && activeLayers) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("frame %d already in progress", frameIndex);
|
|
if (!currentLayerDepth) {
|
|
// There are layers and the real frame preparation starts now. Prepare for present.
|
|
beginFrameDraw();
|
|
}
|
|
return;
|
|
}
|
|
|
|
inFrame = true;
|
|
|
|
// The device may have been lost. This is the point to attempt to start
|
|
// again from scratch. Except when it is not. Operations that can happen
|
|
// out of frame (e.g. textures, render targets) may trigger reinit earlier
|
|
// than beginFrame.
|
|
ensureDevice();
|
|
|
|
// Wait for a buffer to be available for Present, if the waitable event is in use.
|
|
if (waitableSwapChainMaxLatency)
|
|
WaitForSingleObject(swapEvent, INFINITE);
|
|
|
|
// Block if needed. With 2 frames in flight frame N waits for frame N - 2, but not N - 1, to finish.
|
|
currentPFrameIndex = frameIndex % frameInFlightCount;
|
|
if (frameIndex >= frameInFlightCount) {
|
|
ID3D12Fence *fence = frameFence[currentPFrameIndex]->fence.Get();
|
|
HANDLE event = frameFence[currentPFrameIndex]->event;
|
|
// Frame fence values start from 1, hence the +1.
|
|
const quint64 inFlightFenceValue = frameIndex - frameInFlightCount + 1;
|
|
if (fence->GetCompletedValue() < inFlightFenceValue) {
|
|
fence->SetEventOnCompletion(inFlightFenceValue, event);
|
|
WaitForSingleObject(event, INFINITE);
|
|
}
|
|
frameCommandAllocator[currentPFrameIndex]->Reset();
|
|
}
|
|
|
|
PersistentFrameData &pfd(pframeData[currentPFrameIndex]);
|
|
pfd.cbvSrvUavNextFreeDescriptorIndex = 0;
|
|
|
|
for (Buffer &b : buffers) {
|
|
if (b.entryInUse())
|
|
b.d[currentPFrameIndex].dirty.clear();
|
|
}
|
|
|
|
if (frameIndex >= frameInFlightCount - 1) {
|
|
// Now sync the buffer changes from the previous, potentially still in
|
|
// flight, frames. This is done by taking the ranges dirtied in those
|
|
// frames and adding them to the global CPU-side buffer's dirty list,
|
|
// as if this frame changed those ranges. (however, dirty ranges
|
|
// inherited this way are not added to this frame's persistent
|
|
// per-frame dirty list because the next frame after this one should
|
|
// inherit this frame's genuine changes only, the rest will come from
|
|
// the earlier ones)
|
|
for (int delta = frameInFlightCount - 1; delta >= 1; --delta) {
|
|
const int prevPFrameIndex = (frameIndex - delta) % frameInFlightCount;
|
|
PersistentFrameData &prevFrameData(pframeData[prevPFrameIndex]);
|
|
for (uint id : qAsConst(prevFrameData.buffersUsedInFrame)) {
|
|
Buffer &b(buffers[id - 1]);
|
|
if (b.d[currentPFrameIndex].buffer && b.d[currentPFrameIndex].dataSize == b.cpuDataRef.size) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug() << "frame" << frameIndex << "takes dirty" << b.d[prevPFrameIndex].dirty
|
|
<< "from frame" << frameIndex - delta << "for buffer" << id;
|
|
for (const auto &range : qAsConst(b.d[prevPFrameIndex].dirty))
|
|
addDirtyRange(&b.cpuDataRef.dirty, range.first, range.second, b.cpuDataRef.size);
|
|
} else {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug() << "frame" << frameIndex << "makes all dirty from frame" << frameIndex - delta
|
|
<< "for buffer" << id;
|
|
addDirtyRange(&b.cpuDataRef.dirty, 0, b.cpuDataRef.size, b.cpuDataRef.size);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (frameIndex >= frameInFlightCount) {
|
|
// Do some texture upload bookkeeping.
|
|
const quint64 finishedFrameIndex = frameIndex - frameInFlightCount; // we know since we just blocked for this
|
|
// pfd conveniently refers to the same slot that was used by that frame
|
|
if (!pfd.pendingTextureUploads.isEmpty()) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("Removing texture upload data for frame %d", finishedFrameIndex);
|
|
for (uint id : qAsConst(pfd.pendingTextureUploads)) {
|
|
const int idx = id - 1;
|
|
Texture &t(textures[idx]);
|
|
// fenceValue is 0 when the previous frame cleared it, skip in
|
|
// this case. Skip also when fenceValue > the value it was when
|
|
// adding the last GPU wait - this is the case when more
|
|
// uploads were queued for the same texture in the meantime.
|
|
if (t.fenceValue && t.fenceValue == t.lastWaitFenceValue) {
|
|
t.fenceValue = 0;
|
|
t.lastWaitFenceValue = 0;
|
|
t.stagingBuffers.clear();
|
|
t.stagingHeaps.clear();
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("Cleaned staging data for texture %u", id);
|
|
}
|
|
}
|
|
pfd.pendingTextureUploads.clear();
|
|
if (!pfd.pendingTextureMipMap.isEmpty()) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug() << "cleaning mipmap generation data for " << pfd.pendingTextureMipMap;
|
|
// no special cleanup is needed as mipmap generation uses the frame's resources
|
|
pfd.pendingTextureMipMap.clear();
|
|
}
|
|
bool hasPending = false;
|
|
for (int delta = 1; delta < frameInFlightCount; ++delta) {
|
|
const PersistentFrameData &prevFrameData(pframeData[(frameIndex - delta) % frameInFlightCount]);
|
|
if (!prevFrameData.pendingTextureUploads.isEmpty()) {
|
|
hasPending = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!hasPending) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("no more pending textures");
|
|
copyCommandAllocator->Reset();
|
|
}
|
|
}
|
|
|
|
// Do the deferred deletes.
|
|
if (!pfd.deleteQueue.isEmpty()) {
|
|
for (PersistentFrameData::DeleteQueueEntry &e : pfd.deleteQueue) {
|
|
e.res = nullptr;
|
|
e.descHeap = nullptr;
|
|
if (e.cpuDescriptorPtr) {
|
|
D3D12_CPU_DESCRIPTOR_HANDLE h = { e.cpuDescriptorPtr };
|
|
cpuDescHeapManager.release(h, e.descHeapType);
|
|
}
|
|
}
|
|
pfd.deleteQueue.clear();
|
|
}
|
|
// Deferred deletes issued outside a begin-endFrame go to the next
|
|
// frame's out-of-frame delete queue as these cannot be executed in the
|
|
// next beginFrame, only in next + frameInFlightCount. Move to the
|
|
// normal queue if this is the next beginFrame.
|
|
if (!pfd.outOfFrameDeleteQueue.isEmpty()) {
|
|
pfd.deleteQueue = pfd.outOfFrameDeleteQueue;
|
|
pfd.outOfFrameDeleteQueue.clear();
|
|
}
|
|
|
|
// Mark released texture, buffer, etc. slots free.
|
|
if (!pfd.pendingReleases.isEmpty()) {
|
|
for (const auto &pr : qAsConst(pfd.pendingReleases)) {
|
|
Q_ASSERT(pr.id);
|
|
if (pr.type == PersistentFrameData::PendingRelease::TypeTexture) {
|
|
Texture &t(textures[pr.id - 1]);
|
|
Q_ASSERT(t.entryInUse());
|
|
t.flags &= ~RenderTarget::EntryInUse; // createTexture() can now reuse this entry
|
|
t.texture = nullptr;
|
|
} else if (pr.type == PersistentFrameData::PendingRelease::TypeBuffer) {
|
|
Buffer &b(buffers[pr.id - 1]);
|
|
Q_ASSERT(b.entryInUse());
|
|
b.flags &= ~Buffer::EntryInUse;
|
|
for (int i = 0; i < frameInFlightCount; ++i)
|
|
b.d[i].buffer = nullptr;
|
|
} else {
|
|
qFatal("Corrupt pending release list, type %d", pr.type);
|
|
}
|
|
}
|
|
pfd.pendingReleases.clear();
|
|
}
|
|
if (!pfd.outOfFramePendingReleases.isEmpty()) {
|
|
pfd.pendingReleases = pfd.outOfFramePendingReleases;
|
|
pfd.outOfFramePendingReleases.clear();
|
|
}
|
|
}
|
|
|
|
pfd.buffersUsedInFrame.clear();
|
|
|
|
beginDrawCalls();
|
|
|
|
// Prepare for present if this is a frame without layers.
|
|
if (!activeLayers)
|
|
beginFrameDraw();
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::beginDrawCalls()
|
|
{
|
|
frameCommandList->Reset(frameCommandAllocator[frameIndex % frameInFlightCount].Get(), nullptr);
|
|
commandList = frameCommandList.Get();
|
|
invalidateCachedFrameState();
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::invalidateCachedFrameState()
|
|
{
|
|
tframeData.drawingMode = QSGGeometry::DrawingMode(-1);
|
|
tframeData.currentIndexBuffer = 0;
|
|
tframeData.activeTextureCount = 0;
|
|
tframeData.drawCount = 0;
|
|
tframeData.lastPso = nullptr;
|
|
tframeData.lastRootSig = nullptr;
|
|
tframeData.descHeapSet = false;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::restoreFrameState(bool minimal)
|
|
{
|
|
queueSetRenderTarget(currentRenderTarget);
|
|
if (!minimal) {
|
|
queueViewport(tframeData.viewport);
|
|
queueScissor(tframeData.scissor);
|
|
queueSetBlendFactor(tframeData.blendFactor);
|
|
queueSetStencilRef(tframeData.stencilRef);
|
|
}
|
|
finalizePipeline(tframeData.pipelineState);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::beginFrameDraw()
|
|
{
|
|
if (windowSamples == 1)
|
|
transitionResource(defaultRT[presentFrameIndex % swapChainBufferCount].Get(), commandList,
|
|
D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::endFrame()
|
|
{
|
|
if (!inFrame)
|
|
qFatal("endFrame called without beginFrame, frame index %d", frameIndex);
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("***** end frame");
|
|
|
|
endDrawCalls(true);
|
|
|
|
commandQueue->Signal(frameFence[frameIndex % frameInFlightCount]->fence.Get(), frameIndex + 1);
|
|
++frameIndex;
|
|
|
|
inFrame = false;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::endDrawCalls(bool lastInFrame)
|
|
{
|
|
PersistentFrameData &pfd(pframeData[currentPFrameIndex]);
|
|
|
|
// Now is the time to sync all the changed areas in the buffers.
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug() << "buffers used in drawcall set" << pfd.buffersUsedInDrawCallSet;
|
|
for (uint id : qAsConst(pfd.buffersUsedInDrawCallSet))
|
|
updateBuffer(&buffers[id - 1]);
|
|
|
|
pfd.buffersUsedInFrame += pfd.buffersUsedInDrawCallSet;
|
|
pfd.buffersUsedInDrawCallSet.clear();
|
|
|
|
// Add a wait on the 3D queue for the relevant texture uploads on the copy queue.
|
|
if (!pfd.pendingTextureUploads.isEmpty()) {
|
|
quint64 topFenceValue = 0;
|
|
for (uint id : qAsConst(pfd.pendingTextureUploads)) {
|
|
const int idx = id - 1;
|
|
Texture &t(textures[idx]);
|
|
Q_ASSERT(t.fenceValue);
|
|
// skip if already added a Wait in the previous frame
|
|
if (t.lastWaitFenceValue == t.fenceValue)
|
|
continue;
|
|
t.lastWaitFenceValue = t.fenceValue;
|
|
if (t.fenceValue > topFenceValue)
|
|
topFenceValue = t.fenceValue;
|
|
if (t.mipmap())
|
|
pfd.pendingTextureMipMap.insert(id);
|
|
}
|
|
if (topFenceValue) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("added wait for texture fence %llu", topFenceValue);
|
|
commandQueue->Wait(textureUploadFence.Get(), topFenceValue);
|
|
// Generate mipmaps after the wait, when necessary.
|
|
if (!pfd.pendingTextureMipMap.isEmpty()) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug() << "starting mipmap generation for" << pfd.pendingTextureMipMap;
|
|
for (uint id : qAsConst(pfd.pendingTextureMipMap))
|
|
mipmapper.queueGenerate(textures[id - 1]);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lastInFrame) {
|
|
// Resolve and transition the backbuffer for present, if needed.
|
|
const int idx = presentFrameIndex % swapChainBufferCount;
|
|
if (windowSamples == 1) {
|
|
transitionResource(defaultRT[idx].Get(), commandList,
|
|
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT);
|
|
} else {
|
|
if (Q_UNLIKELY(debug_render())) {
|
|
const D3D12_RESOURCE_DESC desc = defaultRT[idx]->GetDesc();
|
|
qDebug("added resolve for multisampled render target (count %d, quality %d)",
|
|
desc.SampleDesc.Count, desc.SampleDesc.Quality);
|
|
}
|
|
resolveMultisampledTarget(defaultRT[idx].Get(), backBufferRT[idx].Get(),
|
|
D3D12_RESOURCE_STATE_PRESENT, commandList);
|
|
}
|
|
|
|
if (activeLayers) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("this frame had %d layers", activeLayers);
|
|
activeLayers = 0;
|
|
}
|
|
}
|
|
|
|
// Go!
|
|
HRESULT hr = frameCommandList->Close();
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to close command list: 0x%x", hr);
|
|
if (hr == E_INVALIDARG)
|
|
qWarning("Invalid arguments. Some of the commands in the list is invalid in some way.");
|
|
}
|
|
|
|
ID3D12CommandList *commandLists[] = { frameCommandList.Get() };
|
|
commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
|
|
|
|
commandList = nullptr;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::beginLayer()
|
|
{
|
|
if (inFrame && !activeLayers)
|
|
qFatal("Layer rendering cannot be started while a frame is active");
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("===== beginLayer active %d depth %d (inFrame=%d)", activeLayers, currentLayerDepth, inFrame);
|
|
|
|
++activeLayers;
|
|
++currentLayerDepth;
|
|
|
|
// Do an early beginFrame. With multiple layers this results in
|
|
// beginLayer - beginFrame - endLayer - beginLayer - beginFrame - endLayer - ... - (*) beginFrame - endFrame
|
|
// where (*) denotes the start of the preparation of the actual, non-layer frame.
|
|
|
|
if (activeLayers == 1)
|
|
beginFrame();
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::endLayer()
|
|
{
|
|
if (!inFrame || !activeLayers || !currentLayerDepth)
|
|
qFatal("Mismatched endLayer");
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("===== endLayer active %d depth %d", activeLayers, currentLayerDepth);
|
|
|
|
--currentLayerDepth;
|
|
|
|
// Do not touch activeLayers. It remains valid until endFrame.
|
|
}
|
|
|
|
// Root signature:
|
|
// [0] CBV - always present
|
|
// [1] table with 1 SRV per texture (optional)
|
|
// one static sampler per texture (optional)
|
|
//
|
|
// SRVs can be created freely via QSGD3D12CPUDescriptorHeapManager and stored
|
|
// in QSGD3D12TextureView. The engine will copy them onto a dedicated,
|
|
// shader-visible CBV-SRV-UAV heap in the correct order.
|
|
|
|
void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipelineState)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
tframeData.pipelineState = pipelineState;
|
|
|
|
RootSigCacheEntry *cachedRootSig = rootSigCache[pipelineState.shaders.rootSig];
|
|
if (!cachedRootSig) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("NEW ROOTSIG");
|
|
|
|
cachedRootSig = new RootSigCacheEntry;
|
|
|
|
D3D12_ROOT_PARAMETER rootParams[4];
|
|
int rootParamCount = 0;
|
|
|
|
rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
rootParams[0].Descriptor.ShaderRegister = 0; // b0
|
|
rootParams[0].Descriptor.RegisterSpace = 0;
|
|
++rootParamCount;
|
|
|
|
if (pipelineState.shaders.rootSig.textureViewCount > 0) {
|
|
rootParams[rootParamCount].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
rootParams[rootParamCount].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
rootParams[rootParamCount].DescriptorTable.NumDescriptorRanges = 1;
|
|
D3D12_DESCRIPTOR_RANGE descRange;
|
|
descRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViewCount;
|
|
descRange.BaseShaderRegister = 0; // t0, t1, ...
|
|
descRange.RegisterSpace = 0;
|
|
descRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
|
|
rootParams[rootParamCount].DescriptorTable.pDescriptorRanges = &descRange;
|
|
++rootParamCount;
|
|
}
|
|
|
|
Q_ASSERT(rootParamCount <= _countof(rootParams));
|
|
D3D12_ROOT_SIGNATURE_DESC desc;
|
|
desc.NumParameters = rootParamCount;
|
|
desc.pParameters = rootParams;
|
|
// Mixing up samplers and resource views in QSGD3D12TextureView means
|
|
// that the number of static samplers has to match the number of
|
|
// textures. This is not really ideal in general but works for Quick's use cases.
|
|
// The shaders can still choose to declare and use fewer samplers, if they want to.
|
|
desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViewCount;
|
|
D3D12_STATIC_SAMPLER_DESC staticSamplers[8];
|
|
int sdIdx = 0;
|
|
Q_ASSERT(pipelineState.shaders.rootSig.textureViewCount <= _countof(staticSamplers));
|
|
for (int i = 0; i < pipelineState.shaders.rootSig.textureViewCount; ++i) {
|
|
const QSGD3D12TextureView &tv(pipelineState.shaders.rootSig.textureViews[i]);
|
|
D3D12_STATIC_SAMPLER_DESC sd = {};
|
|
sd.Filter = D3D12_FILTER(tv.filter);
|
|
sd.AddressU = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeHoriz);
|
|
sd.AddressV = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeVert);
|
|
sd.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
sd.MinLOD = 0.0f;
|
|
sd.MaxLOD = D3D12_FLOAT32_MAX;
|
|
sd.ShaderRegister = sdIdx; // t0, t1, ...
|
|
sd.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
staticSamplers[sdIdx++] = sd;
|
|
}
|
|
desc.pStaticSamplers = staticSamplers;
|
|
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
|
|
|
|
ComPtr<ID3DBlob> signature;
|
|
ComPtr<ID3DBlob> error;
|
|
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) {
|
|
qWarning("Failed to serialize root signature");
|
|
return;
|
|
}
|
|
if (FAILED(device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(),
|
|
IID_PPV_ARGS(&cachedRootSig->rootSig)))) {
|
|
qWarning("Failed to create root signature");
|
|
return;
|
|
}
|
|
|
|
rootSigCache.insert(pipelineState.shaders.rootSig, cachedRootSig);
|
|
}
|
|
|
|
PSOCacheEntry *cachedPso = psoCache[pipelineState];
|
|
if (!cachedPso) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("NEW PSO");
|
|
|
|
cachedPso = new PSOCacheEntry;
|
|
|
|
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
|
|
|
|
D3D12_INPUT_ELEMENT_DESC inputElements[QSGD3D12_MAX_INPUT_ELEMENTS];
|
|
int ieIdx = 0;
|
|
for (int i = 0; i < pipelineState.inputElementCount; ++i) {
|
|
const QSGD3D12InputElement &ie(pipelineState.inputElements[i]);
|
|
D3D12_INPUT_ELEMENT_DESC ieDesc = {};
|
|
ieDesc.SemanticName = ie.semanticName;
|
|
ieDesc.SemanticIndex = ie.semanticIndex;
|
|
ieDesc.Format = DXGI_FORMAT(ie.format);
|
|
ieDesc.InputSlot = ie.slot;
|
|
ieDesc.AlignedByteOffset = ie.offset;
|
|
ieDesc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("input [%d]: %s %d 0x%x %d", ieIdx, ie.semanticName, ie.offset, ie.format, ie.slot);
|
|
inputElements[ieIdx++] = ieDesc;
|
|
}
|
|
|
|
psoDesc.InputLayout = { inputElements, UINT(ieIdx) };
|
|
|
|
psoDesc.pRootSignature = cachedRootSig->rootSig.Get();
|
|
|
|
D3D12_SHADER_BYTECODE vshader;
|
|
vshader.pShaderBytecode = pipelineState.shaders.vs;
|
|
vshader.BytecodeLength = pipelineState.shaders.vsSize;
|
|
D3D12_SHADER_BYTECODE pshader;
|
|
pshader.pShaderBytecode = pipelineState.shaders.ps;
|
|
pshader.BytecodeLength = pipelineState.shaders.psSize;
|
|
|
|
psoDesc.VS = vshader;
|
|
psoDesc.PS = pshader;
|
|
|
|
D3D12_RASTERIZER_DESC rastDesc = {};
|
|
rastDesc.FillMode = D3D12_FILL_MODE_SOLID;
|
|
rastDesc.CullMode = D3D12_CULL_MODE(pipelineState.cullMode);
|
|
rastDesc.FrontCounterClockwise = pipelineState.frontCCW;
|
|
rastDesc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
|
|
rastDesc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
|
|
rastDesc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
|
|
rastDesc.DepthClipEnable = TRUE;
|
|
|
|
psoDesc.RasterizerState = rastDesc;
|
|
|
|
D3D12_BLEND_DESC blendDesc = {};
|
|
if (pipelineState.blend == QSGD3D12PipelineState::BlendNone) {
|
|
D3D12_RENDER_TARGET_BLEND_DESC noBlendDesc = {};
|
|
noBlendDesc.RenderTargetWriteMask = pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0;
|
|
blendDesc.RenderTarget[0] = noBlendDesc;
|
|
} else if (pipelineState.blend == QSGD3D12PipelineState::BlendPremul) {
|
|
const D3D12_RENDER_TARGET_BLEND_DESC premulBlendDesc = {
|
|
TRUE, FALSE,
|
|
D3D12_BLEND_ONE, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
|
|
D3D12_BLEND_ONE, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
|
|
D3D12_LOGIC_OP_NOOP,
|
|
UINT8(pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0)
|
|
};
|
|
blendDesc.RenderTarget[0] = premulBlendDesc;
|
|
} else if (pipelineState.blend == QSGD3D12PipelineState::BlendColor) {
|
|
const D3D12_RENDER_TARGET_BLEND_DESC colorBlendDesc = {
|
|
TRUE, FALSE,
|
|
D3D12_BLEND_BLEND_FACTOR, D3D12_BLEND_INV_SRC_COLOR, D3D12_BLEND_OP_ADD,
|
|
D3D12_BLEND_BLEND_FACTOR, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
|
|
D3D12_LOGIC_OP_NOOP,
|
|
UINT8(pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0)
|
|
};
|
|
blendDesc.RenderTarget[0] = colorBlendDesc;
|
|
}
|
|
psoDesc.BlendState = blendDesc;
|
|
|
|
psoDesc.DepthStencilState.DepthEnable = pipelineState.depthEnable;
|
|
psoDesc.DepthStencilState.DepthWriteMask = pipelineState.depthWrite ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
|
|
psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC(pipelineState.depthFunc);
|
|
|
|
psoDesc.DepthStencilState.StencilEnable = pipelineState.stencilEnable;
|
|
psoDesc.DepthStencilState.StencilReadMask = psoDesc.DepthStencilState.StencilWriteMask = 0xFF;
|
|
D3D12_DEPTH_STENCILOP_DESC stencilOpDesc = {
|
|
D3D12_STENCIL_OP(pipelineState.stencilFailOp),
|
|
D3D12_STENCIL_OP(pipelineState.stencilDepthFailOp),
|
|
D3D12_STENCIL_OP(pipelineState.stencilPassOp),
|
|
D3D12_COMPARISON_FUNC(pipelineState.stencilFunc)
|
|
};
|
|
psoDesc.DepthStencilState.FrontFace = psoDesc.DepthStencilState.BackFace = stencilOpDesc;
|
|
|
|
psoDesc.SampleMask = UINT_MAX;
|
|
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE(pipelineState.topologyType);
|
|
psoDesc.NumRenderTargets = 1;
|
|
psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
psoDesc.DSVFormat = DXGI_FORMAT_D24_UNORM_S8_UINT;
|
|
psoDesc.SampleDesc = defaultRT[0]->GetDesc().SampleDesc;
|
|
|
|
HRESULT hr = device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&cachedPso->pso));
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create graphics pipeline state");
|
|
return;
|
|
}
|
|
|
|
psoCache.insert(pipelineState, cachedPso);
|
|
}
|
|
|
|
if (cachedPso->pso.Get() != tframeData.lastPso) {
|
|
tframeData.lastPso = cachedPso->pso.Get();
|
|
commandList->SetPipelineState(tframeData.lastPso);
|
|
}
|
|
|
|
if (cachedRootSig->rootSig.Get() != tframeData.lastRootSig) {
|
|
tframeData.lastRootSig = cachedRootSig->rootSig.Get();
|
|
commandList->SetGraphicsRootSignature(tframeData.lastRootSig);
|
|
}
|
|
|
|
if (pipelineState.shaders.rootSig.textureViewCount > 0)
|
|
setDescriptorHeaps();
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::setDescriptorHeaps(bool force)
|
|
{
|
|
if (force || !tframeData.descHeapSet) {
|
|
tframeData.descHeapSet = true;
|
|
ID3D12DescriptorHeap *heaps[] = { pframeData[currentPFrameIndex].gpuCbvSrvUavHeap.Get() };
|
|
commandList->SetDescriptorHeaps(_countof(heaps), heaps);
|
|
}
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueViewport(const QRect &rect)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
tframeData.viewport = rect;
|
|
const D3D12_VIEWPORT viewport = { float(rect.x()), float(rect.y()), float(rect.width()), float(rect.height()), 0, 1 };
|
|
commandList->RSSetViewports(1, &viewport);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueScissor(const QRect &rect)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
tframeData.scissor = rect;
|
|
const D3D12_RECT scissorRect = { rect.x(), rect.y(), rect.x() + rect.width(), rect.y() + rect.height() };
|
|
commandList->RSSetScissorRects(1, &scissorRect);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueSetRenderTarget(uint id)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle;
|
|
|
|
if (!id) {
|
|
rtvHandle = defaultRTV[presentFrameIndex % swapChainBufferCount];
|
|
dsvHandle = defaultDSV;
|
|
} else {
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < renderTargets.count() && renderTargets[idx].entryInUse());
|
|
RenderTarget &rt(renderTargets[idx]);
|
|
rtvHandle = rt.rtv;
|
|
dsvHandle = rt.dsv;
|
|
if (!(rt.flags & RenderTarget::NeedsReadBarrier)) {
|
|
rt.flags |= RenderTarget::NeedsReadBarrier;
|
|
if (!(rt.flags & RenderTarget::Multisample))
|
|
transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
|
|
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
}
|
|
}
|
|
|
|
commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, &dsvHandle);
|
|
|
|
currentRenderTarget = id;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueClearRenderTarget(const QColor &color)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
const float clearColor[] = { float(color.redF()), float(color.blueF()), float(color.greenF()), float(color.alphaF()) };
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv = !currentRenderTarget
|
|
? defaultRTV[presentFrameIndex % swapChainBufferCount]
|
|
: renderTargets[currentRenderTarget - 1].rtv;
|
|
commandList->ClearRenderTargetView(rtv, clearColor, 0, nullptr);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueClearDepthStencil(float depthValue, quint8 stencilValue, QSGD3D12Engine::ClearFlags which)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE dsv = !currentRenderTarget
|
|
? defaultDSV
|
|
: renderTargets[currentRenderTarget - 1].dsv;
|
|
commandList->ClearDepthStencilView(dsv, D3D12_CLEAR_FLAGS(int(which)), depthValue, stencilValue, 0, nullptr);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueSetBlendFactor(const QVector4D &factor)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
tframeData.blendFactor = factor;
|
|
const float f[4] = { factor.x(), factor.y(), factor.z(), factor.w() };
|
|
commandList->OMSetBlendFactor(f);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueSetStencilRef(quint32 ref)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
tframeData.stencilRef = ref;
|
|
commandList->OMSetStencilRef(ref);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams ¶ms)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
const bool skip = tframeData.scissor.isEmpty();
|
|
|
|
PersistentFrameData &pfd(pframeData[currentPFrameIndex]);
|
|
|
|
pfd.buffersUsedInDrawCallSet.insert(params.vertexBuf);
|
|
const int vertexBufIdx = params.vertexBuf - 1;
|
|
Q_ASSERT(params.vertexBuf && vertexBufIdx < buffers.count() && buffers[vertexBufIdx].entryInUse());
|
|
pfd.buffersUsedInDrawCallSet.insert(params.constantBuf);
|
|
const int constantBufIdx = params.constantBuf - 1;
|
|
Q_ASSERT(params.constantBuf && constantBufIdx < buffers.count() && buffers[constantBufIdx].entryInUse());
|
|
int indexBufIdx = -1;
|
|
if (params.indexBuf) {
|
|
pfd.buffersUsedInDrawCallSet.insert(params.indexBuf);
|
|
indexBufIdx = params.indexBuf - 1;
|
|
Q_ASSERT(indexBufIdx < buffers.count() && buffers[indexBufIdx].entryInUse());
|
|
}
|
|
|
|
// Ensure buffers are created but do not copy the data here, leave that to endDrawCalls().
|
|
ensureBuffer(&buffers[vertexBufIdx]);
|
|
ensureBuffer(&buffers[constantBufIdx]);
|
|
if (indexBufIdx >= 0)
|
|
ensureBuffer(&buffers[indexBufIdx]);
|
|
|
|
// Set the CBV.
|
|
if (!skip && params.cboOffset >= 0) {
|
|
ID3D12Resource *cbuf = buffers[constantBufIdx].d[currentPFrameIndex].buffer.Get();
|
|
if (cbuf)
|
|
commandList->SetGraphicsRootConstantBufferView(0, cbuf->GetGPUVirtualAddress() + params.cboOffset);
|
|
}
|
|
|
|
// Set up vertex and index buffers.
|
|
ID3D12Resource *vbuf = buffers[vertexBufIdx].d[currentPFrameIndex].buffer.Get();
|
|
ID3D12Resource *ibuf = indexBufIdx >= 0 && params.startIndexIndex >= 0
|
|
? buffers[indexBufIdx].d[currentPFrameIndex].buffer.Get() : nullptr;
|
|
|
|
if (!skip && params.mode != tframeData.drawingMode) {
|
|
D3D_PRIMITIVE_TOPOLOGY topology;
|
|
switch (params.mode) {
|
|
case QSGGeometry::DrawPoints:
|
|
topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
|
|
break;
|
|
case QSGGeometry::DrawLines:
|
|
topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
|
|
break;
|
|
case QSGGeometry::DrawLineStrip:
|
|
topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
|
|
break;
|
|
case QSGGeometry::DrawTriangles:
|
|
topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
|
break;
|
|
case QSGGeometry::DrawTriangleStrip:
|
|
topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
|
|
break;
|
|
default:
|
|
qFatal("Unsupported drawing mode 0x%x", params.mode);
|
|
break;
|
|
}
|
|
commandList->IASetPrimitiveTopology(topology);
|
|
tframeData.drawingMode = params.mode;
|
|
}
|
|
|
|
if (!skip) {
|
|
D3D12_VERTEX_BUFFER_VIEW vbv;
|
|
vbv.BufferLocation = vbuf->GetGPUVirtualAddress() + params.vboOffset;
|
|
vbv.SizeInBytes = params.vboSize;
|
|
vbv.StrideInBytes = params.vboStride;
|
|
|
|
// must be set after the topology
|
|
commandList->IASetVertexBuffers(0, 1, &vbv);
|
|
}
|
|
|
|
if (!skip && params.startIndexIndex >= 0 && ibuf && tframeData.currentIndexBuffer != params.indexBuf) {
|
|
tframeData.currentIndexBuffer = params.indexBuf;
|
|
D3D12_INDEX_BUFFER_VIEW ibv;
|
|
ibv.BufferLocation = ibuf->GetGPUVirtualAddress();
|
|
ibv.SizeInBytes = buffers[indexBufIdx].cpuDataRef.size;
|
|
ibv.Format = DXGI_FORMAT(params.indexFormat);
|
|
commandList->IASetIndexBuffer(&ibv);
|
|
}
|
|
|
|
// Copy the SRVs to a drawcall-dedicated area of the shader-visible descriptor heap.
|
|
Q_ASSERT(tframeData.activeTextureCount == tframeData.pipelineState.shaders.rootSig.textureViewCount);
|
|
if (tframeData.activeTextureCount > 0) {
|
|
if (!skip) {
|
|
ensureGPUDescriptorHeap(tframeData.activeTextureCount);
|
|
const uint stride = cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
D3D12_CPU_DESCRIPTOR_HANDLE dst = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart();
|
|
dst.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
|
|
for (int i = 0; i < tframeData.activeTextureCount; ++i) {
|
|
const TransientFrameData::ActiveTexture &t(tframeData.activeTextures[i]);
|
|
Q_ASSERT(t.id);
|
|
const int idx = t.id - 1;
|
|
const bool isTex = t.type == TransientFrameData::ActiveTexture::TypeTexture;
|
|
device->CopyDescriptorsSimple(1, dst, isTex ? textures[idx].srv : renderTargets[idx].srv,
|
|
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
dst.ptr += stride;
|
|
}
|
|
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpuAddr = pfd.gpuCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart();
|
|
gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
|
|
commandList->SetGraphicsRootDescriptorTable(1, gpuAddr);
|
|
|
|
pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextureCount;
|
|
}
|
|
tframeData.activeTextureCount = 0;
|
|
}
|
|
|
|
// Add the draw call.
|
|
if (!skip) {
|
|
++tframeData.drawCount;
|
|
if (params.startIndexIndex >= 0)
|
|
commandList->DrawIndexedInstanced(params.count, 1, params.startIndexIndex, 0, 0);
|
|
else
|
|
commandList->DrawInstanced(params.count, 1, 0, 0);
|
|
}
|
|
|
|
if (tframeData.drawCount == MAX_DRAW_CALLS_PER_LIST) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("Limit of %d draw calls reached, executing command list", MAX_DRAW_CALLS_PER_LIST);
|
|
// submit the command list
|
|
endDrawCalls();
|
|
// start a new one
|
|
beginDrawCalls();
|
|
// prepare for the upcoming drawcalls
|
|
restoreFrameState();
|
|
}
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::ensureGPUDescriptorHeap(int cbvSrvUavDescriptorCount)
|
|
{
|
|
PersistentFrameData &pfd(pframeData[currentPFrameIndex]);
|
|
int newSize = pfd.gpuCbvSrvUavHeapSize;
|
|
while (pfd.cbvSrvUavNextFreeDescriptorIndex + cbvSrvUavDescriptorCount > newSize)
|
|
newSize *= 2;
|
|
if (newSize != pfd.gpuCbvSrvUavHeapSize) {
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("Out of space for SRVs, creating new CBV-SRV-UAV descriptor heap with descriptor count %d", newSize);
|
|
deferredDelete(pfd.gpuCbvSrvUavHeap);
|
|
createCbvSrvUavHeap(currentPFrameIndex, newSize);
|
|
setDescriptorHeaps(true);
|
|
pfd.cbvSrvUavNextFreeDescriptorIndex = 0;
|
|
}
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::present()
|
|
{
|
|
if (!initialized)
|
|
return;
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("--- present with vsync ---");
|
|
|
|
// This call will not block the CPU unless at least 3 buffers are queued,
|
|
// unless the waitable frame latency event is enabled. Then the latency of
|
|
// 3 is changed to whatever value desired, and blocking happens in
|
|
// beginFrame. If none of these hold, the fence-based wait in beginFrame
|
|
// throttles. Vsync (interval 1) is always enabled.
|
|
HRESULT hr = swapChain->Present(1, 0);
|
|
if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
|
|
deviceManager()->deviceLossDetected();
|
|
return;
|
|
} else if (FAILED(hr)) {
|
|
qWarning("Present failed: 0x%x", hr);
|
|
return;
|
|
}
|
|
|
|
++presentFrameIndex;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::waitGPU()
|
|
{
|
|
if (!initialized)
|
|
return;
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("--- blocking wait for GPU ---");
|
|
|
|
waitForGPU(presentFence);
|
|
}
|
|
|
|
template<class T> uint newId(T *tbl)
|
|
{
|
|
uint id = 0;
|
|
for (int i = 0; i < tbl->count(); ++i) {
|
|
if (!(*tbl)[i].entryInUse()) {
|
|
id = i + 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!id) {
|
|
tbl->resize(tbl->size() + 1);
|
|
id = tbl->count();
|
|
}
|
|
|
|
(*tbl)[id - 1].flags = 0x01; // reset flags and set EntryInUse
|
|
|
|
return id;
|
|
}
|
|
|
|
template<class T> void syncEntryFlags(T *e, int flag, bool b)
|
|
{
|
|
if (b)
|
|
e->flags |= flag;
|
|
else
|
|
e->flags &= ~flag;
|
|
}
|
|
|
|
uint QSGD3D12EnginePrivate::genBuffer()
|
|
{
|
|
return newId(&buffers);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::releaseBuffer(uint id)
|
|
{
|
|
if (!id || !initialized)
|
|
return;
|
|
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < buffers.count());
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("releasing buffer %u", id);
|
|
|
|
Buffer &b(buffers[idx]);
|
|
if (!b.entryInUse())
|
|
return;
|
|
|
|
// Do not null out and do not mark the entry reusable yet.
|
|
// Do that only when the frames potentially in flight have finished for sure.
|
|
|
|
for (int i = 0; i < frameInFlightCount; ++i) {
|
|
if (b.d[i].buffer)
|
|
deferredDelete(b.d[i].buffer);
|
|
}
|
|
|
|
QSet<PersistentFrameData::PendingRelease> *pendingReleasesSet = inFrame
|
|
? &pframeData[currentPFrameIndex].pendingReleases
|
|
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFramePendingReleases;
|
|
|
|
pendingReleasesSet->insert(PersistentFrameData::PendingRelease(PersistentFrameData::PendingRelease::TypeBuffer, id));
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::resetBuffer(uint id, const quint8 *data, int size)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
Q_ASSERT(id);
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < buffers.count() && buffers[idx].entryInUse());
|
|
Buffer &b(buffers[idx]);
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("reset buffer %u, size %d", id, size);
|
|
|
|
b.cpuDataRef.p = data;
|
|
b.cpuDataRef.size = size;
|
|
|
|
b.cpuDataRef.dirty.clear();
|
|
b.d[currentPFrameIndex].dirty.clear();
|
|
|
|
if (size > 0) {
|
|
const QPair<int, int> range = qMakePair(0, size);
|
|
b.cpuDataRef.dirty.append(range);
|
|
b.d[currentPFrameIndex].dirty.append(range);
|
|
}
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::addDirtyRange(DirtyList *dirty, int offset, int size, int bufferSize)
|
|
{
|
|
// Bail out when the dirty list already spans the entire buffer.
|
|
if (!dirty->isEmpty()) {
|
|
if (dirty->at(0).first == 0 && dirty->at(0).second == bufferSize)
|
|
return;
|
|
}
|
|
|
|
const QPair<int, int> range = qMakePair(offset, size);
|
|
if (!dirty->contains(range))
|
|
dirty->append(range);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::markBufferDirty(uint id, int offset, int size)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
Q_ASSERT(id);
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < buffers.count() && buffers[idx].entryInUse());
|
|
Buffer &b(buffers[idx]);
|
|
|
|
addDirtyRange(&b.cpuDataRef.dirty, offset, size, b.cpuDataRef.size);
|
|
addDirtyRange(&b.d[currentPFrameIndex].dirty, offset, size, b.cpuDataRef.size);
|
|
}
|
|
|
|
uint QSGD3D12EnginePrivate::genTexture()
|
|
{
|
|
const uint id = newId(&textures);
|
|
textures[id - 1].fenceValue = 0;
|
|
return id;
|
|
}
|
|
|
|
static inline DXGI_FORMAT textureFormat(QImage::Format format, bool wantsAlpha, bool mipmap,
|
|
QImage::Format *imageFormat, int *bytesPerPixel)
|
|
{
|
|
DXGI_FORMAT f = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
QImage::Format convFormat = format;
|
|
int bpp = 4;
|
|
|
|
if (!mipmap) {
|
|
switch (format) {
|
|
case QImage::Format_Grayscale8:
|
|
case QImage::Format_Indexed8:
|
|
case QImage::Format_Alpha8:
|
|
f = DXGI_FORMAT_R8_UNORM;
|
|
bpp = 1;
|
|
break;
|
|
case QImage::Format_RGB32:
|
|
f = DXGI_FORMAT_B8G8R8A8_UNORM;
|
|
break;
|
|
case QImage::Format_ARGB32:
|
|
f = DXGI_FORMAT_B8G8R8A8_UNORM;
|
|
convFormat = wantsAlpha ? QImage::Format_ARGB32_Premultiplied : QImage::Format_RGB32;
|
|
break;
|
|
case QImage::Format_ARGB32_Premultiplied:
|
|
f = DXGI_FORMAT_B8G8R8A8_UNORM;
|
|
convFormat = wantsAlpha ? format : QImage::Format_RGB32;
|
|
break;
|
|
default:
|
|
convFormat = wantsAlpha ? QImage::Format_RGBA8888_Premultiplied : QImage::Format_RGBX8888;
|
|
break;
|
|
}
|
|
} else {
|
|
// Mipmap generation needs unordered access and BGRA is not an option for that. Stick to RGBA.
|
|
convFormat = wantsAlpha ? QImage::Format_RGBA8888_Premultiplied : QImage::Format_RGBX8888;
|
|
}
|
|
|
|
if (imageFormat)
|
|
*imageFormat = convFormat;
|
|
|
|
if (bytesPerPixel)
|
|
*bytesPerPixel = bpp;
|
|
|
|
return f;
|
|
}
|
|
|
|
static inline QImage::Format imageFormatForTexture(DXGI_FORMAT format)
|
|
{
|
|
QImage::Format f = QImage::Format_Invalid;
|
|
|
|
switch (format) {
|
|
case DXGI_FORMAT_R8G8B8A8_UNORM:
|
|
f = QImage::Format_RGBA8888_Premultiplied;
|
|
break;
|
|
case DXGI_FORMAT_B8G8R8A8_UNORM:
|
|
f = QImage::Format_ARGB32_Premultiplied;
|
|
break;
|
|
case DXGI_FORMAT_R8_UNORM:
|
|
f = QImage::Format_Grayscale8;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return f;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::createTexture(uint id, const QSize &size, QImage::Format format,
|
|
QSGD3D12Engine::TextureCreateFlags createFlags)
|
|
{
|
|
ensureDevice();
|
|
|
|
Q_ASSERT(id);
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
|
|
Texture &t(textures[idx]);
|
|
|
|
syncEntryFlags(&t, Texture::Alpha, createFlags & QSGD3D12Engine::TextureWithAlpha);
|
|
syncEntryFlags(&t, Texture::MipMap, createFlags & QSGD3D12Engine::TextureWithMipMaps);
|
|
|
|
const QSize adjustedSize = !t.mipmap() ? size : QSGD3D12Engine::mipMapAdjustedSourceSize(size);
|
|
|
|
D3D12_HEAP_PROPERTIES defaultHeapProp = {};
|
|
defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
|
|
D3D12_RESOURCE_DESC textureDesc = {};
|
|
textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
textureDesc.Width = adjustedSize.width();
|
|
textureDesc.Height = adjustedSize.height();
|
|
textureDesc.DepthOrArraySize = 1;
|
|
textureDesc.MipLevels = !t.mipmap() ? 1 : QSGD3D12Engine::mipMapLevels(adjustedSize);
|
|
textureDesc.Format = textureFormat(format, t.alpha(), t.mipmap(), nullptr, nullptr);
|
|
textureDesc.SampleDesc.Count = 1;
|
|
textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
if (t.mipmap())
|
|
textureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
|
|
|
HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc,
|
|
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&t.texture));
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create texture resource: 0x%x", hr);
|
|
return;
|
|
}
|
|
|
|
t.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
|
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
srvDesc.Format = textureDesc.Format;
|
|
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
|
srvDesc.Texture2D.MipLevels = textureDesc.MipLevels;
|
|
|
|
device->CreateShaderResourceView(t.texture.Get(), &srvDesc, t.srv);
|
|
|
|
if (t.mipmap()) {
|
|
// Mipmap generation will need an UAV for each level that needs to be generated.
|
|
t.mipUAVs.clear();
|
|
for (int level = 1; level < textureDesc.MipLevels; ++level) {
|
|
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
|
|
uavDesc.Format = textureDesc.Format;
|
|
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
|
|
uavDesc.Texture2D.MipSlice = level;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE h = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
device->CreateUnorderedAccessView(t.texture.Get(), nullptr, &uavDesc, h);
|
|
t.mipUAVs.append(h);
|
|
}
|
|
}
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("created texture %u, size %dx%d, miplevels %d", id, adjustedSize.width(), adjustedSize.height(), textureDesc.MipLevels);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueTextureResize(uint id, const QSize &size)
|
|
{
|
|
Q_ASSERT(id);
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
|
|
Texture &t(textures[idx]);
|
|
|
|
if (!t.texture) {
|
|
qWarning("Cannot resize non-created texture %u", id);
|
|
return;
|
|
}
|
|
|
|
if (t.mipmap()) {
|
|
qWarning("Cannot resize mipmapped texture %u", id);
|
|
return;
|
|
}
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("resizing texture %u, size %dx%d", id, size.width(), size.height());
|
|
|
|
D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc();
|
|
textureDesc.Width = size.width();
|
|
textureDesc.Height = size.height();
|
|
|
|
D3D12_HEAP_PROPERTIES defaultHeapProp = {};
|
|
defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
|
|
ComPtr<ID3D12Resource> oldTexture = t.texture;
|
|
deferredDelete(t.texture);
|
|
|
|
HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc,
|
|
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&t.texture));
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create resized texture resource: 0x%x", hr);
|
|
return;
|
|
}
|
|
|
|
deferredDelete(t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
t.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
|
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
srvDesc.Format = textureDesc.Format;
|
|
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
|
srvDesc.Texture2D.MipLevels = textureDesc.MipLevels;
|
|
|
|
device->CreateShaderResourceView(t.texture.Get(), &srvDesc, t.srv);
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION dstLoc;
|
|
dstLoc.pResource = t.texture.Get();
|
|
dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
dstLoc.SubresourceIndex = 0;
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION srcLoc;
|
|
srcLoc.pResource = oldTexture.Get();
|
|
srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
srcLoc.SubresourceIndex = 0;
|
|
|
|
copyCommandList->Reset(copyCommandAllocator.Get(), nullptr);
|
|
|
|
copyCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr);
|
|
|
|
copyCommandList->Close();
|
|
ID3D12CommandList *commandLists[] = { copyCommandList.Get() };
|
|
copyCommandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
|
|
|
|
t.fenceValue = nextTextureUploadFenceValue.fetchAndAddAcquire(1) + 1;
|
|
copyCommandQueue->Signal(textureUploadFence.Get(), t.fenceValue);
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("submitted old content copy for texture %u on the copy queue, fence %llu", id, t.fenceValue);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::queueTextureUpload(uint id, const QVector<QImage> &images, const QVector<QPoint> &dstPos)
|
|
{
|
|
Q_ASSERT(id);
|
|
Q_ASSERT(images.count() == dstPos.count());
|
|
if (images.isEmpty())
|
|
return;
|
|
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
|
|
Texture &t(textures[idx]);
|
|
Q_ASSERT(t.texture);
|
|
|
|
// When mipmapping is not in use, image can be smaller than the size passed
|
|
// to createTexture() and dstPos can specify a non-zero destination position.
|
|
|
|
if (t.mipmap() && (images.count() != 1 || dstPos.count() != 1 || !dstPos[0].isNull())) {
|
|
qWarning("Mipmapped textures (%u) do not support partial uploads", id);
|
|
return;
|
|
}
|
|
|
|
// Make life simpler by disallowing queuing a new mipmapped upload before the previous one finishes.
|
|
if (t.mipmap() && t.fenceValue) {
|
|
qWarning("Attempted to queue mipmapped texture upload (%u) while a previous upload is still in progress", id);
|
|
return;
|
|
}
|
|
|
|
t.fenceValue = nextTextureUploadFenceValue.fetchAndAddAcquire(1) + 1;
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("adding upload for texture %u on the copy queue, fence %llu", id, t.fenceValue);
|
|
|
|
D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc();
|
|
const QSize adjustedTextureSize(textureDesc.Width, textureDesc.Height);
|
|
|
|
int totalSize = 0;
|
|
for (const QImage &image : images) {
|
|
int bytesPerPixel;
|
|
textureFormat(image.format(), t.alpha(), t.mipmap(), nullptr, &bytesPerPixel);
|
|
const int w = !t.mipmap() ? image.width() : adjustedTextureSize.width();
|
|
const int h = !t.mipmap() ? image.height() : adjustedTextureSize.height();
|
|
const int stride = alignedSize(w * bytesPerPixel, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
|
|
totalSize += alignedSize(h * stride, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
|
|
}
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("%d sub-uploads, heap size %d bytes", images.count(), totalSize);
|
|
|
|
// Instead of individual committed resources for each upload buffer,
|
|
// allocate only once and use placed resources.
|
|
D3D12_HEAP_PROPERTIES uploadHeapProp = {};
|
|
uploadHeapProp.Type = D3D12_HEAP_TYPE_UPLOAD;
|
|
D3D12_HEAP_DESC uploadHeapDesc = {};
|
|
uploadHeapDesc.SizeInBytes = totalSize;
|
|
uploadHeapDesc.Properties = uploadHeapProp;
|
|
uploadHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
|
|
|
|
Texture::StagingHeap sheap;
|
|
if (FAILED(device->CreateHeap(&uploadHeapDesc, IID_PPV_ARGS(&sheap.heap)))) {
|
|
qWarning("Failed to create texture upload heap of size %d", totalSize);
|
|
return;
|
|
}
|
|
t.stagingHeaps.append(sheap);
|
|
|
|
copyCommandList->Reset(copyCommandAllocator.Get(), nullptr);
|
|
|
|
int placedOffset = 0;
|
|
for (int i = 0; i < images.count(); ++i) {
|
|
QImage::Format convFormat;
|
|
int bytesPerPixel;
|
|
textureFormat(images[i].format(), t.alpha(), t.mipmap(), &convFormat, &bytesPerPixel);
|
|
if (Q_UNLIKELY(debug_render() && i == 0))
|
|
qDebug("source image format %d, target format %d, bpp %d", images[i].format(), convFormat, bytesPerPixel);
|
|
|
|
QImage convImage = images[i].format() == convFormat ? images[i] : images[i].convertToFormat(convFormat);
|
|
|
|
if (t.mipmap() && adjustedTextureSize != convImage.size())
|
|
convImage = convImage.scaled(adjustedTextureSize, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
|
|
|
|
const int stride = alignedSize(convImage.width() * bytesPerPixel, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
|
|
|
|
D3D12_RESOURCE_DESC bufDesc = {};
|
|
bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
bufDesc.Width = stride * convImage.height();
|
|
bufDesc.Height = 1;
|
|
bufDesc.DepthOrArraySize = 1;
|
|
bufDesc.MipLevels = 1;
|
|
bufDesc.Format = DXGI_FORMAT_UNKNOWN;
|
|
bufDesc.SampleDesc.Count = 1;
|
|
bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
|
|
Texture::StagingBuffer sbuf;
|
|
if (FAILED(device->CreatePlacedResource(sheap.heap.Get(), placedOffset,
|
|
&bufDesc, D3D12_RESOURCE_STATE_GENERIC_READ,
|
|
nullptr, IID_PPV_ARGS(&sbuf.buffer)))) {
|
|
qWarning("Failed to create texture upload buffer");
|
|
return;
|
|
}
|
|
|
|
quint8 *p = nullptr;
|
|
const D3D12_RANGE readRange = { 0, 0 };
|
|
if (FAILED(sbuf.buffer->Map(0, &readRange, reinterpret_cast<void **>(&p)))) {
|
|
qWarning("Map failed (texture upload buffer)");
|
|
return;
|
|
}
|
|
for (int y = 0, ye = convImage.height(); y < ye; ++y) {
|
|
memcpy(p, convImage.constScanLine(y), convImage.width() * bytesPerPixel);
|
|
p += stride;
|
|
}
|
|
sbuf.buffer->Unmap(0, nullptr);
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION dstLoc;
|
|
dstLoc.pResource = t.texture.Get();
|
|
dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
dstLoc.SubresourceIndex = 0;
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION srcLoc;
|
|
srcLoc.pResource = sbuf.buffer.Get();
|
|
srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
srcLoc.PlacedFootprint.Offset = 0;
|
|
srcLoc.PlacedFootprint.Footprint.Format = textureDesc.Format;
|
|
srcLoc.PlacedFootprint.Footprint.Width = convImage.width();
|
|
srcLoc.PlacedFootprint.Footprint.Height = convImage.height();
|
|
srcLoc.PlacedFootprint.Footprint.Depth = 1;
|
|
srcLoc.PlacedFootprint.Footprint.RowPitch = stride;
|
|
|
|
copyCommandList->CopyTextureRegion(&dstLoc, dstPos[i].x(), dstPos[i].y(), 0, &srcLoc, nullptr);
|
|
|
|
t.stagingBuffers.append(sbuf);
|
|
placedOffset += alignedSize(bufDesc.Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
|
|
}
|
|
|
|
copyCommandList->Close();
|
|
ID3D12CommandList *commandLists[] = { copyCommandList.Get() };
|
|
copyCommandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
|
|
copyCommandQueue->Signal(textureUploadFence.Get(), t.fenceValue);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::releaseTexture(uint id)
|
|
{
|
|
if (!id || !initialized)
|
|
return;
|
|
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < textures.count());
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("releasing texture %d", id);
|
|
|
|
Texture &t(textures[idx]);
|
|
if (!t.entryInUse())
|
|
return;
|
|
|
|
if (t.texture) {
|
|
deferredDelete(t.texture);
|
|
deferredDelete(t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
for (D3D12_CPU_DESCRIPTOR_HANDLE h : t.mipUAVs)
|
|
deferredDelete(h, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
}
|
|
|
|
QSet<PersistentFrameData::PendingRelease> *pendingReleasesSet = inFrame
|
|
? &pframeData[currentPFrameIndex].pendingReleases
|
|
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFramePendingReleases;
|
|
|
|
pendingReleasesSet->insert(PersistentFrameData::PendingRelease(PersistentFrameData::PendingRelease::TypeTexture, id));
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::useTexture(uint id)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
Q_ASSERT(id);
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
|
|
|
|
// Within one frame the order of calling this function determines the
|
|
// texture register (0, 1, ...) so fill up activeTextures accordingly.
|
|
tframeData.activeTextures[tframeData.activeTextureCount++]
|
|
= TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id);
|
|
|
|
if (textures[idx].fenceValue)
|
|
pframeData[currentPFrameIndex].pendingTextureUploads.insert(id);
|
|
}
|
|
|
|
bool QSGD3D12EnginePrivate::MipMapGen::initialize(QSGD3D12EnginePrivate *enginePriv)
|
|
{
|
|
engine = enginePriv;
|
|
|
|
D3D12_STATIC_SAMPLER_DESC sampler = {};
|
|
sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
|
|
sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
sampler.MinLOD = 0.0f;
|
|
sampler.MaxLOD = D3D12_FLOAT32_MAX;
|
|
|
|
D3D12_DESCRIPTOR_RANGE descRange[2];
|
|
descRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
descRange[0].NumDescriptors = 1;
|
|
descRange[0].BaseShaderRegister = 0; // t0
|
|
descRange[0].RegisterSpace = 0;
|
|
descRange[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
|
|
descRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
descRange[1].NumDescriptors = 4;
|
|
descRange[1].BaseShaderRegister = 0; // u0..u3
|
|
descRange[1].RegisterSpace = 0;
|
|
descRange[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
|
|
|
|
// Split into two to allow switching between the first and second set of UAVs later.
|
|
D3D12_ROOT_PARAMETER rootParameters[3];
|
|
rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
rootParameters[0].DescriptorTable.NumDescriptorRanges = 1;
|
|
rootParameters[0].DescriptorTable.pDescriptorRanges = &descRange[0];
|
|
|
|
rootParameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
rootParameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
rootParameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
|
rootParameters[1].DescriptorTable.pDescriptorRanges = &descRange[1];
|
|
|
|
rootParameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
rootParameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
rootParameters[2].Constants.Num32BitValues = 4; // uint2 mip1Size, uint sampleLevel, uint totalMips
|
|
rootParameters[2].Constants.ShaderRegister = 0; // b0
|
|
rootParameters[2].Constants.RegisterSpace = 0;
|
|
|
|
D3D12_ROOT_SIGNATURE_DESC desc = {};
|
|
desc.NumParameters = 3;
|
|
desc.pParameters = rootParameters;
|
|
desc.NumStaticSamplers = 1;
|
|
desc.pStaticSamplers = &sampler;
|
|
|
|
ComPtr<ID3DBlob> signature;
|
|
ComPtr<ID3DBlob> error;
|
|
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) {
|
|
QByteArray msg(static_cast<const char *>(error->GetBufferPointer()), error->GetBufferSize());
|
|
qWarning("Failed to serialize compute root signature: %s", qPrintable(msg));
|
|
return false;
|
|
}
|
|
if (FAILED(engine->device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(),
|
|
IID_PPV_ARGS(&rootSig)))) {
|
|
qWarning("Failed to create compute root signature");
|
|
return false;
|
|
}
|
|
|
|
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
|
|
psoDesc.pRootSignature = rootSig.Get();
|
|
psoDesc.CS.pShaderBytecode = g_CS_Generate4MipMaps;
|
|
psoDesc.CS.BytecodeLength = sizeof(g_CS_Generate4MipMaps);
|
|
|
|
if (FAILED(engine->device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&pipelineState)))) {
|
|
qWarning("Failed to create compute pipeline state");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::MipMapGen::releaseResources()
|
|
{
|
|
pipelineState = nullptr;
|
|
rootSig = nullptr;
|
|
}
|
|
|
|
// The mipmap generator is used to insert commands on the main 3D queue. It is
|
|
// guaranteed that the queue has a wait for the base texture level upload
|
|
// before invoking queueGenerate(). There can be any number of invocations
|
|
// without waiting for earlier ones to finish. finished() is invoked when it is
|
|
// known for sure that frame containing the upload and mipmap generation has
|
|
// finished on the GPU.
|
|
|
|
void QSGD3D12EnginePrivate::MipMapGen::queueGenerate(const Texture &t)
|
|
{
|
|
D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc();
|
|
|
|
engine->commandList->SetPipelineState(pipelineState.Get());
|
|
engine->commandList->SetComputeRootSignature(rootSig.Get());
|
|
|
|
// 1 SRV + (miplevels - 1) UAVs
|
|
const int descriptorCount = 1 + (textureDesc.MipLevels - 1);
|
|
|
|
engine->ensureGPUDescriptorHeap(descriptorCount);
|
|
|
|
// The descriptor heap is set on the command list either because the
|
|
// ensure() call above resized, or, typically, due to a texture-dependent
|
|
// draw call earlier.
|
|
|
|
engine->transitionResource(t.texture.Get(), engine->commandList,
|
|
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
|
|
QSGD3D12EnginePrivate::PersistentFrameData &pfd(engine->pframeData[engine->currentPFrameIndex]);
|
|
|
|
const uint stride = engine->cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
D3D12_CPU_DESCRIPTOR_HANDLE h = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart();
|
|
h.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
|
|
|
|
engine->device->CopyDescriptorsSimple(1, h, t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
h.ptr += stride;
|
|
|
|
for (int level = 1; level < textureDesc.MipLevels; ++level, h.ptr += stride)
|
|
engine->device->CopyDescriptorsSimple(1, h, t.mipUAVs[level - 1], D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpuAddr = pfd.gpuCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart();
|
|
gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
|
|
|
|
engine->commandList->SetComputeRootDescriptorTable(0, gpuAddr);
|
|
gpuAddr.ptr += stride; // now points to the first UAV
|
|
|
|
for (int level = 1; level < textureDesc.MipLevels; level += 4, gpuAddr.ptr += stride * 4) {
|
|
engine->commandList->SetComputeRootDescriptorTable(1, gpuAddr);
|
|
|
|
QSize sz(textureDesc.Width, textureDesc.Height);
|
|
sz.setWidth(qMax(1, sz.width() >> level));
|
|
sz.setHeight(qMax(1, sz.height() >> level));
|
|
|
|
const quint32 constants[4] = { quint32(sz.width()), quint32(sz.height()),
|
|
quint32(level - 1),
|
|
quint32(textureDesc.MipLevels - 1) };
|
|
|
|
engine->commandList->SetComputeRoot32BitConstants(2, 4, constants, 0);
|
|
engine->commandList->Dispatch(sz.width(), sz.height(), 1);
|
|
engine->uavBarrier(t.texture.Get(), engine->commandList);
|
|
}
|
|
|
|
engine->transitionResource(t.texture.Get(), engine->commandList,
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
|
|
|
pfd.cbvSrvUavNextFreeDescriptorIndex += descriptorCount;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::deferredDelete(ComPtr<ID3D12Resource> res)
|
|
{
|
|
PersistentFrameData::DeleteQueueEntry e;
|
|
e.res = res;
|
|
QVector<PersistentFrameData::DeleteQueueEntry> *dq = inFrame
|
|
? &pframeData[currentPFrameIndex].deleteQueue
|
|
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue;
|
|
(*dq) << e;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::deferredDelete(ComPtr<ID3D12DescriptorHeap> dh)
|
|
{
|
|
PersistentFrameData::DeleteQueueEntry e;
|
|
e.descHeap = dh;
|
|
QVector<PersistentFrameData::DeleteQueueEntry> *dq = inFrame
|
|
? &pframeData[currentPFrameIndex].deleteQueue
|
|
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue;
|
|
(*dq) << e;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::deferredDelete(D3D12_CPU_DESCRIPTOR_HANDLE h, D3D12_DESCRIPTOR_HEAP_TYPE type)
|
|
{
|
|
PersistentFrameData::DeleteQueueEntry e;
|
|
e.cpuDescriptorPtr = h.ptr;
|
|
e.descHeapType = type;
|
|
QVector<PersistentFrameData::DeleteQueueEntry> *dq = inFrame
|
|
? &pframeData[currentPFrameIndex].deleteQueue
|
|
: &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue;
|
|
(*dq) << e;
|
|
}
|
|
|
|
uint QSGD3D12EnginePrivate::genRenderTarget()
|
|
{
|
|
return newId(&renderTargets);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::createRenderTarget(uint id, const QSize &size, const QVector4D &clearColor, uint samples)
|
|
{
|
|
ensureDevice();
|
|
|
|
Q_ASSERT(id);
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < renderTargets.count() && renderTargets[idx].entryInUse());
|
|
RenderTarget &rt(renderTargets[idx]);
|
|
|
|
rt.rtv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
|
rt.dsv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
|
rt.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
|
|
ID3D12Resource *res = createColorBuffer(rt.rtv, size, clearColor, samples);
|
|
if (res)
|
|
rt.color.Attach(res);
|
|
|
|
ID3D12Resource *dsres = createDepthStencil(rt.dsv, size, samples);
|
|
if (dsres)
|
|
rt.ds.Attach(dsres);
|
|
|
|
const bool multisample = rt.color->GetDesc().SampleDesc.Count > 1;
|
|
syncEntryFlags(&rt, RenderTarget::Multisample, multisample);
|
|
|
|
if (!multisample) {
|
|
device->CreateShaderResourceView(rt.color.Get(), nullptr, rt.srv);
|
|
} else {
|
|
D3D12_HEAP_PROPERTIES defaultHeapProp = {};
|
|
defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
|
|
D3D12_RESOURCE_DESC textureDesc = {};
|
|
textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
textureDesc.Width = size.width();
|
|
textureDesc.Height = size.height();
|
|
textureDesc.DepthOrArraySize = 1;
|
|
textureDesc.MipLevels = 1;
|
|
textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
textureDesc.SampleDesc.Count = 1;
|
|
textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
|
|
HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc,
|
|
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&rt.colorResolve));
|
|
if (FAILED(hr)) {
|
|
qWarning("Failed to create resolve buffer: 0x%x", hr);
|
|
return;
|
|
}
|
|
|
|
device->CreateShaderResourceView(rt.colorResolve.Get(), nullptr, rt.srv);
|
|
}
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("created new render target %u, size %dx%d, samples %d", id, size.width(), size.height(), samples);
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::releaseRenderTarget(uint id)
|
|
{
|
|
if (!id || !initialized)
|
|
return;
|
|
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < renderTargets.count());
|
|
RenderTarget &rt(renderTargets[idx]);
|
|
if (!rt.entryInUse())
|
|
return;
|
|
|
|
if (Q_UNLIKELY(debug_render()))
|
|
qDebug("releasing render target %u", id);
|
|
|
|
if (rt.colorResolve) {
|
|
deferredDelete(rt.colorResolve);
|
|
rt.colorResolve = nullptr;
|
|
}
|
|
if (rt.color) {
|
|
deferredDelete(rt.color);
|
|
rt.color = nullptr;
|
|
deferredDelete(rt.rtv, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
|
deferredDelete(rt.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
}
|
|
if (rt.ds) {
|
|
deferredDelete(rt.ds);
|
|
rt.ds = nullptr;
|
|
deferredDelete(rt.dsv, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
|
}
|
|
|
|
rt.flags &= ~RenderTarget::EntryInUse;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::useRenderTargetAsTexture(uint id)
|
|
{
|
|
if (!inFrame) {
|
|
qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__);
|
|
return;
|
|
}
|
|
|
|
Q_ASSERT(id);
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < renderTargets.count());
|
|
RenderTarget &rt(renderTargets[idx]);
|
|
Q_ASSERT(rt.entryInUse() && rt.color);
|
|
|
|
if (rt.flags & RenderTarget::NeedsReadBarrier) {
|
|
rt.flags &= ~RenderTarget::NeedsReadBarrier;
|
|
if (rt.flags & RenderTarget::Multisample)
|
|
resolveMultisampledTarget(rt.color.Get(), rt.colorResolve.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, commandList);
|
|
else
|
|
transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
|
}
|
|
|
|
tframeData.activeTextures[tframeData.activeTextureCount++] =
|
|
TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id);
|
|
}
|
|
|
|
QImage QSGD3D12EnginePrivate::executeAndWaitReadbackRenderTarget(uint id)
|
|
{
|
|
// Readback due to QQuickWindow::grabWindow() happens outside
|
|
// begin-endFrame, but QQuickItemGrabResult leads to rendering a layer
|
|
// without a real frame afterwards and triggering readback. This has to be
|
|
// supported as well.
|
|
if (inFrame && (!activeLayers || currentLayerDepth)) {
|
|
qWarning("%s: Cannot be called while frame preparation is active", __FUNCTION__);
|
|
return QImage();
|
|
}
|
|
|
|
// Due to the above we insert a fake "real" frame when a layer was just rendered into.
|
|
if (inFrame) {
|
|
beginFrame();
|
|
endFrame();
|
|
}
|
|
|
|
frameCommandList->Reset(frameCommandAllocator[frameIndex % frameInFlightCount].Get(), nullptr);
|
|
|
|
D3D12_RESOURCE_STATES bstate;
|
|
bool needsBarrier = false;
|
|
ID3D12Resource *rtRes;
|
|
if (id == 0) {
|
|
const int idx = presentFrameIndex % swapChainBufferCount;
|
|
if (windowSamples > 1) {
|
|
resolveMultisampledTarget(defaultRT[idx].Get(), backBufferRT[idx].Get(),
|
|
D3D12_RESOURCE_STATE_COPY_SOURCE, frameCommandList.Get());
|
|
} else {
|
|
bstate = D3D12_RESOURCE_STATE_PRESENT;
|
|
needsBarrier = true;
|
|
}
|
|
rtRes = backBufferRT[idx].Get();
|
|
} else {
|
|
const int idx = id - 1;
|
|
Q_ASSERT(idx < renderTargets.count());
|
|
RenderTarget &rt(renderTargets[idx]);
|
|
Q_ASSERT(rt.entryInUse() && rt.color);
|
|
|
|
if (rt.flags & RenderTarget::Multisample) {
|
|
resolveMultisampledTarget(rt.color.Get(), rt.colorResolve.Get(),
|
|
D3D12_RESOURCE_STATE_COPY_SOURCE, frameCommandList.Get());
|
|
rtRes = rt.colorResolve.Get();
|
|
} else {
|
|
rtRes = rt.color.Get();
|
|
bstate = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
|
needsBarrier = true;
|
|
}
|
|
}
|
|
|
|
ComPtr<ID3D12Resource> readbackBuf;
|
|
|
|
D3D12_RESOURCE_DESC rtDesc = rtRes->GetDesc();
|
|
UINT64 textureByteSize = 0;
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT textureLayout = {};
|
|
device->GetCopyableFootprints(&rtDesc, 0, 1, 0, &textureLayout, nullptr, nullptr, &textureByteSize);
|
|
|
|
D3D12_HEAP_PROPERTIES heapProp = {};
|
|
heapProp.Type = D3D12_HEAP_TYPE_READBACK;
|
|
|
|
D3D12_RESOURCE_DESC bufDesc = {};
|
|
bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
bufDesc.Width = textureByteSize;
|
|
bufDesc.Height = 1;
|
|
bufDesc.DepthOrArraySize = 1;
|
|
bufDesc.MipLevels = 1;
|
|
bufDesc.Format = DXGI_FORMAT_UNKNOWN;
|
|
bufDesc.SampleDesc.Count = 1;
|
|
bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
|
|
if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &bufDesc,
|
|
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&readbackBuf)))) {
|
|
qWarning("Failed to create committed resource (readback buffer)");
|
|
return QImage();
|
|
}
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION dstLoc;
|
|
dstLoc.pResource = readbackBuf.Get();
|
|
dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
dstLoc.PlacedFootprint = textureLayout;
|
|
D3D12_TEXTURE_COPY_LOCATION srcLoc;
|
|
srcLoc.pResource = rtRes;
|
|
srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
srcLoc.SubresourceIndex = 0;
|
|
|
|
ID3D12GraphicsCommandList *cl = frameCommandList.Get();
|
|
if (needsBarrier)
|
|
transitionResource(rtRes, cl, bstate, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
|
cl->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr);
|
|
if (needsBarrier)
|
|
transitionResource(rtRes, cl, D3D12_RESOURCE_STATE_COPY_SOURCE, bstate);
|
|
|
|
cl->Close();
|
|
ID3D12CommandList *commandLists[] = { cl };
|
|
commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
|
|
|
|
QScopedPointer<QSGD3D12CPUWaitableFence> f(createCPUWaitableFence());
|
|
waitForGPU(f.data()); // uh oh
|
|
|
|
QImage::Format fmt = imageFormatForTexture(rtDesc.Format);
|
|
if (fmt == QImage::Format_Invalid) {
|
|
qWarning("Could not map render target format %d to a QImage format", rtDesc.Format);
|
|
return QImage();
|
|
}
|
|
QImage img(rtDesc.Width, rtDesc.Height, fmt);
|
|
quint8 *p = nullptr;
|
|
const D3D12_RANGE readRange = { 0, 0 };
|
|
if (FAILED(readbackBuf->Map(0, &readRange, reinterpret_cast<void **>(&p)))) {
|
|
qWarning("Mapping the readback buffer failed");
|
|
return QImage();
|
|
}
|
|
const int bpp = 4; // ###
|
|
if (id == 0) {
|
|
for (UINT y = 0; y < rtDesc.Height; ++y) {
|
|
quint8 *dst = img.scanLine(y);
|
|
memcpy(dst, p, rtDesc.Width * bpp);
|
|
p += textureLayout.Footprint.RowPitch;
|
|
}
|
|
} else {
|
|
for (int y = rtDesc.Height - 1; y >= 0; --y) {
|
|
quint8 *dst = img.scanLine(y);
|
|
memcpy(dst, p, rtDesc.Width * bpp);
|
|
p += textureLayout.Footprint.RowPitch;
|
|
}
|
|
}
|
|
readbackBuf->Unmap(0, nullptr);
|
|
|
|
return img;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::simulateDeviceLoss()
|
|
{
|
|
qWarning("QSGD3D12Engine: Triggering device loss via TDR");
|
|
devLossTest.killDevice();
|
|
}
|
|
|
|
bool QSGD3D12EnginePrivate::DeviceLossTester::initialize(QSGD3D12EnginePrivate *enginePriv)
|
|
{
|
|
engine = enginePriv;
|
|
|
|
#ifdef DEVLOSS_TEST
|
|
D3D12_DESCRIPTOR_RANGE descRange[2];
|
|
descRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
|
descRange[0].NumDescriptors = 1;
|
|
descRange[0].BaseShaderRegister = 0;
|
|
descRange[0].RegisterSpace = 0;
|
|
descRange[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
|
|
descRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
descRange[1].NumDescriptors = 1;
|
|
descRange[1].BaseShaderRegister = 0;
|
|
descRange[1].RegisterSpace = 0;
|
|
descRange[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
|
|
|
|
D3D12_ROOT_PARAMETER param;
|
|
param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
param.DescriptorTable.NumDescriptorRanges = 2;
|
|
param.DescriptorTable.pDescriptorRanges = descRange;
|
|
|
|
D3D12_ROOT_SIGNATURE_DESC desc = {};
|
|
desc.NumParameters = 1;
|
|
desc.pParameters = ¶m;
|
|
|
|
ComPtr<ID3DBlob> signature;
|
|
ComPtr<ID3DBlob> error;
|
|
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) {
|
|
QByteArray msg(static_cast<const char *>(error->GetBufferPointer()), error->GetBufferSize());
|
|
qWarning("Failed to serialize compute root signature: %s", qPrintable(msg));
|
|
return false;
|
|
}
|
|
if (FAILED(engine->device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(),
|
|
IID_PPV_ARGS(&computeRootSignature)))) {
|
|
qWarning("Failed to create compute root signature");
|
|
return false;
|
|
}
|
|
|
|
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
|
|
psoDesc.pRootSignature = computeRootSignature.Get();
|
|
psoDesc.CS.pShaderBytecode = g_timeout;
|
|
psoDesc.CS.BytecodeLength = sizeof(g_timeout);
|
|
|
|
if (FAILED(engine->device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&computeState)))) {
|
|
qWarning("Failed to create compute pipeline state");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::DeviceLossTester::releaseResources()
|
|
{
|
|
computeState = nullptr;
|
|
computeRootSignature = nullptr;
|
|
}
|
|
|
|
void QSGD3D12EnginePrivate::DeviceLossTester::killDevice()
|
|
{
|
|
#ifdef DEVLOSS_TEST
|
|
ID3D12CommandAllocator *ca = engine->frameCommandAllocator[engine->frameIndex % engine->frameInFlightCount].Get();
|
|
ID3D12GraphicsCommandList *cl = engine->frameCommandList.Get();
|
|
cl->Reset(ca, computeState.Get());
|
|
|
|
cl->SetComputeRootSignature(computeRootSignature.Get());
|
|
cl->Dispatch(256, 1, 1);
|
|
|
|
cl->Close();
|
|
ID3D12CommandList *commandLists[] = { cl };
|
|
engine->commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists);
|
|
|
|
engine->waitGPU();
|
|
#endif
|
|
}
|
|
|
|
void *QSGD3D12EnginePrivate::getResource(QSGRendererInterface::Resource resource) const
|
|
{
|
|
switch (resource) {
|
|
case QSGRendererInterface::Device:
|
|
return device;
|
|
case QSGRendererInterface::CommandQueue:
|
|
return commandQueue.Get();
|
|
case QSGRendererInterface::CommandList:
|
|
return commandList;
|
|
default:
|
|
break;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
QT_END_NAMESPACE
|