D3D12: Drop QVectors from QSGD3D12Engine
Results in a more than 3x drop (and now below GL, as expected) in CPU usage in the 500 Image elements test. There is zero value in using a dynamic data structure for these anyhow, apart from getting == and qHash. However, writing our own hash provides further opportunities for optimizing, for instance there is not much point in hashing the individual input elements since the same shader code pointers imply that the input elements match too. Thus hashing becomes faster without more collisions in practice. Change-Id: Iae766bd44d30ec37080369c8b37677e633c37a88 Reviewed-by: Andy Nichols <andy.nichols@qt.io>
This commit is contained in:
parent
c9cffe6129
commit
ea22206baf
|
@ -221,7 +221,7 @@ void QSGD3D12TextureMaterial::preparePipeline(QSGD3D12PipelineState *pipelineSta
|
|||
pipelineState->shaders.ps = g_PS_Texture;
|
||||
pipelineState->shaders.psSize = sizeof(g_PS_Texture);
|
||||
|
||||
pipelineState->shaders.rootSig.textureViews.resize(1);
|
||||
pipelineState->shaders.rootSig.textureViewCount = 1;
|
||||
}
|
||||
|
||||
QSGD3D12Material::UpdateResults QSGD3D12TextureMaterial::updatePipeline(const RenderState &state,
|
||||
|
@ -304,7 +304,7 @@ void QSGD3D12SmoothTextureMaterial::preparePipeline(QSGD3D12PipelineState *pipel
|
|||
pipelineState->shaders.ps = g_PS_SmoothTexture;
|
||||
pipelineState->shaders.psSize = sizeof(g_PS_SmoothTexture);
|
||||
|
||||
pipelineState->shaders.rootSig.textureViews.resize(1);
|
||||
pipelineState->shaders.rootSig.textureViewCount = 1;
|
||||
}
|
||||
|
||||
QSGD3D12Material::UpdateResults QSGD3D12SmoothTextureMaterial::updatePipeline(const RenderState &state,
|
||||
|
@ -479,7 +479,7 @@ void QSGD3D12TextMaterial::preparePipeline(QSGD3D12PipelineState *pipelineState)
|
|||
pipelineState->shaders.psSize = sizeof(g_PS_StyledText);
|
||||
}
|
||||
|
||||
pipelineState->shaders.rootSig.textureViews.resize(1);
|
||||
pipelineState->shaders.rootSig.textureViewCount = 1;
|
||||
}
|
||||
|
||||
QSGD3D12Material::UpdateResults QSGD3D12TextMaterial::updatePipeline(const RenderState &state,
|
||||
|
|
|
@ -1414,6 +1414,7 @@ void QSGD3D12EnginePrivate::invalidateCachedFrameState()
|
|||
{
|
||||
tframeData.drawingMode = QSGGeometry::DrawingMode(-1);
|
||||
tframeData.currentIndexBuffer = 0;
|
||||
tframeData.activeTextureCount = 0;
|
||||
tframeData.drawCount = 0;
|
||||
tframeData.lastPso = nullptr;
|
||||
tframeData.lastRootSig = nullptr;
|
||||
|
@ -1601,13 +1602,13 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
|
|||
rootParams[0].Descriptor.RegisterSpace = 0;
|
||||
++rootParamCount;
|
||||
|
||||
if (!pipelineState.shaders.rootSig.textureViews.isEmpty()) {
|
||||
if (pipelineState.shaders.rootSig.textureViewCount > 0) {
|
||||
rootParams[rootParamCount].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
rootParams[rootParamCount].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
||||
rootParams[rootParamCount].DescriptorTable.NumDescriptorRanges = 1;
|
||||
D3D12_DESCRIPTOR_RANGE descRange;
|
||||
descRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViews.count();
|
||||
descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViewCount;
|
||||
descRange.BaseShaderRegister = 0; // t0, t1, ...
|
||||
descRange.RegisterSpace = 0;
|
||||
descRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
|
||||
|
@ -1623,11 +1624,12 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
|
|||
// that the number of static samplers has to match the number of
|
||||
// textures. This is not really ideal in general but works for Quick's use cases.
|
||||
// The shaders can still choose to declare and use fewer samplers, if they want to.
|
||||
desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViews.count();
|
||||
desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViewCount;
|
||||
D3D12_STATIC_SAMPLER_DESC staticSamplers[8];
|
||||
int sdIdx = 0;
|
||||
Q_ASSERT(pipelineState.shaders.rootSig.textureViews.count() <= _countof(staticSamplers));
|
||||
for (const QSGD3D12TextureView &tv : qAsConst(pipelineState.shaders.rootSig.textureViews)) {
|
||||
Q_ASSERT(pipelineState.shaders.rootSig.textureViewCount <= _countof(staticSamplers));
|
||||
for (int i = 0; i < pipelineState.shaders.rootSig.textureViewCount; ++i) {
|
||||
const QSGD3D12TextureView &tv(pipelineState.shaders.rootSig.textureViews[i]);
|
||||
D3D12_STATIC_SAMPLER_DESC sd = {};
|
||||
sd.Filter = D3D12_FILTER(tv.filter);
|
||||
sd.AddressU = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeHoriz);
|
||||
|
@ -1666,10 +1668,10 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
|
|||
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
|
||||
|
||||
D3D12_INPUT_ELEMENT_DESC inputElements[8];
|
||||
Q_ASSERT(pipelineState.inputElements.count() <= _countof(inputElements));
|
||||
D3D12_INPUT_ELEMENT_DESC inputElements[QSGD3D12_MAX_INPUT_ELEMENTS];
|
||||
int ieIdx = 0;
|
||||
for (const QSGD3D12InputElement &ie : pipelineState.inputElements) {
|
||||
for (int i = 0; i < pipelineState.inputElementCount; ++i) {
|
||||
const QSGD3D12InputElement &ie(pipelineState.inputElements[i]);
|
||||
D3D12_INPUT_ELEMENT_DESC ieDesc = {};
|
||||
ieDesc.SemanticName = ie.semanticName;
|
||||
ieDesc.SemanticIndex = ie.semanticIndex;
|
||||
|
@ -1773,7 +1775,7 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
|
|||
commandList->SetGraphicsRootSignature(tframeData.lastRootSig);
|
||||
}
|
||||
|
||||
if (!pipelineState.shaders.rootSig.textureViews.isEmpty())
|
||||
if (pipelineState.shaders.rootSig.textureViewCount > 0)
|
||||
setDescriptorHeaps();
|
||||
}
|
||||
|
||||
|
@ -1980,14 +1982,15 @@ void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams ¶ms)
|
|||
}
|
||||
|
||||
// Copy the SRVs to a drawcall-dedicated area of the shader-visible descriptor heap.
|
||||
Q_ASSERT(tframeData.activeTextures.count() == tframeData.pipelineState.shaders.rootSig.textureViews.count());
|
||||
if (!tframeData.activeTextures.isEmpty()) {
|
||||
Q_ASSERT(tframeData.activeTextureCount == tframeData.pipelineState.shaders.rootSig.textureViewCount);
|
||||
if (tframeData.activeTextureCount > 0) {
|
||||
if (!skip) {
|
||||
ensureGPUDescriptorHeap(tframeData.activeTextures.count());
|
||||
ensureGPUDescriptorHeap(tframeData.activeTextureCount);
|
||||
const uint stride = cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE dst = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart();
|
||||
dst.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
|
||||
for (const TransientFrameData::ActiveTexture &t : qAsConst(tframeData.activeTextures)) {
|
||||
for (int i = 0; i < tframeData.activeTextureCount; ++i) {
|
||||
const TransientFrameData::ActiveTexture &t(tframeData.activeTextures[i]);
|
||||
Q_ASSERT(t.id);
|
||||
const int idx = t.id - 1;
|
||||
const bool isTex = t.type == TransientFrameData::ActiveTexture::TypeTexture;
|
||||
|
@ -2000,9 +2003,9 @@ void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams ¶ms)
|
|||
gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
|
||||
commandList->SetGraphicsRootDescriptorTable(1, gpuAddr);
|
||||
|
||||
pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextures.count();
|
||||
pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextureCount;
|
||||
}
|
||||
tframeData.activeTextures.clear();
|
||||
tframeData.activeTextureCount = 0;
|
||||
}
|
||||
|
||||
// Add the draw call.
|
||||
|
@ -2593,8 +2596,10 @@ void QSGD3D12EnginePrivate::useTexture(uint id)
|
|||
const int idx = id - 1;
|
||||
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
|
||||
|
||||
// activeTextures is a vector because the order matters
|
||||
tframeData.activeTextures.append(TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id));
|
||||
// Within one frame the order of calling this function determines the
|
||||
// texture register (0, 1, ...) so fill up activeTextures accordingly.
|
||||
tframeData.activeTextures[tframeData.activeTextureCount++]
|
||||
= TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id);
|
||||
|
||||
if (textures[idx].fenceValue)
|
||||
pframeData[currentPFrameIndex].pendingTextureUploads.insert(id);
|
||||
|
@ -2890,7 +2895,8 @@ void QSGD3D12EnginePrivate::useRenderTargetAsTexture(uint id)
|
|||
transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
}
|
||||
|
||||
tframeData.activeTextures.append(TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id));
|
||||
tframeData.activeTextures[tframeData.activeTextureCount++] =
|
||||
TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id);
|
||||
}
|
||||
|
||||
QImage QSGD3D12EnginePrivate::executeAndWaitReadbackRenderTarget(uint id)
|
||||
|
|
|
@ -136,18 +136,26 @@ inline uint qHash(const QSGD3D12TextureView &key, uint seed = 0)
|
|||
return key.filter + key.addressModeHoriz + key.addressModeVert;
|
||||
}
|
||||
|
||||
const int QSGD3D12_MAX_TEXTURE_VIEWS = 8;
|
||||
|
||||
struct QSGD3D12RootSignature
|
||||
{
|
||||
QVector<QSGD3D12TextureView> textureViews;
|
||||
int textureViewCount = 0;
|
||||
QSGD3D12TextureView textureViews[QSGD3D12_MAX_TEXTURE_VIEWS];
|
||||
|
||||
bool operator==(const QSGD3D12RootSignature &other) const {
|
||||
return textureViews == other.textureViews;
|
||||
if (textureViewCount != other.textureViewCount)
|
||||
return false;
|
||||
for (int i = 0; i < textureViewCount; ++i)
|
||||
if (!(textureViews[i] == other.textureViews[i]))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
inline uint qHash(const QSGD3D12RootSignature &key, uint seed = 0)
|
||||
{
|
||||
return qHash(key.textureViews, seed);
|
||||
return key.textureViewCount + (key.textureViewCount > 0 ? qHash(key.textureViews[0], seed) : 0);
|
||||
}
|
||||
|
||||
// Shader bytecode blobs and root signature-related data.
|
||||
|
@ -172,6 +180,8 @@ inline uint qHash(const QSGD3D12ShaderState &key, uint seed = 0)
|
|||
return qHash(key.vs, seed) + key.vsSize + qHash(key.ps, seed) + key.psSize + qHash(key.rootSig, seed);
|
||||
}
|
||||
|
||||
const int QSGD3D12_MAX_INPUT_ELEMENTS = 8;
|
||||
|
||||
struct QSGD3D12PipelineState
|
||||
{
|
||||
enum CullMode {
|
||||
|
@ -216,7 +226,8 @@ struct QSGD3D12PipelineState
|
|||
|
||||
QSGD3D12ShaderState shaders;
|
||||
|
||||
QVector<QSGD3D12InputElement> inputElements;
|
||||
int inputElementCount = 0;
|
||||
QSGD3D12InputElement inputElements[QSGD3D12_MAX_INPUT_ELEMENTS];
|
||||
|
||||
CullMode cullMode = CullNone;
|
||||
bool frontCCW = true;
|
||||
|
@ -233,8 +244,8 @@ struct QSGD3D12PipelineState
|
|||
TopologyType topologyType = TopologyTypeTriangle;
|
||||
|
||||
bool operator==(const QSGD3D12PipelineState &other) const {
|
||||
return shaders == other.shaders
|
||||
&& inputElements == other.inputElements
|
||||
bool eq = shaders == other.shaders
|
||||
&& inputElementCount == other.inputElementCount
|
||||
&& cullMode == other.cullMode
|
||||
&& frontCCW == other.frontCCW
|
||||
&& colorWrite == other.colorWrite
|
||||
|
@ -248,12 +259,21 @@ struct QSGD3D12PipelineState
|
|||
&& (!stencilEnable || stencilDepthFailOp == other.stencilDepthFailOp)
|
||||
&& (!stencilEnable || stencilPassOp == other.stencilPassOp)
|
||||
&& topologyType == other.topologyType;
|
||||
if (eq) {
|
||||
for (int i = 0; i < inputElementCount; ++i) {
|
||||
if (!(inputElements[i] == other.inputElements[i])) {
|
||||
eq = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return eq;
|
||||
}
|
||||
};
|
||||
|
||||
inline uint qHash(const QSGD3D12PipelineState &key, uint seed = 0)
|
||||
{
|
||||
return qHash(key.shaders, seed) + qHash(key.inputElements, seed)
|
||||
return qHash(key.shaders, seed) + key.inputElementCount
|
||||
+ key.cullMode + key.frontCCW
|
||||
+ key.colorWrite + key.blend
|
||||
+ key.depthEnable + key.depthWrite
|
||||
|
|
|
@ -350,7 +350,8 @@ private:
|
|||
ActiveTexture(Type type, uint id) : type(type), id(id) { }
|
||||
ActiveTexture() { }
|
||||
};
|
||||
QVector<ActiveTexture> activeTextures;
|
||||
int activeTextureCount;
|
||||
ActiveTexture activeTextures[QSGD3D12_MAX_TEXTURE_VIEWS];
|
||||
int drawCount;
|
||||
ID3D12PipelineState *lastPso;
|
||||
ID3D12RootSignature *lastRootSig;
|
||||
|
|
|
@ -75,7 +75,6 @@ QSGD3D12Renderer::QSGD3D12Renderer(QSGRenderContext *context)
|
|||
m_cboData(4096)
|
||||
{
|
||||
setNodeUpdater(new DummyUpdater);
|
||||
m_freshPipelineState.shaders.rootSig.textureViews.reserve(4);
|
||||
}
|
||||
|
||||
QSGD3D12Renderer::~QSGD3D12Renderer()
|
||||
|
@ -554,7 +553,7 @@ void QSGD3D12Renderer::renderElement(int elementIndex)
|
|||
|
||||
void QSGD3D12Renderer::setInputLayout(const QSGGeometry *g, QSGD3D12PipelineState *pipelineState)
|
||||
{
|
||||
pipelineState->inputElements.resize(g->attributeCount());
|
||||
pipelineState->inputElementCount = g->attributeCount();
|
||||
const QSGGeometry::Attribute *attrs = g->attributes();
|
||||
quint32 offset = 0;
|
||||
for (int i = 0; i < g->attributeCount(); ++i) {
|
||||
|
|
|
@ -318,7 +318,7 @@ void QSGD3D12ShaderEffectMaterial::preparePipeline(QSGD3D12PipelineState *pipeli
|
|||
pipelineState->shaders.ps = reinterpret_cast<const quint8 *>(linker.fs.constData());
|
||||
pipelineState->shaders.psSize = linker.fs.size();
|
||||
|
||||
pipelineState->shaders.rootSig.textureViews.resize(textureProviders.count());
|
||||
pipelineState->shaders.rootSig.textureViewCount = textureProviders.count();
|
||||
}
|
||||
|
||||
static inline QColor qsg_premultiply_color(const QColor &c)
|
||||
|
|
Loading…
Reference in New Issue