D3D12: Drop QVectors from QSGD3D12Engine

Results in a more than 3x drop (and now below GL, as expected) in CPU usage
in the 500 Image elements test.

There is zero value in using a dynamic data structure for these anyhow,
apart from getting == and qHash. However, writing our own hash provides
further opportunities for optimizing, for instance there is not much point
in hashing the individual input elements since the same shader code pointers
imply that the input elements match too. Thus hashing becomes faster without
more collisions in practice.

Change-Id: Iae766bd44d30ec37080369c8b37677e633c37a88
Reviewed-by: Andy Nichols <andy.nichols@qt.io>
This commit is contained in:
Laszlo Agocs 2016-05-26 12:03:19 +02:00
parent c9cffe6129
commit ea22206baf
6 changed files with 58 additions and 32 deletions

View File

@ -221,7 +221,7 @@ void QSGD3D12TextureMaterial::preparePipeline(QSGD3D12PipelineState *pipelineSta
pipelineState->shaders.ps = g_PS_Texture;
pipelineState->shaders.psSize = sizeof(g_PS_Texture);
pipelineState->shaders.rootSig.textureViews.resize(1);
pipelineState->shaders.rootSig.textureViewCount = 1;
}
QSGD3D12Material::UpdateResults QSGD3D12TextureMaterial::updatePipeline(const RenderState &state,
@ -304,7 +304,7 @@ void QSGD3D12SmoothTextureMaterial::preparePipeline(QSGD3D12PipelineState *pipel
pipelineState->shaders.ps = g_PS_SmoothTexture;
pipelineState->shaders.psSize = sizeof(g_PS_SmoothTexture);
pipelineState->shaders.rootSig.textureViews.resize(1);
pipelineState->shaders.rootSig.textureViewCount = 1;
}
QSGD3D12Material::UpdateResults QSGD3D12SmoothTextureMaterial::updatePipeline(const RenderState &state,
@ -479,7 +479,7 @@ void QSGD3D12TextMaterial::preparePipeline(QSGD3D12PipelineState *pipelineState)
pipelineState->shaders.psSize = sizeof(g_PS_StyledText);
}
pipelineState->shaders.rootSig.textureViews.resize(1);
pipelineState->shaders.rootSig.textureViewCount = 1;
}
QSGD3D12Material::UpdateResults QSGD3D12TextMaterial::updatePipeline(const RenderState &state,

View File

@ -1414,6 +1414,7 @@ void QSGD3D12EnginePrivate::invalidateCachedFrameState()
{
tframeData.drawingMode = QSGGeometry::DrawingMode(-1);
tframeData.currentIndexBuffer = 0;
tframeData.activeTextureCount = 0;
tframeData.drawCount = 0;
tframeData.lastPso = nullptr;
tframeData.lastRootSig = nullptr;
@ -1601,13 +1602,13 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
rootParams[0].Descriptor.RegisterSpace = 0;
++rootParamCount;
if (!pipelineState.shaders.rootSig.textureViews.isEmpty()) {
if (pipelineState.shaders.rootSig.textureViewCount > 0) {
rootParams[rootParamCount].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParams[rootParamCount].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
rootParams[rootParamCount].DescriptorTable.NumDescriptorRanges = 1;
D3D12_DESCRIPTOR_RANGE descRange;
descRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViews.count();
descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViewCount;
descRange.BaseShaderRegister = 0; // t0, t1, ...
descRange.RegisterSpace = 0;
descRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
@ -1623,11 +1624,12 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
// that the number of static samplers has to match the number of
// textures. This is not really ideal in general but works for Quick's use cases.
// The shaders can still choose to declare and use fewer samplers, if they want to.
desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViews.count();
desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViewCount;
D3D12_STATIC_SAMPLER_DESC staticSamplers[8];
int sdIdx = 0;
Q_ASSERT(pipelineState.shaders.rootSig.textureViews.count() <= _countof(staticSamplers));
for (const QSGD3D12TextureView &tv : qAsConst(pipelineState.shaders.rootSig.textureViews)) {
Q_ASSERT(pipelineState.shaders.rootSig.textureViewCount <= _countof(staticSamplers));
for (int i = 0; i < pipelineState.shaders.rootSig.textureViewCount; ++i) {
const QSGD3D12TextureView &tv(pipelineState.shaders.rootSig.textureViews[i]);
D3D12_STATIC_SAMPLER_DESC sd = {};
sd.Filter = D3D12_FILTER(tv.filter);
sd.AddressU = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeHoriz);
@ -1666,10 +1668,10 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
D3D12_INPUT_ELEMENT_DESC inputElements[8];
Q_ASSERT(pipelineState.inputElements.count() <= _countof(inputElements));
D3D12_INPUT_ELEMENT_DESC inputElements[QSGD3D12_MAX_INPUT_ELEMENTS];
int ieIdx = 0;
for (const QSGD3D12InputElement &ie : pipelineState.inputElements) {
for (int i = 0; i < pipelineState.inputElementCount; ++i) {
const QSGD3D12InputElement &ie(pipelineState.inputElements[i]);
D3D12_INPUT_ELEMENT_DESC ieDesc = {};
ieDesc.SemanticName = ie.semanticName;
ieDesc.SemanticIndex = ie.semanticIndex;
@ -1773,7 +1775,7 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
commandList->SetGraphicsRootSignature(tframeData.lastRootSig);
}
if (!pipelineState.shaders.rootSig.textureViews.isEmpty())
if (pipelineState.shaders.rootSig.textureViewCount > 0)
setDescriptorHeaps();
}
@ -1980,14 +1982,15 @@ void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams &params)
}
// Copy the SRVs to a drawcall-dedicated area of the shader-visible descriptor heap.
Q_ASSERT(tframeData.activeTextures.count() == tframeData.pipelineState.shaders.rootSig.textureViews.count());
if (!tframeData.activeTextures.isEmpty()) {
Q_ASSERT(tframeData.activeTextureCount == tframeData.pipelineState.shaders.rootSig.textureViewCount);
if (tframeData.activeTextureCount > 0) {
if (!skip) {
ensureGPUDescriptorHeap(tframeData.activeTextures.count());
ensureGPUDescriptorHeap(tframeData.activeTextureCount);
const uint stride = cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
D3D12_CPU_DESCRIPTOR_HANDLE dst = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart();
dst.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
for (const TransientFrameData::ActiveTexture &t : qAsConst(tframeData.activeTextures)) {
for (int i = 0; i < tframeData.activeTextureCount; ++i) {
const TransientFrameData::ActiveTexture &t(tframeData.activeTextures[i]);
Q_ASSERT(t.id);
const int idx = t.id - 1;
const bool isTex = t.type == TransientFrameData::ActiveTexture::TypeTexture;
@ -2000,9 +2003,9 @@ void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams &params)
gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
commandList->SetGraphicsRootDescriptorTable(1, gpuAddr);
pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextures.count();
pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextureCount;
}
tframeData.activeTextures.clear();
tframeData.activeTextureCount = 0;
}
// Add the draw call.
@ -2593,8 +2596,10 @@ void QSGD3D12EnginePrivate::useTexture(uint id)
const int idx = id - 1;
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
// activeTextures is a vector because the order matters
tframeData.activeTextures.append(TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id));
// Within one frame the order of calling this function determines the
// texture register (0, 1, ...) so fill up activeTextures accordingly.
tframeData.activeTextures[tframeData.activeTextureCount++]
= TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id);
if (textures[idx].fenceValue)
pframeData[currentPFrameIndex].pendingTextureUploads.insert(id);
@ -2890,7 +2895,8 @@ void QSGD3D12EnginePrivate::useRenderTargetAsTexture(uint id)
transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
tframeData.activeTextures.append(TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id));
tframeData.activeTextures[tframeData.activeTextureCount++] =
TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id);
}
QImage QSGD3D12EnginePrivate::executeAndWaitReadbackRenderTarget(uint id)

View File

@ -136,18 +136,26 @@ inline uint qHash(const QSGD3D12TextureView &key, uint seed = 0)
return key.filter + key.addressModeHoriz + key.addressModeVert;
}
const int QSGD3D12_MAX_TEXTURE_VIEWS = 8;
struct QSGD3D12RootSignature
{
QVector<QSGD3D12TextureView> textureViews;
int textureViewCount = 0;
QSGD3D12TextureView textureViews[QSGD3D12_MAX_TEXTURE_VIEWS];
bool operator==(const QSGD3D12RootSignature &other) const {
return textureViews == other.textureViews;
if (textureViewCount != other.textureViewCount)
return false;
for (int i = 0; i < textureViewCount; ++i)
if (!(textureViews[i] == other.textureViews[i]))
return false;
return true;
}
};
inline uint qHash(const QSGD3D12RootSignature &key, uint seed = 0)
{
return qHash(key.textureViews, seed);
return key.textureViewCount + (key.textureViewCount > 0 ? qHash(key.textureViews[0], seed) : 0);
}
// Shader bytecode blobs and root signature-related data.
@ -172,6 +180,8 @@ inline uint qHash(const QSGD3D12ShaderState &key, uint seed = 0)
return qHash(key.vs, seed) + key.vsSize + qHash(key.ps, seed) + key.psSize + qHash(key.rootSig, seed);
}
const int QSGD3D12_MAX_INPUT_ELEMENTS = 8;
struct QSGD3D12PipelineState
{
enum CullMode {
@ -216,7 +226,8 @@ struct QSGD3D12PipelineState
QSGD3D12ShaderState shaders;
QVector<QSGD3D12InputElement> inputElements;
int inputElementCount = 0;
QSGD3D12InputElement inputElements[QSGD3D12_MAX_INPUT_ELEMENTS];
CullMode cullMode = CullNone;
bool frontCCW = true;
@ -233,8 +244,8 @@ struct QSGD3D12PipelineState
TopologyType topologyType = TopologyTypeTriangle;
bool operator==(const QSGD3D12PipelineState &other) const {
return shaders == other.shaders
&& inputElements == other.inputElements
bool eq = shaders == other.shaders
&& inputElementCount == other.inputElementCount
&& cullMode == other.cullMode
&& frontCCW == other.frontCCW
&& colorWrite == other.colorWrite
@ -248,12 +259,21 @@ struct QSGD3D12PipelineState
&& (!stencilEnable || stencilDepthFailOp == other.stencilDepthFailOp)
&& (!stencilEnable || stencilPassOp == other.stencilPassOp)
&& topologyType == other.topologyType;
if (eq) {
for (int i = 0; i < inputElementCount; ++i) {
if (!(inputElements[i] == other.inputElements[i])) {
eq = false;
break;
}
}
}
return eq;
}
};
inline uint qHash(const QSGD3D12PipelineState &key, uint seed = 0)
{
return qHash(key.shaders, seed) + qHash(key.inputElements, seed)
return qHash(key.shaders, seed) + key.inputElementCount
+ key.cullMode + key.frontCCW
+ key.colorWrite + key.blend
+ key.depthEnable + key.depthWrite

View File

@ -350,7 +350,8 @@ private:
ActiveTexture(Type type, uint id) : type(type), id(id) { }
ActiveTexture() { }
};
QVector<ActiveTexture> activeTextures;
int activeTextureCount;
ActiveTexture activeTextures[QSGD3D12_MAX_TEXTURE_VIEWS];
int drawCount;
ID3D12PipelineState *lastPso;
ID3D12RootSignature *lastRootSig;

View File

@ -75,7 +75,6 @@ QSGD3D12Renderer::QSGD3D12Renderer(QSGRenderContext *context)
m_cboData(4096)
{
setNodeUpdater(new DummyUpdater);
m_freshPipelineState.shaders.rootSig.textureViews.reserve(4);
}
QSGD3D12Renderer::~QSGD3D12Renderer()
@ -554,7 +553,7 @@ void QSGD3D12Renderer::renderElement(int elementIndex)
void QSGD3D12Renderer::setInputLayout(const QSGGeometry *g, QSGD3D12PipelineState *pipelineState)
{
pipelineState->inputElements.resize(g->attributeCount());
pipelineState->inputElementCount = g->attributeCount();
const QSGGeometry::Attribute *attrs = g->attributes();
quint32 offset = 0;
for (int i = 0; i < g->attributeCount(); ++i) {

View File

@ -318,7 +318,7 @@ void QSGD3D12ShaderEffectMaterial::preparePipeline(QSGD3D12PipelineState *pipeli
pipelineState->shaders.ps = reinterpret_cast<const quint8 *>(linker.fs.constData());
pipelineState->shaders.psSize = linker.fs.size();
pipelineState->shaders.rootSig.textureViews.resize(textureProviders.count());
pipelineState->shaders.rootSig.textureViewCount = textureProviders.count();
}
static inline QColor qsg_premultiply_color(const QColor &c)