• 在DirectX 12中使用compute shader


    compute shader是用来把一些适合在gpu上运算的任务从cpu挪到gpu去做。例如一些后处理的效果,对纹理上的每个像素分别进行各自运算,最后得到结果。我们以实现sobel边缘检测算法为例,首先把正常绘制的内容渲染到一张off screen的render target上;然后使用这张render target作为compute shader的输入,在gpu上运行sobel边缘检测算法,输出到一个新的buffer;最后,将buffer作为输入,绘制到屏幕上。

    首先,需要为off screen的render target创建对应的buffer和view:

    	D3D12_RESOURCE_DESC resDesc;
    	resDesc.Alignment = 0;
    	resDesc.DepthOrArraySize = 1;
    	resDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
    	resDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
    	resDesc.Format = mBackBufferFormat;
    	resDesc.Height = windowHeight;
    	resDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
    	resDesc.MipLevels = 1;
    	resDesc.SampleDesc.Count = 1;
    	resDesc.SampleDesc.Quality = 0;
    	resDesc.Width = windowWidth;
    	ThrowIfFailed(mDevice->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
    		D3D12_HEAP_FLAG_NONE, &resDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
    		IID_PPV_ARGS(&mRenderTargetBuffer)));
    
    	D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc;
    	srvDesc.Format = mBackBufferFormat;
    	srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
    	srvDesc.Texture2D.MipLevels = 1;
    	srvDesc.Texture2D.MostDetailedMip = 0;
    	srvDesc.Texture2D.PlaneSlice = 0;
    	srvDesc.Texture2D.ResourceMinLODClamp = 0.0f;
    	srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
    
    	mDevice->CreateShaderResourceView(mRenderTargetBuffer.Get(), &srvDesc, cpuSrvHandle);
    
    	D3D12_RENDER_TARGET_VIEW_DESC rtvDesc;
    	rtvDesc.Format = mBackBufferFormat;
    	rtvDesc.Texture2D.MipSlice = 0;
    	rtvDesc.Texture2D.PlaneSlice = 0;
    	rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
    
    	mDevice->CreateRenderTargetView(mRenderTargetBuffer.Get(), &rtvDesc, cpuRtvHandle);
    

    我们为buffer创建了一个shader resource view和render target view,前者是用来给后面的sobel算法的输入使用,后者是作为off screen buffer存储正常绘制的内容。

    接下来,需要为compute shader去创建对应的buffer和view:

    	D3D12_RESOURCE_DESC resDesc;
    	resDesc.Alignment = 0;
    	resDesc.DepthOrArraySize = 1;
    	resDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
    	resDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
    	resDesc.Format = mBackBufferFormat;
    	resDesc.Height = windowHeight;
    	resDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
    	resDesc.MipLevels = 1;
    	resDesc.SampleDesc.Count = 1;
    	resDesc.SampleDesc.Quality = 0;
    	resDesc.Width = windowWidth;
    
    	ThrowIfFailed(mDevice->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
    		D3D12_HEAP_FLAG_NONE, &resDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
    		IID_PPV_ARGS(&mComputeBuffer)));
    
    	D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc;
    	srvDesc.Format = mBackBufferFormat;
    	srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
    	srvDesc.Texture2D.MipLevels = 1;
    	srvDesc.Texture2D.MostDetailedMip = 0;
    	srvDesc.Texture2D.PlaneSlice = 0;
    	srvDesc.Texture2D.ResourceMinLODClamp = 0.0f;
    	srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
    
    	mDevice->CreateShaderResourceView(mComputeBuffer.Get(), &srvDesc, cpuSrvHandle);
    
    	D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc;
    	uavDesc.Format = mBackBufferFormat;
    	uavDesc.Texture2D.MipSlice = 0;
    	uavDesc.Texture2D.PlaneSlice = 0;
    	uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
    
    	mDevice->CreateUnorderedAccessView(mComputeBuffer.Get(), nullptr, &uavDesc, cpuUavHandle);
    

    我们为buffer创建了一个shader resource view和unordered access view,前者是compute shader执行完之后,用来提供给最终绘制到屏幕的shader使用的;后者是compute shader如果需要写入到buffer,则buffer需要绑定到unordered access view。

    其次,还要为它们创建对应的根签名和pipeline state object,用来在绘制过程中切换:

    	CD3DX12_DESCRIPTOR_RANGE cbvSrvTable[3];
    	cbvSrvTable[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0);
    	cbvSrvTable[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 1);
    	cbvSrvTable[2].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 2, 0);
    
    	CD3DX12_ROOT_PARAMETER rootParams[3];
    	rootParams[0].InitAsDescriptorTable(1, &cbvSrvTable[0]);
    	rootParams[1].InitAsDescriptorTable(1, &cbvSrvTable[1]);
    	rootParams[2].InitAsDescriptorTable(1, &cbvSrvTable[2]);
    
    	CD3DX12_ROOT_SIGNATURE_DESC sigDesc(_countof(rootParams), rootParams, mStaticSamplers.size(),
    		mStaticSamplers.data(), D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);
    	ComPtr<ID3DBlob> signature = nullptr;
    	ComPtr<ID3DBlob> error = nullptr;
    	HRESULT hr = D3D12SerializeRootSignature(&sigDesc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error);
    	ThrowIfFailed(hr);
    	ThrowIfFailed(mDevice->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(),
    		IID_PPV_ARGS(&mSignature)));
    
    	D3D12_COMPUTE_PIPELINE_STATE_DESC computePsoDesc;
    	computePsoDesc.CachedPSO.CachedBlobSizeInBytes = 0;
    	computePsoDesc.CachedPSO.pCachedBlob = nullptr;
    	computePsoDesc.CS = { cs->GetBufferPointer(), cs->GetBufferSize() };
    	computePsoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
    	computePsoDesc.NodeMask = 0;
    	computePsoDesc.pRootSignature = mSignature.Get();
    	ThrowIfFailed(mDevice->CreateComputePipelineState(&computePsoDesc, 
    		IID_PPV_ARGS(&mComputePipelineState)));
    
    	// render target
    	D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPsoDesc;
    	ZeroMemory(&graphicPsoDesc, sizeof(D3D12_GRAPHICS_PIPELINE_STATE_DESC));
    	graphicPsoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
    	CD3DX12_DEPTH_STENCIL_DESC dsDesc = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT);
    	dsDesc.DepthEnable = false;
    	dsDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
    	dsDesc.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
    	graphicPsoDesc.DepthStencilState = dsDesc;
    	graphicPsoDesc.DSVFormat = mDepthStencilBufferFormat;
    	graphicPsoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
    	graphicPsoDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
    	graphicPsoDesc.NodeMask = 0;
    	graphicPsoDesc.NumRenderTargets = 1;
    	graphicPsoDesc.pRootSignature = mSignature.Get();
    	CD3DX12_RASTERIZER_DESC rastDesc = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
    	rastDesc.FillMode = mFillMode;
    	rastDesc.CullMode = mCullMode;
    	graphicPsoDesc.RasterizerState = rastDesc;
    	graphicPsoDesc.RTVFormats[0] = mBackBufferFormat;
    	graphicPsoDesc.SampleDesc.Count = mEnableMsaa ? mMsaaCount : 1;
    	graphicPsoDesc.SampleDesc.Quality = mEnableMsaa ? mMsaaQuality - 1 : 0;
    	graphicPsoDesc.SampleMask = UINT_MAX;
    	graphicPsoDesc.PS = { ps->GetBufferPointer(), ps->GetBufferSize() };
    	graphicPsoDesc.VS = { vs->GetBufferPointer(), vs->GetBufferSize() };
    	graphicPsoDesc.InputLayout = { mInputLayout.data(), mInputLayout.size() };
    	graphicPsoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
    	ThrowIfFailed(mDevice->CreateGraphicsPipelineState(&graphicPsoDesc,
    		IID_PPV_ARGS(&mCoreRenderTarget.mGraphicPipelineState)));
    

    这里为了方便处理,我们只创建了一个根签名给两个shader使用。因为这两个shader都只接收一个buffer的输入,输出一个buffer。

    准备过程完成后,我们就可以开始绘制了。先要对已有的逻辑进行修改,即渲染写入的对象从原先的back buffer转移到off screen的render target上:

    		mCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(
    			mRenderTargetBuffer.Get(), D3D12_RESOURCE_STATE_GENERIC_READ, 
    			D3D12_RESOURCE_STATE_RENDER_TARGET));
    		mCommandList->ClearRenderTargetView(rtv, LightSteelBlue, 0, nullptr);
    		mCommandList->OMSetRenderTargets(1, &rtv, true, &dsv);
    
    		// draw objects
    
    		mCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(
    			mRenderTargetBuffer.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, 
    			D3D12_RESOURCE_STATE_GENERIC_READ));
    

    正常绘制工作完成后,再执行一开始提到的后处理逻辑,先将off screen的render target传给compute shader处理,输出提取边缘后的buffer,然后将buffer直接绘制屏幕上:

    	mCommandList->SetComputeRootSignature(mComputeSignature.Get());
    	mCommandList->SetComputeRootDescriptorTable(0, mGpuSrv);
    	mCommandList->SetComputeRootDescriptorTable(2, mGpuUav);
    	mCommandList->SetPipelineState(mComputePipelineState.Get());
    
    	mCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(mComputeBuffer.Get(),
    		D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
    
    	UINT numGroupsX = (UINT)ceilf(windowWidth / 16.0f);
    	UINT numGroupsY = (UINT)ceilf(windowHeight / 16.0f);
    	mCommandList->Dispatch(numGroupsX, numGroupsY, 1);
    
    	mCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(mComputeBuffer.Get(),
    		D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_GENERIC_READ));
    
    
    	mCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(mBackBuffer[mCurBackBuffer].Get(),
    		D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET));
    	CD3DX12_CPU_DESCRIPTOR_HANDLE rtv = CD3DX12_CPU_DESCRIPTOR_HANDLE(
    		mRtvHeap->GetCPUDescriptorHandleForHeapStart(), mCurBackBuffer, mRtvHeapIncSize);
    	CD3DX12_CPU_DESCRIPTOR_HANDLE dsv = CD3DX12_CPU_DESCRIPTOR_HANDLE(
    		mDsvHeap->GetCPUDescriptorHandleForHeapStart());
    	mCommandList->OMSetRenderTargets(1, &rtv, true, &dsv);
    
    	mCommandList->SetGraphicsRootSignature(mGraphicSignature.Get());
    	mCommandList->SetGraphicsRootDescriptorTable(0, mGpuSrv);
    	mCommandList->SetGraphicsRootDescriptorTable(1, mGpuSrv);
    	mCommandList->SetPipelineState(mGraphicPipelineState.Get());
    
    	mCommandList->IASetVertexBuffers(0, 1, nullptr);
    	mCommandList->IASetIndexBuffer(nullptr);
    	mCommandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
    	mCommandList->DrawInstanced(6, 1, 0, 0);
    
    	mCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(mBackBuffer[mCurBackBuffer].Get(),
    		D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT));
    

    值得一提的是,在最后绘制边缘效果到屏幕上时,我们并没有传入vertex buffer和index buffer,通过设置为nullptr,我们在shader中直接去写死clip space中顶点的位置:

    float2 gTexCoords[6] = 
    {
    	float2(0.0f, 1.0f),
    	float2(0.0f, 0.0f),
    	float2(1.0f, 0.0f),
    	float2(0.0f, 1.0f),
    	float2(1.0f, 0.0f),
    	float2(1.0f, 1.0f)
    };
    
    struct VertexOut
    {
    	float4 PosH    : SV_POSITION;
    	float2 TexC    : TEXCOORD;
    };
    
    VertexOut VS(uint vid : SV_VertexID)
    {
    	VertexOut vout;
    	
    	vout.TexC = gTexCoords[vid];
    	
    	// Map [0,1]^2 to NDC space.
    	vout.PosH = float4(2.0f*vout.TexC.x - 1.0f, 1.0f - 2.0f*vout.TexC.y, 0.0f, 1.0f);
    
        return vout;
    }
    

    最后绘制的效果如下:

    如果你觉得我的文章有帮助,欢迎关注我的微信公众号(大龄社畜的游戏开发之路-

  • 相关阅读:
    java中的上转型对象
    java工程项目里,在一个包里面,不能出现同名的类名,这问题是刚接触java才会遇到的,特别是新手一般都没有建立包,而是使用默认的,易出现同名的类名,导致eclipse提示错误
    接口作为参数,不同的接口调用不同的方法,例如:输出“I love Game”或输出“我喜欢游戏”
    JavaScript--原型链
    JavaScript--clientX,clientY、pageX,pageY、offsetLeft,offsetTop/offsetWidth,offsetHeight、scrollLeft,scrollTop/scrollWidth,scrollHeight、clientHeight,clientWidth区别
    JavaScript--结合CSS变形、缩放能拖拽的登录框
    JavaScript--放大镜
    JavaScript--返回顶部方法:锚链接、行内式js写法、外链式、内嵌式
    JavaScript--封装好的运动函数+旋转木马例子
    JavaScript--漏写var却还能使用标签
  • 原文地址:https://www.cnblogs.com/back-to-the-past/p/14920947.html
Copyright © 2020-2023  润新知