|
|
- #include "UnityCG.cginc"
-
- RWStructuredBuffer<uint4> _Histogram;
- Texture2D<float4> _Source;
-
- CBUFFER_START (Params)
- uint _IsLinear;
- float4 _Res;
- uint4 _Channels;
- CBUFFER_END
-
- groupshared uint4 gs_histogram[256];
-
- #define GROUP_SIZE 16
-
- #pragma kernel KHistogramGather
- [numthreads(GROUP_SIZE, GROUP_SIZE,1)]
- void KHistogramGather(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID)
- {
- const uint localThreadId = groupThreadId.y * GROUP_SIZE + groupThreadId.x;
-
- if (localThreadId < 256)
- gs_histogram[localThreadId] = uint4(0, 0, 0, 0);
-
- GroupMemoryBarrierWithGroupSync();
-
- if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y)
- {
- // We want a gamma histogram (like Photoshop & all)
- float3 color = saturate(_Source[dispatchThreadId].xyz);
- if (_IsLinear > 0)
- color = LinearToGammaSpace(color);
-
- // Convert color & luminance to histogram bin
- uint3 idx_c = (uint3)(round(color * 255.0));
- uint idx_l = (uint)(round(dot(color.rgb, float3(0.2125, 0.7154, 0.0721)) * 255.0));
-
- // Fill the group shared histogram
- if (_Channels.x > 0u) InterlockedAdd(gs_histogram[idx_c.x].x, 1); // Red
- if (_Channels.y > 0u) InterlockedAdd(gs_histogram[idx_c.y].y, 1); // Green
- if (_Channels.z > 0u) InterlockedAdd(gs_histogram[idx_c.z].z, 1); // Blue
- if (_Channels.w > 0u) InterlockedAdd(gs_histogram[idx_l].w, 1); // Luminance
- }
-
- GroupMemoryBarrierWithGroupSync();
-
- // Merge
- if (localThreadId < 256)
- {
- uint4 h = gs_histogram[localThreadId];
- if (_Channels.x > 0u && h.x > 0) InterlockedAdd(_Histogram[localThreadId].x, h.x); // Red
- if (_Channels.y > 0u && h.y > 0) InterlockedAdd(_Histogram[localThreadId].y, h.y); // Green
- if (_Channels.z > 0u && h.z > 0) InterlockedAdd(_Histogram[localThreadId].z, h.z); // Blue
- if (_Channels.w > 0u && h.w > 0) InterlockedAdd(_Histogram[localThreadId].w, h.w); // Luminance
- }
- }
-
- // Scaling pass
- groupshared uint4 gs_pyramid[256];
-
- #pragma kernel KHistogramScale
- [numthreads(16,16,1)]
- void KHistogramScale(uint2 groupThreadId : SV_GroupThreadID)
- {
- const uint localThreadId = groupThreadId.y * 16 + groupThreadId.x;
- gs_pyramid[localThreadId] = _Histogram[localThreadId];
-
- GroupMemoryBarrierWithGroupSync();
-
- // Parallel reduction to find the max value
- UNITY_UNROLL
- for(uint i = 256 >> 1; i > 0; i >>= 1)
- {
- if(localThreadId < i)
- gs_pyramid[localThreadId] = max(gs_pyramid[localThreadId], gs_pyramid[localThreadId + i]);
-
- GroupMemoryBarrierWithGroupSync();
- }
-
- // Actual scaling
- float4 factor = _Res.y / (float4)gs_pyramid[0];
- _Histogram[localThreadId] = (uint4)round(_Histogram[localThreadId] * factor);
- }
-
- #pragma kernel KHistogramClear
- [numthreads(GROUP_SIZE, GROUP_SIZE, 1)]
- void KHistogramClear(uint2 dispatchThreadId : SV_DispatchThreadID)
- {
- if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y)
- _Histogram[dispatchThreadId.y * _Res.x + dispatchThreadId.x] = uint4(0u, 0u, 0u, 0u);
- }
|