#include "UnityCG.cginc" RWStructuredBuffer _Histogram; Texture2D _Source; CBUFFER_START (Params) uint _IsLinear; float4 _Res; uint4 _Channels; CBUFFER_END groupshared uint4 gs_histogram[256]; #define GROUP_SIZE 16 #pragma kernel KHistogramGather [numthreads(GROUP_SIZE, GROUP_SIZE,1)] void KHistogramGather(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID) { const uint localThreadId = groupThreadId.y * GROUP_SIZE + groupThreadId.x; if (localThreadId < 256) gs_histogram[localThreadId] = uint4(0, 0, 0, 0); GroupMemoryBarrierWithGroupSync(); if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y) { // We want a gamma histogram (like Photoshop & all) float3 color = saturate(_Source[dispatchThreadId].xyz); if (_IsLinear > 0) color = LinearToGammaSpace(color); // Convert color & luminance to histogram bin uint3 idx_c = (uint3)(round(color * 255.0)); uint idx_l = (uint)(round(dot(color.rgb, float3(0.2125, 0.7154, 0.0721)) * 255.0)); // Fill the group shared histogram if (_Channels.x > 0u) InterlockedAdd(gs_histogram[idx_c.x].x, 1); // Red if (_Channels.y > 0u) InterlockedAdd(gs_histogram[idx_c.y].y, 1); // Green if (_Channels.z > 0u) InterlockedAdd(gs_histogram[idx_c.z].z, 1); // Blue if (_Channels.w > 0u) InterlockedAdd(gs_histogram[idx_l].w, 1); // Luminance } GroupMemoryBarrierWithGroupSync(); // Merge if (localThreadId < 256) { uint4 h = gs_histogram[localThreadId]; if (_Channels.x > 0u && h.x > 0) InterlockedAdd(_Histogram[localThreadId].x, h.x); // Red if (_Channels.y > 0u && h.y > 0) InterlockedAdd(_Histogram[localThreadId].y, h.y); // Green if (_Channels.z > 0u && h.z > 0) InterlockedAdd(_Histogram[localThreadId].z, h.z); // Blue if (_Channels.w > 0u && h.w > 0) InterlockedAdd(_Histogram[localThreadId].w, h.w); // Luminance } } // Scaling pass groupshared uint4 gs_pyramid[256]; #pragma kernel KHistogramScale [numthreads(16,16,1)] void KHistogramScale(uint2 groupThreadId : SV_GroupThreadID) { const uint localThreadId = groupThreadId.y * 16 + groupThreadId.x; gs_pyramid[localThreadId] = _Histogram[localThreadId]; GroupMemoryBarrierWithGroupSync(); // Parallel reduction to find the max value UNITY_UNROLL for(uint i = 256 >> 1; i > 0; i >>= 1) { if(localThreadId < i) gs_pyramid[localThreadId] = max(gs_pyramid[localThreadId], gs_pyramid[localThreadId + i]); GroupMemoryBarrierWithGroupSync(); } // Actual scaling float4 factor = _Res.y / (float4)gs_pyramid[0]; _Histogram[localThreadId] = (uint4)round(_Histogram[localThreadId] * factor); } #pragma kernel KHistogramClear [numthreads(GROUP_SIZE, GROUP_SIZE, 1)] void KHistogramClear(uint2 dispatchThreadId : SV_DispatchThreadID) { if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y) _Histogram[dispatchThreadId.y * _Res.x + dispatchThreadId.x] = uint4(0u, 0u, 0u, 0u); }