You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
3.1 KiB

  1. #include "UnityCG.cginc"
  2. RWStructuredBuffer<uint4> _Histogram;
  3. Texture2D<float4> _Source;
  4. CBUFFER_START (Params)
  5. uint _IsLinear;
  6. float4 _Res;
  7. uint4 _Channels;
  8. CBUFFER_END
  9. groupshared uint4 gs_histogram[256];
  10. #define GROUP_SIZE 16
  11. #pragma kernel KHistogramGather
  12. [numthreads(GROUP_SIZE, GROUP_SIZE,1)]
  13. void KHistogramGather(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID)
  14. {
  15. const uint localThreadId = groupThreadId.y * GROUP_SIZE + groupThreadId.x;
  16. if (localThreadId < 256)
  17. gs_histogram[localThreadId] = uint4(0, 0, 0, 0);
  18. GroupMemoryBarrierWithGroupSync();
  19. if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y)
  20. {
  21. // We want a gamma histogram (like Photoshop & all)
  22. float3 color = saturate(_Source[dispatchThreadId].xyz);
  23. if (_IsLinear > 0)
  24. color = LinearToGammaSpace(color);
  25. // Convert color & luminance to histogram bin
  26. uint3 idx_c = (uint3)(round(color * 255.0));
  27. uint idx_l = (uint)(round(dot(color.rgb, float3(0.2125, 0.7154, 0.0721)) * 255.0));
  28. // Fill the group shared histogram
  29. if (_Channels.x > 0u) InterlockedAdd(gs_histogram[idx_c.x].x, 1); // Red
  30. if (_Channels.y > 0u) InterlockedAdd(gs_histogram[idx_c.y].y, 1); // Green
  31. if (_Channels.z > 0u) InterlockedAdd(gs_histogram[idx_c.z].z, 1); // Blue
  32. if (_Channels.w > 0u) InterlockedAdd(gs_histogram[idx_l].w, 1); // Luminance
  33. }
  34. GroupMemoryBarrierWithGroupSync();
  35. // Merge
  36. if (localThreadId < 256)
  37. {
  38. uint4 h = gs_histogram[localThreadId];
  39. if (_Channels.x > 0u && h.x > 0) InterlockedAdd(_Histogram[localThreadId].x, h.x); // Red
  40. if (_Channels.y > 0u && h.y > 0) InterlockedAdd(_Histogram[localThreadId].y, h.y); // Green
  41. if (_Channels.z > 0u && h.z > 0) InterlockedAdd(_Histogram[localThreadId].z, h.z); // Blue
  42. if (_Channels.w > 0u && h.w > 0) InterlockedAdd(_Histogram[localThreadId].w, h.w); // Luminance
  43. }
  44. }
  45. // Scaling pass
  46. groupshared uint4 gs_pyramid[256];
  47. #pragma kernel KHistogramScale
  48. [numthreads(16,16,1)]
  49. void KHistogramScale(uint2 groupThreadId : SV_GroupThreadID)
  50. {
  51. const uint localThreadId = groupThreadId.y * 16 + groupThreadId.x;
  52. gs_pyramid[localThreadId] = _Histogram[localThreadId];
  53. GroupMemoryBarrierWithGroupSync();
  54. // Parallel reduction to find the max value
  55. UNITY_UNROLL
  56. for(uint i = 256 >> 1; i > 0; i >>= 1)
  57. {
  58. if(localThreadId < i)
  59. gs_pyramid[localThreadId] = max(gs_pyramid[localThreadId], gs_pyramid[localThreadId + i]);
  60. GroupMemoryBarrierWithGroupSync();
  61. }
  62. // Actual scaling
  63. float4 factor = _Res.y / (float4)gs_pyramid[0];
  64. _Histogram[localThreadId] = (uint4)round(_Histogram[localThreadId] * factor);
  65. }
  66. #pragma kernel KHistogramClear
  67. [numthreads(GROUP_SIZE, GROUP_SIZE, 1)]
  68. void KHistogramClear(uint2 dispatchThreadId : SV_DispatchThreadID)
  69. {
  70. if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y)
  71. _Histogram[dispatchThreadId.y * _Res.x + dispatchThreadId.x] = uint4(0u, 0u, 0u, 0u);
  72. }