You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

58 lines
2.2 KiB

  1. // Put the following line to 0 or comment it to disable vignette weighting
  2. #define USE_VIGNETTE_WEIGHTING 1
  3. #include "Common.cginc"
  4. #include "EyeAdaptation.cginc"
  5. RWStructuredBuffer<uint> _Histogram;
  6. Texture2D<float4> _Source;
  7. CBUFFER_START(Params)
  8. float4 _ScaleOffsetRes; // x: scale, y: offset, z: width, w: height
  9. CBUFFER_END
  10. groupshared uint gs_histogram[HISTOGRAM_BINS];
  11. #pragma kernel KEyeHistogram
  12. [numthreads(HISTOGRAM_THREAD_X,HISTOGRAM_THREAD_Y,1)]
  13. void KEyeHistogram(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID)
  14. {
  15. // Pretty straightforward implementation of histogram gathering using atomic ops.
  16. // I tried a few methods (no atomic ops / heavy LDS leveraging) but this one turned out to be
  17. // the fastest on desktop (Nvidia - Kepler/Maxwell) and PS4. Still need to try it on GCN/desktop
  18. // but considering it runs very fast on PS4 we can expect it to run well (?).
  19. const uint localThreadId = groupThreadId.y * HISTOGRAM_THREAD_X + groupThreadId.x;
  20. // Clears the shared memory
  21. if (localThreadId < HISTOGRAM_BINS)
  22. gs_histogram[localThreadId] = 0u;
  23. GroupMemoryBarrierWithGroupSync();
  24. // Gather local group histogram
  25. if (dispatchThreadId.x < (uint)_ScaleOffsetRes.z && dispatchThreadId.y < (uint)_ScaleOffsetRes.w)
  26. {
  27. #if USE_VIGNETTE_WEIGHTING
  28. // Vignette weighting to put more focus on what's in the center of the screen
  29. float2 uv01 = float2(dispatchThreadId) / float2(_ScaleOffsetRes.z, _ScaleOffsetRes.w);
  30. float2 d = abs(uv01 - (0.5).xx);
  31. float vfactor = Pow2(saturate(1.0 - dot(d, d)));
  32. uint weight = (uint)(64.0 * vfactor);
  33. #else
  34. uint weight = 1u;
  35. #endif
  36. float3 color = _Source[dispatchThreadId].xyz;
  37. float luminance = Max3(color); // Looks more natural than using a Rec.709 luminance for some reason
  38. float logLuminance = GetHistogramBinFromLuminance(luminance, _ScaleOffsetRes.xy);
  39. uint idx = (uint)(logLuminance * (HISTOGRAM_BINS - 1u));
  40. InterlockedAdd(gs_histogram[idx], weight);
  41. }
  42. GroupMemoryBarrierWithGroupSync();
  43. // Merge everything
  44. if (localThreadId < HISTOGRAM_BINS)
  45. InterlockedAdd(_Histogram[localThreadId], gs_histogram[localThreadId]);
  46. }