// This file is part of the FidelityFX SDK. // // Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #ifndef FFX_FSR2_SAMPLE_H #define FFX_FSR2_SAMPLE_H // suppress warnings #ifdef FFX_HLSL #pragma warning(disable: 4008) // potentially divide by zero #endif //FFX_HLSL struct FetchedBilinearSamples { FfxFloat32x4 fColor00; FfxFloat32x4 fColor10; FfxFloat32x4 fColor01; FfxFloat32x4 fColor11; }; struct FetchedBicubicSamples { FfxFloat32x4 fColor00; FfxFloat32x4 fColor10; FfxFloat32x4 fColor20; FfxFloat32x4 fColor30; FfxFloat32x4 fColor01; FfxFloat32x4 fColor11; FfxFloat32x4 fColor21; FfxFloat32x4 fColor31; FfxFloat32x4 fColor02; FfxFloat32x4 fColor12; FfxFloat32x4 fColor22; FfxFloat32x4 fColor32; FfxFloat32x4 fColor03; FfxFloat32x4 fColor13; FfxFloat32x4 fColor23; FfxFloat32x4 fColor33; }; #if FFX_HALF struct FetchedBilinearSamplesMin16 { FFX_MIN16_F4 fColor00; FFX_MIN16_F4 fColor10; FFX_MIN16_F4 fColor01; FFX_MIN16_F4 fColor11; }; struct FetchedBicubicSamplesMin16 { FFX_MIN16_F4 fColor00; FFX_MIN16_F4 fColor10; FFX_MIN16_F4 fColor20; FFX_MIN16_F4 fColor30; FFX_MIN16_F4 fColor01; FFX_MIN16_F4 fColor11; FFX_MIN16_F4 fColor21; FFX_MIN16_F4 fColor31; FFX_MIN16_F4 fColor02; FFX_MIN16_F4 fColor12; FFX_MIN16_F4 fColor22; FFX_MIN16_F4 fColor32; FFX_MIN16_F4 fColor03; FFX_MIN16_F4 fColor13; FFX_MIN16_F4 fColor23; FFX_MIN16_F4 fColor33; }; #else //FFX_HALF #define FetchedBicubicSamplesMin16 FetchedBicubicSamples #define FetchedBilinearSamplesMin16 FetchedBilinearSamples #endif //FFX_HALF FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) { return A + (B - A) * t; } FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) { FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); return fColorXY; } #if FFX_HALF FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t) { return A + (B - A) * t; } FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac) { FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); return fColorXY; } #endif FfxFloat32 Lanczos2NoClamp(FfxFloat32 x) { const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); } FfxFloat32 Lanczos2(FfxFloat32 x) { x = ffxMin(abs(x), 2.0f); return Lanczos2NoClamp(x); } #if FFX_HALF #if 0 FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) { const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); } #endif FFX_MIN16_F Lanczos2(FFX_MIN16_F x) { x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); return FFX_MIN16_F(Lanczos2NoClamp(x)); } #endif //FFX_HALF // FSR1 lanczos approximation. Input is x*x and must be <= 4. FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) { FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); } #if FFX_HALF FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) { FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1); FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); } #endif //FFX_HALF FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) { x2 = ffxMin(x2, 4.0f); return Lanczos2ApproxSqNoClamp(x2); } #if FFX_HALF FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) { x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); return Lanczos2ApproxSqNoClamp(x2); } #endif //FFX_HALF FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) { return Lanczos2ApproxSqNoClamp(x * x); } #if FFX_HALF FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x) { return Lanczos2ApproxSqNoClamp(x * x); } #endif //FFX_HALF FfxFloat32 Lanczos2Approx(FfxFloat32 x) { return Lanczos2ApproxSq(x * x); } #if FFX_HALF FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x) { return Lanczos2ApproxSq(x * x); } #endif //FFX_HALF FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) { return SampleLanczos2Weight(abs(x)); } #if FFX_HALF FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x) { return FFX_MIN16_F(SampleLanczos2Weight(abs(x))); } #endif //FFX_HALF FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) { FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } #if FFX_HALF FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) { FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t); FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t); FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t); FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } #endif FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) { FfxFloat32 fWeight0 = Lanczos2(-1.f - t); FfxFloat32 fWeight1 = Lanczos2(-0.f - t); FfxFloat32 fWeight2 = Lanczos2(+1.f - t); FfxFloat32 fWeight3 = Lanczos2(+2.f - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) { FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FfxFloat32x4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #if FFX_HALF FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) { FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t); FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t); FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t); FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) { FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FFX_MIN16_F4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #endif //FFX_HALF FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) { FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FfxFloat32x4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #if FFX_HALF FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) { FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FFX_MIN16_F4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #endif //FFX_HALF FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) { FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } #if FFX_HALF FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) { FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t); FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t); FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t); FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } #endif //FFX_HALF FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) { FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FfxFloat32x4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #if FFX_HALF FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) { FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FFX_MIN16_F4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #endif // Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) { FfxInt32x2 result = iPxSample + iPxOffset; result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; return result; } #if FFX_HALF FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) { FFX_MIN16_I2 result = iPxSample + iPxOffset; result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; return result; } #endif //FFX_HALF #define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \ SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ { \ SampleType Samples; \ \ Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \ Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \ Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \ Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \ \ Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \ Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \ \ Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \ Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \ \ Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \ Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \ Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \ Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \ \ return Samples; \ } #define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) #define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) #define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \ SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ { \ SampleType Samples; \ Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ return Samples; \ } #define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) #define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \ DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) // BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) // is common, so iPxSample can "jitter" #define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ /* Clamp base coords */ \ fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ /* */ \ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ return fColorXY; \ } #define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ /* Clamp base coords */ \ fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ /* */ \ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ return fColorXY; \ } #define FFX_FSR2_CONCAT_ID(x, y) x ## y #define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y) #define FFX_FSR2_SAMPLER_1D_0 Lanczos2 #define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT #define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx #define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x) #endif //!defined( FFX_FSR2_SAMPLE_H )