diff --git a/Ryujinx.Graphics.GAL/IProgram.cs b/Ryujinx.Graphics.GAL/IProgram.cs
index 5ab8346f2..272a2f7d6 100644
--- a/Ryujinx.Graphics.GAL/IProgram.cs
+++ b/Ryujinx.Graphics.GAL/IProgram.cs
@@ -4,6 +4,8 @@ namespace Ryujinx.Graphics.GAL
 {
     public interface IProgram : IDisposable
     {
+        ProgramLinkStatus CheckProgramLink(bool blocking);
+
         byte[] GetBinary();
     }
 }
diff --git a/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs b/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs
new file mode 100644
index 000000000..5ca1be8c3
--- /dev/null
+++ b/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs
@@ -0,0 +1,9 @@
+namespace Ryujinx.Graphics.GAL
+{
+    public enum ProgramLinkStatus
+    {
+        Incomplete,
+        Success,
+        Failure
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index cd20a5a23..96b836c52 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -10,6 +10,7 @@ using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Threading;
+using System.Threading.Tasks;
 
 namespace Ryujinx.Graphics.Gpu.Shader
 {
@@ -102,234 +103,327 @@ namespace Ryujinx.Graphics.Gpu.Shader
                     progressReportThread.Start(progressReportEvent);
                 }
 
-                for (int programIndex = 0; programIndex < guestProgramList.Length; programIndex++)
+                // Make sure these are initialized before doing compilation.
+                Capabilities caps = _context.Capabilities;
+
+                int maxTaskCount = Math.Min(Environment.ProcessorCount, 8);
+                int programIndex = 0;
+                List<ShaderCompileTask> activeTasks = new List<ShaderCompileTask>();
+
+                // This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background.
+                // The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once.
+                
+                while (programIndex < guestProgramList.Length || activeTasks.Count > 0)
                 {
-                    Hash128 key = guestProgramList[programIndex];
-
-                    byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key);
-                    bool hasHostCache = hostProgramBinary != null;
-
-                    IProgram hostProgram = null;
-
-                    // If the program sources aren't in the cache, compile from saved guest program.
-                    byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key);
-
-                    if (guestProgram == null)
+                    if (activeTasks.Count < maxTaskCount && programIndex < guestProgramList.Length)
                     {
-                        Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)");
+                        // Begin a new shader compilation.
+                        Hash128 key = guestProgramList[programIndex];
 
-                        // Should not happen, but if someone messed with the cache it's better to catch it.
-                        invalidEntries?.Add(key);
+                        byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key);
+                        bool hasHostCache = hostProgramBinary != null;
 
-                        continue;
-                    }
+                        IProgram hostProgram = null;
 
-                    ReadOnlySpan<byte> guestProgramReadOnlySpan = guestProgram;
+                        // If the program sources aren't in the cache, compile from saved guest program.
+                        byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key);
 
-                    ReadOnlySpan<GuestShaderCacheEntry> cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader);
-
-                    if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute)
-                    {
-                        Debug.Assert(cachedShaderEntries.Length == 1);
-
-                        GuestShaderCacheEntry entry = cachedShaderEntries[0];
-
-                        HostShaderCacheEntry[] hostShaderEntries = null;
-
-                        // Try loading host shader binary.
-                        if (hasHostCache)
+                        if (guestProgram == null)
                         {
-                            hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan<byte> hostProgramBinarySpan);
-                            hostProgramBinary = hostProgramBinarySpan.ToArray();
-                            hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary);
+                            Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)");
+
+                            // Should not happen, but if someone messed with the cache it's better to catch it.
+                            invalidEntries?.Add(key);
+
+                            continue;
                         }
 
-                        bool isHostProgramValid = hostProgram != null;
+                        ReadOnlySpan<byte> guestProgramReadOnlySpan = guestProgram;
 
-                        ShaderProgram program;
-                        ShaderProgramInfo shaderProgramInfo;
+                        ReadOnlySpan<GuestShaderCacheEntry> cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader);
 
-                        // Reconstruct code holder.
-                        if (isHostProgramValid)
+                        if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute)
                         {
-                            program = new ShaderProgram(entry.Header.Stage, "");
-                            shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo();
+                            Debug.Assert(cachedShaderEntries.Length == 1);
+
+                            GuestShaderCacheEntry entry = cachedShaderEntries[0];
+
+                            HostShaderCacheEntry[] hostShaderEntries = null;
+
+                            // Try loading host shader binary.
+                            if (hasHostCache)
+                            {
+                                hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan<byte> hostProgramBinarySpan);
+                                hostProgramBinary = hostProgramBinarySpan.ToArray();
+                                hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary);
+                            }
+
+                            ShaderCompileTask task = new ShaderCompileTask();
+                            activeTasks.Add(task);
+
+                            task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) =>
+                            {
+                                ShaderProgram program = null;
+                                ShaderProgramInfo shaderProgramInfo = null;
+
+                                Task compileTask = Task.Run(() =>
+                                {
+                                    // Reconstruct code holder.
+                                    if (isHostProgramValid)
+                                    {
+                                        program = new ShaderProgram(entry.Header.Stage, "");
+                                        shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo();
+                                    }
+                                    else
+                                    {
+                                        IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+
+                                        program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo);
+                                    }
+                                });
+
+                                task.OnTask(compileTask, (bool _, ShaderCompileTask task) =>
+                                {
+                                    ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
+
+                                    // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
+                                    if (!isHostProgramValid)
+                                    {
+                                        Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
+
+                                        // Compile shader and create program as the shader program binary got invalidated.
+                                        shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
+                                        hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
+
+                                        task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) =>
+                                        {
+                                            if (!isNewProgramValid)
+                                            {
+                                                return true;
+                                            }
+
+                                            // As the host program was invalidated, save the new entry in the cache.
+                                            hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader });
+
+                                            if (!isReadOnly)
+                                            {
+                                                if (hasHostCache)
+                                                {
+                                                    _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary);
+                                                }
+                                                else
+                                                {
+                                                    Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)");
+
+                                                    _cacheManager.AddHostProgram(ref key, hostProgramBinary);
+                                                }
+                                            }
+
+                                            _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader));
+
+                                            return true;
+                                        });
+
+                                        return false; // Not finished: still need to compile the host program.
+                                    }
+                                    else
+                                    {
+                                        _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader));
+
+                                        return true;
+                                    }
+                                });
+
+                                return false; // Not finished: translating the shaders.
+                            });
+                            
                         }
                         else
                         {
-                            IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+                            Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages);
 
-                            program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo);
-                        }
+                            ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length];
+                            List<ShaderProgram> shaderPrograms = new List<ShaderProgram>();
 
-                        ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
+                            TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader);
 
-                        // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
-                        if (hostProgram == null)
-                        {
-                            Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
+                            TranslationFlags flags = DefaultFlags;
 
-                            // Compile shader and create program as the shader program binary got invalidated.
-                            shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
-                            hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
-
-                            // As the host program was invalidated, save the new entry in the cache.
-                            hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader });
-
-                            if (!isReadOnly)
+                            if (tfd != null)
                             {
-                                if (hasHostCache)
-                                {
-                                    _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary);
-                                }
-                                else
-                                {
-                                    Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)");
-
-                                    _cacheManager.AddHostProgram(ref key, hostProgramBinary);
-                                }
+                                flags |= TranslationFlags.Feedback;
                             }
+
+                            TranslationCounts counts = new TranslationCounts();
+
+                            HostShaderCacheEntry[] hostShaderEntries = null;
+
+                            // Try loading host shader binary.
+                            if (hasHostCache)
+                            {
+                                hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan<byte> hostProgramBinarySpan);
+                                hostProgramBinary = hostProgramBinarySpan.ToArray();
+                                hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary);
+                            }
+
+                            ShaderCompileTask task = new ShaderCompileTask();
+                            activeTasks.Add(task);
+
+                            GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray();
+
+                            task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) =>
+                            {
+                                Task compileTask = Task.Run(() =>
+                                {
+                                    // Reconstruct code holder.
+                                    for (int i = 0; i < entries.Length; i++)
+                                    {
+                                        GuestShaderCacheEntry entry = entries[i];
+
+                                        if (entry == null)
+                                        {
+                                            continue;
+                                        }
+
+                                        ShaderProgram program;
+
+                                        if (entry.Header.SizeA != 0)
+                                        {
+                                            ShaderProgramInfo shaderProgramInfo;
+
+                                            if (isHostProgramValid)
+                                            {
+                                                program = new ShaderProgram(entry.Header.Stage, "");
+                                                shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
+                                            }
+                                            else
+                                            {
+                                                IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+
+                                                TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, flags, counts);
+                                                TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, flags | TranslationFlags.VertexA, counts);
+
+                                                program = translatorContext.Translate(out shaderProgramInfo, translatorContext2);
+                                            }
+
+                                            // NOTE: Vertex B comes first in the shader cache.
+                                            byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray();
+                                            byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray();
+
+                                            shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2);
+                                        }
+                                        else
+                                        {
+                                            ShaderProgramInfo shaderProgramInfo;
+
+                                            if (isHostProgramValid)
+                                            {
+                                                program = new ShaderProgram(entry.Header.Stage, "");
+                                                shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
+                                            }
+                                            else
+                                            {
+                                                IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+
+                                                program = Translator.CreateContext(0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo);
+                                            }
+
+                                            shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
+                                        }
+
+                                        shaderPrograms.Add(program);
+                                    }
+                                });
+
+                                task.OnTask(compileTask, (bool _, ShaderCompileTask task) =>
+                                {
+                                    // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
+                                    if (!isHostProgramValid)
+                                    {
+                                        Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
+
+                                        List<IShader> hostShaders = new List<IShader>();
+
+                                        // Compile shaders and create program as the shader program binary got invalidated.
+                                        for (int stage = 0; stage < Constants.ShaderStages; stage++)
+                                        {
+                                            ShaderProgram program = shaders[stage]?.Program;
+
+                                            if (program == null)
+                                            {
+                                                continue;
+                                            }
+
+                                            IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code);
+
+                                            shaders[stage].HostShader = hostShader;
+
+                                            hostShaders.Add(hostShader);
+                                        }
+
+                                        hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd);
+
+                                        task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) =>
+                                        {
+                                            if (!isNewProgramValid)
+                                            {
+                                                return true;
+                                            }
+
+                                            // As the host program was invalidated, save the new entry in the cache.
+                                            hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders);
+
+                                            if (!isReadOnly)
+                                            {
+                                                if (hasHostCache)
+                                                {
+                                                    _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary);
+                                                }
+                                                else
+                                                {
+                                                    Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)");
+
+                                                    _cacheManager.AddHostProgram(ref key, hostProgramBinary);
+                                                }
+                                            }
+
+                                            _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders));
+
+                                            return true;
+                                        });
+
+                                        return false; // Not finished: still need to compile the host program.
+                                    }
+                                    else
+                                    {
+                                        _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders));
+
+                                        return true;
+                                    }
+                                });
+
+                                return false; // Not finished: translating the shaders.
+                            });
                         }
 
-                        _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader));
+                        _shaderCount = ++programIndex;
                     }
-                    else
+
+                    // Process the queue.
+                    for (int i = 0; i < activeTasks.Count; i++)
                     {
-                        Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages);
+                        ShaderCompileTask task = activeTasks[i];
 
-                        ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length];
-                        List<ShaderProgram> shaderPrograms = new List<ShaderProgram>();
-
-                        TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader);
-
-                        TranslationFlags flags = DefaultFlags;
-
-                        if (tfd != null)
+                        if (task.IsDone())
                         {
-                            flags |= TranslationFlags.Feedback;
+                            activeTasks.RemoveAt(i--);
                         }
-
-                        TranslationCounts counts = new TranslationCounts();
-
-                        HostShaderCacheEntry[] hostShaderEntries = null;
-
-                        // Try loading host shader binary.
-                        if (hasHostCache)
-                        {
-                            hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan<byte> hostProgramBinarySpan);
-                            hostProgramBinary = hostProgramBinarySpan.ToArray();
-                            hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary);
-                        }
-
-                        bool isHostProgramValid = hostProgram != null;
-
-                        // Reconstruct code holder.
-                        for (int i = 0; i < cachedShaderEntries.Length; i++)
-                        {
-                            GuestShaderCacheEntry entry = cachedShaderEntries[i];
-
-                            if (entry == null)
-                            {
-                                continue;
-                            }
-
-                            ShaderProgram program;
-
-                            if (entry.Header.SizeA != 0)
-                            {
-                                ShaderProgramInfo shaderProgramInfo;
-
-                                if (isHostProgramValid)
-                                {
-                                    program = new ShaderProgram(entry.Header.Stage, "");
-                                    shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
-                                }
-                                else
-                                {
-                                    IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
-
-                                    TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, flags, counts);
-                                    TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, flags | TranslationFlags.VertexA, counts);
-
-                                    program = translatorContext.Translate(out shaderProgramInfo, translatorContext2);
-                                }
-
-                                // NOTE: Vertex B comes first in the shader cache.
-                                byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray();
-                                byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray();
-
-                                shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2);
-                            }
-                            else
-                            {
-                                ShaderProgramInfo shaderProgramInfo;
-
-                                if (isHostProgramValid)
-                                {
-                                    program = new ShaderProgram(entry.Header.Stage, "");
-                                    shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
-                                }
-                                else
-                                {
-                                    IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
-
-                                    program = Translator.CreateContext(0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo);
-                                }
-
-                                shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
-                            }
-
-                            shaderPrograms.Add(program);
-                        }
-
-                        // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
-                        if (!isHostProgramValid)
-                        {
-                            Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
-
-                            List<IShader> hostShaders = new List<IShader>();
-
-                            // Compile shaders and create program as the shader program binary got invalidated.
-                            for (int stage = 0; stage < Constants.ShaderStages; stage++)
-                            {
-                                ShaderProgram program = shaders[stage]?.Program;
-
-                                if (program == null)
-                                {
-                                    continue;
-                                }
-
-                                IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code);
-
-                                shaders[stage].HostShader = hostShader;
-
-                                hostShaders.Add(hostShader);
-                            }
-
-                            hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd);
-
-                            // As the host program was invalidated, save the new entry in the cache.
-                            hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders);
-
-                            if (!isReadOnly)
-                            {
-                                if (hasHostCache)
-                                {
-                                    _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary);
-                                }
-                                else
-                                {
-                                    Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)");
-
-                                    _cacheManager.AddHostProgram(ref key, hostProgramBinary);
-                                }
-                            }
-                        }
-
-                        _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders));
                     }
 
-                    _shaderCount = programIndex + 1;
+                    if (activeTasks.Count == maxTaskCount)
+                    {
+                        Thread.Sleep(1);
+                    }
                 }
 
                 if (!isReadOnly)
@@ -458,6 +552,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
                 IProgram hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
 
+                hostProgram.CheckProgramLink(true);
+
                 byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader });
 
                 cpShader = new ShaderBundle(hostProgram, shader);
@@ -598,6 +694,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
                 IProgram hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd);
 
+                hostProgram.CheckProgramLink(true);
+
                 byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders);
 
                 gpShaders = new ShaderBundle(hostProgram, shaders);
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs
new file mode 100644
index 000000000..cc1b322ba
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs
@@ -0,0 +1,81 @@
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Threading.Tasks;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+    delegate bool ShaderCompileTaskCallback(bool success, ShaderCompileTask task);
+
+    /// <summary>
+    /// A class that represents a shader compilation.
+    /// </summary>
+    class ShaderCompileTask
+    {
+        private bool _compiling;
+
+        private Task _programsTask;
+        private IProgram _program;
+
+        private ShaderCompileTaskCallback _action;
+
+        /// <summary>
+        /// Check the completion status of the shader compile task, and run callbacks on step completion.
+        /// Calling this periodically is required to progress through steps of the compilation.
+        /// </summary>
+        /// <returns>True if the task is complete, false if it is in progress</returns>
+        public bool IsDone()
+        {
+            if (_compiling)
+            {
+                ProgramLinkStatus status = _program.CheckProgramLink(false);
+
+                if (status != ProgramLinkStatus.Incomplete)
+                {
+                    return _action(status == ProgramLinkStatus.Success, this);
+                }
+            }
+            else
+            {
+                // Waiting on the task.
+
+                if (_programsTask.IsCompleted)
+                {
+                    return _action(true, this);
+                }
+            }
+
+            return false;
+        }
+
+        /// <summary>
+        /// Run a callback when the specified task has completed.
+        /// </summary>
+        /// <param name="task">The task object that needs to complete</param>
+        /// <param name="action">The action to perform when it is complete</param>
+        public void OnTask(Task task, ShaderCompileTaskCallback action)
+        {
+            _compiling = false;
+
+            _programsTask = task;
+            _action = action;
+        }
+
+        /// <summary>
+        /// Run a callback when the specified program has been linked.
+        /// </summary>
+        /// <param name="task">The program that needs to be linked</param>
+        /// <param name="action">The action to perform when linking is complete</param>
+        public void OnCompiled(IProgram program, ShaderCompileTaskCallback action)
+        {
+            _compiling = true;
+
+            _program = program;
+            _action = action;
+
+            if (program == null)
+            {
+                action(false, this);
+            }
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs
index 6795b4234..08b0e6af8 100644
--- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs
+++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs
@@ -10,6 +10,7 @@ namespace Ryujinx.Graphics.OpenGL
         private static readonly Lazy<bool> _supportsPolygonOffsetClamp        = new Lazy<bool>(() => HasExtension("GL_EXT_polygon_offset_clamp"));
         private static readonly Lazy<bool> _supportsViewportSwizzle           = new Lazy<bool>(() => HasExtension("GL_NV_viewport_swizzle"));
         private static readonly Lazy<bool> _supportsSeamlessCubemapPerTexture = new Lazy<bool>(() => HasExtension("GL_ARB_seamless_cubemap_per_texture"));
+        private static readonly Lazy<bool> _supportsParallelShaderCompile     = new Lazy<bool>(() => HasExtension("GL_ARB_parallel_shader_compile"));
 
         private static readonly Lazy<int> _maximumComputeSharedMemorySize = new Lazy<int>(() => GetLimit(All.MaxComputeSharedMemorySize));
         private static readonly Lazy<int> _storageBufferOffsetAlignment   = new Lazy<int>(() => GetLimit(All.ShaderStorageBufferOffsetAlignment));
@@ -34,6 +35,7 @@ namespace Ryujinx.Graphics.OpenGL
         public static bool SupportsPolygonOffsetClamp        => _supportsPolygonOffsetClamp.Value;
         public static bool SupportsViewportSwizzle           => _supportsViewportSwizzle.Value;
         public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value;
+        public static bool SupportsParallelShaderCompile     => _supportsParallelShaderCompile.Value;
         public static bool SupportsNonConstantTextureOffset  => _gpuVendor.Value == GpuVendor.Nvidia;
         public static bool RequiresSyncFlush                 => _gpuVendor.Value == GpuVendor.Amd || _gpuVendor.Value == GpuVendor.IntelWindows || _gpuVendor.Value == GpuVendor.IntelUnix;
 
diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs
index d39e181d9..decc75b1e 100644
--- a/Ryujinx.Graphics.OpenGL/Program.cs
+++ b/Ryujinx.Graphics.OpenGL/Program.cs
@@ -13,11 +13,26 @@ namespace Ryujinx.Graphics.OpenGL
     {
         public int Handle { get; private set; }
 
-        public int FragmentIsBgraUniform { get; }
-        public int FragmentRenderScaleUniform { get; }
-        public int ComputeRenderScaleUniform { get; }
+        public int FragmentIsBgraUniform { get; private set; }
+        public int FragmentRenderScaleUniform { get; private set; }
+        public int ComputeRenderScaleUniform { get; private set; }
 
-        public bool IsLinked { get; private set; }
+        public bool IsLinked
+        {
+            get
+            {
+                if (_status == ProgramLinkStatus.Incomplete)
+                {
+                    CheckProgramLink(true);
+                }
+
+                return _status == ProgramLinkStatus.Success;
+            }
+        }
+
+        private bool _initialized;
+        private ProgramLinkStatus _status = ProgramLinkStatus.Incomplete;
+        private IShader[] _shaders;
 
         public Program(IShader[] shaders, TransformFeedbackDescriptor[] transformFeedbackDescriptors)
         {
@@ -82,18 +97,7 @@ namespace Ryujinx.Graphics.OpenGL
 
             GL.LinkProgram(Handle);
 
-            for (int index = 0; index < shaders.Length; index++)
-            {
-                int shaderHandle = ((Shader)shaders[index]).Handle;
-
-                GL.DetachShader(Handle, shaderHandle);
-            }
-
-            CheckProgramLink();
-
-            FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra");
-            FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale");
-            ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale");
+            _shaders = shaders;
         }
 
         public Program(ReadOnlySpan<byte> code)
@@ -109,32 +113,60 @@ namespace Ryujinx.Graphics.OpenGL
                     GL.ProgramBinary(Handle, binaryFormat, (IntPtr)ptr, code.Length - 4);
                 }
             }
-
-            CheckProgramLink();
-
-            FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra");
-            FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale");
-            ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale");
         }
 
         public void Bind()
         {
+            if (!_initialized)
+            {
+                FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra");
+                FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale");
+                ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale");
+
+                _initialized = true;
+            }
+
             GL.UseProgram(Handle);
         }
 
-        private void CheckProgramLink()
+        public ProgramLinkStatus CheckProgramLink(bool blocking)
         {
+            if (!blocking && HwCapabilities.SupportsParallelShaderCompile)
+            {
+                GL.GetProgram(Handle, (GetProgramParameterName)ArbParallelShaderCompile.CompletionStatusArb, out int completed);
+
+                if (completed == 0)
+                {
+                    return ProgramLinkStatus.Incomplete;
+                }
+            }
+
             GL.GetProgram(Handle, GetProgramParameterName.LinkStatus, out int status);
 
+            if (_shaders != null)
+            {
+                for (int index = 0; index < _shaders.Length; index++)
+                {
+                    int shaderHandle = ((Shader)_shaders[index]).Handle;
+
+                    GL.DetachShader(Handle, shaderHandle);
+                }
+
+                _shaders = null;
+            }
+
             if (status == 0)
             {
                 // Use GL.GetProgramInfoLog(Handle), it may be too long to print on the log.
+                _status = ProgramLinkStatus.Failure;
                 Logger.Debug?.Print(LogClass.Gpu, "Shader linking failed.");
             }
             else
             {
-                IsLinked = true;
+                _status = ProgramLinkStatus.Success;
             }
+
+            return _status;
         }
 
         public byte[] GetBinary()
diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs
index cc8fa195d..0382ba86e 100644
--- a/Ryujinx.Graphics.OpenGL/Renderer.cs
+++ b/Ryujinx.Graphics.OpenGL/Renderer.cs
@@ -130,6 +130,11 @@ namespace Ryujinx.Graphics.OpenGL
 
             PrintGpuInformation();
 
+            if (HwCapabilities.SupportsParallelShaderCompile)
+            {
+                GL.Arb.MaxShaderCompilerThreads(Math.Min(Environment.ProcessorCount, 8));
+            }
+
             _counters.Initialize();
         }
 
@@ -177,16 +182,7 @@ namespace Ryujinx.Graphics.OpenGL
 
         public IProgram LoadProgramBinary(byte[] programBinary)
         {
-            Program program = new Program(programBinary);
-
-            if (program.IsLinked)
-            {
-                return program;
-            }
-
-            program.Dispose();
-
-            return null;
+            return new Program(programBinary);
         }
 
         public void CreateSync(ulong id)