From 6ebe5bb4066f3f00ef8fce9ce49d5ab805324aeb Mon Sep 17 00:00:00 2001 From: Isaac Marovitz <42140194+IsaacMarovitz@users.noreply.github.com> Date: Thu, 20 Jun 2024 12:59:29 +0100 Subject: [PATCH] Buffer Conversions (#23) * Why is this not working * Revert helper shader changes for now * Byte Index Buffer Restride --- src/Ryujinx.Graphics.Metal/BufferHolder.cs | 59 ++++ src/Ryujinx.Graphics.Metal/BufferManager.cs | 10 + src/Ryujinx.Graphics.Metal/CacheByRange.cs | 333 ++++++++++++++++++ src/Ryujinx.Graphics.Metal/EncoderState.cs | 27 +- .../EncoderStateManager.cs | 164 +++++++-- src/Ryujinx.Graphics.Metal/HelperShader.cs | 72 +++- src/Ryujinx.Graphics.Metal/MetalRenderer.cs | 2 +- src/Ryujinx.Graphics.Metal/Pipeline.cs | 11 +- .../Ryujinx.Graphics.Metal.csproj | 1 + .../Shaders/ChangeBufferStride.metal | 52 +++ 10 files changed, 692 insertions(+), 39 deletions(-) create mode 100644 src/Ryujinx.Graphics.Metal/CacheByRange.cs create mode 100644 src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal diff --git a/src/Ryujinx.Graphics.Metal/BufferHolder.cs b/src/Ryujinx.Graphics.Metal/BufferHolder.cs index 7fe3530eb..c44c59eec 100644 --- a/src/Ryujinx.Graphics.Metal/BufferHolder.cs +++ b/src/Ryujinx.Graphics.Metal/BufferHolder.cs @@ -10,6 +10,8 @@ namespace Ryujinx.Graphics.Metal [SupportedOSPlatform("macos")] public class BufferHolder : IDisposable { + private CacheByRange _cachedConvertedBuffers; + public int Size { get; } private readonly IntPtr _map; @@ -271,9 +273,66 @@ namespace Ryujinx.Graphics.Metal _waitable.WaitForFences(offset, size); } + private bool BoundToRange(int offset, ref int size) + { + if (offset >= Size) + { + return false; + } + + size = Math.Min(Size - offset, size); + + return true; + } + + public Auto GetBufferI8ToI16(CommandBufferScoped cbs, int offset, int size) + { + if (!BoundToRange(offset, ref size)) + { + return null; + } + + var key = new I8ToI16CacheKey(_renderer); + + if (!_cachedConvertedBuffers.TryGetValue(offset, size, key, out var holder)) + { + holder = _renderer.BufferManager.Create((size * 2 + 3) & ~3); + + _renderer.HelperShader.ConvertI8ToI16(cbs, this, holder, offset, size); + + key.SetBuffer(holder.GetBuffer()); + + _cachedConvertedBuffers.Add(offset, size, key, holder); + } + + return holder.GetBuffer(); + } + + public bool TryGetCachedConvertedBuffer(int offset, int size, ICacheKey key, out BufferHolder holder) + { + return _cachedConvertedBuffers.TryGetValue(offset, size, key, out holder); + } + + public void AddCachedConvertedBuffer(int offset, int size, ICacheKey key, BufferHolder holder) + { + _cachedConvertedBuffers.Add(offset, size, key, holder); + } + + public void AddCachedConvertedBufferDependency(int offset, int size, ICacheKey key, Dependency dependency) + { + _cachedConvertedBuffers.AddDependency(offset, size, key, dependency); + } + + public void RemoveCachedConvertedBuffer(int offset, int size, ICacheKey key) + { + _cachedConvertedBuffers.Remove(offset, size, key); + } + + public void Dispose() { _buffer.Dispose(); + _cachedConvertedBuffers.Dispose(); _flushLock.EnterWriteLock(); diff --git a/src/Ryujinx.Graphics.Metal/BufferManager.cs b/src/Ryujinx.Graphics.Metal/BufferManager.cs index 8f6c2daa7..bf7f00901 100644 --- a/src/Ryujinx.Graphics.Metal/BufferManager.cs +++ b/src/Ryujinx.Graphics.Metal/BufferManager.cs @@ -153,6 +153,16 @@ namespace Ryujinx.Graphics.Metal return null; } + public Auto GetBufferI8ToI16(CommandBufferScoped cbs, BufferHandle handle, int offset, int size) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetBufferI8ToI16(cbs, offset, size); + } + + return null; + } + public PinnedSpan GetData(BufferHandle handle, int offset, int size) { if (TryGetBuffer(handle, out var holder)) diff --git a/src/Ryujinx.Graphics.Metal/CacheByRange.cs b/src/Ryujinx.Graphics.Metal/CacheByRange.cs new file mode 100644 index 000000000..d507dcaeb --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/CacheByRange.cs @@ -0,0 +1,333 @@ +using SharpMetal.Metal; +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; + +namespace Ryujinx.Graphics.Metal +{ + public interface ICacheKey : IDisposable + { + bool KeyEqual(ICacheKey other); + } + + [SupportedOSPlatform("macos")] + struct I8ToI16CacheKey : ICacheKey + { + // Used to notify the pipeline that bindings have invalidated on dispose. + private readonly MetalRenderer _renderer; + private Auto _buffer; + + public I8ToI16CacheKey(MetalRenderer renderer) + { + _renderer = renderer; + _buffer = null; + } + + public readonly bool KeyEqual(ICacheKey other) + { + return other is I8ToI16CacheKey; + } + + public void SetBuffer(Auto buffer) + { + _buffer = buffer; + } + + public void Dispose() + { + // TODO: Tell pipeline buffer is dirty! + // _renderer.PipelineInternal.DirtyIndexBuffer(_buffer); + } + } + + [SupportedOSPlatform("macos")] + struct AlignedVertexBufferCacheKey : ICacheKey + { + private readonly int _stride; + private readonly int _alignment; + + // Used to notify the pipeline that bindings have invalidated on dispose. + private readonly MetalRenderer _renderer; + private Auto _buffer; + + public AlignedVertexBufferCacheKey(MetalRenderer renderer, int stride, int alignment) + { + _renderer = renderer; + _stride = stride; + _alignment = alignment; + _buffer = null; + } + + public readonly bool KeyEqual(ICacheKey other) + { + return other is AlignedVertexBufferCacheKey entry && + entry._stride == _stride && + entry._alignment == _alignment; + } + + public void SetBuffer(Auto buffer) + { + _buffer = buffer; + } + + public readonly void Dispose() + { + // TODO: Tell pipeline buffer is dirty! + // _renderer.PipelineInternal.DirtyVertexBuffer(_buffer); + } + } + + [SupportedOSPlatform("macos")] + struct TopologyConversionCacheKey : ICacheKey + { + // TODO: Patterns + // private readonly IndexBufferPattern _pattern; + private readonly int _indexSize; + + // Used to notify the pipeline that bindings have invalidated on dispose. + private readonly MetalRenderer _renderer; + private Auto _buffer; + + public TopologyConversionCacheKey(MetalRenderer renderer, /*IndexBufferPattern pattern, */int indexSize) + { + _renderer = renderer; + // _pattern = pattern; + _indexSize = indexSize; + _buffer = null; + } + + public readonly bool KeyEqual(ICacheKey other) + { + return other is TopologyConversionCacheKey entry && + // entry._pattern == _pattern && + entry._indexSize == _indexSize; + } + + public void SetBuffer(Auto buffer) + { + _buffer = buffer; + } + + public readonly void Dispose() + { + // TODO: Tell pipeline buffer is dirty! + // _renderer.PipelineInternal.DirtyVertexBuffer(_buffer); + } + } + + [SupportedOSPlatform("macos")] + public readonly struct Dependency + { + private readonly BufferHolder _buffer; + private readonly int _offset; + private readonly int _size; + private readonly ICacheKey _key; + + public Dependency(BufferHolder buffer, int offset, int size, ICacheKey key) + { + _buffer = buffer; + _offset = offset; + _size = size; + _key = key; + } + + public void RemoveFromOwner() + { + _buffer.RemoveCachedConvertedBuffer(_offset, _size, _key); + } + } + + [SupportedOSPlatform("macos")] + struct CacheByRange where T : IDisposable + { + private struct Entry + { + public ICacheKey Key; + public T Value; + public List DependencyList; + + public Entry(ICacheKey key, T value) + { + Key = key; + Value = value; + DependencyList = null; + } + + public readonly void InvalidateDependencies() + { + if (DependencyList != null) + { + foreach (Dependency dependency in DependencyList) + { + dependency.RemoveFromOwner(); + } + + DependencyList.Clear(); + } + } + } + + private Dictionary> _ranges; + + public void Add(int offset, int size, ICacheKey key, T value) + { + List entries = GetEntries(offset, size); + + entries.Add(new Entry(key, value)); + } + + public void AddDependency(int offset, int size, ICacheKey key, Dependency dependency) + { + List entries = GetEntries(offset, size); + + for (int i = 0; i < entries.Count; i++) + { + Entry entry = entries[i]; + + if (entry.Key.KeyEqual(key)) + { + if (entry.DependencyList == null) + { + entry.DependencyList = new List(); + entries[i] = entry; + } + + entry.DependencyList.Add(dependency); + + break; + } + } + } + + public void Remove(int offset, int size, ICacheKey key) + { + List entries = GetEntries(offset, size); + + for (int i = 0; i < entries.Count; i++) + { + Entry entry = entries[i]; + + if (entry.Key.KeyEqual(key)) + { + entries.RemoveAt(i--); + + DestroyEntry(entry); + } + } + + if (entries.Count == 0) + { + _ranges.Remove(PackRange(offset, size)); + } + } + + public bool TryGetValue(int offset, int size, ICacheKey key, out T value) + { + List entries = GetEntries(offset, size); + + foreach (Entry entry in entries) + { + if (entry.Key.KeyEqual(key)) + { + value = entry.Value; + + return true; + } + } + + value = default; + return false; + } + + public void Clear() + { + if (_ranges != null) + { + foreach (List entries in _ranges.Values) + { + foreach (Entry entry in entries) + { + DestroyEntry(entry); + } + } + + _ranges.Clear(); + _ranges = null; + } + } + + public readonly void ClearRange(int offset, int size) + { + if (_ranges != null && _ranges.Count > 0) + { + int end = offset + size; + + List toRemove = null; + + foreach (KeyValuePair> range in _ranges) + { + (int rOffset, int rSize) = UnpackRange(range.Key); + + int rEnd = rOffset + rSize; + + if (rEnd > offset && rOffset < end) + { + List entries = range.Value; + + foreach (Entry entry in entries) + { + DestroyEntry(entry); + } + + (toRemove ??= new List()).Add(range.Key); + } + } + + if (toRemove != null) + { + foreach (ulong range in toRemove) + { + _ranges.Remove(range); + } + } + } + } + + private List GetEntries(int offset, int size) + { + _ranges ??= new Dictionary>(); + + ulong key = PackRange(offset, size); + + if (!_ranges.TryGetValue(key, out List value)) + { + value = new List(); + _ranges.Add(key, value); + } + + return value; + } + + private static void DestroyEntry(Entry entry) + { + entry.Key.Dispose(); + entry.Value?.Dispose(); + entry.InvalidateDependencies(); + } + + private static ulong PackRange(int offset, int size) + { + return (uint)offset | ((ulong)size << 32); + } + + private static (int offset, int size) UnpackRange(ulong range) + { + return ((int)range, (int)(range >> 32)); + } + + public void Dispose() + { + Clear(); + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/EncoderState.cs b/src/Ryujinx.Graphics.Metal/EncoderState.cs index d0d963ae1..811c68995 100644 --- a/src/Ryujinx.Graphics.Metal/EncoderState.cs +++ b/src/Ryujinx.Graphics.Metal/EncoderState.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using SharpMetal.Metal; using System.Linq; @@ -21,6 +22,26 @@ namespace Ryujinx.Graphics.Metal } } + public record struct BufferRef + { + public Auto Buffer; + public int Index; + public BufferRange? Range; + + public BufferRef(Auto buffer, int index) + { + Buffer = buffer; + Index = index; + } + + public BufferRef(Auto buffer, int index, ref BufferRange range) + { + Buffer = buffer; + Index = index; + Range = range; + } + } + [SupportedOSPlatform("macos")] struct EncoderState { @@ -37,10 +58,10 @@ namespace Ryujinx.Graphics.Metal public TextureBase[] ComputeTextures = new TextureBase[Constants.MaxTextures]; public MTLSamplerState[] ComputeSamplers = new MTLSamplerState[Constants.MaxSamplers]; - public BufferAssignment[] UniformBuffers = []; - public BufferAssignment[] StorageBuffers = []; + public BufferRef[] UniformBuffers = []; + public BufferRef[] StorageBuffers = []; - public BufferRange IndexBuffer = default; + public Auto IndexBuffer = default; public MTLIndexType IndexType = MTLIndexType.UInt16; public ulong IndexBufferOffset = 0; diff --git a/src/Ryujinx.Graphics.Metal/EncoderStateManager.cs b/src/Ryujinx.Graphics.Metal/EncoderStateManager.cs index 641d1e2ac..e6933eeb2 100644 --- a/src/Ryujinx.Graphics.Metal/EncoderStateManager.cs +++ b/src/Ryujinx.Graphics.Metal/EncoderStateManager.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Metal private EncoderState _currentState = new(); private readonly Stack _backStates = []; - public readonly BufferRange IndexBuffer => _currentState.IndexBuffer; + public readonly Auto IndexBuffer => _currentState.IndexBuffer; public readonly MTLIndexType IndexType => _currentState.IndexType; public readonly ulong IndexBufferOffset => _currentState.IndexBufferOffset; public readonly PrimitiveTopology Topology => _currentState.Topology; @@ -356,9 +356,18 @@ namespace Ryujinx.Graphics.Metal { if (buffer.Handle != BufferHandle.Null) { - _currentState.IndexType = type.Convert(); - _currentState.IndexBufferOffset = (ulong)buffer.Offset; - _currentState.IndexBuffer = buffer; + if (type == GAL.IndexType.UByte) + { + _currentState.IndexType = MTLIndexType.UInt16; + _currentState.IndexBufferOffset = (ulong)buffer.Offset; + _currentState.IndexBuffer = _bufferManager.GetBufferI8ToI16(_pipeline.CurrentCommandBuffer, buffer.Handle, buffer.Offset, buffer.Size); + } + else + { + _currentState.IndexType = type.Convert(); + _currentState.IndexBufferOffset = (ulong)buffer.Offset; + _currentState.IndexBuffer = _bufferManager.GetBuffer(buffer.Handle, false); + } } } @@ -659,7 +668,20 @@ namespace Ryujinx.Graphics.Metal // Inlineable public void UpdateUniformBuffers(ReadOnlySpan buffers) { - _currentState.UniformBuffers = buffers.ToArray(); + _currentState.UniformBuffers = new BufferRef[buffers.Length]; + + for (int i = 0; i < buffers.Length; i++) + { + var assignment = buffers[i]; + var buffer = assignment.Range; + int index = assignment.Binding; + + Auto mtlBuffer = buffer.Handle == BufferHandle.Null + ? null + : _bufferManager.GetBuffer(buffer.Handle, buffer.Write); + + _currentState.UniformBuffers[i] = new BufferRef(mtlBuffer, index, ref buffer); + } // Inline update if (_pipeline.CurrentEncoder != null) @@ -680,13 +702,49 @@ namespace Ryujinx.Graphics.Metal // Inlineable public void UpdateStorageBuffers(ReadOnlySpan buffers) { - _currentState.StorageBuffers = buffers.ToArray(); + _currentState.StorageBuffers = new BufferRef[buffers.Length]; - for (int i = 0; i < _currentState.StorageBuffers.Length; i++) + for (int i = 0; i < buffers.Length; i++) { - BufferAssignment buffer = _currentState.StorageBuffers[i]; - // TODO: DONT offset the binding by 15 - _currentState.StorageBuffers[i] = new BufferAssignment(buffer.Binding + 15, buffer.Range); + var assignment = buffers[i]; + var buffer = assignment.Range; + // TODO: Dont do this + int index = assignment.Binding + 15; + + Auto mtlBuffer = buffer.Handle == BufferHandle.Null + ? null + : _bufferManager.GetBuffer(buffer.Handle, buffer.Write); + + _currentState.StorageBuffers[i] = new BufferRef(mtlBuffer, index, ref buffer); + } + + // Inline update + if (_pipeline.CurrentEncoder != null) + { + if (_pipeline.CurrentEncoderType == EncoderType.Render) + { + var renderCommandEncoder = new MTLRenderCommandEncoder(_pipeline.CurrentEncoder.Value); + SetRenderBuffers(renderCommandEncoder, _currentState.StorageBuffers, true); + } + else if (_pipeline.CurrentEncoderType == EncoderType.Compute) + { + var computeCommandEncoder = new MTLComputeCommandEncoder(_pipeline.CurrentEncoder.Value); + SetComputeBuffers(computeCommandEncoder, _currentState.StorageBuffers); + } + } + } + + // Inlineable + public void UpdateStorageBuffers(int first, ReadOnlySpan> buffers) + { + _currentState.StorageBuffers = new BufferRef[buffers.Length]; + + for (int i = 0; i < buffers.Length; i++) + { + var mtlBuffer = buffers[i]; + int index = first + i; + + _currentState.StorageBuffers[i] = new BufferRef(mtlBuffer, index); } // Inline update @@ -938,51 +996,95 @@ namespace Ryujinx.Graphics.Metal private void SetVertexBuffers(MTLRenderCommandEncoder renderCommandEncoder, VertexBufferDescriptor[] bufferDescriptors) { - var buffers = new List(); + var buffers = new List(); for (int i = 0; i < bufferDescriptors.Length; i++) { - buffers.Add(new BufferAssignment(i, bufferDescriptors[i].Buffer)); + Auto mtlBuffer = bufferDescriptors[i].Buffer.Handle == BufferHandle.Null + ? null + : _bufferManager.GetBuffer(bufferDescriptors[i].Buffer.Handle, bufferDescriptors[i].Buffer.Write); + + var range = bufferDescriptors[i].Buffer; + + buffers.Add(new BufferRef(mtlBuffer, i, ref range)); } + var zeroBufferRange = new BufferRange(_zeroBuffer, 0, ZeroBufferSize); + + Auto zeroBuffer = _zeroBuffer == BufferHandle.Null + ? null + : _bufferManager.GetBuffer(_zeroBuffer, false); + // Zero buffer - buffers.Add(new BufferAssignment( - bufferDescriptors.Length, - new BufferRange(_zeroBuffer, 0, ZeroBufferSize))); + buffers.Add(new BufferRef(zeroBuffer, bufferDescriptors.Length, ref zeroBufferRange)); SetRenderBuffers(renderCommandEncoder, buffers.ToArray()); } - private readonly void SetRenderBuffers(MTLRenderCommandEncoder renderCommandEncoder, BufferAssignment[] buffers, bool fragment = false) + private readonly void SetRenderBuffers(MTLRenderCommandEncoder renderCommandEncoder, BufferRef[] buffers, bool fragment = false) { - foreach (var buffer in buffers) + for (int i = 0; i < buffers.Length; i++) { - var range = buffer.Range; - var autoBuffer = _bufferManager.GetBuffer(range.Handle, range.Offset, range.Size, range.Write); + var range = buffers[i].Range; + var autoBuffer = buffers[i].Buffer; + var offset = 0; + var index = buffers[i].Index; - if (autoBuffer != null) + if (autoBuffer == null) { - var mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer).Value; + continue; + } - renderCommandEncoder.SetVertexBuffer(mtlBuffer, (ulong)range.Offset, (ulong)buffer.Binding); + MTLBuffer mtlBuffer; - if (fragment) - { - renderCommandEncoder.SetFragmentBuffer(mtlBuffer, (ulong)range.Offset, (ulong)buffer.Binding); - } + if (range.HasValue) + { + offset = range.Value.Offset; + mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer, offset, range.Value.Size, range.Value.Write).Value; + + } + else + { + mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer).Value; + } + + renderCommandEncoder.SetVertexBuffer(mtlBuffer, (ulong)offset, (ulong)index); + + if (fragment) + { + renderCommandEncoder.SetFragmentBuffer(mtlBuffer, (ulong)offset, (ulong)index); } } } - private readonly void SetComputeBuffers(MTLComputeCommandEncoder computeCommandEncoder, BufferAssignment[] buffers) + private readonly void SetComputeBuffers(MTLComputeCommandEncoder computeCommandEncoder, BufferRef[] buffers) { - foreach (var buffer in buffers) + for (int i = 0; i < buffers.Length; i++) { - var range = buffer.Range; - var mtlBuffer = _bufferManager.GetBuffer(range.Handle, range.Offset, range.Size, range.Write).Get(_pipeline.CurrentCommandBuffer).Value; + var range = buffers[i].Range; + var autoBuffer = buffers[i].Buffer; + var offset = 0; + var index = buffers[i].Index; - computeCommandEncoder.SetBuffer(mtlBuffer, (ulong)range.Offset, (ulong)buffer.Binding); + if (autoBuffer == null) + { + continue; + } + MTLBuffer mtlBuffer; + + if (range.HasValue) + { + offset = range.Value.Offset; + mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer, offset, range.Value.Size, range.Value.Write).Value; + + } + else + { + mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer).Value; + } + + computeCommandEncoder.SetBuffer(mtlBuffer, (ulong)offset, (ulong)index); } } diff --git a/src/Ryujinx.Graphics.Metal/HelperShader.cs b/src/Ryujinx.Graphics.Metal/HelperShader.cs index ea6c4e31b..9b9d5d73e 100644 --- a/src/Ryujinx.Graphics.Metal/HelperShader.cs +++ b/src/Ryujinx.Graphics.Metal/HelperShader.cs @@ -12,7 +12,9 @@ namespace Ryujinx.Graphics.Metal [SupportedOSPlatform("macos")] public class HelperShader : IDisposable { + private const int ConvertElementsPerWorkgroup = 32 * 100; // Work group size of 32 times 100 elements. private const string ShadersSourcePath = "/Ryujinx.Graphics.Metal/Shaders"; + private readonly MetalRenderer _renderer; private readonly Pipeline _pipeline; private MTLDevice _device; @@ -21,10 +23,12 @@ namespace Ryujinx.Graphics.Metal private readonly IProgram _programColorBlit; private readonly List _programsColorClear = new(); private readonly IProgram _programDepthStencilClear; + private readonly IProgram _programStrideChange; - public HelperShader(MTLDevice device, Pipeline pipeline) + public HelperShader(MTLDevice device, MetalRenderer renderer, Pipeline pipeline) { _device = device; + _renderer = renderer; _pipeline = pipeline; _samplerNearest = new Sampler(_device, SamplerCreateInfo.Create(MinFilter.Nearest, MagFilter.Nearest)); @@ -54,6 +58,12 @@ namespace Ryujinx.Graphics.Metal new ShaderSource(depthStencilClearSource, ShaderStage.Fragment, TargetLanguage.Msl), new ShaderSource(depthStencilClearSource, ShaderStage.Vertex, TargetLanguage.Msl) ], device); + + var strideChangeSource = ReadMsl("ChangeBufferStride.metal"); + _programStrideChange = new Program( + [ + new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl) + ], device); } private static string ReadMsl(string fileName) @@ -62,6 +72,7 @@ namespace Ryujinx.Graphics.Metal } public unsafe void BlitColor( + CommandBufferScoped cbs, ITexture src, ITexture dst, Extents2D srcRegion, @@ -89,6 +100,10 @@ namespace Ryujinx.Graphics.Metal (region[2], region[3]) = (region[3], region[2]); } + // using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, RegionBufferSize); + // buffer.Holder.SetDataUnchecked(buffer.Offset, region); + // _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + var rect = new Rectangle( MathF.Min(dstRegion.X1, dstRegion.X2), MathF.Min(dstRegion.Y1, dstRegion.Y2), @@ -156,6 +171,10 @@ namespace Ryujinx.Graphics.Metal (region[2], region[3]) = (region[3], region[2]); } + // var bufferHandle = _renderer.BufferManager.CreateWithHandle(RegionBufferSize); + // _renderer.BufferManager.SetData(bufferHandle, 0, region); + // _pipeline.SetUniformBuffers([new BufferAssignment(0, new BufferRange(bufferHandle, 0, RegionBufferSize))]); + Span viewports = stackalloc Viewport[1]; Span> scissors = stackalloc Rectangle[1]; @@ -200,6 +219,57 @@ namespace Ryujinx.Graphics.Metal _pipeline.RestoreState(); } + public void ConvertI8ToI16(CommandBufferScoped cbs, BufferHolder src, BufferHolder dst, int srcOffset, int size) + { + ChangeStride(cbs, src, dst, srcOffset, size, 1, 2); + } + + public unsafe void ChangeStride( + CommandBufferScoped cbs, + BufferHolder src, + BufferHolder dst, + int srcOffset, + int size, + int stride, + int newStride) + { + int elems = size / stride; + + var srcBuffer = src.GetBuffer(); + var dstBuffer = dst.GetBuffer(); + + const int ParamsBufferSize = 16; + + // Save current state + _pipeline.SaveAndResetState(); + + Span shaderParams = stackalloc int[ParamsBufferSize / sizeof(int)]; + + shaderParams[0] = stride; + shaderParams[1] = newStride; + shaderParams[2] = size; + shaderParams[3] = srcOffset; + + using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize); + + buffer.Holder.SetDataUnchecked(buffer.Offset, shaderParams); + + _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + + Span> sbRanges = new Auto[2]; + + sbRanges[0] = srcBuffer; + sbRanges[1] = dstBuffer; + + _pipeline.SetStorageBuffers(1, sbRanges); + + _pipeline.SetProgram(_programStrideChange); + _pipeline.DispatchCompute(1 + elems / ConvertElementsPerWorkgroup, 1, 1, 64, 1, 1); + + // Restore previous state + _pipeline.RestoreState(); + } + public unsafe void ClearColor( int index, ReadOnlySpan clearColor, diff --git a/src/Ryujinx.Graphics.Metal/MetalRenderer.cs b/src/Ryujinx.Graphics.Metal/MetalRenderer.cs index 1f61090f0..aac88587d 100644 --- a/src/Ryujinx.Graphics.Metal/MetalRenderer.cs +++ b/src/Ryujinx.Graphics.Metal/MetalRenderer.cs @@ -58,7 +58,7 @@ namespace Ryujinx.Graphics.Metal _pipeline.InitEncoderStateManager(_bufferManager); - _helperShader = new HelperShader(_device, _pipeline); + _helperShader = new HelperShader(_device, this, _pipeline); SyncManager = new SyncManager(this); } diff --git a/src/Ryujinx.Graphics.Metal/Pipeline.cs b/src/Ryujinx.Graphics.Metal/Pipeline.cs index 1230bb120..7a83a02a7 100644 --- a/src/Ryujinx.Graphics.Metal/Pipeline.cs +++ b/src/Ryujinx.Graphics.Metal/Pipeline.cs @@ -193,7 +193,7 @@ namespace Ryujinx.Graphics.Metal var textureInfo = new TextureCreateInfo((int)drawable.Texture.Width, (int)drawable.Texture.Height, (int)drawable.Texture.Depth, (int)drawable.Texture.MipmapLevelCount, (int)drawable.Texture.SampleCount, 0, 0, 0, Format.B8G8R8A8Unorm, 0, Target.Texture2D, SwizzleComponent.Red, SwizzleComponent.Green, SwizzleComponent.Blue, SwizzleComponent.Alpha); var dst = new Texture(_device, _renderer, this, textureInfo, drawable.Texture, 0, 0); - _renderer.HelperShader.BlitColor(src, dst, srcRegion, dstRegion, isLinear); + _renderer.HelperShader.BlitColor(Cbs, src, dst, srcRegion, dstRegion, isLinear); EndCurrentPass(); @@ -227,7 +227,7 @@ namespace Ryujinx.Graphics.Metal Extents2D dstRegion, bool linearFilter) { - _renderer.HelperShader.BlitColor(src, dst, srcRegion, dstRegion, linearFilter); + _renderer.HelperShader.BlitColor(Cbs, src, dst, srcRegion, dstRegion, linearFilter); } public void Barrier() @@ -348,7 +348,7 @@ namespace Ryujinx.Graphics.Metal // TODO: Support topology re-indexing to provide support for TriangleFans var primitiveType = _encoderStateManager.Topology.Convert(); - var indexBuffer = _renderer.BufferManager.GetBuffer(_encoderStateManager.IndexBuffer.Handle, false); + var indexBuffer = _encoderStateManager.IndexBuffer; renderCommandEncoder.DrawIndexedPrimitives( primitiveType, @@ -546,6 +546,11 @@ namespace Ryujinx.Graphics.Metal _encoderStateManager.UpdateStorageBuffers(buffers); } + public void SetStorageBuffers(int first, ReadOnlySpan> buffers) + { + _encoderStateManager.UpdateStorageBuffers(first, buffers); + } + public void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler) { if (texture is TextureBase tex) diff --git a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj index 0824accc1..f4e98cd45 100644 --- a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj +++ b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj @@ -16,6 +16,7 @@ + diff --git a/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal b/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal new file mode 100644 index 000000000..64e832092 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal @@ -0,0 +1,52 @@ +#include + +using namespace metal; + +kernel void kernelMain(constant int4& stride_arguments [[buffer(0)]], + device uint8_t* in_data [[buffer(1)]], + device uint8_t* out_data [[buffer(2)]], + uint3 thread_position_in_grid [[thread_position_in_grid]], + uint3 threads_per_threadgroup [[threads_per_threadgroup]], + uint3 threadgroups_per_grid [[threads_per_grid]]) +{ + // Determine what slice of the stride copies this invocation will perform. + + int sourceStride = stride_arguments.x; + int targetStride = stride_arguments.y; + int bufferSize = stride_arguments.z; + int sourceOffset = stride_arguments.w; + + int strideRemainder = targetStride - sourceStride; + int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x); + + int copiesRequired = bufferSize / sourceStride; + + // Find the copies that this invocation should perform. + + // - Copies that all invocations perform. + int allInvocationCopies = copiesRequired / invocations; + + // - Extra remainder copy that this invocation performs. + int index = int(thread_position_in_grid.x); + int extra = (index < (copiesRequired % invocations)) ? 1 : 0; + + int copyCount = allInvocationCopies + extra; + + // Finally, get the starting offset. Make sure to count extra copies. + + int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index); + + int srcOffset = sourceOffset + startCopy * sourceStride; + int dstOffset = startCopy * targetStride; + + // Perform the copies for this region + for (int i = 0; i < copyCount; i++) { + for (int j = 0; j < sourceStride; j++) { + out_data[dstOffset++] = in_data[srcOffset++]; + } + + for (int j = 0; j < strideRemainder; j++) { + out_data[dstOffset++] = uint8_t(0); + } + } +}