mirror of
https://github.com/GreemDev/Ryujinx
synced 2025-01-13 14:23:24 +01:00
Fix EmitHighNarrow(), EmitSaturatingNarrowOp() when Rd == Rn || Rd == Rm (& Part != 0). Optimization of EmitVectorTranspose(), EmitVectorUnzip(), EmitVectorZip() algorithms (reduction of the number of operations and their complexity). Add 12 Tests about Trn1/2, Uzp1/2, Zip1/2 (V) instructions. (#268)
* Update CpuTestSimdArithmetic.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update Instructions.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Update AInstEmitSimdMove.cs * Delete CpuTestSimdMove.cs
This commit is contained in:
parent
50b706e2ba
commit
063fae50fe
8 changed files with 2385 additions and 1516 deletions
|
@ -163,12 +163,19 @@ namespace ChocolArm64.Instruction
|
|||
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
|
||||
|
||||
int Elems = 8 >> Op.Size;
|
||||
|
||||
int ESize = 8 << Op.Size;
|
||||
|
||||
int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
|
||||
|
||||
long RoundConst = 1L << (ESize - 1);
|
||||
|
||||
if (Part != 0)
|
||||
{
|
||||
Context.EmitLdvec(Op.Rd);
|
||||
Context.EmitStvectmp();
|
||||
}
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
{
|
||||
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
|
||||
|
@ -185,9 +192,12 @@ namespace ChocolArm64.Instruction
|
|||
|
||||
Context.EmitLsr(ESize);
|
||||
|
||||
EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Part + Index, Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
Context.EmitStvec(Op.Rd);
|
||||
|
||||
if (Part == 0)
|
||||
{
|
||||
EmitVectorZeroUpper(Context, Op.Rd);
|
||||
|
|
|
@ -813,6 +813,7 @@ namespace ChocolArm64.Instruction
|
|||
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
|
||||
|
||||
int Elems = !Scalar ? 8 >> Op.Size : 1;
|
||||
|
||||
int ESize = 8 << Op.Size;
|
||||
|
||||
int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0;
|
||||
|
@ -823,6 +824,12 @@ namespace ChocolArm64.Instruction
|
|||
Context.EmitLdc_I8(0L);
|
||||
Context.EmitSttmp();
|
||||
|
||||
if (Part != 0)
|
||||
{
|
||||
Context.EmitLdvec(Op.Rd);
|
||||
Context.EmitStvectmp();
|
||||
}
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
{
|
||||
AILLabel LblLe = new AILLabel();
|
||||
|
@ -867,9 +874,12 @@ namespace ChocolArm64.Instruction
|
|||
EmitVectorZeroLower(Context, Op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Part + Index, Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
Context.EmitStvec(Op.Rd);
|
||||
|
||||
if (Part == 0)
|
||||
{
|
||||
EmitVectorZeroUpper(Context, Op.Rd);
|
||||
|
|
|
@ -331,17 +331,18 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
|
||||
|
||||
int Bytes = Op.GetBitsCount() >> 3;
|
||||
int Words = Op.GetBitsCount() >> 4;
|
||||
int Pairs = Words >> Op.Size;
|
||||
|
||||
int Elems = Bytes >> Op.Size;
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
for (int Index = 0; Index < Pairs; Index++)
|
||||
{
|
||||
int Elem = (Index & ~1) + Part;
|
||||
int Idx = Index << 1;
|
||||
|
||||
EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
|
||||
|
||||
EmitVectorInsertTmp(Context, Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Idx , Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
|
@ -357,18 +358,18 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
|
||||
|
||||
int Bytes = Op.GetBitsCount() >> 3;
|
||||
int Words = Op.GetBitsCount() >> 4;
|
||||
int Pairs = Words >> Op.Size;
|
||||
|
||||
int Elems = Bytes >> Op.Size;
|
||||
int Half = Elems >> 1;
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
for (int Index = 0; Index < Pairs; Index++)
|
||||
{
|
||||
int Elem = Part + ((Index & (Half - 1)) << 1);
|
||||
int Idx = Index << 1;
|
||||
|
||||
EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
|
||||
|
||||
EmitVectorInsertTmp(Context, Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Index, Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
|
@ -384,18 +385,20 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
|
||||
|
||||
int Bytes = Op.GetBitsCount() >> 3;
|
||||
int Words = Op.GetBitsCount() >> 4;
|
||||
int Pairs = Words >> Op.Size;
|
||||
|
||||
int Elems = Bytes >> Op.Size;
|
||||
int Half = Elems >> 1;
|
||||
int Base = Part != 0 ? Pairs : 0;
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
for (int Index = 0; Index < Pairs; Index++)
|
||||
{
|
||||
int Elem = Part * Half + (Index >> 1);
|
||||
int Idx = Index << 1;
|
||||
|
||||
EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size);
|
||||
|
||||
EmitVectorInsertTmp(Context, Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Idx , Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -9,46 +9,6 @@ namespace Ryujinx.Tests.Cpu
|
|||
{
|
||||
public class CpuTestSimdArithmetic : CpuTest
|
||||
{
|
||||
[TestCase(0xE228420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0xE228420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFFFF00ul, 0x0000000000000000ul)]
|
||||
[TestCase(0xE228420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFEFEFEFEFEFEFEFEul, 0x0000000000000000ul)]
|
||||
[TestCase(0xE228420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x4E228420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x4E228420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFFFF00ul, 0x00000000FFFFFF00ul)]
|
||||
[TestCase(0x4E228420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFEFEFEFEFEFEFEFEul, 0xFEFEFEFEFEFEFEFEul)]
|
||||
[TestCase(0x4E228420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
|
||||
[TestCase(0xE628420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0xE628420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFF0000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0xE628420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFEFFFEFFFEFFFEul, 0x0000000000000000ul)]
|
||||
[TestCase(0xE628420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x4E628420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x4E628420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x00000000FFFF0000ul, 0x00000000FFFF0000ul)]
|
||||
[TestCase(0x4E628420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFEFFFEFFFEFFFEul, 0xFFFEFFFEFFFEFFFEul)]
|
||||
[TestCase(0x4E628420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
|
||||
[TestCase(0xEA28420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0xEA28420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0xEA28420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFEFFFFFFFEul, 0x0000000000000000ul)]
|
||||
[TestCase(0xEA28420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x4EA28420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x4EA28420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x4EA28420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFEFFFFFFFEul, 0xFFFFFFFEFFFFFFFEul)]
|
||||
[TestCase(0x4EA28420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
|
||||
[TestCase(0x4EE28420u, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x4EE28420u, 0x00000000FFFFFFFFul, 0x00000000FFFFFFFFul, 0x0000000000000001ul, 0x0000000000000001ul, 0x0000000100000000ul, 0x0000000100000000ul)]
|
||||
[TestCase(0x4EE28420u, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFEul, 0xFFFFFFFFFFFFFFFEul)]
|
||||
[TestCase(0x4EE28420u, 0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
|
||||
public void Add_V(uint Opcode, ulong A0, ulong A1, ulong B0, ulong B1, ulong Result0, ulong Result1)
|
||||
{
|
||||
Vector128<float> V1 = MakeVectorE0E1(A0, A1);
|
||||
Vector128<float> V2 = MakeVectorE0E1(B0, B1);
|
||||
AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
|
||||
Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
|
||||
});
|
||||
}
|
||||
|
||||
[TestCase(0x1E224820u, 0x0000000000000000ul, 0x0000000080000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x1E224820u, 0x0000000080000000ul, 0x0000000000000000ul, 0x0000000000000000ul)]
|
||||
[TestCase(0x1E224820u, 0x0000000080000000ul, 0x0000000080000000ul, 0x0000000080000000ul)]
|
||||
|
@ -195,6 +155,7 @@ namespace Ryujinx.Tests.Cpu
|
|||
V0: Sse.SetAllVector128(B));
|
||||
|
||||
float Result = (float)(2 - ((double)A * (double)B));
|
||||
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result));
|
||||
|
|
|
@ -1,136 +0,0 @@
|
|||
using ChocolArm64.State;
|
||||
|
||||
using NUnit.Framework;
|
||||
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
namespace Ryujinx.Tests.Cpu
|
||||
{
|
||||
public class CpuTestSimdMove : CpuTest
|
||||
{
|
||||
[Test, Description("TRN1 V0.4S, V1.4S, V2.4S")]
|
||||
public void Trn1_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3,
|
||||
[Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3)
|
||||
{
|
||||
uint Opcode = 0x4E822820;
|
||||
Vector128<float> V1 = Sse.StaticCast<uint, float>(Sse2.SetVector128(A3, A2, A1, A0));
|
||||
Vector128<float> V2 = Sse.StaticCast<uint, float>(Sse2.SetVector128(B3, B2, B1, B0));
|
||||
|
||||
AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
|
||||
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A0));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B0));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A2));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B2));
|
||||
});
|
||||
}
|
||||
|
||||
[Test, Description("TRN1 V0.8B, V1.8B, V2.8B")]
|
||||
public void Trn1_V_8B([Random(2)] byte A0, [Random(1)] byte A1, [Random(2)] byte A2, [Random(1)] byte A3,
|
||||
[Random(2)] byte A4, [Random(1)] byte A5, [Random(2)] byte A6, [Random(1)] byte A7,
|
||||
[Random(2)] byte B0, [Random(1)] byte B1, [Random(2)] byte B2, [Random(1)] byte B3,
|
||||
[Random(2)] byte B4, [Random(1)] byte B5, [Random(2)] byte B6, [Random(1)] byte B7)
|
||||
{
|
||||
uint Opcode = 0x0E022820;
|
||||
Vector128<float> V1 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, A7, A6, A5, A4, A3, A2, A1, A0));
|
||||
Vector128<float> V2 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, B7, B6, B5, B4, B3, B2, B1, B0));
|
||||
|
||||
AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
|
||||
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)0), Is.EqualTo(A0));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)1), Is.EqualTo(B0));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)2), Is.EqualTo(A2));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)3), Is.EqualTo(B2));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)4), Is.EqualTo(A4));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)5), Is.EqualTo(B4));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)6), Is.EqualTo(A6));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)7), Is.EqualTo(B6));
|
||||
});
|
||||
}
|
||||
|
||||
[Test, Description("TRN2 V0.4S, V1.4S, V2.4S")]
|
||||
public void Trn2_V_4S([Random(2)] uint A0, [Random(2)] uint A1, [Random(2)] uint A2, [Random(2)] uint A3,
|
||||
[Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3)
|
||||
{
|
||||
uint Opcode = 0x4E826820;
|
||||
Vector128<float> V1 = Sse.StaticCast<uint, float>(Sse2.SetVector128(A3, A2, A1, A0));
|
||||
Vector128<float> V2 = Sse.StaticCast<uint, float>(Sse2.SetVector128(B3, B2, B1, B0));
|
||||
|
||||
AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
|
||||
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A1));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B1));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A3));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B3));
|
||||
});
|
||||
}
|
||||
|
||||
[Test, Description("TRN2 V0.8B, V1.8B, V2.8B")]
|
||||
public void Trn2_V_8B([Random(1)] byte A0, [Random(2)] byte A1, [Random(1)] byte A2, [Random(2)] byte A3,
|
||||
[Random(1)] byte A4, [Random(2)] byte A5, [Random(1)] byte A6, [Random(2)] byte A7,
|
||||
[Random(1)] byte B0, [Random(2)] byte B1, [Random(1)] byte B2, [Random(2)] byte B3,
|
||||
[Random(1)] byte B4, [Random(2)] byte B5, [Random(1)] byte B6, [Random(2)] byte B7)
|
||||
{
|
||||
uint Opcode = 0x0E026820;
|
||||
Vector128<float> V1 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, A7, A6, A5, A4, A3, A2, A1, A0));
|
||||
Vector128<float> V2 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, B7, B6, B5, B4, B3, B2, B1, B0));
|
||||
|
||||
AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
|
||||
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)0), Is.EqualTo(A1));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)1), Is.EqualTo(B1));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)2), Is.EqualTo(A3));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)3), Is.EqualTo(B3));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)4), Is.EqualTo(A5));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)5), Is.EqualTo(B5));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)6), Is.EqualTo(A7));
|
||||
Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)7), Is.EqualTo(B7));
|
||||
});
|
||||
}
|
||||
|
||||
[TestCase(0u, 0u, 0x2313221221112010ul, 0x0000000000000000ul)]
|
||||
[TestCase(1u, 0u, 0x2313221221112010ul, 0x2717261625152414ul)]
|
||||
[TestCase(0u, 1u, 0x2322131221201110ul, 0x0000000000000000ul)]
|
||||
[TestCase(1u, 1u, 0x2322131221201110ul, 0x2726171625241514ul)]
|
||||
[TestCase(0u, 2u, 0x2322212013121110ul, 0x0000000000000000ul)]
|
||||
[TestCase(1u, 2u, 0x2322212013121110ul, 0x2726252417161514ul)]
|
||||
[TestCase(1u, 3u, 0x1716151413121110ul, 0x2726252423222120ul)]
|
||||
public void Zip1_V(uint Q, uint size, ulong Result_0, ulong Result_1)
|
||||
{
|
||||
// ZIP1 V0.<T>, V1.<T>, V2.<T>
|
||||
uint Opcode = 0x0E023820 | (Q << 30) | (size << 22);
|
||||
Vector128<float> V1 = MakeVectorE0E1(0x1716151413121110, 0x1F1E1D1C1B1A1918);
|
||||
Vector128<float> V2 = MakeVectorE0E1(0x2726252423222120, 0x2F2E2D2C2B2A2928);
|
||||
AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
|
||||
Assert.AreEqual(Result_0, GetVectorE0(ThreadState.V0));
|
||||
Assert.AreEqual(Result_1, GetVectorE1(ThreadState.V0));
|
||||
}
|
||||
|
||||
[TestCase(0u, 0u, 0x2717261625152414ul, 0x0000000000000000ul)]
|
||||
[TestCase(1u, 0u, 0x2B1B2A1A29192818ul, 0x2F1F2E1E2D1D2C1Cul)]
|
||||
[TestCase(0u, 1u, 0x2726171625241514ul, 0x0000000000000000ul)]
|
||||
[TestCase(1u, 1u, 0x2B2A1B1A29281918ul, 0x2F2E1F1E2D2C1D1Cul)]
|
||||
[TestCase(0u, 2u, 0x2726252417161514ul, 0x0000000000000000ul)]
|
||||
[TestCase(1u, 2u, 0x2B2A29281B1A1918ul, 0x2F2E2D2C1F1E1D1Cul)]
|
||||
[TestCase(1u, 3u, 0x1F1E1D1C1B1A1918ul, 0x2F2E2D2C2B2A2928ul)]
|
||||
public void Zip2_V(uint Q, uint size, ulong Result_0, ulong Result_1)
|
||||
{
|
||||
// ZIP2 V0.<T>, V1.<T>, V2.<T>
|
||||
uint Opcode = 0x0E027820 | (Q << 30) | (size << 22);
|
||||
Vector128<float> V1 = MakeVectorE0E1(0x1716151413121110, 0x1F1E1D1C1B1A1918);
|
||||
Vector128<float> V2 = MakeVectorE0E1(0x2726252423222120, 0x2F2E2D2C2B2A2928);
|
||||
AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
|
||||
Assert.AreEqual(Result_0, GetVectorE0(ThreadState.V0));
|
||||
Assert.AreEqual(Result_1, GetVectorE1(ThreadState.V0));
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -4655,6 +4655,74 @@ namespace Ryujinx.Tests.Cpu.Tester
|
|||
Vpart(d, part, result);
|
||||
}
|
||||
|
||||
// trn1_advsimd.html
|
||||
public static void Trn1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
|
||||
{
|
||||
const bool op = false;
|
||||
|
||||
/* Decode */
|
||||
int d = (int)UInt(Rd);
|
||||
int n = (int)UInt(Rn);
|
||||
int m = (int)UInt(Rm);
|
||||
|
||||
/* if size:Q == '110' then ReservedValue(); */
|
||||
|
||||
int esize = 8 << (int)UInt(size);
|
||||
int datasize = (Q ? 128 : 64);
|
||||
int elements = datasize / esize;
|
||||
int part = (int)UInt(op);
|
||||
int pairs = elements / 2;
|
||||
|
||||
/* Operation */
|
||||
/* CheckFPAdvSIMDEnabled64(); */
|
||||
|
||||
Bits result = new Bits(datasize);
|
||||
Bits operand1 = V(datasize, n);
|
||||
Bits operand2 = V(datasize, m);
|
||||
|
||||
for (int p = 0; p <= pairs - 1; p++)
|
||||
{
|
||||
Elem(result, 2 * p + 0, esize, Elem(operand1, 2 * p + part, esize));
|
||||
Elem(result, 2 * p + 1, esize, Elem(operand2, 2 * p + part, esize));
|
||||
}
|
||||
|
||||
V(d, result);
|
||||
}
|
||||
|
||||
// trn2_advsimd.html
|
||||
public static void Trn2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
|
||||
{
|
||||
const bool op = true;
|
||||
|
||||
/* Decode */
|
||||
int d = (int)UInt(Rd);
|
||||
int n = (int)UInt(Rn);
|
||||
int m = (int)UInt(Rm);
|
||||
|
||||
/* if size:Q == '110' then ReservedValue(); */
|
||||
|
||||
int esize = 8 << (int)UInt(size);
|
||||
int datasize = (Q ? 128 : 64);
|
||||
int elements = datasize / esize;
|
||||
int part = (int)UInt(op);
|
||||
int pairs = elements / 2;
|
||||
|
||||
/* Operation */
|
||||
/* CheckFPAdvSIMDEnabled64(); */
|
||||
|
||||
Bits result = new Bits(datasize);
|
||||
Bits operand1 = V(datasize, n);
|
||||
Bits operand2 = V(datasize, m);
|
||||
|
||||
for (int p = 0; p <= pairs - 1; p++)
|
||||
{
|
||||
Elem(result, 2 * p + 0, esize, Elem(operand1, 2 * p + part, esize));
|
||||
Elem(result, 2 * p + 1, esize, Elem(operand2, 2 * p + part, esize));
|
||||
}
|
||||
|
||||
V(d, result);
|
||||
}
|
||||
|
||||
// uaba_advsimd.html
|
||||
public static void Uaba_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
|
||||
{
|
||||
|
@ -4832,6 +4900,146 @@ namespace Ryujinx.Tests.Cpu.Tester
|
|||
|
||||
V(d, result);
|
||||
}
|
||||
|
||||
// uzp1_advsimd.html
|
||||
public static void Uzp1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
|
||||
{
|
||||
const bool op = false;
|
||||
|
||||
/* Decode */
|
||||
int d = (int)UInt(Rd);
|
||||
int n = (int)UInt(Rn);
|
||||
int m = (int)UInt(Rm);
|
||||
|
||||
/* if size:Q == '110' then ReservedValue(); */
|
||||
|
||||
int esize = 8 << (int)UInt(size);
|
||||
int datasize = (Q ? 128 : 64);
|
||||
int elements = datasize / esize;
|
||||
int part = (int)UInt(op);
|
||||
|
||||
/* Operation */
|
||||
/* CheckFPAdvSIMDEnabled64(); */
|
||||
|
||||
Bits result = new Bits(datasize);
|
||||
Bits operandl = V(datasize, n);
|
||||
Bits operandh = V(datasize, m);
|
||||
|
||||
Bits zipped = Bits.Concat(operandh, operandl);
|
||||
|
||||
for (int e = 0; e <= elements - 1; e++)
|
||||
{
|
||||
Elem(result, e, esize, Elem(zipped, 2 * e + part, esize));
|
||||
}
|
||||
|
||||
V(d, result);
|
||||
}
|
||||
|
||||
// uzp2_advsimd.html
|
||||
public static void Uzp2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
|
||||
{
|
||||
const bool op = true;
|
||||
|
||||
/* Decode */
|
||||
int d = (int)UInt(Rd);
|
||||
int n = (int)UInt(Rn);
|
||||
int m = (int)UInt(Rm);
|
||||
|
||||
/* if size:Q == '110' then ReservedValue(); */
|
||||
|
||||
int esize = 8 << (int)UInt(size);
|
||||
int datasize = (Q ? 128 : 64);
|
||||
int elements = datasize / esize;
|
||||
int part = (int)UInt(op);
|
||||
|
||||
/* Operation */
|
||||
/* CheckFPAdvSIMDEnabled64(); */
|
||||
|
||||
Bits result = new Bits(datasize);
|
||||
Bits operandl = V(datasize, n);
|
||||
Bits operandh = V(datasize, m);
|
||||
|
||||
Bits zipped = Bits.Concat(operandh, operandl);
|
||||
|
||||
for (int e = 0; e <= elements - 1; e++)
|
||||
{
|
||||
Elem(result, e, esize, Elem(zipped, 2 * e + part, esize));
|
||||
}
|
||||
|
||||
V(d, result);
|
||||
}
|
||||
|
||||
// zip1_advsimd.html
|
||||
public static void Zip1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
|
||||
{
|
||||
const bool op = false;
|
||||
|
||||
/* Decode */
|
||||
int d = (int)UInt(Rd);
|
||||
int n = (int)UInt(Rn);
|
||||
int m = (int)UInt(Rm);
|
||||
|
||||
/* if size:Q == '110' then ReservedValue(); */
|
||||
|
||||
int esize = 8 << (int)UInt(size);
|
||||
int datasize = (Q ? 128 : 64);
|
||||
int elements = datasize / esize;
|
||||
int part = (int)UInt(op);
|
||||
int pairs = elements / 2;
|
||||
|
||||
/* Operation */
|
||||
/* CheckFPAdvSIMDEnabled64(); */
|
||||
|
||||
Bits result = new Bits(datasize);
|
||||
Bits operand1 = V(datasize, n);
|
||||
Bits operand2 = V(datasize, m);
|
||||
|
||||
int @base = part * pairs;
|
||||
|
||||
for (int p = 0; p <= pairs - 1; p++)
|
||||
{
|
||||
Elem(result, 2 * p + 0, esize, Elem(operand1, @base + p, esize));
|
||||
Elem(result, 2 * p + 1, esize, Elem(operand2, @base + p, esize));
|
||||
}
|
||||
|
||||
V(d, result);
|
||||
}
|
||||
|
||||
// zip2_advsimd.html
|
||||
public static void Zip2_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd)
|
||||
{
|
||||
const bool op = true;
|
||||
|
||||
/* Decode */
|
||||
int d = (int)UInt(Rd);
|
||||
int n = (int)UInt(Rn);
|
||||
int m = (int)UInt(Rm);
|
||||
|
||||
/* if size:Q == '110' then ReservedValue(); */
|
||||
|
||||
int esize = 8 << (int)UInt(size);
|
||||
int datasize = (Q ? 128 : 64);
|
||||
int elements = datasize / esize;
|
||||
int part = (int)UInt(op);
|
||||
int pairs = elements / 2;
|
||||
|
||||
/* Operation */
|
||||
/* CheckFPAdvSIMDEnabled64(); */
|
||||
|
||||
Bits result = new Bits(datasize);
|
||||
Bits operand1 = V(datasize, n);
|
||||
Bits operand2 = V(datasize, m);
|
||||
|
||||
int @base = part * pairs;
|
||||
|
||||
for (int p = 0; p <= pairs - 1; p++)
|
||||
{
|
||||
Elem(result, 2 * p + 0, esize, Elem(operand1, @base + p, esize));
|
||||
Elem(result, 2 * p + 1, esize, Elem(operand2, @base + p, esize));
|
||||
}
|
||||
|
||||
V(d, result);
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue