aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorzmt00 <[email protected]>2023-12-09 15:43:08 -0800
committermerry <[email protected]>2023-12-11 13:18:19 +0000
commita43c176fc3984446d15e473fd2803493536412b6 (patch)
treee00fc8d1ab9f9f60e0c5a9df05b7794840a3e819
parent7ef11ee31174d22c5238969eee59d8aab911c3a6 (diff)
downloaddynarmic-a43c176fc3984446d15e473fd2803493536412b6.tar.gz
dynarmic-a43c176fc3984446d15e473fd2803493536412b6.zip
emit_x64_vector: Add SSSE3 implementation of VUZP{1,2}.4H
-rw-r--r--src/dynarmic/backend/x64/emit_x64_vector.cpp44
1 files changed, 30 insertions, 14 deletions
diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp
index 5c7c53ee..bd393dc7 100644
--- a/src/dynarmic/backend/x64/emit_x64_vector.cpp
+++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp
@@ -1129,17 +1129,25 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst)
void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
- const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
- code.pslld(lhs, 16);
- code.psrad(lhs, 16);
+ if (code.HasHostFeature(HostFeature::SSSE3)) {
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
- code.pslld(rhs, 16);
- code.psrad(rhs, 16);
+ code.punpcklwd(lhs, rhs);
+ code.pshufb(lhs, code.MConst(xword, 0x0B0A'0302'0908'0100, 0x8080'8080'8080'8080));
+ } else {
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
- code.packssdw(lhs, rhs);
- code.pshufd(lhs, lhs, 0b11011000);
- code.movq(lhs, lhs);
+ code.pslld(lhs, 16);
+ code.psrad(lhs, 16);
+
+ code.pslld(rhs, 16);
+ code.psrad(rhs, 16);
+
+ code.packssdw(lhs, rhs);
+ code.pshufd(lhs, lhs, 0b11011000);
+ code.movq(lhs, lhs);
+ }
ctx.reg_alloc.DefineValue(inst, lhs);
}
@@ -1221,13 +1229,21 @@ void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst)
void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
- const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
- code.psrad(lhs, 16);
- code.psrad(rhs, 16);
- code.packssdw(lhs, rhs);
- code.pshufd(lhs, lhs, 0b11011000);
- code.movq(lhs, lhs);
+ if (code.HasHostFeature(HostFeature::SSSE3)) {
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
+
+ code.punpcklwd(lhs, rhs);
+ code.pshufb(lhs, code.MConst(xword, 0x0F0E'0706'0D0C'0504, 0x8080'8080'8080'8080));
+ } else {
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+ code.psrad(lhs, 16);
+ code.psrad(rhs, 16);
+ code.packssdw(lhs, rhs);
+ code.pshufd(lhs, lhs, 0b11011000);
+ code.movq(lhs, lhs);
+ }
ctx.reg_alloc.DefineValue(inst, lhs);
}