aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorzmt00 <[email protected]>2023-12-14 18:06:21 -0800
committermerry <[email protected]>2023-12-17 21:56:55 +0000
commit8ef0f2b54f83f0c359e05fc9f7711d4e83dcd210 (patch)
tree1dfd2b1748961c4fad829b659ea1b12688ce796c
parent521bf64ef216285dc1a5fbf4b8a77eebf5093499 (diff)
downloaddynarmic-8ef0f2b54f83f0c359e05fc9f7711d4e83dcd210.tar.gz
dynarmic-8ef0f2b54f83f0c359e05fc9f7711d4e83dcd210.zip
emit_x64_vector: Add SSSE3 implementation of VUZP{1,2}.8B
-rw-r--r--src/dynarmic/backend/x64/emit_x64_vector.cpp44
1 files changed, 30 insertions, 14 deletions
diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp
index fbcfc8c5..032bb121 100644
--- a/src/dynarmic/backend/x64/emit_x64_vector.cpp
+++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp
@@ -1122,15 +1122,23 @@ void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
- const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
- const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
- code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
- code.pand(lhs, tmp);
- code.pand(rhs, tmp);
- code.packuswb(lhs, rhs);
- code.pshufd(lhs, lhs, 0b11011000);
- code.movq(lhs, lhs);
+ if (code.HasHostFeature(HostFeature::SSSE3)) {
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
+
+ code.punpcklbw(lhs, rhs);
+ code.pshufb(lhs, code.MConst(xword, 0x0D'09'05'01'0C'08'04'00, 0x8080808080808080));
+ } else {
+ const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+ code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
+ code.pand(lhs, tmp);
+ code.pand(rhs, tmp);
+ code.packuswb(lhs, rhs);
+ code.pshufd(lhs, lhs, 0b11011000);
+ code.movq(lhs, lhs);
+ }
ctx.reg_alloc.DefineValue(inst, lhs);
}
@@ -1224,13 +1232,21 @@ void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
- const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
- code.psraw(lhs, 8);
- code.psraw(rhs, 8);
- code.packsswb(lhs, rhs);
- code.pshufd(lhs, lhs, 0b11011000);
- code.movq(lhs, lhs);
+ if (code.HasHostFeature(HostFeature::SSSE3)) {
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
+
+ code.punpcklbw(lhs, rhs);
+ code.pshufb(lhs, code.MConst(xword, 0x0F'0B'07'03'0E'0A'06'02, 0x8080808080808080));
+ } else {
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
+
+ code.psraw(lhs, 8);
+ code.psraw(rhs, 8);
+ code.packsswb(lhs, rhs);
+ code.pshufd(lhs, lhs, 0b11011000);
+ code.movq(lhs, lhs);
+ }
ctx.reg_alloc.DefineValue(inst, lhs);
}