aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorzmt00 <[email protected]>2023-12-02 10:36:27 -0800
committermerry <[email protected]>2023-12-02 20:33:45 +0000
commit4c2bd4ed296170be02ee67d5164d75f5af84ac40 (patch)
treed52554991c17987abd4b35d53d76efacec5e7a83
parentd68b916f574d23fe0775da2be387466186c96fe4 (diff)
downloaddynarmic-4c2bd4ed296170be02ee67d5164d75f5af84ac40.tar.gz
dynarmic-4c2bd4ed296170be02ee67d5164d75f5af84ac40.zip
emit_x64_vector: Add SSE4.1 implementation of VUZP{1,2}.2S
-rw-r--r--src/dynarmic/backend/x64/emit_x64_vector.cpp34
1 files changed, 25 insertions, 9 deletions
diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp
index 0da5ca51..5c7c53ee 100644
--- a/src/dynarmic/backend/x64/emit_x64_vector.cpp
+++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp
@@ -1149,8 +1149,13 @@ void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
- code.unpcklps(lhs, rhs);
- code.movq(lhs, lhs);
+ if (code.HasHostFeature(HostFeature::SSE41)) {
+ // copy bytes 0:3 of rhs to lhs, zero out upper 8 bytes
+ code.insertps(lhs, rhs, 0b00011100);
+ } else {
+ code.unpcklps(lhs, rhs);
+ code.movq(lhs, lhs);
+ }
ctx.reg_alloc.DefineValue(inst, lhs);
}
@@ -1229,15 +1234,26 @@ void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst)
void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
- const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
- const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
- const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
- code.xorps(zero, zero);
- code.unpcklps(lhs, rhs);
- code.unpckhpd(lhs, zero);
+ if (code.HasHostFeature(HostFeature::SSE41)) {
+ const Xbyak::Xmm lhs = ctx.reg_alloc.UseXmm(args[0]);
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
- ctx.reg_alloc.DefineValue(inst, lhs);
+ // copy bytes 4:7 of lhs to bytes 0:3 of rhs, zero out upper 8 bytes
+ code.insertps(rhs, lhs, 0b01001100);
+
+ ctx.reg_alloc.DefineValue(inst, rhs);
+ } else {
+ const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
+ const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
+ const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
+
+ code.xorps(zero, zero);
+ code.unpcklps(lhs, rhs);
+ code.unpckhpd(lhs, zero);
+
+ ctx.reg_alloc.DefineValue(inst, lhs);
+ }
}
void EmitX64::EmitVectorEor(EmitContext& ctx, IR::Inst* inst) {