diff options
author | zmt00 <[email protected]> | 2024-01-20 17:34:54 -0800 |
---|---|---|
committer | merry <[email protected]> | 2024-01-23 18:28:19 +0000 |
commit | ba9009abd87285c8088f39d4c9301a31b74d4da1 (patch) | |
tree | 9a1991b8c508a4c3f9f972fbf1572ba24ed38283 | |
parent | 7e66e082fdb54d4aef34c222363e5be976ec3789 (diff) | |
download | dynarmic-ba9009abd87285c8088f39d4c9301a31b74d4da1.tar.gz dynarmic-ba9009abd87285c8088f39d4c9301a31b74d4da1.zip |
emit_x64_vector: Optimize VectorSignedAbsoluteDifference
-rw-r--r-- | src/dynarmic/backend/x64/emit_x64_vector.cpp | 68 |
1 files changed, 42 insertions, 26 deletions
diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index a878746c..85fa8d38 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -3744,35 +3744,51 @@ static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, I auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code.movdqa(mask, x); - code.movdqa(tmp1, y); + // only signed 16-bit min/max are available below SSE4.1 + if (code.HasHostFeature(HostFeature::SSE41) || esize == 16) { + code.movdqa(tmp, x); - switch (esize) { - case 8: - code.pcmpgtb(mask, y); - code.psubb(tmp1, x); - code.psubb(x, y); - break; - case 16: - code.pcmpgtw(mask, y); - code.psubw(tmp1, x); - code.psubw(x, y); - break; - case 32: - code.pcmpgtd(mask, y); - code.psubd(tmp1, x); - code.psubd(x, y); - break; - } + switch (esize) { + case 8: + code.pminsb(tmp, y); + code.pmaxsb(x, y); + code.psubb(x, tmp); + break; + case 16: + code.pminsw(tmp, y); + code.pmaxsw(x, y); + code.psubw(x, tmp); + break; + case 32: + code.pminsd(tmp, y); + code.pmaxsd(x, y); + code.psubd(x, tmp); + break; + default: + UNREACHABLE(); + } + } else { + code.movdqa(tmp, y); - code.movdqa(tmp2, mask); - code.pand(x, mask); - code.pandn(tmp2, tmp1); - code.por(x, tmp2); + switch (esize) { + case 8: + code.pcmpgtb(tmp, x); + code.psubb(x, y); + code.pxor(x, tmp); + code.psubb(x, tmp); + break; + case 32: + code.pcmpgtd(tmp, x); + code.psubd(x, y); + code.pxor(x, tmp); + code.psubd(x, tmp); + break; + default: + UNREACHABLE(); + } + } ctx.reg_alloc.DefineValue(inst, x); } |