From eb5eb9cdf7c50879ab313a99be0d8f0bae50a2f0 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sun, 4 Feb 2024 12:23:29 -0800 Subject: emit_x64_vector: GNFI implementation of EmitVectorCountLeadingZeros8 --- src/dynarmic/backend/x64/emit_x64_vector.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index 6e68b862..4f47f2e0 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -946,6 +946,27 @@ static void EmitVectorCountLeadingZeros(VectorArray& result, const VectorArra } void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { + if (code.HasHostFeature(HostFeature::GFNI)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + + // Reverse bits: + code.gf2p8affineqb(data, code.BConst<64>(xword, 0x8040201008040201), 0); + + // Perform a tzcnt: + // Isolate lowest set bit + code.pcmpeqb(result, result); + code.paddb(result, data); + code.pandn(result, data); + // Convert lowest set bit into an index + code.gf2p8affineqb(result, code.BConst<64>(xword, 0xaaccf0ff'00000000), 8); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); -- cgit v1.2.3