diff options
-rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 102 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 71 |
2 files changed, 86 insertions, 87 deletions
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index c6b2b2109..1934039c0 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -2,9 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> +#include <bitset> +#include <optional> + +#include <xbyak/xbyak.h> + #include "common/assert.h" +#include "common/bit_field.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/x64/xbyak_abi.h" #include "common/x64/xbyak_util.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro_interpreter.h" @@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { +namespace { constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; -static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ +const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, RESULT, PARAMETERS, @@ -28,19 +37,73 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ BRANCH_HOLDER, }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} +// Arbitrarily chosen based on current booting games. +constexpr size_t MAX_CODE_SIZE = 0x10000; -std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { - return std::make_unique<MacroJITx64Impl>(maxwell3d, code); -} +class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { +public: + explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) + : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { + Compile(); + } -MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) - : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { - Compile(); -} + void Execute(const std::vector<u32>& parameters, u32 method) override; + + void Compile_ALU(Macro::Opcode opcode); + void Compile_AddImmediate(Macro::Opcode opcode); + void Compile_ExtractInsert(Macro::Opcode opcode); + void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); + void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); + void Compile_Read(Macro::Opcode opcode); + void Compile_Branch(Macro::Opcode opcode); + +private: + void Optimizer_ScanFlags(); + + void Compile(); + bool Compile_NextInstruction(); + + Xbyak::Reg32 Compile_FetchParameter(); + Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); + + void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); + void Compile_Send(Xbyak::Reg32 value); -MacroJITx64Impl::~MacroJITx64Impl() = default; + Macro::Opcode GetOpCode() const; + std::bitset<32> PersistentCallerSavedRegs() const; + + struct JITState { + Engines::Maxwell3D* maxwell3d{}; + std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; + u32 carry_flag{}; + }; + static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); + using ProgramType = void (*)(JITState*, const u32*); + + struct OptimizerState { + bool can_skip_carry{}; + bool has_delayed_pc{}; + bool zero_reg_skip{}; + bool skip_dummy_addimmediate{}; + bool optimize_for_method_move{}; + bool enable_asserts{}; + }; + OptimizerState optimizer{}; + + std::optional<Macro::Opcode> next_opcode{}; + ProgramType program{nullptr}; + + std::array<Xbyak::Label, MAX_CODE_SIZE> labels; + std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; + Xbyak::Label end_of_code{}; + + bool is_delay_slot{}; + u32 pc{}; + std::optional<u32> delayed_pc; + + const std::vector<u32>& code; + Engines::Maxwell3D& maxwell3d; +}; void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { MICROPROFILE_SCOPE(MacroJitExecute); @@ -307,11 +370,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } -static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { +void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { maxwell3d->CallMethodFromMME(method_address.address, value); } -void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { +void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); @@ -338,7 +401,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { L(dont_process); } -void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { +void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); const s32 jump_address = static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); @@ -392,7 +455,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { L(end); } -void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { +void MacroJITx64Impl::Optimizer_ScanFlags() { optimizer.can_skip_carry = true; optimizer.has_delayed_pc = false; for (auto raw_op : code) { @@ -534,7 +597,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() { return true; } -Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { +Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { mov(eax, dword[PARAMETERS]); add(PARAMETERS, sizeof(u32)); return eax; @@ -615,5 +678,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const { std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; } +} // Anonymous namespace +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) + : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} + +std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { + return std::make_unique<MacroJITx64Impl>(maxwell3d, code); +} } // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index d03d480b4..773b037ae 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -4,12 +4,7 @@ #pragma once -#include <array> -#include <bitset> -#include <xbyak/xbyak.h> -#include "common/bit_field.h" #include "common/common_types.h" -#include "common/x64/xbyak_abi.h" #include "video_core/macro/macro.h" namespace Tegra { @@ -18,9 +13,6 @@ namespace Engines { class Maxwell3D; } -/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games -constexpr size_t MAX_CODE_SIZE = 0x10000; - class MacroJITx64 final : public MacroEngine { public: explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); @@ -32,67 +24,4 @@ private: Engines::Maxwell3D& maxwell3d; }; -class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { -public: - explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); - ~MacroJITx64Impl(); - - void Execute(const std::vector<u32>& parameters, u32 method) override; - - void Compile_ALU(Macro::Opcode opcode); - void Compile_AddImmediate(Macro::Opcode opcode); - void Compile_ExtractInsert(Macro::Opcode opcode); - void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); - void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); - void Compile_Read(Macro::Opcode opcode); - void Compile_Branch(Macro::Opcode opcode); - -private: - void Optimizer_ScanFlags(); - - void Compile(); - bool Compile_NextInstruction(); - - Xbyak::Reg32 Compile_FetchParameter(); - Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - - void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); - void Compile_Send(Xbyak::Reg32 value); - - Macro::Opcode GetOpCode() const; - std::bitset<32> PersistentCallerSavedRegs() const; - - struct JITState { - Engines::Maxwell3D* maxwell3d{}; - std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; - u32 carry_flag{}; - }; - static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*, const u32*); - - struct OptimizerState { - bool can_skip_carry{}; - bool has_delayed_pc{}; - bool zero_reg_skip{}; - bool skip_dummy_addimmediate{}; - bool optimize_for_method_move{}; - bool enable_asserts{}; - }; - OptimizerState optimizer{}; - - std::optional<Macro::Opcode> next_opcode{}; - ProgramType program{nullptr}; - - std::array<Xbyak::Label, MAX_CODE_SIZE> labels; - std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; - Xbyak::Label end_of_code{}; - - bool is_delay_slot{}; - u32 pc{}; - std::optional<u32> delayed_pc; - - const std::vector<u32>& code; - Engines::Maxwell3D& maxwell3d; -}; - } // namespace Tegra |