diff options
Diffstat (limited to 'ext/detours/src/disasm.cpp')
-rw-r--r-- | ext/detours/src/disasm.cpp | 4301 |
1 files changed, 4301 insertions, 0 deletions
diff --git a/ext/detours/src/disasm.cpp b/ext/detours/src/disasm.cpp new file mode 100644 index 0000000..c12c1c1 --- /dev/null +++ b/ext/detours/src/disasm.cpp @@ -0,0 +1,4301 @@ +////////////////////////////////////////////////////////////////////////////// +// +// Detours Disassembler (disasm.cpp of detours.lib) +// +// Microsoft Research Detours Package, Version 4.0.1 +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// + +// #define DETOUR_DEBUG 1 +#define DETOURS_INTERNAL +#include "detours.h" +#include <limits.h> + +#if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH +#error detours.h version mismatch +#endif + +#undef ASSERT +#define ASSERT(x) + +////////////////////////////////////////////////////////////////////////////// +// +// Special macros to handle the case when we are building disassembler for +// offline processing. +// + + +#if defined(DETOURS_X86_OFFLINE_LIBRARY) \ + || defined(DETOURS_X64_OFFLINE_LIBRARY) \ + || defined(DETOURS_ARM_OFFLINE_LIBRARY) \ + || defined(DETOURS_ARM64_OFFLINE_LIBRARY) \ + || defined(DETOURS_IA64_OFFLINE_LIBRARY) + +#undef DETOURS_X64 +#undef DETOURS_X86 +#undef DETOURS_IA64 +#undef DETOURS_ARM +#undef DETOURS_ARM64 + +#if defined(DETOURS_X86_OFFLINE_LIBRARY) + +#define DetourCopyInstruction DetourCopyInstructionX86 +#define DetourSetCodeModule DetourSetCodeModuleX86 +#define CDetourDis CDetourDisX86 +#define DETOURS_X86 + +#elif defined(DETOURS_X64_OFFLINE_LIBRARY) + +#if !defined(DETOURS_64BIT) +// Fix this as/if bugs are discovered. +//#error X64 disassembler can only build for 64-bit. +#endif + +#define DetourCopyInstruction DetourCopyInstructionX64 +#define DetourSetCodeModule DetourSetCodeModuleX64 +#define CDetourDis CDetourDisX64 +#define DETOURS_X64 + +#elif defined(DETOURS_ARM_OFFLINE_LIBRARY) + +#define DetourCopyInstruction DetourCopyInstructionARM +#define DetourSetCodeModule DetourSetCodeModuleARM +#define CDetourDis CDetourDisARM +#define DETOURS_ARM + +#elif defined(DETOURS_ARM64_OFFLINE_LIBRARY) + +#define DetourCopyInstruction DetourCopyInstructionARM64 +#define DetourSetCodeModule DetourSetCodeModuleARM64 +#define CDetourDis CDetourDisARM64 +#define DETOURS_ARM64 + +#elif defined(DETOURS_IA64_OFFLINE_LIBRARY) + +#define DetourCopyInstruction DetourCopyInstructionIA64 +#define DetourSetCodeModule DetourSetCodeModuleIA64 +#define DETOURS_IA64 + +#else + +#error + +#endif +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// Function: +// DetourCopyInstruction(PVOID pDst, +// PVOID *ppDstPool +// PVOID pSrc, +// PVOID *ppTarget, +// LONG *plExtra) +// Purpose: +// Copy a single instruction from pSrc to pDst. +// +// Arguments: +// pDst: +// Destination address for the instruction. May be NULL in which +// case DetourCopyInstruction is used to measure an instruction. +// If not NULL then the source instruction is copied to the +// destination instruction and any relative arguments are adjusted. +// ppDstPool: +// Destination address for the end of the constant pool. The +// constant pool works backwards toward pDst. All memory between +// pDst and *ppDstPool must be available for use by this function. +// ppDstPool may be NULL if pDst is NULL. +// pSrc: +// Source address of the instruction. +// ppTarget: +// Out parameter for any target instruction address pointed to by +// the instruction. For example, a branch or a jump insruction has +// a target, but a load or store instruction doesn't. A target is +// another instruction that may be executed as a result of this +// instruction. ppTarget may be NULL. +// plExtra: +// Out parameter for the number of extra bytes needed by the +// instruction to reach the target. For example, lExtra = 3 if the +// instruction had an 8-bit relative offset, but needs a 32-bit +// relative offset. +// +// Returns: +// Returns the address of the next instruction (following in the source) +// instruction. By subtracting pSrc from the return value, the caller +// can determinte the size of the instruction copied. +// +// Comments: +// By following the pTarget, the caller can follow alternate +// instruction streams. However, it is not always possible to determine +// the target based on static analysis. For example, the destination of +// a jump relative to a register cannot be determined from just the +// instruction stream. The output value, pTarget, can have any of the +// following outputs: +// DETOUR_INSTRUCTION_TARGET_NONE: +// The instruction has no targets. +// DETOUR_INSTRUCTION_TARGET_DYNAMIC: +// The instruction has a non-deterministic (dynamic) target. +// (i.e. the jump is to an address held in a register.) +// Address: The instruction has the specified target. +// +// When copying instructions, DetourCopyInstruction insures that any +// targets remain constant. It does so by adjusting any IP relative +// offsets. +// + +#pragma data_seg(".detourd") +#pragma const_seg(".detourc") + +//////////////////////////////////////////////////// X86 and X64 Disassembler. +// +// Includes full support for all x86 chips prior to the Pentium III, and some newer stuff. +// +#if defined(DETOURS_X64) || defined(DETOURS_X86) + +class CDetourDis +{ + public: + CDetourDis(_Out_opt_ PBYTE *ppbTarget, + _Out_opt_ LONG *plExtra); + + PBYTE CopyInstruction(PBYTE pbDst, PBYTE pbSrc); + static BOOL SanityCheckSystem(); + static BOOL SetCodeModule(PBYTE pbBeg, PBYTE pbEnd, BOOL fLimitReferencesToModule); + + public: + struct COPYENTRY; + typedef const COPYENTRY * REFCOPYENTRY; + + typedef PBYTE (CDetourDis::* COPYFUNC)(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + + // nFlagBits flags. + enum { + DYNAMIC = 0x1u, + ADDRESS = 0x2u, + NOENLARGE = 0x4u, + RAX = 0x8u, + }; + + // ModR/M Flags + enum { + SIB = 0x10u, + RIP = 0x20u, + NOTSIB = 0x0fu, + }; + + struct COPYENTRY + { + // Many of these fields are often ignored. See ENTRY_DataIgnored. + ULONG nFixedSize : 4; // Fixed size of opcode + ULONG nFixedSize16 : 4; // Fixed size when 16 bit operand + ULONG nModOffset : 4; // Offset to mod/rm byte (0=none) + ULONG nRelOffset : 4; // Offset to relative target. + ULONG nFlagBits : 4; // Flags for DYNAMIC, etc. + COPYFUNC pfCopy; // Function pointer. + }; + + protected: +// These macros define common uses of nFixedSize, nFixedSize16, nModOffset, nRelOffset, nFlagBits, pfCopy. +#define ENTRY_DataIgnored 0, 0, 0, 0, 0, +#define ENTRY_CopyBytes1 { 1, 1, 0, 0, 0, &CDetourDis::CopyBytes } +#ifdef DETOURS_X64 +#define ENTRY_CopyBytes1Address { 9, 5, 0, 0, ADDRESS, &CDetourDis::CopyBytes } +#else +#define ENTRY_CopyBytes1Address { 5, 3, 0, 0, ADDRESS, &CDetourDis::CopyBytes } +#endif +#define ENTRY_CopyBytes1Dynamic { 1, 1, 0, 0, DYNAMIC, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes2 { 2, 2, 0, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes2Jump { ENTRY_DataIgnored &CDetourDis::CopyBytesJump } +#define ENTRY_CopyBytes2CantJump { 2, 2, 0, 1, NOENLARGE, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes2Dynamic { 2, 2, 0, 0, DYNAMIC, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes3 { 3, 3, 0, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes3Dynamic { 3, 3, 0, 0, DYNAMIC, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes3Or5 { 5, 3, 0, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes3Or5Dynamic { 5, 3, 0, 0, DYNAMIC, &CDetourDis::CopyBytes }// x86 only +#ifdef DETOURS_X64 +#define ENTRY_CopyBytes3Or5Rax { 5, 3, 0, 0, RAX, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes3Or5Target { 5, 5, 0, 1, 0, &CDetourDis::CopyBytes } +#else +#define ENTRY_CopyBytes3Or5Rax { 5, 3, 0, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes3Or5Target { 5, 3, 0, 1, 0, &CDetourDis::CopyBytes } +#endif +#define ENTRY_CopyBytes4 { 4, 4, 0, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes5 { 5, 5, 0, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes5Or7Dynamic { 7, 5, 0, 0, DYNAMIC, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes7 { 7, 7, 0, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes2Mod { 2, 2, 1, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes2ModDynamic { 2, 2, 1, 0, DYNAMIC, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes2Mod1 { 3, 3, 1, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes2ModOperand { 6, 4, 1, 0, 0, &CDetourDis::CopyBytes } +#define ENTRY_CopyBytes3Mod { 3, 3, 2, 0, 0, &CDetourDis::CopyBytes } // SSE3 0F 38 opcode modrm +#define ENTRY_CopyBytes3Mod1 { 4, 4, 2, 0, 0, &CDetourDis::CopyBytes } // SSE3 0F 3A opcode modrm .. imm8 +#define ENTRY_CopyBytesPrefix { ENTRY_DataIgnored &CDetourDis::CopyBytesPrefix } +#define ENTRY_CopyBytesSegment { ENTRY_DataIgnored &CDetourDis::CopyBytesSegment } +#define ENTRY_CopyBytesRax { ENTRY_DataIgnored &CDetourDis::CopyBytesRax } +#define ENTRY_CopyF2 { ENTRY_DataIgnored &CDetourDis::CopyF2 } +#define ENTRY_CopyF3 { ENTRY_DataIgnored &CDetourDis::CopyF3 } // 32bit x86 only +#define ENTRY_Copy0F { ENTRY_DataIgnored &CDetourDis::Copy0F } +#define ENTRY_Copy0F78 { ENTRY_DataIgnored &CDetourDis::Copy0F78 } +#define ENTRY_Copy0F00 { ENTRY_DataIgnored &CDetourDis::Copy0F00 } // 32bit x86 only +#define ENTRY_Copy0FB8 { ENTRY_DataIgnored &CDetourDis::Copy0FB8 } // 32bit x86 only +#define ENTRY_Copy66 { ENTRY_DataIgnored &CDetourDis::Copy66 } +#define ENTRY_Copy67 { ENTRY_DataIgnored &CDetourDis::Copy67 } +#define ENTRY_CopyF6 { ENTRY_DataIgnored &CDetourDis::CopyF6 } +#define ENTRY_CopyF7 { ENTRY_DataIgnored &CDetourDis::CopyF7 } +#define ENTRY_CopyFF { ENTRY_DataIgnored &CDetourDis::CopyFF } +#define ENTRY_CopyVex2 { ENTRY_DataIgnored &CDetourDis::CopyVex2 } +#define ENTRY_CopyVex3 { ENTRY_DataIgnored &CDetourDis::CopyVex3 } +#define ENTRY_CopyEvex { ENTRY_DataIgnored &CDetourDis::CopyEvex } // 62, 3 byte payload, then normal with implied prefixes like vex +#define ENTRY_CopyXop { ENTRY_DataIgnored &CDetourDis::CopyXop } // 0x8F ... POP /0 or AMD XOP +#define ENTRY_CopyBytesXop { 5, 5, 4, 0, 0, &CDetourDis::CopyBytes } // 0x8F xop1 xop2 opcode modrm +#define ENTRY_CopyBytesXop1 { 6, 6, 4, 0, 0, &CDetourDis::CopyBytes } // 0x8F xop1 xop2 opcode modrm ... imm8 +#define ENTRY_CopyBytesXop4 { 9, 9, 4, 0, 0, &CDetourDis::CopyBytes } // 0x8F xop1 xop2 opcode modrm ... imm32 +#define ENTRY_Invalid { ENTRY_DataIgnored &CDetourDis::Invalid } + + PBYTE CopyBytes(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyBytesPrefix(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyBytesSegment(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyBytesRax(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyBytesJump(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + + PBYTE Invalid(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + + PBYTE AdjustTarget(PBYTE pbDst, PBYTE pbSrc, UINT cbOp, + UINT cbTargetOffset, UINT cbTargetSize); + + protected: + PBYTE Copy0F(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE Copy0F00(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // x86 only sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6/dynamic invalid/7 + PBYTE Copy0F78(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // vmread, 66/extrq/ib/ib, F2/insertq/ib/ib + PBYTE Copy0FB8(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // jmpe or F3/popcnt + PBYTE Copy66(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE Copy67(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyF2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyF3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // x86 only + PBYTE CopyF6(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyF7(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyFF(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyVex2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyVex3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p); + PBYTE CopyEvex(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyXop(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + + protected: + static const COPYENTRY s_rceCopyTable[]; + static const COPYENTRY s_rceCopyTable0F[]; + static const BYTE s_rbModRm[256]; + static PBYTE s_pbModuleBeg; + static PBYTE s_pbModuleEnd; + static BOOL s_fLimitReferencesToModule; + + protected: + BOOL m_bOperandOverride; + BOOL m_bAddressOverride; + BOOL m_bRaxOverride; // AMD64 only + BOOL m_bVex; + BOOL m_bEvex; + BOOL m_bF2; + BOOL m_bF3; // x86 only + BYTE m_nSegmentOverride; + + PBYTE * m_ppbTarget; + LONG * m_plExtra; + + LONG m_lScratchExtra; + PBYTE m_pbScratchTarget; + BYTE m_rbScratchDst[64]; // matches or exceeds rbCode +}; + +PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst, + _Inout_opt_ PVOID *ppDstPool, + _In_ PVOID pSrc, + _Out_opt_ PVOID *ppTarget, + _Out_opt_ LONG *plExtra) +{ + UNREFERENCED_PARAMETER(ppDstPool); // x86 & x64 don't use a constant pool. + + CDetourDis oDetourDisasm((PBYTE*)ppTarget, plExtra); + return oDetourDisasm.CopyInstruction((PBYTE)pDst, (PBYTE)pSrc); +} + +/////////////////////////////////////////////////////////// Disassembler Code. +// +CDetourDis::CDetourDis(_Out_opt_ PBYTE *ppbTarget, _Out_opt_ LONG *plExtra) : + m_bOperandOverride(FALSE), + m_bAddressOverride(FALSE), + m_bRaxOverride(FALSE), + m_bF2(FALSE), + m_bF3(FALSE), + m_bVex(FALSE), + m_bEvex(FALSE) +{ + m_ppbTarget = ppbTarget ? ppbTarget : &m_pbScratchTarget; + m_plExtra = plExtra ? plExtra : &m_lScratchExtra; + + *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_NONE; + *m_plExtra = 0; +} + +PBYTE CDetourDis::CopyInstruction(PBYTE pbDst, PBYTE pbSrc) +{ + // Configure scratch areas if real areas are not available. + if (NULL == pbDst) { + pbDst = m_rbScratchDst; + } + if (NULL == pbSrc) { + // We can't copy a non-existent instruction. + SetLastError(ERROR_INVALID_DATA); + return NULL; + } + + // Figure out how big the instruction is, do the appropriate copy, + // and figure out what the target of the instruction is if any. + // + REFCOPYENTRY pEntry = &s_rceCopyTable[pbSrc[0]]; + return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); +} + +PBYTE CDetourDis::CopyBytes(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ + UINT nBytesFixed; + + if (m_bVex || m_bEvex) + { + ASSERT(pEntry->nFlagBits == 0); + ASSERT(pEntry->nFixedSize == pEntry->nFixedSize16); + } + + UINT const nModOffset = pEntry->nModOffset; + UINT const nFlagBits = pEntry->nFlagBits; + UINT const nFixedSize = pEntry->nFixedSize; + UINT const nFixedSize16 = pEntry->nFixedSize16; + + if (nFlagBits & ADDRESS) { + nBytesFixed = m_bAddressOverride ? nFixedSize16 : nFixedSize; + } +#ifdef DETOURS_X64 + // REX.W trumps 66 + else if (m_bRaxOverride) { + nBytesFixed = nFixedSize + ((nFlagBits & RAX) ? 4 : 0); + } +#endif + else { + nBytesFixed = m_bOperandOverride ? nFixedSize16 : nFixedSize; + } + + UINT nBytes = nBytesFixed; + UINT nRelOffset = pEntry->nRelOffset; + UINT cbTarget = nBytes - nRelOffset; + if (nModOffset > 0) { + ASSERT(nRelOffset == 0); + BYTE const bModRm = pbSrc[nModOffset]; + BYTE const bFlags = s_rbModRm[bModRm]; + + nBytes += bFlags & NOTSIB; + + if (bFlags & SIB) { + BYTE const bSib = pbSrc[nModOffset + 1]; + + if ((bSib & 0x07) == 0x05) { + if ((bModRm & 0xc0) == 0x00) { + nBytes += 4; + } + else if ((bModRm & 0xc0) == 0x40) { + nBytes += 1; + } + else if ((bModRm & 0xc0) == 0x80) { + nBytes += 4; + } + } + cbTarget = nBytes - nRelOffset; + } +#ifdef DETOURS_X64 + else if (bFlags & RIP) { + nRelOffset = nModOffset + 1; + cbTarget = 4; + } +#endif + } + CopyMemory(pbDst, pbSrc, nBytes); + + if (nRelOffset) { + *m_ppbTarget = AdjustTarget(pbDst, pbSrc, nBytes, nRelOffset, cbTarget); +#ifdef DETOURS_X64 + if (pEntry->nRelOffset == 0) { + // This is a data target, not a code target, so we shouldn't return it. + *m_ppbTarget = NULL; + } +#endif + } + if (nFlagBits & NOENLARGE) { + *m_plExtra = -*m_plExtra; + } + if (nFlagBits & DYNAMIC) { + *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + return pbSrc + nBytes; +} + +PBYTE CDetourDis::CopyBytesPrefix(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ + pbDst[0] = pbSrc[0]; + pEntry = &s_rceCopyTable[pbSrc[1]]; + return (this->*pEntry->pfCopy)(pEntry, pbDst + 1, pbSrc + 1); +} + +PBYTE CDetourDis::CopyBytesSegment(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +{ + m_nSegmentOverride = pbSrc[0]; + return CopyBytesPrefix(0, pbDst, pbSrc); +} + +PBYTE CDetourDis::CopyBytesRax(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +{ // AMD64 only + if (pbSrc[0] & 0x8) { + m_bRaxOverride = TRUE; + } + return CopyBytesPrefix(0, pbDst, pbSrc); +} + +PBYTE CDetourDis::CopyBytesJump(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ + (void)pEntry; + + PVOID pvSrcAddr = &pbSrc[1]; + PVOID pvDstAddr = NULL; + LONG_PTR nOldOffset = (LONG_PTR)*(signed char*&)pvSrcAddr; + LONG_PTR nNewOffset = 0; + + *m_ppbTarget = pbSrc + 2 + nOldOffset; + + if (pbSrc[0] == 0xeb) { + pbDst[0] = 0xe9; + pvDstAddr = &pbDst[1]; + nNewOffset = nOldOffset - ((pbDst - pbSrc) + 3); + *(UNALIGNED LONG*&)pvDstAddr = (LONG)nNewOffset; + + *m_plExtra = 3; + return pbSrc + 2; + } + + ASSERT(pbSrc[0] >= 0x70 && pbSrc[0] <= 0x7f); + + pbDst[0] = 0x0f; + pbDst[1] = 0x80 | (pbSrc[0] & 0xf); + pvDstAddr = &pbDst[2]; + nNewOffset = nOldOffset - ((pbDst - pbSrc) + 4); + *(UNALIGNED LONG*&)pvDstAddr = (LONG)nNewOffset; + + *m_plExtra = 4; + return pbSrc + 2; +} + +PBYTE CDetourDis::AdjustTarget(PBYTE pbDst, PBYTE pbSrc, UINT cbOp, + UINT cbTargetOffset, UINT cbTargetSize) +{ + PBYTE pbTarget = NULL; +#if 1 // fault injection to test test code +#if defined(DETOURS_X64) + typedef LONGLONG T; +#else + typedef LONG T; +#endif + T nOldOffset; + T nNewOffset; + PVOID pvTargetAddr = &pbDst[cbTargetOffset]; + + switch (cbTargetSize) { + case 1: + nOldOffset = *(signed char*&)pvTargetAddr; + break; + case 2: + nOldOffset = *(UNALIGNED SHORT*&)pvTargetAddr; + break; + case 4: + nOldOffset = *(UNALIGNED LONG*&)pvTargetAddr; + break; +#if defined(DETOURS_X64) + case 8: + nOldOffset = *(UNALIGNED LONGLONG*&)pvTargetAddr; + break; +#endif + default: + ASSERT(!"cbTargetSize is invalid."); + nOldOffset = 0; + break; + } + + pbTarget = pbSrc + cbOp + nOldOffset; + nNewOffset = nOldOffset - (T)(pbDst - pbSrc); + + switch (cbTargetSize) { + case 1: + *(CHAR*&)pvTargetAddr = (CHAR)nNewOffset; + if (nNewOffset < SCHAR_MIN || nNewOffset > SCHAR_MAX) { + *m_plExtra = sizeof(ULONG) - 1; + } + break; + case 2: + *(UNALIGNED SHORT*&)pvTargetAddr = (SHORT)nNewOffset; + if (nNewOffset < SHRT_MIN || nNewOffset > SHRT_MAX) { + *m_plExtra = sizeof(ULONG) - 2; + } + break; + case 4: + *(UNALIGNED LONG*&)pvTargetAddr = (LONG)nNewOffset; + if (nNewOffset < LONG_MIN || nNewOffset > LONG_MAX) { + *m_plExtra = sizeof(ULONG) - 4; + } + break; +#if defined(DETOURS_X64) + case 8: + *(UNALIGNED LONGLONG*&)pvTargetAddr = nNewOffset; + break; +#endif + } +#ifdef DETOURS_X64 + // When we are only computing size, source and dest can be + // far apart, distance not encodable in 32bits. Ok. + // At least still check the lower 32bits. + + if (pbDst >= m_rbScratchDst && pbDst < (sizeof(m_rbScratchDst) + m_rbScratchDst)) { + ASSERT((((size_t)pbDst + cbOp + nNewOffset) & 0xFFFFFFFF) == (((size_t)pbTarget) & 0xFFFFFFFF)); + } + else +#endif + { + ASSERT(pbDst + cbOp + nNewOffset == pbTarget); + } +#endif + return pbTarget; +} + +PBYTE CDetourDis::Invalid(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ + (void)pbDst; + (void)pEntry; + ASSERT(!"Invalid Instruction"); + return pbSrc + 1; +} + +////////////////////////////////////////////////////// Individual Bytes Codes. +// +PBYTE CDetourDis::Copy0F(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ + pbDst[0] = pbSrc[0]; + pEntry = &s_rceCopyTable0F[pbSrc[1]]; + return (this->*pEntry->pfCopy)(pEntry, pbDst + 1, pbSrc + 1); +} + +PBYTE CDetourDis::Copy0F78(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +{ + // vmread, 66/extrq, F2/insertq + + static const COPYENTRY vmread = /* 78 */ ENTRY_CopyBytes2Mod; + static const COPYENTRY extrq_insertq = /* 78 */ ENTRY_CopyBytes4; + + ASSERT(!(m_bF2 && m_bOperandOverride)); + + // For insertq and presumably despite documentation extrq, mode must be 11, not checked. + // insertq/extrq/78 are followed by two immediate bytes, and given mode == 11, mod/rm byte is always one byte, + // and the 0x78 makes 4 bytes (not counting the 66/F2/F which are accounted for elsewhere) + + REFCOPYENTRY const pEntry = ((m_bF2 || m_bOperandOverride) ? &extrq_insertq : &vmread); + + return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); +} + +PBYTE CDetourDis::Copy0F00(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +{ + // jmpe is 32bit x86 only + // Notice that the sizes are the same either way, but jmpe is marked as "dynamic". + + static const COPYENTRY other = /* B8 */ ENTRY_CopyBytes2Mod; // sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6 invalid/7 + static const COPYENTRY jmpe = /* B8 */ ENTRY_CopyBytes2ModDynamic; // jmpe/6 x86-on-IA64 syscalls + + REFCOPYENTRY const pEntry = (((6 << 3) == ((7 << 3) & pbSrc[1])) ? &jmpe : &other); + return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); +} + +PBYTE CDetourDis::Copy0FB8(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +{ + // jmpe is 32bit x86 only + + static const COPYENTRY popcnt = /* B8 */ ENTRY_CopyBytes2Mod; + static const COPYENTRY jmpe = /* B8 */ ENTRY_CopyBytes3Or5Dynamic; // jmpe x86-on-IA64 syscalls + REFCOPYENTRY const pEntry = m_bF3 ? &popcnt : &jmpe; + return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); +} + +PBYTE CDetourDis::Copy66(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ // Operand-size override prefix + m_bOperandOverride = TRUE; + return CopyBytesPrefix(pEntry, pbDst, pbSrc); +} + +PBYTE CDetourDis::Copy67(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ // Address size override prefix + m_bAddressOverride = TRUE; + return CopyBytesPrefix(pEntry, pbDst, pbSrc); +} + +PBYTE CDetourDis::CopyF2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ + m_bF2 = TRUE; + return CopyBytesPrefix(pEntry, pbDst, pbSrc); +} + +PBYTE CDetourDis::CopyF3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ // x86 only + m_bF3 = TRUE; + return CopyBytesPrefix(pEntry, pbDst, pbSrc); +} + +PBYTE CDetourDis::CopyF6(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ + (void)pEntry; + + // TEST BYTE /0 + if (0x00 == (0x38 & pbSrc[1])) { // reg(bits 543) of ModR/M == 0 + static const COPYENTRY ce = /* f6 */ ENTRY_CopyBytes2Mod1; + return (this->*ce.pfCopy)(&ce, pbDst, pbSrc); + } + // DIV /6 + // IDIV /7 + // IMUL /5 + // MUL /4 + // NEG /3 + // NOT /2 + + static const COPYENTRY ce = /* f6 */ ENTRY_CopyBytes2Mod; + return (this->*ce.pfCopy)(&ce, pbDst, pbSrc); +} + +PBYTE CDetourDis::CopyF7(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ + (void)pEntry; + + // TEST WORD /0 + if (0x00 == (0x38 & pbSrc[1])) { // reg(bits 543) of ModR/M == 0 + static const COPYENTRY ce = /* f7 */ ENTRY_CopyBytes2ModOperand; + return (this->*ce.pfCopy)(&ce, pbDst, pbSrc); + } + + // DIV /6 + // IDIV /7 + // IMUL /5 + // MUL /4 + // NEG /3 + // NOT /2 + static const COPYENTRY ce = /* f7 */ ENTRY_CopyBytes2Mod; + return (this->*ce.pfCopy)(&ce, pbDst, pbSrc); +} + +PBYTE CDetourDis::CopyFF(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) +{ // INC /0 + // DEC /1 + // CALL /2 + // CALL /3 + // JMP /4 + // JMP /5 + // PUSH /6 + // invalid/7 + (void)pEntry; + + static const COPYENTRY ce = /* ff */ ENTRY_CopyBytes2Mod; + PBYTE pbOut = (this->*ce.pfCopy)(&ce, pbDst, pbSrc); + + BYTE const b1 = pbSrc[1]; + + if (0x15 == b1 || 0x25 == b1) { // CALL [], JMP [] +#ifdef DETOURS_X64 + // All segments but FS and GS are equivalent. + if (m_nSegmentOverride != 0x64 && m_nSegmentOverride != 0x65) +#else + if (m_nSegmentOverride == 0 || m_nSegmentOverride == 0x2E) +#endif + { +#ifdef DETOURS_X64 + INT32 offset = *(UNALIGNED INT32*)&pbSrc[2]; + PBYTE *ppbTarget = (PBYTE *)(pbSrc + 6 + offset); +#else + PBYTE *ppbTarget = (PBYTE *)(SIZE_T)*(UNALIGNED ULONG*)&pbSrc[2]; +#endif + if (s_fLimitReferencesToModule && + (ppbTarget < (PVOID)s_pbModuleBeg || ppbTarget >= (PVOID)s_pbModuleEnd)) { + + *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + else { + // This can access violate on random bytes. Use DetourSetCodeModule. + *m_ppbTarget = *ppbTarget; + } + } + else { + *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + } + else if (0x10 == (0x30 & b1) || // CALL /2 or /3 --> reg(bits 543) of ModR/M == 010 or 011 + 0x20 == (0x30 & b1)) { // JMP /4 or /5 --> reg(bits 543) of ModR/M == 100 or 101 + *m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + return pbOut; +} + +PBYTE CDetourDis::CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p) +// m is first instead of last in the hopes of pbDst/pbSrc being +// passed along efficiently in the registers they were already in. +{ + static const COPYENTRY ceF38 = /* 38 */ ENTRY_CopyBytes2Mod; + static const COPYENTRY ceF3A = /* 3A */ ENTRY_CopyBytes2Mod1; + static const COPYENTRY ceInvalid = /* C4 */ ENTRY_Invalid; + + switch (p & 3) { + case 0: break; + case 1: m_bOperandOverride = TRUE; break; + case 2: m_bF3 = TRUE; break; + case 3: m_bF2 = TRUE; break; + } + + REFCOPYENTRY pEntry; + + switch (m) { + default: return Invalid(&ceInvalid, pbDst, pbSrc); + case 1: pEntry = &s_rceCopyTable0F[pbSrc[0]]; + return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); + case 2: return CopyBytes(&ceF38, pbDst, pbSrc); + case 3: return CopyBytes(&ceF3A, pbDst, pbSrc); + } +} + +PBYTE CDetourDis::CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc) +// m is first instead of last in the hopes of pbDst/pbSrc being +// passed along efficiently in the registers they were already in. +{ + m_bVex = TRUE; + BYTE const p = (BYTE)(pbSrc[-1] & 3); // p in last byte + return CopyVexEvexCommon(m, pbDst, pbSrc, p); +} + + +PBYTE CDetourDis::CopyVex3(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +// 3 byte VEX prefix 0xC4 +{ +#ifdef DETOURS_X86 + const static COPYENTRY ceLES = /* C4 */ ENTRY_CopyBytes2Mod; + if ((pbSrc[1] & 0xC0) != 0xC0) { + REFCOPYENTRY pEntry = &ceLES; + return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); + } +#endif + pbDst[0] = pbSrc[0]; + pbDst[1] = pbSrc[1]; + pbDst[2] = pbSrc[2]; +#ifdef DETOURS_X64 + m_bRaxOverride |= !!(pbSrc[2] & 0x80); // w in last byte, see CopyBytesRax +#else + // + // TODO + // + // Usually the VEX.W bit changes the size of a general purpose register and is ignored for 32bit. + // Sometimes it is an opcode extension. + // Look in the Intel manual, in the instruction-by-instruction reference, for ".W1", + // without nearby wording saying it is ignored for 32bit. + // For example: "VFMADD132PD/VFMADD213PD/VFMADD231PD Fused Multiply-Add of Packed Double-Precision Floating-Point Values". + // + // Then, go through each such case and determine if W0 vs. W1 affect the size of the instruction. Probably not. + // Look for the same encoding but with "W1" changed to "W0". + // Here is one such pairing: + // VFMADD132PD/VFMADD213PD/VFMADD231PD Fused Multiply-Add of Packed Double-Precision Floating-Point Values + // + // VEX.DDS.128.66.0F38.W1 98 /r A V/V FMA Multiply packed double-precision floating-point values + // from xmm0 and xmm2/mem, add to xmm1 and + // put result in xmm0. + // VFMADD132PD xmm0, xmm1, xmm2/m128 + // + // VFMADD132PS/VFMADD213PS/VFMADD231PS Fused Multiply-Add of Packed Single-Precision Floating-Point Values + // VEX.DDS.128.66.0F38.W0 98 /r A V/V FMA Multiply packed single-precision floating-point values + // from xmm0 and xmm2/mem, add to xmm1 and put + // result in xmm0. + // VFMADD132PS xmm0, xmm1, xmm2/m128 + // +#endif + return CopyVexCommon(pbSrc[1] & 0x1F, pbDst + 3, pbSrc + 3); +} + +PBYTE CDetourDis::CopyVex2(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +// 2 byte VEX prefix 0xC5 +{ +#ifdef DETOURS_X86 + const static COPYENTRY ceLDS = /* C5 */ ENTRY_CopyBytes2Mod; + if ((pbSrc[1] & 0xC0) != 0xC0) { + REFCOPYENTRY pEntry = &ceLDS; + return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); + } +#endif + pbDst[0] = pbSrc[0]; + pbDst[1] = pbSrc[1]; + return CopyVexCommon(1, pbDst + 2, pbSrc + 2); +} + +PBYTE CDetourDis::CopyEvex(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +// 62, 3 byte payload, x86 with implied prefixes like Vex +// for 32bit, mode 0xC0 else fallback to bound /r +{ + // NOTE: Intel and Wikipedia number these differently. + // Intel says 0-2, Wikipedia says 1-3. + + BYTE const p0 = pbSrc[1]; + +#ifdef DETOURS_X86 + const static COPYENTRY ceBound = /* 62 */ ENTRY_CopyBytes2Mod; + if ((p0 & 0xC0) != 0xC0) { + return CopyBytes(&ceBound, pbDst, pbSrc); + } +#endif + + static const COPYENTRY ceInvalid = /* 62 */ ENTRY_Invalid; + + if ((p0 & 0x0C) != 0) + return Invalid(&ceInvalid, pbDst, pbSrc); + + BYTE const p1 = pbSrc[2]; + + if ((p1 & 0x04) != 0x04) + return Invalid(&ceInvalid, pbDst, pbSrc); + + // Copy 4 byte prefix. + *(UNALIGNED ULONG *)pbDst = *(UNALIGNED ULONG*)pbSrc; + + m_bEvex = TRUE; + +#ifdef DETOURS_X64 + m_bRaxOverride |= !!(p1 & 0x80); // w +#endif + + return CopyVexEvexCommon(p0 & 3u, pbDst + 4, pbSrc + 4, p1 & 3u); +} + +PBYTE CDetourDis::CopyXop(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +/* 3 byte AMD XOP prefix 0x8F +byte0: 0x8F +byte1: RXBmmmmm +byte2: WvvvvLpp +byte3: opcode +mmmmm >= 8, else pop +mmmmm only otherwise defined for 8, 9, A. +pp is like VEX but only instructions with 0 are defined +*/ +{ + const static COPYENTRY cePop = /* 8F */ ENTRY_CopyBytes2Mod; + const static COPYENTRY ceXop = /* 8F */ ENTRY_CopyBytesXop; + const static COPYENTRY ceXop1 = /* 8F */ ENTRY_CopyBytesXop1; + const static COPYENTRY ceXop4 = /* 8F */ ENTRY_CopyBytesXop4; + + BYTE const m = (BYTE)(pbSrc[1] & 0x1F); + ASSERT(m <= 10); + switch (m) + { + default: + return CopyBytes(&cePop, pbDst, pbSrc); + + case 8: // modrm with 8bit immediate + return CopyBytes(&ceXop1, pbDst, pbSrc); + + case 9: // modrm with no immediate + return CopyBytes(&ceXop, pbDst, pbSrc); + + case 10: // modrm with 32bit immediate + return CopyBytes(&ceXop4, pbDst, pbSrc); + } +} + +////////////////////////////////////////////////////////////////////////////// +// +PBYTE CDetourDis::s_pbModuleBeg = NULL; +PBYTE CDetourDis::s_pbModuleEnd = (PBYTE)~(ULONG_PTR)0; +BOOL CDetourDis::s_fLimitReferencesToModule = FALSE; + +BOOL CDetourDis::SetCodeModule(PBYTE pbBeg, PBYTE pbEnd, BOOL fLimitReferencesToModule) +{ + if (pbEnd < pbBeg) { + return FALSE; + } + + s_pbModuleBeg = pbBeg; + s_pbModuleEnd = pbEnd; + s_fLimitReferencesToModule = fLimitReferencesToModule; + + return TRUE; +} + +///////////////////////////////////////////////////////// Disassembler Tables. +// +const BYTE CDetourDis::s_rbModRm[256] = { + 0,0,0,0, SIB|1,RIP|4,0,0, 0,0,0,0, SIB|1,RIP|4,0,0, // 0x + 0,0,0,0, SIB|1,RIP|4,0,0, 0,0,0,0, SIB|1,RIP|4,0,0, // 1x + 0,0,0,0, SIB|1,RIP|4,0,0, 0,0,0,0, SIB|1,RIP|4,0,0, // 2x + 0,0,0,0, SIB|1,RIP|4,0,0, 0,0,0,0, SIB|1,RIP|4,0,0, // 3x + 1,1,1,1, 2,1,1,1, 1,1,1,1, 2,1,1,1, // 4x + 1,1,1,1, 2,1,1,1, 1,1,1,1, 2,1,1,1, // 5x + 1,1,1,1, 2,1,1,1, 1,1,1,1, 2,1,1,1, // 6x + 1,1,1,1, 2,1,1,1, 1,1,1,1, 2,1,1,1, // 7x + 4,4,4,4, 5,4,4,4, 4,4,4,4, 5,4,4,4, // 8x + 4,4,4,4, 5,4,4,4, 4,4,4,4, 5,4,4,4, // 9x + 4,4,4,4, 5,4,4,4, 4,4,4,4, 5,4,4,4, // Ax + 4,4,4,4, 5,4,4,4, 4,4,4,4, 5,4,4,4, // Bx + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, // Cx + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, // Dx + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, // Ex + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 // Fx +}; + +const CDetourDis::COPYENTRY CDetourDis::s_rceCopyTable[] = +{ + /* 00 */ ENTRY_CopyBytes2Mod, // ADD /r + /* 01 */ ENTRY_CopyBytes2Mod, // ADD /r + /* 02 */ ENTRY_CopyBytes2Mod, // ADD /r + /* 03 */ ENTRY_CopyBytes2Mod, // ADD /r + /* 04 */ ENTRY_CopyBytes2, // ADD ib + /* 05 */ ENTRY_CopyBytes3Or5, // ADD iw +#ifdef DETOURS_X64 + /* 06 */ ENTRY_Invalid, // Invalid + /* 07 */ ENTRY_Invalid, // Invalid +#else + /* 06 */ ENTRY_CopyBytes1, // PUSH + /* 07 */ ENTRY_CopyBytes1, // POP +#endif + /* 08 */ ENTRY_CopyBytes2Mod, // OR /r + /* 09 */ ENTRY_CopyBytes2Mod, // OR /r + /* 0A */ ENTRY_CopyBytes2Mod, // OR /r + /* 0B */ ENTRY_CopyBytes2Mod, // OR /r + /* 0C */ ENTRY_CopyBytes2, // OR ib + /* 0D */ ENTRY_CopyBytes3Or5, // OR iw +#ifdef DETOURS_X64 + /* 0E */ ENTRY_Invalid, // Invalid +#else + /* 0E */ ENTRY_CopyBytes1, // PUSH +#endif + /* 0F */ ENTRY_Copy0F, // Extension Ops + /* 10 */ ENTRY_CopyBytes2Mod, // ADC /r + /* 11 */ ENTRY_CopyBytes2Mod, // ADC /r + /* 12 */ ENTRY_CopyBytes2Mod, // ADC /r + /* 13 */ ENTRY_CopyBytes2Mod, // ADC /r + /* 14 */ ENTRY_CopyBytes2, // ADC ib + /* 15 */ ENTRY_CopyBytes3Or5, // ADC id +#ifdef DETOURS_X64 + /* 16 */ ENTRY_Invalid, // Invalid + /* 17 */ ENTRY_Invalid, // Invalid +#else + /* 16 */ ENTRY_CopyBytes1, // PUSH + /* 17 */ ENTRY_CopyBytes1, // POP +#endif + /* 18 */ ENTRY_CopyBytes2Mod, // SBB /r + /* 19 */ ENTRY_CopyBytes2Mod, // SBB /r + /* 1A */ ENTRY_CopyBytes2Mod, // SBB /r + /* 1B */ ENTRY_CopyBytes2Mod, // SBB /r + /* 1C */ ENTRY_CopyBytes2, // SBB ib + /* 1D */ ENTRY_CopyBytes3Or5, // SBB id +#ifdef DETOURS_X64 + /* 1E */ ENTRY_Invalid, // Invalid + /* 1F */ ENTRY_Invalid, // Invalid +#else + /* 1E */ ENTRY_CopyBytes1, // PUSH + /* 1F */ ENTRY_CopyBytes1, // POP +#endif + /* 20 */ ENTRY_CopyBytes2Mod, // AND /r + /* 21 */ ENTRY_CopyBytes2Mod, // AND /r + /* 22 */ ENTRY_CopyBytes2Mod, // AND /r + /* 23 */ ENTRY_CopyBytes2Mod, // AND /r + /* 24 */ ENTRY_CopyBytes2, // AND ib + /* 25 */ ENTRY_CopyBytes3Or5, // AND id + /* 26 */ ENTRY_CopyBytesSegment, // ES prefix +#ifdef DETOURS_X64 + /* 27 */ ENTRY_Invalid, // Invalid +#else + /* 27 */ ENTRY_CopyBytes1, // DAA +#endif + /* 28 */ ENTRY_CopyBytes2Mod, // SUB /r + /* 29 */ ENTRY_CopyBytes2Mod, // SUB /r + /* 2A */ ENTRY_CopyBytes2Mod, // SUB /r + /* 2B */ ENTRY_CopyBytes2Mod, // SUB /r + /* 2C */ ENTRY_CopyBytes2, // SUB ib + /* 2D */ ENTRY_CopyBytes3Or5, // SUB id + /* 2E */ ENTRY_CopyBytesSegment, // CS prefix +#ifdef DETOURS_X64 + /* 2F */ ENTRY_Invalid, // Invalid +#else + /* 2F */ ENTRY_CopyBytes1, // DAS +#endif + /* 30 */ ENTRY_CopyBytes2Mod, // XOR /r + /* 31 */ ENTRY_CopyBytes2Mod, // XOR /r + /* 32 */ ENTRY_CopyBytes2Mod, // XOR /r + /* 33 */ ENTRY_CopyBytes2Mod, // XOR /r + /* 34 */ ENTRY_CopyBytes2, // XOR ib + /* 35 */ ENTRY_CopyBytes3Or5, // XOR id + /* 36 */ ENTRY_CopyBytesSegment, // SS prefix +#ifdef DETOURS_X64 + /* 37 */ ENTRY_Invalid, // Invalid +#else + /* 37 */ ENTRY_CopyBytes1, // AAA +#endif + /* 38 */ ENTRY_CopyBytes2Mod, // CMP /r + /* 39 */ ENTRY_CopyBytes2Mod, // CMP /r + /* 3A */ ENTRY_CopyBytes2Mod, // CMP /r + /* 3B */ ENTRY_CopyBytes2Mod, // CMP /r + /* 3C */ ENTRY_CopyBytes2, // CMP ib + /* 3D */ ENTRY_CopyBytes3Or5, // CMP id + /* 3E */ ENTRY_CopyBytesSegment, // DS prefix +#ifdef DETOURS_X64 + /* 3F */ ENTRY_Invalid, // Invalid +#else + /* 3F */ ENTRY_CopyBytes1, // AAS +#endif +#ifdef DETOURS_X64 // For Rax Prefix + /* 40 */ ENTRY_CopyBytesRax, // Rax + /* 41 */ ENTRY_CopyBytesRax, // Rax + /* 42 */ ENTRY_CopyBytesRax, // Rax + /* 43 */ ENTRY_CopyBytesRax, // Rax + /* 44 */ ENTRY_CopyBytesRax, // Rax + /* 45 */ ENTRY_CopyBytesRax, // Rax + /* 46 */ ENTRY_CopyBytesRax, // Rax + /* 47 */ ENTRY_CopyBytesRax, // Rax + /* 48 */ ENTRY_CopyBytesRax, // Rax + /* 49 */ ENTRY_CopyBytesRax, // Rax + /* 4A */ ENTRY_CopyBytesRax, // Rax + /* 4B */ ENTRY_CopyBytesRax, // Rax + /* 4C */ ENTRY_CopyBytesRax, // Rax + /* 4D */ ENTRY_CopyBytesRax, // Rax + /* 4E */ ENTRY_CopyBytesRax, // Rax + /* 4F */ ENTRY_CopyBytesRax, // Rax +#else + /* 40 */ ENTRY_CopyBytes1, // INC + /* 41 */ ENTRY_CopyBytes1, // INC + /* 42 */ ENTRY_CopyBytes1, // INC + /* 43 */ ENTRY_CopyBytes1, // INC + /* 44 */ ENTRY_CopyBytes1, // INC + /* 45 */ ENTRY_CopyBytes1, // INC + /* 46 */ ENTRY_CopyBytes1, // INC + /* 47 */ ENTRY_CopyBytes1, // INC + /* 48 */ ENTRY_CopyBytes1, // DEC + /* 49 */ ENTRY_CopyBytes1, // DEC + /* 4A */ ENTRY_CopyBytes1, // DEC + /* 4B */ ENTRY_CopyBytes1, // DEC + /* 4C */ ENTRY_CopyBytes1, // DEC + /* 4D */ ENTRY_CopyBytes1, // DEC + /* 4E */ ENTRY_CopyBytes1, // DEC + /* 4F */ ENTRY_CopyBytes1, // DEC +#endif + /* 50 */ ENTRY_CopyBytes1, // PUSH + /* 51 */ ENTRY_CopyBytes1, // PUSH + /* 52 */ ENTRY_CopyBytes1, // PUSH + /* 53 */ ENTRY_CopyBytes1, // PUSH + /* 54 */ ENTRY_CopyBytes1, // PUSH + /* 55 */ ENTRY_CopyBytes1, // PUSH + /* 56 */ ENTRY_CopyBytes1, // PUSH + /* 57 */ ENTRY_CopyBytes1, // PUSH + /* 58 */ ENTRY_CopyBytes1, // POP + /* 59 */ ENTRY_CopyBytes1, // POP + /* 5A */ ENTRY_CopyBytes1, // POP + /* 5B */ ENTRY_CopyBytes1, // POP + /* 5C */ ENTRY_CopyBytes1, // POP + /* 5D */ ENTRY_CopyBytes1, // POP + /* 5E */ ENTRY_CopyBytes1, // POP + /* 5F */ ENTRY_CopyBytes1, // POP +#ifdef DETOURS_X64 + /* 60 */ ENTRY_Invalid, // Invalid + /* 61 */ ENTRY_Invalid, // Invalid + /* 62 */ ENTRY_CopyEvex, // EVEX / AVX512 +#else + /* 60 */ ENTRY_CopyBytes1, // PUSHAD + /* 61 */ ENTRY_CopyBytes1, // POPAD + /* 62 */ ENTRY_CopyEvex, // BOUND /r and EVEX / AVX512 +#endif + /* 63 */ ENTRY_CopyBytes2Mod, // 32bit ARPL /r, 64bit MOVSXD + /* 64 */ ENTRY_CopyBytesSegment, // FS prefix + /* 65 */ ENTRY_CopyBytesSegment, // GS prefix + /* 66 */ ENTRY_Copy66, // Operand Prefix + /* 67 */ ENTRY_Copy67, // Address Prefix + /* 68 */ ENTRY_CopyBytes3Or5, // PUSH + /* 69 */ ENTRY_CopyBytes2ModOperand, // IMUL /r iz + /* 6A */ ENTRY_CopyBytes2, // PUSH + /* 6B */ ENTRY_CopyBytes2Mod1, // IMUL /r ib + /* 6C */ ENTRY_CopyBytes1, // INS + /* 6D */ ENTRY_CopyBytes1, // INS + /* 6E */ ENTRY_CopyBytes1, // OUTS/OUTSB + /* 6F */ ENTRY_CopyBytes1, // OUTS/OUTSW + /* 70 */ ENTRY_CopyBytes2Jump, // JO // 0f80 + /* 71 */ ENTRY_CopyBytes2Jump, // JNO // 0f81 + /* 72 */ ENTRY_CopyBytes2Jump, // JB/JC/JNAE // 0f82 + /* 73 */ ENTRY_CopyBytes2Jump, // JAE/JNB/JNC // 0f83 + /* 74 */ ENTRY_CopyBytes2Jump, // JE/JZ // 0f84 + /* 75 */ ENTRY_CopyBytes2Jump, // JNE/JNZ // 0f85 + /* 76 */ ENTRY_CopyBytes2Jump, // JBE/JNA // 0f86 + /* 77 */ ENTRY_CopyBytes2Jump, // JA/JNBE // 0f87 + /* 78 */ ENTRY_CopyBytes2Jump, // JS // 0f88 + /* 79 */ ENTRY_CopyBytes2Jump, // JNS // 0f89 + /* 7A */ ENTRY_CopyBytes2Jump, // JP/JPE // 0f8a + /* 7B */ ENTRY_CopyBytes2Jump, // JNP/JPO // 0f8b + /* 7C */ ENTRY_CopyBytes2Jump, // JL/JNGE // 0f8c + /* 7D */ ENTRY_CopyBytes2Jump, // JGE/JNL // 0f8d + /* 7E */ ENTRY_CopyBytes2Jump, // JLE/JNG // 0f8e + /* 7F */ ENTRY_CopyBytes2Jump, // JG/JNLE // 0f8f + /* 80 */ ENTRY_CopyBytes2Mod1, // ADD/0 OR/1 ADC/2 SBB/3 AND/4 SUB/5 XOR/6 CMP/7 byte reg, immediate byte + /* 81 */ ENTRY_CopyBytes2ModOperand, // ADD/0 OR/1 ADC/2 SBB/3 AND/4 SUB/5 XOR/6 CMP/7 byte reg, immediate word or dword +#ifdef DETOURS_X64 + /* 82 */ ENTRY_Invalid, // Invalid +#else + /* 82 */ ENTRY_CopyBytes2Mod1, // MOV al,x +#endif + /* 83 */ ENTRY_CopyBytes2Mod1, // ADD/0 OR/1 ADC/2 SBB/3 AND/4 SUB/5 XOR/6 CMP/7 reg, immediate byte + /* 84 */ ENTRY_CopyBytes2Mod, // TEST /r + /* 85 */ ENTRY_CopyBytes2Mod, // TEST /r + /* 86 */ ENTRY_CopyBytes2Mod, // XCHG /r @todo + /* 87 */ ENTRY_CopyBytes2Mod, // XCHG /r @todo + /* 88 */ ENTRY_CopyBytes2Mod, // MOV /r + /* 89 */ ENTRY_CopyBytes2Mod, // MOV /r + /* 8A */ ENTRY_CopyBytes2Mod, // MOV /r + /* 8B */ ENTRY_CopyBytes2Mod, // MOV /r + /* 8C */ ENTRY_CopyBytes2Mod, // MOV /r + /* 8D */ ENTRY_CopyBytes2Mod, // LEA /r + /* 8E */ ENTRY_CopyBytes2Mod, // MOV /r + /* 8F */ ENTRY_CopyXop, // POP /0 or AMD XOP + /* 90 */ ENTRY_CopyBytes1, // NOP + /* 91 */ ENTRY_CopyBytes1, // XCHG + /* 92 */ ENTRY_CopyBytes1, // XCHG + /* 93 */ ENTRY_CopyBytes1, // XCHG + /* 94 */ ENTRY_CopyBytes1, // XCHG + /* 95 */ ENTRY_CopyBytes1, // XCHG + /* 96 */ ENTRY_CopyBytes1, // XCHG + /* 97 */ ENTRY_CopyBytes1, // XCHG + /* 98 */ ENTRY_CopyBytes1, // CWDE + /* 99 */ ENTRY_CopyBytes1, // CDQ +#ifdef DETOURS_X64 + /* 9A */ ENTRY_Invalid, // Invalid +#else + /* 9A */ ENTRY_CopyBytes5Or7Dynamic, // CALL cp +#endif + /* 9B */ ENTRY_CopyBytes1, // WAIT/FWAIT + /* 9C */ ENTRY_CopyBytes1, // PUSHFD + /* 9D */ ENTRY_CopyBytes1, // POPFD + /* 9E */ ENTRY_CopyBytes1, // SAHF + /* 9F */ ENTRY_CopyBytes1, // LAHF + /* A0 */ ENTRY_CopyBytes1Address, // MOV + /* A1 */ ENTRY_CopyBytes1Address, // MOV + /* A2 */ ENTRY_CopyBytes1Address, // MOV + /* A3 */ ENTRY_CopyBytes1Address, // MOV + /* A4 */ ENTRY_CopyBytes1, // MOVS + /* A5 */ ENTRY_CopyBytes1, // MOVS/MOVSD + /* A6 */ ENTRY_CopyBytes1, // CMPS/CMPSB + /* A7 */ ENTRY_CopyBytes1, // CMPS/CMPSW + /* A8 */ ENTRY_CopyBytes2, // TEST + /* A9 */ ENTRY_CopyBytes3Or5, // TEST + /* AA */ ENTRY_CopyBytes1, // STOS/STOSB + /* AB */ ENTRY_CopyBytes1, // STOS/STOSW + /* AC */ ENTRY_CopyBytes1, // LODS/LODSB + /* AD */ ENTRY_CopyBytes1, // LODS/LODSW + /* AE */ ENTRY_CopyBytes1, // SCAS/SCASB + /* AF */ ENTRY_CopyBytes1, // SCAS/SCASD + /* B0 */ ENTRY_CopyBytes2, // MOV B0+rb + /* B1 */ ENTRY_CopyBytes2, // MOV B0+rb + /* B2 */ ENTRY_CopyBytes2, // MOV B0+rb + /* B3 */ ENTRY_CopyBytes2, // MOV B0+rb + /* B4 */ ENTRY_CopyBytes2, // MOV B0+rb + /* B5 */ ENTRY_CopyBytes2, // MOV B0+rb + /* B6 */ ENTRY_CopyBytes2, // MOV B0+rb + /* B7 */ ENTRY_CopyBytes2, // MOV B0+rb + /* B8 */ ENTRY_CopyBytes3Or5Rax, // MOV B8+rb + /* B9 */ ENTRY_CopyBytes3Or5Rax, // MOV B8+rb + /* BA */ ENTRY_CopyBytes3Or5Rax, // MOV B8+rb + /* BB */ ENTRY_CopyBytes3Or5Rax, // MOV B8+rb + /* BC */ ENTRY_CopyBytes3Or5Rax, // MOV B8+rb + /* BD */ ENTRY_CopyBytes3Or5Rax, // MOV B8+rb + /* BE */ ENTRY_CopyBytes3Or5Rax, // MOV B8+rb + /* BF */ ENTRY_CopyBytes3Or5Rax, // MOV B8+rb + /* C0 */ ENTRY_CopyBytes2Mod1, // RCL/2 ib, etc. + /* C1 */ ENTRY_CopyBytes2Mod1, // RCL/2 ib, etc. + /* C2 */ ENTRY_CopyBytes3, // RET + /* C3 */ ENTRY_CopyBytes1, // RET + /* C4 */ ENTRY_CopyVex3, // LES, VEX 3-byte opcodes. + /* C5 */ ENTRY_CopyVex2, // LDS, VEX 2-byte opcodes. + /* C6 */ ENTRY_CopyBytes2Mod1, // MOV + /* C7 */ ENTRY_CopyBytes2ModOperand, // MOV/0 XBEGIN/7 + /* C8 */ ENTRY_CopyBytes4, // ENTER + /* C9 */ ENTRY_CopyBytes1, // LEAVE + /* CA */ ENTRY_CopyBytes3Dynamic, // RET + /* CB */ ENTRY_CopyBytes1Dynamic, // RET + /* CC */ ENTRY_CopyBytes1Dynamic, // INT 3 + /* CD */ ENTRY_CopyBytes2Dynamic, // INT ib +#ifdef DETOURS_X64 + /* CE */ ENTRY_Invalid, // Invalid +#else + /* CE */ ENTRY_CopyBytes1Dynamic, // INTO +#endif + /* CF */ ENTRY_CopyBytes1Dynamic, // IRET + /* D0 */ ENTRY_CopyBytes2Mod, // RCL/2, etc. + /* D1 */ ENTRY_CopyBytes2Mod, // RCL/2, etc. + /* D2 */ ENTRY_CopyBytes2Mod, // RCL/2, etc. + /* D3 */ ENTRY_CopyBytes2Mod, // RCL/2, etc. +#ifdef DETOURS_X64 + /* D4 */ ENTRY_Invalid, // Invalid + /* D5 */ ENTRY_Invalid, // Invalid +#else + /* D4 */ ENTRY_CopyBytes2, // AAM + /* D5 */ ENTRY_CopyBytes2, // AAD +#endif + /* D6 */ ENTRY_Invalid, // Invalid + /* D7 */ ENTRY_CopyBytes1, // XLAT/XLATB + /* D8 */ ENTRY_CopyBytes2Mod, // FADD, etc. + /* D9 */ ENTRY_CopyBytes2Mod, // F2XM1, etc. + /* DA */ ENTRY_CopyBytes2Mod, // FLADD, etc. + /* DB */ ENTRY_CopyBytes2Mod, // FCLEX, etc. + /* DC */ ENTRY_CopyBytes2Mod, // FADD/0, etc. + /* DD */ ENTRY_CopyBytes2Mod, // FFREE, etc. + /* DE */ ENTRY_CopyBytes2Mod, // FADDP, etc. + /* DF */ ENTRY_CopyBytes2Mod, // FBLD/4, etc. + /* E0 */ ENTRY_CopyBytes2CantJump, // LOOPNE cb + /* E1 */ ENTRY_CopyBytes2CantJump, // LOOPE cb + /* E2 */ ENTRY_CopyBytes2CantJump, // LOOP cb + /* E3 */ ENTRY_CopyBytes2CantJump, // JCXZ/JECXZ + /* E4 */ ENTRY_CopyBytes2, // IN ib + /* E5 */ ENTRY_CopyBytes2, // IN id + /* E6 */ ENTRY_CopyBytes2, // OUT ib + /* E7 */ ENTRY_CopyBytes2, // OUT ib + /* E8 */ ENTRY_CopyBytes3Or5Target, // CALL cd + /* E9 */ ENTRY_CopyBytes3Or5Target, // JMP cd +#ifdef DETOURS_X64 + /* EA */ ENTRY_Invalid, // Invalid +#else + /* EA */ ENTRY_CopyBytes5Or7Dynamic, // JMP cp +#endif + /* EB */ ENTRY_CopyBytes2Jump, // JMP cb + /* EC */ ENTRY_CopyBytes1, // IN ib + /* ED */ ENTRY_CopyBytes1, // IN id + /* EE */ ENTRY_CopyBytes1, // OUT + /* EF */ ENTRY_CopyBytes1, // OUT + /* F0 */ ENTRY_CopyBytesPrefix, // LOCK prefix + /* F1 */ ENTRY_CopyBytes1Dynamic, // INT1 / ICEBP somewhat documented by AMD, not by Intel + /* F2 */ ENTRY_CopyF2, // REPNE prefix +//#ifdef DETOURS_X86 + /* F3 */ ENTRY_CopyF3, // REPE prefix +//#else +// This does presently suffice for AMD64 but it requires tracing +// through a bunch of code to verify and seems not worth maintaining. +// /* F3 */ ENTRY_CopyBytesPrefix, // REPE prefix +//#endif + /* F4 */ ENTRY_CopyBytes1, // HLT + /* F5 */ ENTRY_CopyBytes1, // CMC + /* F6 */ ENTRY_CopyF6, // TEST/0, DIV/6 + /* F7 */ ENTRY_CopyF7, // TEST/0, DIV/6 + /* F8 */ ENTRY_CopyBytes1, // CLC + /* F9 */ ENTRY_CopyBytes1, // STC + /* FA */ ENTRY_CopyBytes1, // CLI + /* FB */ ENTRY_CopyBytes1, // STI + /* FC */ ENTRY_CopyBytes1, // CLD + /* FD */ ENTRY_CopyBytes1, // STD + /* FE */ ENTRY_CopyBytes2Mod, // DEC/1,INC/0 + /* FF */ ENTRY_CopyFF, // CALL/2 +}; + +const CDetourDis::COPYENTRY CDetourDis::s_rceCopyTable0F[] = +{ +#ifdef DETOURS_X86 + /* 00 */ ENTRY_Copy0F00, // sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6/dynamic invalid/7 +#else + /* 00 */ ENTRY_CopyBytes2Mod, // sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6/dynamic invalid/7 +#endif + /* 01 */ ENTRY_CopyBytes2Mod, // INVLPG/7, etc. + /* 02 */ ENTRY_CopyBytes2Mod, // LAR/r + /* 03 */ ENTRY_CopyBytes2Mod, // LSL/r + /* 04 */ ENTRY_Invalid, // _04 + /* 05 */ ENTRY_CopyBytes1, // SYSCALL + /* 06 */ ENTRY_CopyBytes1, // CLTS + /* 07 */ ENTRY_CopyBytes1, // SYSRET + /* 08 */ ENTRY_CopyBytes1, // INVD + /* 09 */ ENTRY_CopyBytes1, // WBINVD + /* 0A */ ENTRY_Invalid, // _0A + /* 0B */ ENTRY_CopyBytes1, // UD2 + /* 0C */ ENTRY_Invalid, // _0C + /* 0D */ ENTRY_CopyBytes2Mod, // PREFETCH + /* 0E */ ENTRY_CopyBytes1, // FEMMS (3DNow -- not in Intel documentation) + /* 0F */ ENTRY_CopyBytes2Mod1, // 3DNow Opcodes + /* 10 */ ENTRY_CopyBytes2Mod, // MOVSS MOVUPD MOVSD + /* 11 */ ENTRY_CopyBytes2Mod, // MOVSS MOVUPD MOVSD + /* 12 */ ENTRY_CopyBytes2Mod, // MOVLPD + /* 13 */ ENTRY_CopyBytes2Mod, // MOVLPD + /* 14 */ ENTRY_CopyBytes2Mod, // UNPCKLPD + /* 15 */ ENTRY_CopyBytes2Mod, // UNPCKHPD + /* 16 */ ENTRY_CopyBytes2Mod, // MOVHPD + /* 17 */ ENTRY_CopyBytes2Mod, // MOVHPD + /* 18 */ ENTRY_CopyBytes2Mod, // PREFETCHINTA... + /* 19 */ ENTRY_CopyBytes2Mod, // NOP/r multi byte nop, not documented by Intel, documented by AMD + /* 1A */ ENTRY_CopyBytes2Mod, // NOP/r multi byte nop, not documented by Intel, documented by AMD + /* 1B */ ENTRY_CopyBytes2Mod, // NOP/r multi byte nop, not documented by Intel, documented by AMD + /* 1C */ ENTRY_CopyBytes2Mod, // NOP/r multi byte nop, not documented by Intel, documented by AMD + /* 1D */ ENTRY_CopyBytes2Mod, // NOP/r multi byte nop, not documented by Intel, documented by AMD + /* 1E */ ENTRY_CopyBytes2Mod, // NOP/r multi byte nop, not documented by Intel, documented by AMD + /* 1F */ ENTRY_CopyBytes2Mod, // NOP/r multi byte nop + /* 20 */ ENTRY_CopyBytes2Mod, // MOV/r + /* 21 */ ENTRY_CopyBytes2Mod, // MOV/r + /* 22 */ ENTRY_CopyBytes2Mod, // MOV/r + /* 23 */ ENTRY_CopyBytes2Mod, // MOV/r +#ifdef DETOURS_X64 + /* 24 */ ENTRY_Invalid, // _24 +#else + /* 24 */ ENTRY_CopyBytes2Mod, // MOV/r,TR TR is test register on 80386 and 80486, removed in Pentium +#endif + /* 25 */ ENTRY_Invalid, // _25 +#ifdef DETOURS_X64 + /* 26 */ ENTRY_Invalid, // _26 +#else + /* 26 */ ENTRY_CopyBytes2Mod, // MOV TR/r TR is test register on 80386 and 80486, removed in Pentium +#endif + /* 27 */ ENTRY_Invalid, // _27 + /* 28 */ ENTRY_CopyBytes2Mod, // MOVAPS MOVAPD + /* 29 */ ENTRY_CopyBytes2Mod, // MOVAPS MOVAPD + /* 2A */ ENTRY_CopyBytes2Mod, // CVPI2PS & + /* 2B */ ENTRY_CopyBytes2Mod, // MOVNTPS MOVNTPD + /* 2C */ ENTRY_CopyBytes2Mod, // CVTTPS2PI & + /* 2D */ ENTRY_CopyBytes2Mod, // CVTPS2PI & + /* 2E */ ENTRY_CopyBytes2Mod, // UCOMISS UCOMISD + /* 2F */ ENTRY_CopyBytes2Mod, // COMISS COMISD + /* 30 */ ENTRY_CopyBytes1, // WRMSR + /* 31 */ ENTRY_CopyBytes1, // RDTSC + /* 32 */ ENTRY_CopyBytes1, // RDMSR + /* 33 */ ENTRY_CopyBytes1, // RDPMC + /* 34 */ ENTRY_CopyBytes1, // SYSENTER + /* 35 */ ENTRY_CopyBytes1, // SYSEXIT + /* 36 */ ENTRY_Invalid, // _36 + /* 37 */ ENTRY_CopyBytes1, // GETSEC + /* 38 */ ENTRY_CopyBytes3Mod, // SSE3 Opcodes + /* 39 */ ENTRY_Invalid, // _39 + /* 3A */ ENTRY_CopyBytes3Mod1, // SSE3 Opcodes + /* 3B */ ENTRY_Invalid, // _3B + /* 3C */ ENTRY_Invalid, // _3C + /* 3D */ ENTRY_Invalid, // _3D + /* 3E */ ENTRY_Invalid, // _3E + /* 3F */ ENTRY_Invalid, // _3F + /* 40 */ ENTRY_CopyBytes2Mod, // CMOVO (0F 40) + /* 41 */ ENTRY_CopyBytes2Mod, // CMOVNO (0F 41) + /* 42 */ ENTRY_CopyBytes2Mod, // CMOVB & CMOVNE (0F 42) + /* 43 */ ENTRY_CopyBytes2Mod, // CMOVAE & CMOVNB (0F 43) + /* 44 */ ENTRY_CopyBytes2Mod, // CMOVE & CMOVZ (0F 44) + /* 45 */ ENTRY_CopyBytes2Mod, // CMOVNE & CMOVNZ (0F 45) + /* 46 */ ENTRY_CopyBytes2Mod, // CMOVBE & CMOVNA (0F 46) + /* 47 */ ENTRY_CopyBytes2Mod, // CMOVA & CMOVNBE (0F 47) + /* 48 */ ENTRY_CopyBytes2Mod, // CMOVS (0F 48) + /* 49 */ ENTRY_CopyBytes2Mod, // CMOVNS (0F 49) + /* 4A */ ENTRY_CopyBytes2Mod, // CMOVP & CMOVPE (0F 4A) + /* 4B */ ENTRY_CopyBytes2Mod, // CMOVNP & CMOVPO (0F 4B) + /* 4C */ ENTRY_CopyBytes2Mod, // CMOVL & CMOVNGE (0F 4C) + /* 4D */ ENTRY_CopyBytes2Mod, // CMOVGE & CMOVNL (0F 4D) + /* 4E */ ENTRY_CopyBytes2Mod, // CMOVLE & CMOVNG (0F 4E) + /* 4F */ ENTRY_CopyBytes2Mod, // CMOVG & CMOVNLE (0F 4F) + /* 50 */ ENTRY_CopyBytes2Mod, // MOVMSKPD MOVMSKPD + /* 51 */ ENTRY_CopyBytes2Mod, // SQRTPS & + /* 52 */ ENTRY_CopyBytes2Mod, // RSQRTTS RSQRTPS + /* 53 */ ENTRY_CopyBytes2Mod, // RCPPS RCPSS + /* 54 */ ENTRY_CopyBytes2Mod, // ANDPS ANDPD + /* 55 */ ENTRY_CopyBytes2Mod, // ANDNPS ANDNPD + /* 56 */ ENTRY_CopyBytes2Mod, // ORPS ORPD + /* 57 */ ENTRY_CopyBytes2Mod, // XORPS XORPD + /* 58 */ ENTRY_CopyBytes2Mod, // ADDPS & + /* 59 */ ENTRY_CopyBytes2Mod, // MULPS & + /* 5A */ ENTRY_CopyBytes2Mod, // CVTPS2PD & + /* 5B */ ENTRY_CopyBytes2Mod, // CVTDQ2PS & + /* 5C */ ENTRY_CopyBytes2Mod, // SUBPS & + /* 5D */ ENTRY_CopyBytes2Mod, // MINPS & + /* 5E */ ENTRY_CopyBytes2Mod, // DIVPS & + /* 5F */ ENTRY_CopyBytes2Mod, // MASPS & + /* 60 */ ENTRY_CopyBytes2Mod, // PUNPCKLBW/r + /* 61 */ ENTRY_CopyBytes2Mod, // PUNPCKLWD/r + /* 62 */ ENTRY_CopyBytes2Mod, // PUNPCKLWD/r + /* 63 */ ENTRY_CopyBytes2Mod, // PACKSSWB/r + /* 64 */ ENTRY_CopyBytes2Mod, // PCMPGTB/r + /* 65 */ ENTRY_CopyBytes2Mod, // PCMPGTW/r + /* 66 */ ENTRY_CopyBytes2Mod, // PCMPGTD/r + /* 67 */ ENTRY_CopyBytes2Mod, // PACKUSWB/r + /* 68 */ ENTRY_CopyBytes2Mod, // PUNPCKHBW/r + /* 69 */ ENTRY_CopyBytes2Mod, // PUNPCKHWD/r + /* 6A */ ENTRY_CopyBytes2Mod, // PUNPCKHDQ/r + /* 6B */ ENTRY_CopyBytes2Mod, // PACKSSDW/r + /* 6C */ ENTRY_CopyBytes2Mod, // PUNPCKLQDQ + /* 6D */ ENTRY_CopyBytes2Mod, // PUNPCKHQDQ + /* 6E */ ENTRY_CopyBytes2Mod, // MOVD/r + /* 6F */ ENTRY_CopyBytes2Mod, // MOV/r + /* 70 */ ENTRY_CopyBytes2Mod1, // PSHUFW/r ib + /* 71 */ ENTRY_CopyBytes2Mod1, // PSLLW/6 ib,PSRAW/4 ib,PSRLW/2 ib + /* 72 */ ENTRY_CopyBytes2Mod1, // PSLLD/6 ib,PSRAD/4 ib,PSRLD/2 ib + /* 73 */ ENTRY_CopyBytes2Mod1, // PSLLQ/6 ib,PSRLQ/2 ib + /* 74 */ ENTRY_CopyBytes2Mod, // PCMPEQB/r + /* 75 */ ENTRY_CopyBytes2Mod, // PCMPEQW/r + /* 76 */ ENTRY_CopyBytes2Mod, // PCMPEQD/r + /* 77 */ ENTRY_CopyBytes1, // EMMS + // extrq/insertq require mode=3 and are followed by two immediate bytes + /* 78 */ ENTRY_Copy0F78, // VMREAD/r, 66/EXTRQ/r/ib/ib, F2/INSERTQ/r/ib/ib + // extrq/insertq require mod=3, therefore ENTRY_CopyBytes2, but it ends up the same + /* 79 */ ENTRY_CopyBytes2Mod, // VMWRITE/r, 66/EXTRQ/r, F2/INSERTQ/r + /* 7A */ ENTRY_Invalid, // _7A + /* 7B */ ENTRY_Invalid, // _7B + /* 7C */ ENTRY_CopyBytes2Mod, // HADDPS + /* 7D */ ENTRY_CopyBytes2Mod, // HSUBPS + /* 7E */ ENTRY_CopyBytes2Mod, // MOVD/r + /* 7F */ ENTRY_CopyBytes2Mod, // MOV/r + /* 80 */ ENTRY_CopyBytes3Or5Target, // JO + /* 81 */ ENTRY_CopyBytes3Or5Target, // JNO + /* 82 */ ENTRY_CopyBytes3Or5Target, // JB,JC,JNAE + /* 83 */ ENTRY_CopyBytes3Or5Target, // JAE,JNB,JNC + /* 84 */ ENTRY_CopyBytes3Or5Target, // JE,JZ,JZ + /* 85 */ ENTRY_CopyBytes3Or5Target, // JNE,JNZ + /* 86 */ ENTRY_CopyBytes3Or5Target, // JBE,JNA + /* 87 */ ENTRY_CopyBytes3Or5Target, // JA,JNBE + /* 88 */ ENTRY_CopyBytes3Or5Target, // JS + /* 89 */ ENTRY_CopyBytes3Or5Target, // JNS + /* 8A */ ENTRY_CopyBytes3Or5Target, // JP,JPE + /* 8B */ ENTRY_CopyBytes3Or5Target, // JNP,JPO + /* 8C */ ENTRY_CopyBytes3Or5Target, // JL,NGE + /* 8D */ ENTRY_CopyBytes3Or5Target, // JGE,JNL + /* 8E */ ENTRY_CopyBytes3Or5Target, // JLE,JNG + /* 8F */ ENTRY_CopyBytes3Or5Target, // JG,JNLE + /* 90 */ ENTRY_CopyBytes2Mod, // CMOVO (0F 40) + /* 91 */ ENTRY_CopyBytes2Mod, // CMOVNO (0F 41) + /* 92 */ ENTRY_CopyBytes2Mod, // CMOVB & CMOVC & CMOVNAE (0F 42) + /* 93 */ ENTRY_CopyBytes2Mod, // CMOVAE & CMOVNB & CMOVNC (0F 43) + /* 94 */ ENTRY_CopyBytes2Mod, // CMOVE & CMOVZ (0F 44) + /* 95 */ ENTRY_CopyBytes2Mod, // CMOVNE & CMOVNZ (0F 45) + /* 96 */ ENTRY_CopyBytes2Mod, // CMOVBE & CMOVNA (0F 46) + /* 97 */ ENTRY_CopyBytes2Mod, // CMOVA & CMOVNBE (0F 47) + /* 98 */ ENTRY_CopyBytes2Mod, // CMOVS (0F 48) + /* 99 */ ENTRY_CopyBytes2Mod, // CMOVNS (0F 49) + /* 9A */ ENTRY_CopyBytes2Mod, // CMOVP & CMOVPE (0F 4A) + /* 9B */ ENTRY_CopyBytes2Mod, // CMOVNP & CMOVPO (0F 4B) + /* 9C */ ENTRY_CopyBytes2Mod, // CMOVL & CMOVNGE (0F 4C) + /* 9D */ ENTRY_CopyBytes2Mod, // CMOVGE & CMOVNL (0F 4D) + /* 9E */ ENTRY_CopyBytes2Mod, // CMOVLE & CMOVNG (0F 4E) + /* 9F */ ENTRY_CopyBytes2Mod, // CMOVG & CMOVNLE (0F 4F) + /* A0 */ ENTRY_CopyBytes1, // PUSH + /* A1 */ ENTRY_CopyBytes1, // POP + /* A2 */ ENTRY_CopyBytes1, // CPUID + /* A3 */ ENTRY_CopyBytes2Mod, // BT (0F A3) + /* A4 */ ENTRY_CopyBytes2Mod1, // SHLD + /* A5 */ ENTRY_CopyBytes2Mod, // SHLD + /* A6 */ ENTRY_CopyBytes2Mod, // XBTS + /* A7 */ ENTRY_CopyBytes2Mod, // IBTS + /* A8 */ ENTRY_CopyBytes1, // PUSH + /* A9 */ ENTRY_CopyBytes1, // POP + /* AA */ ENTRY_CopyBytes1, // RSM + /* AB */ ENTRY_CopyBytes2Mod, // BTS (0F AB) + /* AC */ ENTRY_CopyBytes2Mod1, // SHRD + /* AD */ ENTRY_CopyBytes2Mod, // SHRD + + // 0F AE mod76=mem mod543=0 fxsave + // 0F AE mod76=mem mod543=1 fxrstor + // 0F AE mod76=mem mod543=2 ldmxcsr + // 0F AE mod76=mem mod543=3 stmxcsr + // 0F AE mod76=mem mod543=4 xsave + // 0F AE mod76=mem mod543=5 xrstor + // 0F AE mod76=mem mod543=6 saveopt + // 0F AE mod76=mem mod543=7 clflush + // 0F AE mod76=11b mod543=5 lfence + // 0F AE mod76=11b mod543=6 mfence + // 0F AE mod76=11b mod543=7 sfence + // F3 0F AE mod76=11b mod543=0 rdfsbase + // F3 0F AE mod76=11b mod543=1 rdgsbase + // F3 0F AE mod76=11b mod543=2 wrfsbase + // F3 0F AE mod76=11b mod543=3 wrgsbase + /* AE */ ENTRY_CopyBytes2Mod, // fxsave fxrstor ldmxcsr stmxcsr xsave xrstor saveopt clflush lfence mfence sfence rdfsbase rdgsbase wrfsbase wrgsbase + /* AF */ ENTRY_CopyBytes2Mod, // IMUL (0F AF) + /* B0 */ ENTRY_CopyBytes2Mod, // CMPXCHG (0F B0) + /* B1 */ ENTRY_CopyBytes2Mod, // CMPXCHG (0F B1) + /* B2 */ ENTRY_CopyBytes2Mod, // LSS/r + /* B3 */ ENTRY_CopyBytes2Mod, // BTR (0F B3) + /* B4 */ ENTRY_CopyBytes2Mod, // LFS/r + /* B5 */ ENTRY_CopyBytes2Mod, // LGS/r + /* B6 */ ENTRY_CopyBytes2Mod, // MOVZX/r + /* B7 */ ENTRY_CopyBytes2Mod, // MOVZX/r +#ifdef DETOURS_X86 + /* B8 */ ENTRY_Copy0FB8, // jmpe f3/popcnt +#else + /* B8 */ ENTRY_CopyBytes2Mod, // f3/popcnt +#endif + /* B9 */ ENTRY_Invalid, // _B9 + /* BA */ ENTRY_CopyBytes2Mod1, // BT & BTC & BTR & BTS (0F BA) + /* BB */ ENTRY_CopyBytes2Mod, // BTC (0F BB) + /* BC */ ENTRY_CopyBytes2Mod, // BSF (0F BC) + /* BD */ ENTRY_CopyBytes2Mod, // BSR (0F BD) + /* BE */ ENTRY_CopyBytes2Mod, // MOVSX/r + /* BF */ ENTRY_CopyBytes2Mod, // MOVSX/r + /* C0 */ ENTRY_CopyBytes2Mod, // XADD/r + /* C1 */ ENTRY_CopyBytes2Mod, // XADD/r + /* C2 */ ENTRY_CopyBytes2Mod1, // CMPPS & + /* C3 */ ENTRY_CopyBytes2Mod, // MOVNTI + /* C4 */ ENTRY_CopyBytes2Mod1, // PINSRW /r ib + /* C5 */ ENTRY_CopyBytes2Mod1, // PEXTRW /r ib + /* C6 */ ENTRY_CopyBytes2Mod1, // SHUFPS & SHUFPD + /* C7 */ ENTRY_CopyBytes2Mod, // CMPXCHG8B (0F C7) + /* C8 */ ENTRY_CopyBytes1, // BSWAP 0F C8 + rd + /* C9 */ ENTRY_CopyBytes1, // BSWAP 0F C8 + rd + /* CA */ ENTRY_CopyBytes1, // BSWAP 0F C8 + rd + /* CB */ ENTRY_CopyBytes1, // CVTPD2PI BSWAP 0F C8 + rd + /* CC */ ENTRY_CopyBytes1, // BSWAP 0F C8 + rd + /* CD */ ENTRY_CopyBytes1, // BSWAP 0F C8 + rd + /* CE */ ENTRY_CopyBytes1, // BSWAP 0F C8 + rd + /* CF */ ENTRY_CopyBytes1, // BSWAP 0F C8 + rd + /* D0 */ ENTRY_CopyBytes2Mod, // ADDSUBPS (untestd) + /* D1 */ ENTRY_CopyBytes2Mod, // PSRLW/r + /* D2 */ ENTRY_CopyBytes2Mod, // PSRLD/r + /* D3 */ ENTRY_CopyBytes2Mod, // PSRLQ/r + /* D4 */ ENTRY_CopyBytes2Mod, // PADDQ + /* D5 */ ENTRY_CopyBytes2Mod, // PMULLW/r + /* D6 */ ENTRY_CopyBytes2Mod, // MOVDQ2Q / MOVQ2DQ + /* D7 */ ENTRY_CopyBytes2Mod, // PMOVMSKB/r + /* D8 */ ENTRY_CopyBytes2Mod, // PSUBUSB/r + /* D9 */ ENTRY_CopyBytes2Mod, // PSUBUSW/r + /* DA */ ENTRY_CopyBytes2Mod, // PMINUB/r + /* DB */ ENTRY_CopyBytes2Mod, // PAND/r + /* DC */ ENTRY_CopyBytes2Mod, // PADDUSB/r + /* DD */ ENTRY_CopyBytes2Mod, // PADDUSW/r + /* DE */ ENTRY_CopyBytes2Mod, // PMAXUB/r + /* DF */ ENTRY_CopyBytes2Mod, // PANDN/r + /* E0 */ ENTRY_CopyBytes2Mod , // PAVGB + /* E1 */ ENTRY_CopyBytes2Mod, // PSRAW/r + /* E2 */ ENTRY_CopyBytes2Mod, // PSRAD/r + /* E3 */ ENTRY_CopyBytes2Mod, // PAVGW + /* E4 */ ENTRY_CopyBytes2Mod, // PMULHUW/r + /* E5 */ ENTRY_CopyBytes2Mod, // PMULHW/r + /* E6 */ ENTRY_CopyBytes2Mod, // CTDQ2PD & + /* E7 */ ENTRY_CopyBytes2Mod, // MOVNTQ + /* E8 */ ENTRY_CopyBytes2Mod, // PSUBB/r + /* E9 */ ENTRY_CopyBytes2Mod, // PSUBW/r + /* EA */ ENTRY_CopyBytes2Mod, // PMINSW/r + /* EB */ ENTRY_CopyBytes2Mod, // POR/r + /* EC */ ENTRY_CopyBytes2Mod, // PADDSB/r + /* ED */ ENTRY_CopyBytes2Mod, // PADDSW/r + /* EE */ ENTRY_CopyBytes2Mod, // PMAXSW /r + /* EF */ ENTRY_CopyBytes2Mod, // PXOR/r + /* F0 */ ENTRY_CopyBytes2Mod, // LDDQU + /* F1 */ ENTRY_CopyBytes2Mod, // PSLLW/r + /* F2 */ ENTRY_CopyBytes2Mod, // PSLLD/r + /* F3 */ ENTRY_CopyBytes2Mod, // PSLLQ/r + /* F4 */ ENTRY_CopyBytes2Mod, // PMULUDQ/r + /* F5 */ ENTRY_CopyBytes2Mod, // PMADDWD/r + /* F6 */ ENTRY_CopyBytes2Mod, // PSADBW/r + /* F7 */ ENTRY_CopyBytes2Mod, // MASKMOVQ + /* F8 */ ENTRY_CopyBytes2Mod, // PSUBB/r + /* F9 */ ENTRY_CopyBytes2Mod, // PSUBW/r + /* FA */ ENTRY_CopyBytes2Mod, // PSUBD/r + /* FB */ ENTRY_CopyBytes2Mod, // FSUBQ/r + /* FC */ ENTRY_CopyBytes2Mod, // PADDB/r + /* FD */ ENTRY_CopyBytes2Mod, // PADDW/r + /* FE */ ENTRY_CopyBytes2Mod, // PADDD/r + /* FF */ ENTRY_Invalid, // _FF +}; + +BOOL CDetourDis::SanityCheckSystem() +{
+ C_ASSERT(ARRAYSIZE(CDetourDis::s_rceCopyTable) == 256);
+ C_ASSERT(ARRAYSIZE(CDetourDis::s_rceCopyTable0F) == 256); + return TRUE; +} +#endif // defined(DETOURS_X64) || defined(DETOURS_X86) + +/////////////////////////////////////////////////////////// IA64 Disassembler. +// +#ifdef DETOURS_IA64 + +#if defined(_IA64_) != defined(DETOURS_IA64_OFFLINE_LIBRARY) +// Compile DETOUR_IA64_BUNDLE for native IA64 or cross, but not both -- we get duplicates otherwise. +const DETOUR_IA64_BUNDLE::DETOUR_IA64_METADATA DETOUR_IA64_BUNDLE::s_rceCopyTable[33] = +{ + { 0x00, M_UNIT, I_UNIT, I_UNIT, }, + { 0x01, M_UNIT, I_UNIT, I_UNIT, }, + { 0x02, M_UNIT, I_UNIT, I_UNIT, }, + { 0x03, M_UNIT, I_UNIT, I_UNIT, }, + { 0x04, M_UNIT, L_UNIT, X_UNIT, }, + { 0x05, M_UNIT, L_UNIT, X_UNIT, }, + { 0x06, 0, 0, 0, }, + { 0x07, 0, 0, 0, }, + { 0x08, M_UNIT, M_UNIT, I_UNIT, }, + { 0x09, M_UNIT, M_UNIT, I_UNIT, }, + { 0x0a, M_UNIT, M_UNIT, I_UNIT, }, + { 0x0b, M_UNIT, M_UNIT, I_UNIT, }, + { 0x0c, M_UNIT, F_UNIT, I_UNIT, }, + { 0x0d, M_UNIT, F_UNIT, I_UNIT, }, + { 0x0e, M_UNIT, M_UNIT, F_UNIT, }, + { 0x0f, M_UNIT, M_UNIT, F_UNIT, }, + { 0x10, M_UNIT, I_UNIT, B_UNIT, }, + { 0x11, M_UNIT, I_UNIT, B_UNIT, }, + { 0x12, M_UNIT, B_UNIT, B_UNIT, }, + { 0x13, M_UNIT, B_UNIT, B_UNIT, }, + { 0x14, 0, 0, 0, }, + { 0x15, 0, 0, 0, }, + { 0x16, B_UNIT, B_UNIT, B_UNIT, }, + { 0x17, B_UNIT, B_UNIT, B_UNIT, }, + { 0x18, M_UNIT, M_UNIT, B_UNIT, }, + { 0x19, M_UNIT, M_UNIT, B_UNIT, }, + { 0x1a, 0, 0, 0, }, + { 0x1b, 0, 0, 0, }, + { 0x1c, M_UNIT, F_UNIT, B_UNIT, }, + { 0x1d, M_UNIT, F_UNIT, B_UNIT, }, + { 0x1e, 0, 0, 0, }, + { 0x1f, 0, 0, 0, }, + { 0x00, 0, 0, 0, }, +}; + +// 120 112 104 96 88 80 72 64 56 48 40 32 24 16 8 0 +// f. e. d. c. b. a. 9. 8. 7. 6. 5. 4. 3. 2. 1. 0. + +// 00 +// f.e. d.c. b.a. 9.8. 7.6. 5.4. 3.2. 1.0. +// 0000 0000 0000 0000 0000 0000 0000 001f : Template [4..0] +// 0000 0000 0000 0000 0000 03ff ffff ffe0 : Zero [ 41.. 5] +// 0000 0000 0000 0000 0000 3c00 0000 0000 : Zero [ 45.. 42] +// 0000 0000 0007 ffff ffff c000 0000 0000 : One [ 82.. 46] +// 0000 0000 0078 0000 0000 0000 0000 0000 : One [ 86.. 83] +// 0fff ffff ff80 0000 0000 0000 0000 0000 : Two [123.. 87] +// f000 0000 0000 0000 0000 0000 0000 0000 : Two [127..124] +BYTE DETOUR_IA64_BUNDLE::GetTemplate() const +{ + return (data[0] & 0x1f); +} + +BYTE DETOUR_IA64_BUNDLE::GetInst0() const +{ + return ((data[5] & 0x3c) >> 2); +} + +BYTE DETOUR_IA64_BUNDLE::GetInst1() const +{ + return ((data[10] & 0x78) >> 3); +} + +BYTE DETOUR_IA64_BUNDLE::GetInst2() const +{ + return ((data[15] & 0xf0) >> 4); +} + +BYTE DETOUR_IA64_BUNDLE::GetUnit(BYTE slot) const +{ + switch (slot) { + case 0: return GetUnit0(); + case 1: return GetUnit1(); + case 2: return GetUnit2(); + } + __debugbreak(); + return 0; +} + +BYTE DETOUR_IA64_BUNDLE::GetUnit0() const +{ + return s_rceCopyTable[data[0] & 0x1f].nUnit0; +} + +BYTE DETOUR_IA64_BUNDLE::GetUnit1() const +{ + return s_rceCopyTable[data[0] & 0x1f].nUnit1; +} + +BYTE DETOUR_IA64_BUNDLE::GetUnit2() const +{ + return s_rceCopyTable[data[0] & 0x1f].nUnit2; +} + +UINT64 DETOUR_IA64_BUNDLE::GetData0() const +{ + return (((wide[0] & 0x000003ffffffffe0) >> 5)); +} + +UINT64 DETOUR_IA64_BUNDLE::GetData1() const +{ + return (((wide[0] & 0xffffc00000000000) >> 46) | + ((wide[1] & 0x000000000007ffff) << 18)); +} + +UINT64 DETOUR_IA64_BUNDLE::GetData2() const +{ + return (((wide[1] & 0x0fffffffff800000) >> 23)); +} + +VOID DETOUR_IA64_BUNDLE::SetInst(BYTE slot, BYTE nInst) +{ + switch (slot) + { + case 0: SetInst0(nInst); return; + case 1: SetInst1(nInst); return; + case 2: SetInst2(nInst); return; + } + __debugbreak(); +} + +VOID DETOUR_IA64_BUNDLE::SetInst0(BYTE nInst) +{ + data[5] = (data[5] & ~0x3c) | ((nInst << 2) & 0x3c); +} + +VOID DETOUR_IA64_BUNDLE::SetInst1(BYTE nInst) +{ + data[10] = (data[10] & ~0x78) | ((nInst << 3) & 0x78); +} + +VOID DETOUR_IA64_BUNDLE::SetInst2(BYTE nInst) +{ + data[15] = (data[15] & ~0xf0) | ((nInst << 4) & 0xf0); +} + +VOID DETOUR_IA64_BUNDLE::SetData(BYTE slot, UINT64 nData) +{ + switch (slot) + { + case 0: SetData0(nData); return; + case 1: SetData1(nData); return; + case 2: SetData2(nData); return; + } + __debugbreak(); +} + +VOID DETOUR_IA64_BUNDLE::SetData0(UINT64 nData) +{ + wide[0] = (wide[0] & ~0x000003ffffffffe0) | (( nData << 5) & 0x000003ffffffffe0); +} + +VOID DETOUR_IA64_BUNDLE::SetData1(UINT64 nData) +{ + wide[0] = (wide[0] & ~0xffffc00000000000) | ((nData << 46) & 0xffffc00000000000); + wide[1] = (wide[1] & ~0x000000000007ffff) | ((nData >> 18) & 0x000000000007ffff); +} + +VOID DETOUR_IA64_BUNDLE::SetData2(UINT64 nData) +{ + wide[1] = (wide[1] & ~0x0fffffffff800000) | ((nData << 23) & 0x0fffffffff800000); +} + +UINT64 DETOUR_IA64_BUNDLE::GetInstruction(BYTE slot) const +{ + switch (slot) { + case 0: return GetInstruction0(); + case 1: return GetInstruction1(); + case 2: return GetInstruction2(); + } + __debugbreak(); + return 0; +} + +UINT64 DETOUR_IA64_BUNDLE::GetInstruction0() const +{ + // 41 bits from wide[0], skipping the 5 bit template. + return GetBits(wide[0], DETOUR_IA64_INSTRUCTION0_OFFSET, DETOUR_IA64_INSTRUCTION_SIZE); +} + +UINT64 DETOUR_IA64_BUNDLE::GetInstruction1() const +{ + // 64-46 bits from wide[0] and the rest from wide[1]. + const UINT count0 = 64 - DETOUR_IA64_INSTRUCTION1_OFFSET; + const UINT count1 = DETOUR_IA64_INSTRUCTION_SIZE - count0; + return GetBits(wide[0], DETOUR_IA64_INSTRUCTION1_OFFSET, count0) | (GetBits(wide[1], 0, count1) << count0); +} + +UINT64 DETOUR_IA64_BUNDLE::GetInstruction2() const +{ + // Upper 41 bits of wide[1]. + return wide[1] >> (64 - DETOUR_IA64_INSTRUCTION_SIZE); +} + +void DETOUR_IA64_BUNDLE::SetInstruction(BYTE slot, UINT64 instruction) +{ + switch (slot) { + case 0: SetInstruction0(instruction); return; + case 1: SetInstruction1(instruction); return; + case 2: SetInstruction2(instruction); return; + } + __debugbreak(); +} + +void DETOUR_IA64_BUNDLE::SetInstruction0(UINT64 instruction) +{ + wide[0] = SetBits(wide[0], DETOUR_IA64_INSTRUCTION0_OFFSET, DETOUR_IA64_INSTRUCTION_SIZE, instruction); +} + +void DETOUR_IA64_BUNDLE::SetInstruction1(UINT64 instruction) +{ + UINT const count0 = 64 - DETOUR_IA64_INSTRUCTION1_OFFSET; + UINT const count1 = DETOUR_IA64_INSTRUCTION_SIZE - count0; + UINT64 const wide0 = SetBits(wide[0], DETOUR_IA64_INSTRUCTION1_OFFSET, count0, instruction); + UINT64 const wide1 = SetBits(wide[1], 0, count1, instruction >> count0); + wide[0] = wide0; + wide[1] = wide1; +} + +void DETOUR_IA64_BUNDLE::SetInstruction2(UINT64 instruction) +{ + // Set upper 41 bits of wide[1]. + wide[1] = SetBits(wide[1], 64 - DETOUR_IA64_INSTRUCTION_SIZE, DETOUR_IA64_INSTRUCTION_SIZE, instruction); +} + +UINT64 DETOUR_IA64_BUNDLE::SignExtend(UINT64 Value, UINT64 Offset) +// This definition is from the IA64 manual. +{ + if ((Value & (((UINT64)1) << (Offset - 1))) == 0) + return Value; + UINT64 const new_value = Value | ((~(UINT64)0) << Offset); + return new_value; +} + +UINT64 DETOUR_IA64_BUNDLE::GetBits(UINT64 Value, UINT64 Offset, UINT64 Count) +{ + UINT64 const new_value = (Value >> Offset) & ~(~((UINT64)0) << Count); + return new_value; +} + +UINT64 DETOUR_IA64_BUNDLE::SetBits(UINT64 Value, UINT64 Offset, UINT64 Count, UINT64 Field) +{ + UINT64 const mask = (~((~(UINT64)0) << Count)) << Offset; + UINT64 const new_value = (Value & ~mask) | ((Field << Offset) & mask); + return new_value; +} + +UINT64 DETOUR_IA64_BUNDLE::GetOpcode(UINT64 instruction) +// Get 4bit primary opcode. +{ + UINT64 const opcode = GetBits(instruction, DETOUR_IA64_INSTRUCTION_SIZE - 4, 4); + return opcode; +} + +UINT64 DETOUR_IA64_BUNDLE::GetX(UINT64 instruction) +// Get 1bit opcode extension. +{ + UINT64 const x = GetBits(instruction, 33, 1); + return x; +} + +UINT64 DETOUR_IA64_BUNDLE::GetX3(UINT64 instruction) +// Get 3bit opcode extension. +{ + UINT64 const x3 = GetBits(instruction, 33, 3); + return x3; +} + +UINT64 DETOUR_IA64_BUNDLE::GetX6(UINT64 instruction) +// Get 6bit opcode extension. +{ + UINT64 const x6 = GetBits(instruction, 27, 6); + return x6; +} + +UINT64 DETOUR_IA64_BUNDLE::GetImm7a(UINT64 instruction) +{ + UINT64 const imm7a = GetBits(instruction, 6, 7); + return imm7a; +} + +UINT64 DETOUR_IA64_BUNDLE::SetImm7a(UINT64 instruction, UINT64 imm7a) +{ + UINT64 const new_instruction = SetBits(instruction, 6, 7, imm7a); + return new_instruction; +} + +UINT64 DETOUR_IA64_BUNDLE::GetImm13c(UINT64 instruction) +{ + UINT64 const imm13c = GetBits(instruction, 20, 13); + return imm13c; +} + +UINT64 DETOUR_IA64_BUNDLE::SetImm13c(UINT64 instruction, UINT64 imm13c) +{ + UINT64 const new_instruction = SetBits(instruction, 20, 13, imm13c); + return new_instruction; +} + +UINT64 DETOUR_IA64_BUNDLE::GetSignBit(UINT64 instruction) +{ + UINT64 const signBit = GetBits(instruction, 36, 1); + return signBit; +} + +UINT64 DETOUR_IA64_BUNDLE::SetSignBit(UINT64 instruction, UINT64 signBit) +{ + UINT64 const new_instruction = SetBits(instruction, 36, 1, signBit); + return new_instruction; +} + +UINT64 DETOUR_IA64_BUNDLE::GetImm20a(UINT64 instruction) +{ + UINT64 const imm20a = GetBits(instruction, 6, 20); + return imm20a; +} + +UINT64 DETOUR_IA64_BUNDLE::SetImm20a(UINT64 instruction, UINT64 imm20a) +{ + UINT64 const new_instruction = SetBits(instruction, 6, 20, imm20a); + return new_instruction; +} + +UINT64 DETOUR_IA64_BUNDLE::GetImm20b(UINT64 instruction) +{ + UINT64 const imm20b = GetBits(instruction, 13, 20); + return imm20b; +} + +UINT64 DETOUR_IA64_BUNDLE::SetImm20b(UINT64 instruction, UINT64 imm20b) +{ + UINT64 const new_instruction = SetBits(instruction, 13, 20, imm20b); + return new_instruction; +} + +bool DETOUR_IA64_BUNDLE::RelocateInstruction(_Inout_ DETOUR_IA64_BUNDLE* pDst, + _In_ BYTE slot, + _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra) const +/* + If pBundleExtra is provided and instruction is IP-relative, + this function relocates instruction to target pBundleExtra, + pBundleExtra is set to brl the original target, and return true. + + [Not used] If pBundleExtra is not provided and instruction is IP-relative, return true. + + Else return false. + + The following IP-relative forms are recognized: + br and br.call + chk.s.m integer and float + chk.a.nc integer and float + chk.a.clr integer and float + chk.s.i + fchkf + + Brl is handled elsewhere, because the code was previously written. + + Branch prediction hints are not relocated. +*/ +{ + UINT64 const instruction = GetInstruction(slot); + UINT64 const opcode = GetOpcode(instruction); + size_t const dest = (size_t)pDst; + size_t const extra = (size_t)pBundleExtra; + + switch (GetUnit(slot)) { + case F_UNIT: + // F14 fchkf + if (opcode == 0 && GetX(instruction) == 0 && GetX6(instruction) == 8) { + goto imm20a; + } + return false; + + case M_UNIT: + // M20 x3 == 1 integer chk.s.m + // M21 x3 == 3 floating point chk.s + if (opcode == 1) { + UINT64 const x3 = GetX3(instruction); + if (x3 == 1 || x3 == 3) { + goto imm13_7; + } + } + + // M22 x3 == 4 integer chk.a.nc + // M22 x3 == 5 integer chk.a.clr + // M23 x3 == 6 floating point chk.a.nc + // M23 x3 == 7 floating point chk.a.clr + if (opcode == 0) { + UINT64 const x3 = GetX3(instruction); + if (x3 == 4 || x3 == 5 || x3 == 6 || x3 == 7) { + goto imm20b; + } + } + return false; + case I_UNIT: + // I20 + if (opcode == 0 && GetX3(instruction) == 1) { // chk.s.i + goto imm13_7; + } + return false; + case B_UNIT: + // B1 B2 B3 + // 4 br + // 5 br.call + if (opcode == 4 || opcode == 5) { + goto imm20b; + } + return false; + } + return false; + + UINT64 imm; + UINT64 new_instruction; + +imm13_7: + imm = SignExtend((GetSignBit(instruction) << 20) | (GetImm13c(instruction) << 7) | GetImm7a(instruction), 21) << 4; + new_instruction = SetSignBit(SetImm13c(SetImm7a(instruction, (extra - dest) >> 4), (extra - dest) >> 11), extra < dest); + goto set_brl; + +imm20a: + imm = SignExtend((GetSignBit(instruction) << 20) | GetImm20a(instruction), 21) << 4; + new_instruction = SetSignBit(SetImm20a(instruction, (extra - dest) >> 4), extra < dest); + goto set_brl; + +imm20b: + imm = SignExtend((GetSignBit(instruction) << 20) | GetImm20b(instruction), 21) << 4; + new_instruction = SetSignBit(SetImm20b(instruction, (extra - dest) >> 4), extra < dest); + goto set_brl; + +set_brl: + if (pBundleExtra != NULL) { + pDst->SetInstruction(slot, new_instruction); + pBundleExtra->SetBrl((size_t)this + imm); + } + return true; +} + +UINT DETOUR_IA64_BUNDLE::RelocateBundle(_Inout_ DETOUR_IA64_BUNDLE* pDst, + _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra) const +/* + Having already copied the bundle unchanged, then relocate its instructions one at a time. + Return how many extra bytes are required to relocate the bundle. +*/ +{ + UINT nExtraBytes = 0; + for (BYTE slot = 0; slot < DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE; ++slot) { + if (!RelocateInstruction(pDst, slot, pBundleExtra)) { + continue; + } + pBundleExtra -= !!pBundleExtra; + nExtraBytes += sizeof(DETOUR_IA64_BUNDLE); + } + return nExtraBytes; +} + +BOOL DETOUR_IA64_BUNDLE::IsBrl() const +{ + // f.e. d.c. b.a. 9.8. 7.6. 5. 4. 3. 2. 1. 0. + // c000 0070 0000 0000 0000 00 01 00 00 00 05 : brl.sptk.few + // c8ff fff0 007f fff0 ffff 00 01 00 00 00 05 : brl.sptk.few + // c000 0048 0000 0000 0001 00 00 00 00 00 05 : brl.sptk.many + return ((wide[0] & 0x000000000000001e) == 0x0000000000000004 && // 4 or 5. + (wide[1] & 0xe000000000000000) == 0xc000000000000000); // c or d. +} + +VOID DETOUR_IA64_BUNDLE::SetBrl() +{ + wide[0] = 0x0000000100000005; // few + //wide[0] = 0x0000000180000005; // many + wide[1] = 0xc000000800000000; +} + +UINT64 DETOUR_IA64_BUNDLE::GetBrlImm() const +{ + return ( + // 0x0000000000fffff0 + ((wide[1] & 0x00fffff000000000) >> 32) | // all 20 bits of imm20b. + // 0x000000ffff000000 + ((wide[0] & 0xffff000000000000) >> 24) | // bottom 16 bits of imm39. + // 0x7fffff0000000000 + ((wide[1] & 0x00000000007fffff) << 40) | // top 23 bits of imm39. + // 0x8000000000000000 + ((wide[1] & 0x0800000000000000) << 4) // single bit of i. + ); +} + +VOID DETOUR_IA64_BUNDLE::SetBrlImm(UINT64 imm) +{ + wide[0] = ((wide[0] & ~0xffff000000000000) | + // 0xffff000000000000 + ((imm & 0x000000ffff000000) << 24) // bottom 16 bits of imm39. + ); + wide[1] = ((wide[1] & ~0x08fffff0007fffff) | + // 0x00fffff000000000 + ((imm & 0x0000000000fffff0) << 32) | // all 20 bits of imm20b. + // 0x00000000007fffff + ((imm & 0x7fffff0000000000) >> 40) | // top 23 bits of imm39. + // 0x0800000000000000 + ((imm & 0x8000000000000000) >> 4) // single bit of i. + ); +} + +UINT64 DETOUR_IA64_BUNDLE::GetBrlTarget() const +{ + return (UINT64)this + GetBrlImm(); +} + +VOID DETOUR_IA64_BUNDLE::SetBrl(UINT64 target) +{ + UINT64 imm = target - (UINT64)this; + SetBrl(); + SetBrlImm(imm); +} + +VOID DETOUR_IA64_BUNDLE::SetBrlTarget(UINT64 target) +{ + UINT64 imm = target - (UINT64)this; + SetBrlImm(imm); +} + +BOOL DETOUR_IA64_BUNDLE::IsMovlGp() const +{ + // f.e. d.c. b.a. 9.8. 7.6. 5.4. 3.2. 1.0. + // 6fff f7f0 207f ffff ffff c001 0000 0004 + // 6000 0000 2000 0000 0000 0001 0000 0004 + return ((wide[0] & 0x00003ffffffffffe) == 0x0000000100000004 && + (wide[1] & 0xf000080fff800000) == 0x6000000020000000); +} + +UINT64 DETOUR_IA64_BUNDLE::GetMovlGp() const +{ + UINT64 raw = ( + // 0x0000000000000070 + ((wide[1] & 0x000007f000000000) >> 36) | + // 0x000000000000ff80 + ((wide[1] & 0x07fc000000000000) >> 43) | + // 0x00000000001f0000 + ((wide[1] & 0x0003e00000000000) >> 29) | + // 0x0000000000200000 + ((wide[1] & 0x0000100000000000) >> 23) | + // 0x000000ffffc00000 + ((wide[0] & 0xffffc00000000000) >> 24) | + // 0x7fffff0000000000 + ((wide[1] & 0x00000000007fffff) << 40) | + // 0x8000000000000000 + ((wide[1] & 0x0800000000000000) << 4) + ); + + return (INT64)raw; +} + +VOID DETOUR_IA64_BUNDLE::SetMovlGp(UINT64 gp) +{ + UINT64 raw = (UINT64)gp; + + wide[0] = (0x0000000100000005 | + // 0xffffc00000000000 + ((raw & 0x000000ffffc00000) << 24) + ); + wide[1] = ( + 0x6000000020000000 | + // 0x0000070000000000 + ((raw & 0x0000000000000070) << 36) | + // 0x07fc000000000000 + ((raw & 0x000000000000ff80) << 43) | + // 0x0003e00000000000 + ((raw & 0x00000000001f0000) << 29) | + // 0x0000100000000000 + ((raw & 0x0000000000200000) << 23) | + // 0x00000000007fffff + ((raw & 0x7fffff0000000000) >> 40) | + // 0x0800000000000000 + ((raw & 0x8000000000000000) >> 4) + ); +} + +UINT DETOUR_IA64_BUNDLE::Copy(_Out_ DETOUR_IA64_BUNDLE *pDst, + _Inout_opt_ DETOUR_IA64_BUNDLE* pBundleExtra) const +{ + // Copy the bytes unchanged. + +#pragma warning(suppress:6001) // using uninitialized *pDst + pDst->wide[0] = wide[0]; + pDst->wide[1] = wide[1]; + + // Relocate if necessary. + + UINT nExtraBytes = RelocateBundle(pDst, pBundleExtra); + + if (GetUnit1() == L_UNIT && IsBrl()) { + pDst->SetBrlTarget(GetBrlTarget()); + } + + return nExtraBytes; +} + +BOOL DETOUR_IA64_BUNDLE::SetNop(BYTE slot) +{ + switch (GetUnit(slot)) { + case I_UNIT: + case M_UNIT: + case F_UNIT: + SetInst(slot, 0); + SetData(slot, 0x8000000); + return true; + case B_UNIT: + SetInst(slot, 2); + SetData(slot, 0); + return true; + } + DebugBreak(); + return false; +} + +BOOL DETOUR_IA64_BUNDLE::SetNop0() +{ + return SetNop(0); +} + +BOOL DETOUR_IA64_BUNDLE::SetNop1() +{ + return SetNop(1); +} + +BOOL DETOUR_IA64_BUNDLE::SetNop2() +{ + return SetNop(2); +} + +VOID DETOUR_IA64_BUNDLE::SetStop() +{ + data[0] |= 0x01; +} + +#endif // DETOURS_IA64 + +PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst, + _Inout_opt_ PVOID *ppDstPool, + _In_ PVOID pSrc, + _Out_opt_ PVOID *ppTarget, + _Out_opt_ LONG *plExtra) +{ + LONG nExtra; + DETOUR_IA64_BUNDLE bExtra; + DETOUR_IA64_BUNDLE *pbSrc = (DETOUR_IA64_BUNDLE *)pSrc; + DETOUR_IA64_BUNDLE *pbDst = pDst ? (DETOUR_IA64_BUNDLE *)pDst : &bExtra; + + plExtra = plExtra ? plExtra : &nExtra; + *plExtra = 0; + + if (ppTarget != NULL) { + if (pbSrc->IsBrl()) { + *ppTarget = (PVOID)pbSrc->GetBrlTarget(); + } + else { + *ppTarget = DETOUR_INSTRUCTION_TARGET_NONE; + } + } + *plExtra = (LONG)pbSrc->Copy(pbDst, ppDstPool ? ((DETOUR_IA64_BUNDLE*)*ppDstPool) - 1 : (DETOUR_IA64_BUNDLE*)NULL); + return pbSrc + 1; +} + +#endif // DETOURS_IA64 + +#ifdef DETOURS_ARM + +#define DETOURS_PFUNC_TO_PBYTE(p) ((PBYTE)(((ULONG_PTR)(p)) & ~(ULONG_PTR)1)) +#define DETOURS_PBYTE_TO_PFUNC(p) ((PBYTE)(((ULONG_PTR)(p)) | (ULONG_PTR)1)) + +#define c_PCAdjust 4 // The PC value of an instruction is the PC address plus 4. +#define c_PC 15 // The register number for the Program Counter +#define c_LR 14 // The register number for the Link Register +#define c_SP 13 // The register number for the Stack Pointer +#define c_NOP 0xbf00 // A nop instruction +#define c_BREAK 0xdefe // A nop instruction + +class CDetourDis +{ + public: + CDetourDis(); + + PBYTE CopyInstruction(PBYTE pDst, + PBYTE *ppDstPool, + PBYTE pSrc, + PBYTE *ppTarget, + LONG *plExtra); + + public: + typedef BYTE (CDetourDis::* COPYFUNC)(PBYTE pbDst, PBYTE pbSrc); + + struct COPYENTRY { + USHORT nOpcode; + COPYFUNC pfCopy; + }; + + typedef const COPYENTRY * REFCOPYENTRY; + + struct Branch5 + { + DWORD Register : 3; + DWORD Imm5 : 5; + DWORD Padding : 1; + DWORD I : 1; + DWORD OpCode : 6; + }; + + struct Branch5Target + { + DWORD Padding : 1; + DWORD Imm5 : 5; + DWORD I : 1; + DWORD Padding2 : 25; + }; + + struct Branch8 + { + DWORD Imm8 : 8; + DWORD Condition : 4; + DWORD OpCode : 4; + }; + + struct Branch8Target + { + DWORD Padding : 1; + DWORD Imm8 : 8; + DWORD Padding2 : 23; + }; + + struct Branch11 + { + DWORD Imm11 : 11; + DWORD OpCode : 5; + }; + + struct Branch11Target + { + DWORD Padding : 1; + DWORD Imm11 : 11; + DWORD Padding2 : 20; + }; + + struct Branch20 + { + DWORD Imm11 : 11; + DWORD J2 : 1; + DWORD IT : 1; + DWORD J1 : 1; + DWORD Other : 2; + DWORD Imm6 : 6; + DWORD Condition : 4; + DWORD Sign : 1; + DWORD OpCode : 5; + }; + + struct Branch20Target + { + DWORD Padding : 1; + DWORD Imm11 : 11; + DWORD Imm6 : 6; + DWORD J1 : 1; + DWORD J2 : 1; + DWORD Sign : 1; + INT32 Padding2 : 11; + }; + + struct Branch24 + { + DWORD Imm11 : 11; + DWORD J2 : 1; + DWORD InstructionSet : 1; + DWORD J1 : 1; + DWORD Link : 1; + DWORD Branch : 1; + DWORD Imm10 : 10; + DWORD Sign : 1; + DWORD OpCode : 5; + }; + + struct Branch24Target + { + DWORD Padding : 1; + DWORD Imm11 : 11; + DWORD Imm10 : 10; + DWORD I2 : 1; + DWORD I1 : 1; + DWORD Sign : 1; + INT32 Padding2 : 7; + }; + + struct LiteralLoad8 + { + DWORD Imm8 : 8; + DWORD Register : 3; + DWORD OpCode : 5; + }; + + struct LiteralLoad8Target + { + DWORD Padding : 2; + DWORD Imm8 : 8; + DWORD Padding2 : 22; + }; + + struct LiteralLoad12 + { + DWORD Imm12 : 12; + DWORD Register : 4; + DWORD OpCodeSuffix : 7; + DWORD Add : 1; + DWORD OpCodePrefix : 8; + }; + + struct LiteralLoad12Target + { + DWORD Imm12 : 12; + DWORD Padding : 20; + }; + + struct ImmediateRegisterLoad32 + { + DWORD Imm12 : 12; + DWORD DestinationRegister : 4; + DWORD SourceRegister: 4; + DWORD OpCode : 12; + }; + + struct ImmediateRegisterLoad16 + { + DWORD DestinationRegister : 3; + DWORD SourceRegister: 3; + DWORD OpCode : 10; + }; + + struct TableBranch + { + DWORD IndexRegister : 4; + DWORD HalfWord : 1; + DWORD OpCodeSuffix : 11; + DWORD BaseRegister : 4; + DWORD OpCodePrefix : 12; + }; + + struct Shift + { + DWORD Imm2 : 2; + DWORD Imm3 : 3; + }; + + struct Add32 + { + DWORD SecondOperandRegister : 4; + DWORD Type : 2; + DWORD Imm2 : 2; + DWORD DestinationRegister : 4; + DWORD Imm3 : 3; + DWORD Padding : 1; + DWORD FirstOperandRegister : 4; + DWORD SetFlags : 1; + DWORD OpCode : 11; + }; + + struct LogicalShiftLeft32 + { + DWORD SourceRegister : 4; + DWORD Padding : 2; + DWORD Imm2 : 2; + DWORD DestinationRegister : 4; + DWORD Imm3 : 3; + DWORD Padding2 : 5; + DWORD SetFlags : 1; + DWORD OpCode : 11; + }; + + struct StoreImmediate12 + { + DWORD Imm12 : 12; + DWORD SourceRegister : 4; + DWORD BaseRegister : 4; + DWORD OpCode : 12; + }; + + protected: + BYTE PureCopy16(BYTE* pSource, BYTE* pDest); + BYTE PureCopy32(BYTE* pSource, BYTE* pDest); + BYTE CopyMiscellaneous16(BYTE* pSource, BYTE* pDest); + BYTE CopyConditionalBranchOrOther16(BYTE* pSource, BYTE* pDest); + BYTE CopyUnConditionalBranch16(BYTE* pSource, BYTE* pDest); + BYTE CopyLiteralLoad16(BYTE* pSource, BYTE* pDest); + BYTE CopyBranchExchangeOrDataProcessing16(BYTE* pSource, BYTE* pDest); + BYTE CopyBranch24(BYTE* pSource, BYTE* pDest); + BYTE CopyBranchOrMiscellaneous32(BYTE* pSource, BYTE* pDest); + BYTE CopyLiteralLoad32(BYTE* pSource, BYTE* pDest); + BYTE CopyLoadAndStoreSingle(BYTE* pSource, BYTE* pDest); + BYTE CopyLoadAndStoreMultipleAndSRS(BYTE* pSource, BYTE* pDest); + BYTE CopyTableBranch(BYTE* pSource, BYTE* pDest); + BYTE BeginCopy32(BYTE* pSource, BYTE* pDest); + + LONG DecodeBranch5(ULONG opcode); + USHORT EncodeBranch5(ULONG originalOpCode, LONG delta); + LONG DecodeBranch8(ULONG opcode); + USHORT EncodeBranch8(ULONG originalOpCode, LONG delta); + LONG DecodeBranch11(ULONG opcode); + USHORT EncodeBranch11(ULONG originalOpCode, LONG delta); + BYTE EmitBranch11(PUSHORT& pDest, LONG relativeAddress); + LONG DecodeBranch20(ULONG opcode); + ULONG EncodeBranch20(ULONG originalOpCode, LONG delta); + LONG DecodeBranch24(ULONG opcode, BOOL& fLink); + ULONG EncodeBranch24(ULONG originalOpCode, LONG delta, BOOL fLink); + LONG DecodeLiteralLoad8(ULONG instruction); + LONG DecodeLiteralLoad12(ULONG instruction); + BYTE EmitLiteralLoad8(PUSHORT& pDest, BYTE targetRegister, PBYTE pLiteral); + BYTE EmitLiteralLoad12(PUSHORT& pDest, BYTE targetRegister, PBYTE pLiteral); + BYTE EmitImmediateRegisterLoad32(PUSHORT& pDest, BYTE reg); + BYTE EmitImmediateRegisterLoad16(PUSHORT& pDest, BYTE reg); + BYTE EmitLongLiteralLoad(PUSHORT& pDest, BYTE reg, PVOID pTarget); + BYTE EmitLongBranch(PUSHORT& pDest, PVOID pTarget); + USHORT CalculateExtra(BYTE sourceLength, BYTE* pDestStart, BYTE* pDestEnd); + + protected: + ULONG GetLongInstruction(BYTE* pSource) + { + return (((PUSHORT)pSource)[0] << 16) | (((PUSHORT)pSource)[1]); + } + + BYTE EmitLongInstruction(PUSHORT& pDstInst, ULONG instruction) + { + *pDstInst++ = (USHORT)(instruction >> 16); + *pDstInst++ = (USHORT)instruction; + return sizeof(ULONG); + } + + BYTE EmitShortInstruction(PUSHORT& pDstInst, USHORT instruction) + { + *pDstInst++ = instruction; + return sizeof(USHORT); + } + + PBYTE Align4(PBYTE pValue) + { + return (PBYTE)(((size_t)pValue) & ~(ULONG)3u); + } + + PBYTE CalculateTarget(PBYTE pSource, LONG delta) + { + return (pSource + delta + c_PCAdjust); + } + + LONG CalculateNewDelta(PBYTE pTarget, BYTE* pDest) + { + return (LONG)(pTarget - (pDest + c_PCAdjust)); + } + + BYTE EmitAdd32(PUSHORT& pDstInst, BYTE op1Reg, BYTE op2Reg, BYTE dstReg, BYTE shiftAmount) + { + Shift& shift = (Shift&)(shiftAmount); + const BYTE shiftType = 0x00; // LSL + Add32 add = { op2Reg, shiftType, shift.Imm2, dstReg, shift.Imm3, + 0x0, op1Reg, 0x0, 0x758 }; + return EmitLongInstruction(pDstInst, (ULONG&)add); + } + + BYTE EmitLogicalShiftLeft32(PUSHORT& pDstInst, BYTE srcReg, BYTE dstReg, BYTE shiftAmount) + { + Shift& shift = (Shift&)(shiftAmount); + LogicalShiftLeft32 shiftLeft = { srcReg, 0x00, shift.Imm2, dstReg, shift.Imm3, 0x1E, + 0x00, 0x752 }; + return EmitLongInstruction(pDstInst, (ULONG&)shiftLeft); + } + + BYTE EmitStoreImmediate12(PUSHORT& pDstInst, BYTE srcReg, BYTE baseReg, USHORT offset) + { + StoreImmediate12 store = { offset, srcReg, baseReg, 0xF8C }; + return EmitLongInstruction(pDstInst, (ULONG&)store); + } + + protected: + PBYTE m_pbTarget; + PBYTE m_pbPool; + LONG m_lExtra; + + BYTE m_rbScratchDst[64]; // matches or exceeds rbCode + + static const COPYENTRY s_rceCopyTable[33]; +}; + +LONG CDetourDis::DecodeBranch5(ULONG opcode) +{ + Branch5& branch = (Branch5&)(opcode); + + Branch5Target target; + ZeroMemory(&target, sizeof(target)); + target.Imm5 = branch.Imm5; + target.I = branch.I; + + // Return zero-extended value + return (LONG&)target; +} + +USHORT CDetourDis::EncodeBranch5(ULONG originalOpCode, LONG delta) +{ + // Too large for a 5 bit branch (5 bit branches can be up to 7 bits due to I and the trailing 0) + if (delta < 0 || delta > 0x7F) { + return 0; + } + + Branch5& branch = (Branch5&)(originalOpCode); + Branch5Target& target = (Branch5Target&)(delta); + + branch.Imm5 = target.Imm5; + branch.I = target.I; + + return (USHORT&)branch; +} + +LONG CDetourDis::DecodeBranch8(ULONG opcode) +{ + Branch8& branch = (Branch8&)(opcode); + + Branch8Target target; + ZeroMemory(&target, sizeof(target)); + target.Imm8 = branch.Imm8; + + // Return sign extended value + return (((LONG&)target) << 23) >> 23; +} + +USHORT CDetourDis::EncodeBranch8(ULONG originalOpCode, LONG delta) +{ + // Too large for 8 bit branch (8 bit branches can be up to 9 bits due to the trailing 0) + if (delta < (-(int)0x100) || delta > 0xFF) { + return 0; + } + + Branch8& branch = (Branch8&)(originalOpCode); + Branch8Target& target = (Branch8Target&)(delta); + + branch.Imm8 = target.Imm8; + + return (USHORT&)branch; +} + +LONG CDetourDis::DecodeBranch11(ULONG opcode) +{ + Branch11& branch = (Branch11&)(opcode); + + Branch11Target target; + ZeroMemory(&target, sizeof(target)); + target.Imm11 = branch.Imm11; + + // Return sign extended value + return (((LONG&)target) << 20) >> 20; +} + +USHORT CDetourDis::EncodeBranch11(ULONG originalOpCode, LONG delta) +{ + // Too large for an 11 bit branch (11 bit branches can be up to 12 bits due to the trailing 0) + if (delta < (-(int)0x800) || delta > 0x7FF) { + return 0; + } + + Branch11& branch = (Branch11&)(originalOpCode); + Branch11Target& target = (Branch11Target&)(delta); + + branch.Imm11 = target.Imm11; + + return (USHORT&)branch; +} + +BYTE CDetourDis::EmitBranch11(PUSHORT& pDest, LONG relativeAddress) +{ + Branch11Target& target = (Branch11Target&)(relativeAddress); + Branch11 branch11 = { target.Imm11, 0x1C }; + + *pDest++ = (USHORT&)branch11; + return sizeof(USHORT); +} + +LONG CDetourDis::DecodeBranch20(ULONG opcode) +{ + Branch20& branch = (Branch20&)(opcode); + + Branch20Target target; + ZeroMemory(&target, sizeof(target)); + target.Imm11 = branch.Imm11; + target.Imm6 = branch.Imm6; + target.Sign = branch.Sign; + target.J1 = branch.J1; + target.J2 = branch.J2; + + // Sign extend + if (target.Sign) { + target.Padding2 = -1; + } + + return (LONG&)target; +} + +ULONG CDetourDis::EncodeBranch20(ULONG originalOpCode, LONG delta) +{ + // Too large for 20 bit branch (20 bit branches can be up to 21 bits due to the trailing 0) + if (delta < (-(int)0x100000) || delta > 0xFFFFF) { + return 0; + } + + Branch20& branch = (Branch20&)(originalOpCode); + Branch20Target& target = (Branch20Target&)(delta); + + branch.Imm11 = target.Imm11; + branch.Imm6 = target.Imm6; + branch.Sign = target.Sign; + branch.J1 = target.J1; + branch.J2 = target.J2; + + return (ULONG&)branch; +} + +LONG CDetourDis::DecodeBranch24(ULONG opcode, BOOL& fLink) +{ + Branch24& branch = (Branch24&)(opcode); + + Branch24Target target; + ZeroMemory(&target, sizeof(target)); + target.Imm11 = branch.Imm11; + target.Imm10 = branch.Imm10; + target.Sign = branch.Sign; + target.I1 = ~(branch.J1 ^ target.Sign); + target.I2 = ~(branch.J2 ^ target.Sign); + fLink = branch.Link; + + // Sign extend + if (target.Sign) { + target.Padding2 = -1; + } + + return (LONG&)target; +} + +ULONG CDetourDis::EncodeBranch24(ULONG originalOpCode, LONG delta, BOOL fLink) +{ + // Too large for 24 bit branch (24 bit branches can be up to 25 bits due to the trailing 0) + if (delta < static_cast<int>(0xFF000000) || delta > static_cast<int>(0xFFFFFF)) { + return 0; + } + + Branch24& branch = (Branch24&)(originalOpCode); + Branch24Target& target = (Branch24Target&)(delta); + + branch.Imm11 = target.Imm11; + branch.Imm10 = target.Imm10; + branch.Link = fLink; + branch.Sign = target.Sign; + branch.J1 = ~(target.I1 ^ branch.Sign); + branch.J2 = ~(target.I2 ^ branch.Sign); + + return (ULONG&)branch; +} + +LONG CDetourDis::DecodeLiteralLoad8(ULONG instruction) +{ + LiteralLoad8& load = (LiteralLoad8&)(instruction); + + LiteralLoad8Target target; + ZeroMemory(&target, sizeof(target)); + target.Imm8 = load.Imm8; + + return (LONG&)target; +} + +BYTE CDetourDis::EmitLiteralLoad8(PUSHORT& pDest, BYTE targetRegister, PBYTE pLiteral) +{ + // Note: We add 2 (which gets rounded down) because literals must be 32-bit + // aligned, but the ldr can be 16-bit aligned. + LONG newDelta = CalculateNewDelta((PBYTE)pLiteral + 2, (PBYTE)pDest); + LONG relative = ((newDelta > 0 ? newDelta : -newDelta) & 0x3FF); + + LiteralLoad8Target& target = (LiteralLoad8Target&)(relative); + LiteralLoad8 load = { target.Imm8, targetRegister, 0x9 }; + + return EmitShortInstruction(pDest, (USHORT&)load); +} + +LONG CDetourDis::DecodeLiteralLoad12(ULONG instruction) +{ + LiteralLoad12& load = (LiteralLoad12&)(instruction); + + LiteralLoad12Target target; + ZeroMemory(&target, sizeof(target)); + target.Imm12 = load.Imm12; + + return (LONG&)target; +} + +BYTE CDetourDis::EmitLiteralLoad12(PUSHORT& pDest, BYTE targetRegister, PBYTE pLiteral) +{ + // Note: We add 2 (which gets rounded down) because literals must be 32-bit + // aligned, but the ldr can be 16-bit aligned. + LONG newDelta = CalculateNewDelta((PBYTE)pLiteral + 2, (PBYTE)pDest); + LONG relative = ((newDelta > 0 ? newDelta : -newDelta) & 0xFFF); + + LiteralLoad12Target& target = (LiteralLoad12Target&)(relative); + target.Imm12 -= target.Imm12 & 3; + LiteralLoad12 load = { target.Imm12, targetRegister, 0x5F, (DWORD)(newDelta > 0), 0xF8 }; + + return EmitLongInstruction(pDest, (ULONG&)load); +} + +BYTE CDetourDis::EmitImmediateRegisterLoad32(PUSHORT& pDest, BYTE reg) +{ + ImmediateRegisterLoad32 load = { 0, reg, reg, 0xF8D }; + return EmitLongInstruction(pDest, (ULONG&)load); +} + +BYTE CDetourDis::EmitImmediateRegisterLoad16(PUSHORT& pDest, BYTE reg) +{ + ImmediateRegisterLoad16 load = { reg, reg, 0x680 >> 2 }; + return EmitShortInstruction(pDest, (USHORT&)load); +} + +BYTE CDetourDis::EmitLongLiteralLoad(PUSHORT& pDest, BYTE targetRegister, PVOID pTarget) +{ + *--((PULONG&)m_pbPool) = (ULONG)(size_t)pTarget; + + // ldr rn, target. + BYTE size = EmitLiteralLoad12(pDest, targetRegister, m_pbPool); + + // This only makes sense if targetRegister != PC; + // otherwise, we would have branched with the previous instruction anyway + if (targetRegister != c_PC) { + // ldr rn, [rn] + if (targetRegister <= 7) { + size = (BYTE)(size + EmitImmediateRegisterLoad16(pDest, targetRegister)); + } + else { + size = (BYTE)(size + EmitImmediateRegisterLoad32(pDest, targetRegister)); + } + } + + return size; +} + +BYTE CDetourDis::EmitLongBranch(PUSHORT& pDest, PVOID pTarget) +{ + // Emit a long literal load into PC + BYTE size = EmitLongLiteralLoad(pDest, c_PC, DETOURS_PBYTE_TO_PFUNC(pTarget)); + return size; +} + +BYTE CDetourDis::PureCopy16(BYTE* pSource, BYTE* pDest) +{ + *(USHORT *)pDest = *(USHORT *)pSource; + return sizeof(USHORT); +} + +BYTE CDetourDis::PureCopy32(BYTE* pSource, BYTE* pDest) +{ + *(UNALIGNED ULONG *)pDest = *(UNALIGNED ULONG*)pSource; + return sizeof(DWORD); +} + +USHORT CDetourDis::CalculateExtra(BYTE sourceLength, BYTE* pDestStart, BYTE* pDestEnd) +{ + ULONG destinationLength = (ULONG)(pDestEnd - pDestStart); + return static_cast<USHORT>((destinationLength > sourceLength) ? (destinationLength - sourceLength) : 0); +} + +BYTE CDetourDis::CopyMiscellaneous16(BYTE* pSource, BYTE* pDest) +{ + USHORT instruction = *(PUSHORT)(pSource); + + // Compare and branch imm5 (CBZ, CBNZ) + if ((instruction & 0x100) && !(instruction & 0x400)) { // (1011x0x1xxxxxxxx) + LONG oldDelta = DecodeBranch5(instruction); + PBYTE pTarget = CalculateTarget(pSource, oldDelta); + m_pbTarget = pTarget; + + LONG newDelta = CalculateNewDelta(pTarget, pDest); + instruction = EncodeBranch5(instruction, newDelta); + + if (instruction) { + // Copy the 16 bit instruction over + *(PUSHORT)(pDest) = instruction; + return sizeof(USHORT); // The source instruction was 16 bits + } + + // If that fails, re-encode with 'conditional branch' logic, without using the condition flags + // For example, cbz r2,+0x56 (0x90432) becomes: + // + // 001df73a b92a cbnz r2,001df748 + // 001df73c e002 b 001df744 + // 001df73e bf00 nop + // 001df740 0432 dc.h 0432 + // 001df742 0009 dc.h 0009 + // 001df744 f85ff008 ldr pc,=0x90432 + // + + // Store where we will be writing our conditional branch, and move past it so we can emit a long branch + PUSHORT pDstInst = (PUSHORT)(pDest); + PUSHORT pConditionalBranchInstruction = pDstInst++; + + // Emit the long branch instruction + BYTE longBranchSize = EmitLongBranch(pDstInst, pTarget); + + // Invert the CBZ/CBNZ instruction to move past our 'long branch' if the inverse comparison succeeds + // Write the CBZ/CBNZ instruction *before* the long branch we emitted above + // This had to be done out of order, since the size of a long branch can vary due to alignment restrictions + instruction = EncodeBranch5(*(PUSHORT)(pSource), longBranchSize - c_PCAdjust + sizeof(USHORT)); + Branch5& branch = (Branch5&)(instruction); + branch.OpCode = (branch.OpCode & 0x02) ? 0x2C : 0x2E; // Invert the CBZ/CBNZ comparison + *pConditionalBranchInstruction = instruction; + + // Compute the extra space needed for the branch sequence + m_lExtra = CalculateExtra(sizeof(USHORT), pDest, (BYTE*)(pDstInst)); + return sizeof(USHORT); // The source instruction was 16 bits + } + + // If-Then Instruction (IT) + if ((instruction >> 8 == 0xBF) && (instruction & 0xF)) { //(10111111xxxx(mask != 0b0000)) + // ToDo: Implement IT handler + ASSERT(false); + return sizeof(USHORT); + } + + // ADD/SUB, SXTH, SXTB, UXTH, UXTB, CBZ, CBNZ, PUSH, POP, REV, REV15, REVSH, NOP, YIELD, WFE, WFI, SEV, etc. + return PureCopy16(pSource, pDest); +} + +BYTE CDetourDis::CopyConditionalBranchOrOther16(BYTE* pSource, BYTE* pDest) +{ + USHORT instruction = *(PUSHORT)(pSource); + + // Could be a conditional branch, an Undefined instruction or a Service System Call + // Only the former needs special logic + if ((instruction & 0xE00) != 0xE00) { // 1101(!=111x)xxxxxxxx + LONG oldDelta = DecodeBranch8(instruction); + PBYTE pTarget = CalculateTarget(pSource, oldDelta); + m_pbTarget = pTarget; + + LONG newDelta = CalculateNewDelta(pTarget, pDest); + instruction = EncodeBranch8(instruction, newDelta); + if (instruction) { + // Copy the 16 bit instruction over + *(PUSHORT)(pDest) = instruction; + return sizeof(USHORT); // The source instruction was 16 bits + } + + // If that fails, re-encode as a sequence of branches + // For example, bne +0x6E (0x90452) becomes: + // + // 001df758 d100 bne 001df75c + // 001df75a e005 b 001df768 + // 001df75c e002 b 001df764 + // 001df75e bf00 nop + // 001df760 0452 dc.h 0452 + // 001df762 0009 dc.h 0009 + // 001df764 f85ff008 ldr pc,=0x90452 + // + + // First, reuse the existing conditional branch to, if successful, branch down to a 'long branch' that we will emit below + USHORT newInstruction = EncodeBranch8(*(PUSHORT)(pSource), 0); // Due to the size of c_PCAdjust a zero-length branch moves 4 bytes forward, past the following unconditional branch + ASSERT(newInstruction); + PUSHORT pDstInst = (PUSHORT)(pDest); + *pDstInst++ = newInstruction; + + // Next, prepare to insert an unconditional branch that will be hit if the condition above is not met. This branch will branch over the following 'long branch' + // We can't actually encode this branch yet though, because 'long branches' can vary in size + PUSHORT pUnconditionalBranchInstruction = pDstInst++; + + // Then, emit a 'long branch' that will be hit if the original condition is met + BYTE longBranchSize = EmitLongBranch(pDstInst, pTarget); + + // Finally, encode and emit the unconditional branch that will be used to branch past the 'long branch' if the initial condition was not met + Branch11 branch11 = { 0x00, 0x1C }; + newInstruction = EncodeBranch11(*(DWORD*)(&branch11), longBranchSize - c_PCAdjust + sizeof(USHORT)); + ASSERT(newInstruction); + *pUnconditionalBranchInstruction = newInstruction; + + // Compute the extra space needed for the branch sequence + m_lExtra = CalculateExtra(sizeof(USHORT), pDest, (BYTE*)(pDstInst)); + return sizeof(USHORT); // The source instruction was 16 bits + } + + return PureCopy16(pSource, pDest); +} + +BYTE CDetourDis::CopyUnConditionalBranch16(BYTE* pSource, BYTE* pDest) +{ + ULONG instruction = *(PUSHORT)(pSource); + + LONG oldDelta = DecodeBranch11(instruction); + PBYTE pTarget = CalculateTarget(pSource, oldDelta); + m_pbTarget = pTarget; + + LONG newDelta = CalculateNewDelta(pTarget, pDest); + instruction = EncodeBranch11(instruction, newDelta); + if (instruction) { + // Copy the 16 bit instruction over + *(PUSHORT)(pDest) = (USHORT)instruction; + return sizeof(USHORT); // The source instruction was 16 bits + } + + // If that fails, re-encode as 32-bit + PUSHORT pDstInst = (PUSHORT)(pDest); + instruction = EncodeBranch24(0xf0009000, newDelta, FALSE); + if (instruction) { + // Copy both bytes of the instruction + EmitLongInstruction(pDstInst, instruction); + + m_lExtra = sizeof(DWORD) - sizeof(USHORT); // The destination instruction was 32 bits + return sizeof(USHORT); // The source instruction was 16 bits + } + + // If that fails, emit as a 'long branch' + if (!instruction) { + // For example, b +0x7FE (00090be6) becomes: + // 003f6d02 e001 b 003f6d08 + // 003f6d04 0be6 dc.h 0be6 + // 003f6d06 0009 dc.h 0009 + // 003f6d08 f85ff008 ldr pc,=0x90BE6 + EmitLongBranch(pDstInst, pTarget); + + // Compute the extra space needed for the branch sequence + m_lExtra = CalculateExtra(sizeof(USHORT), pDest, (BYTE*)(pDstInst)); + return sizeof(USHORT); // The source instruction was 16 bits + } + + return sizeof(USHORT); // The source instruction was 16 bits +} + +BYTE CDetourDis::CopyLiteralLoad16(BYTE* pSource, BYTE* pDest) +{ + PBYTE pStart = pDest; + USHORT instruction = *(PUSHORT)(pSource); + + LONG oldDelta = DecodeLiteralLoad8(instruction); + PBYTE pTarget = CalculateTarget(Align4(pSource), oldDelta); + + // Re-encode as a 'long literal load' + // For example, ldr r0, [PC + 1E0] (0x905B4) becomes: + // + // 001df72c f85f0008 ldr r0,=0x905B4 + // 001df730 f8d00000 ldr.w r0,[r0] + LiteralLoad8& load8 = (LiteralLoad8&)(instruction); + EmitLongLiteralLoad((PUSHORT&)pDest, load8.Register, pTarget); + + m_lExtra = (LONG)(pDest - pStart - sizeof(USHORT)); + return sizeof(USHORT); // The source instruction was 16 bits +} + +BYTE CDetourDis::CopyBranchExchangeOrDataProcessing16(BYTE* pSource, BYTE* pDest) +{ + ULONG instruction = *(PUSHORT)(pSource); + + // BX + if ((instruction & 0xff80) == 0x4700) { + // The target is stored in a register + m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + + // AND, LSR, TST, ADD, CMP, MOV + return PureCopy16(pSource, pDest); +} + +const CDetourDis::COPYENTRY CDetourDis::s_rceCopyTable[33] = +{ + // Shift by immediate, move register + // ToDo: Not handling moves from PC + /* 0b00000 */ { 0x00, &CDetourDis::PureCopy16 }, + /* 0b00001 */ { 0x01, &CDetourDis::PureCopy16 }, + /* 0b00010 */ { 0x02, &CDetourDis::PureCopy16 }, + + // Add/subtract register + // Add/subtract immediate + /* 0b00011 */ { 0x03, &CDetourDis::PureCopy16}, + + // Add/subtract/compare/move immediate + /* 0b00100 */ { 0x04, &CDetourDis::PureCopy16 }, + /* 0b00101 */ { 0x05, &CDetourDis::PureCopy16 }, + /* 0b00110 */ { 0x06, &CDetourDis::PureCopy16 }, + /* 0b00111 */ { 0x07, &CDetourDis::PureCopy16 }, + + // Data-processing register + // Special data processing + // Branch/exchange instruction set + /* 0b01000 */ { 0x08, &CDetourDis::CopyBranchExchangeOrDataProcessing16 }, + + // Load from literal pool + /* 0b01001 */ { 0x09, &CDetourDis::CopyLiteralLoad16 }, + + // Load/store register offset + /* 0b01010 */ { 0x0a, &CDetourDis::PureCopy16 }, + /* 0b01011 */ { 0x0b, &CDetourDis::PureCopy16 }, + + // Load/store word/byte immediate offset. + /* 0b01100 */ { 0x0c, &CDetourDis::PureCopy16 }, + /* 0b01101 */ { 0x0d, &CDetourDis::PureCopy16 }, + /* 0b01110 */ { 0x0e, &CDetourDis::PureCopy16 }, + /* 0b01111 */ { 0x0f, &CDetourDis::PureCopy16 }, + + // Load/store halfword immediate offset. + /* 0b10000 */ { 0x10, &CDetourDis::PureCopy16 }, + /* 0b10001 */ { 0x11, &CDetourDis::PureCopy16 }, + + // Load from or store to stack + /* 0b10010 */ { 0x12, &CDetourDis::PureCopy16 }, + /* 0b10011 */ { 0x13, &CDetourDis::PureCopy16 }, + + // Add to SP or PC + /* 0b10100 */ { 0x14, &CDetourDis::PureCopy16 }, + // ToDo: Is ADR (T1) blitt-able? + // It adds a value to PC and stores the result in a register. + // Does this count as a 'target' for detours? + /* 0b10101 */ { 0x15, &CDetourDis::PureCopy16 }, + + // Miscellaneous + /* 0b10110 */ { 0x16, &CDetourDis::CopyMiscellaneous16 }, + /* 0b10111 */ { 0x17, &CDetourDis::CopyMiscellaneous16 }, + + // Load/store multiple + /* 0b11000 */ { 0x18, &CDetourDis::PureCopy16 }, + /* 0b11001 */ { 0x19, &CDetourDis::PureCopy16 }, + // ToDo: Are we sure these are all safe? + // LDMIA, for example, can include an 'embedded' branch. + // Does this count as a 'target' for detours? + + // Conditional branch + /* 0b11010 */ { 0x1a, &CDetourDis::CopyConditionalBranchOrOther16 }, + + // Conditional branch + // Undefined instruction + // Service (system) call + /* 0b11011 */ { 0x1b, &CDetourDis::CopyConditionalBranchOrOther16 }, + + // Unconditional branch + /* 0b11100 */ { 0x1c, &CDetourDis::CopyUnConditionalBranch16 }, + + // 32-bit instruction + /* 0b11101 */ { 0x1d, &CDetourDis::BeginCopy32 }, + /* 0b11110 */ { 0x1e, &CDetourDis::BeginCopy32 }, + /* 0b11111 */ { 0x1f, &CDetourDis::BeginCopy32 }, + { 0, NULL } +}; + +BYTE CDetourDis::CopyBranch24(BYTE* pSource, BYTE* pDest) +{ + ULONG instruction = GetLongInstruction(pSource); + BOOL fLink; + LONG oldDelta = DecodeBranch24(instruction, fLink); + PBYTE pTarget = CalculateTarget(pSource, oldDelta); + m_pbTarget = pTarget; + + // Re-encode as 32-bit + PUSHORT pDstInst = (PUSHORT)(pDest); + LONG newDelta = CalculateNewDelta(pTarget, pDest); + instruction = EncodeBranch24(instruction, newDelta, fLink); + if (instruction) { + // Copy both bytes of the instruction + EmitLongInstruction(pDstInst, instruction); + return sizeof(DWORD); + } + + // If that fails, re-encode as a 'long branch' + EmitLongBranch(pDstInst, pTarget); + + // Compute the extra space needed for the instruction + m_lExtra = CalculateExtra(sizeof(DWORD), pDest, (BYTE*)(pDstInst)); + return sizeof(DWORD); // The source instruction was 32 bits +} + +BYTE CDetourDis::CopyBranchOrMiscellaneous32(BYTE* pSource, BYTE* pDest) +{ + ULONG instruction = GetLongInstruction(pSource); + if ((instruction & 0xf800d000) == 0xf0008000) { // B<c>.W <label> + LONG oldDelta = DecodeBranch20(instruction); + PBYTE pTarget = CalculateTarget(pSource, oldDelta); + m_pbTarget = pTarget; + + // Re-encode as 32-bit + PUSHORT pDstInst = (PUSHORT)(pDest); + LONG newDelta = CalculateNewDelta(pTarget, pDest); + instruction = EncodeBranch20(instruction, newDelta); + if (instruction) { + // Copy both bytes of the instruction + EmitLongInstruction(pDstInst, instruction); + return sizeof(DWORD); + } + + // If that fails, re-encode as a sequence of branches + // For example, bls.w +0x86 (00090480)| becomes: + // + // 001df788 f2408001 bls.w 001df78e + // 001df78c e004 b 001df798 + // 001df78e e001 b 001df794 + // 001df790 0480 dc.h 0480 + // 001df792 0009 dc.h 0009 + // 001df794 f85ff008 ldr pc,=0x90480 + // + + // First, reuse the existing conditional branch to, if successful, + // branch down to a 'long branch' that we will emit below + instruction = EncodeBranch20(GetLongInstruction(pSource), 2); + // Due to the size of c_PCAdjust a two-length branch moves 6 bytes forward, + // past the following unconditional branch + ASSERT(instruction); + EmitLongInstruction(pDstInst, instruction); + + // Next, prepare to insert an unconditional branch that will be hit + // if the condition above is not met. This branch will branch over + // the following 'long branch' + // We can't actually encode this branch yet though, because + // 'long branches' can vary in size + PUSHORT pUnconditionalBranchInstruction = pDstInst++; + + // Then, emit a 'long branch' that will be hit if the original condition is met + BYTE longBranchSize = EmitLongBranch(pDstInst, pTarget); + + // Finally, encode and emit the unconditional branch that will be used + // to branch past the 'long branch' if the initial condition was not met + Branch11 branch11 = { 0x00, 0x1C }; + instruction = EncodeBranch11(*(DWORD*)(&branch11), longBranchSize - c_PCAdjust + sizeof(USHORT)); + ASSERT(instruction); + *pUnconditionalBranchInstruction = static_cast<USHORT>(instruction); + + // Compute the extra space needed for the instruction + m_lExtra = CalculateExtra(sizeof(DWORD), pDest, (BYTE*)(pDstInst)); + return sizeof(DWORD); // The source instruction was 32 bits + } + + if ((instruction & 0xf800d000) == 0xf0009000) { // B.W <label> + // B <label> 11110xxxxxxxxxxx10xxxxxxxxxxxxxx + return CopyBranch24(pSource, pDest); + } + + if ((instruction & 0xf800d000) == 0xf000d000) { // BL.W <label> + // B <label> 11110xxxxxxxxxxx10xxxxxxxxxxxxxx + + PUSHORT pDstInst = (PUSHORT)(pDest); + BOOL fLink; + LONG oldDelta = DecodeBranch24(instruction, fLink); + PBYTE pTarget = CalculateTarget(pSource, oldDelta); + m_pbTarget = pTarget; + + *--((PULONG&)m_pbPool) = (ULONG)(size_t)DETOURS_PBYTE_TO_PFUNC(pTarget); + + // ldr lr, target. + EmitLiteralLoad12(pDstInst, c_LR, m_pbPool); + // blx lr + EmitShortInstruction(pDstInst, 0x47f0); + + // Compute the extra space needed for the instruction + m_lExtra = CalculateExtra(sizeof(DWORD), pDest, (BYTE*)(pDstInst)); + return sizeof(DWORD); // The source instruction was 32 bits + } + + if ((instruction & 0xFFF0FFFF) == 0xF3C08F00) { + // BXJ 111100111100xxxx1000111100000000 + // BXJ switches to Jazelle mode, which is not supported + ASSERT(false); + } + + if ((instruction & 0xFFFFFF00) == 0xF3DE8F00) { + // SUBS PC, LR 111100111101111010001111xxxxxxxx + m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + + // Everything else should be blitt-able + return PureCopy32(pSource, pDest); +} + +BYTE CDetourDis::CopyLiteralLoad32(BYTE* pSource, BYTE* pDest) +{ + BYTE* pStart = pDest; + ULONG instruction = GetLongInstruction(pSource); + + LONG oldDelta = DecodeLiteralLoad12(instruction); + PBYTE pTarget = CalculateTarget(Align4(pSource), oldDelta); + + LiteralLoad12& load = (LiteralLoad12&)(instruction); + + EmitLongLiteralLoad((PUSHORT&)pDest, load.Register, pTarget); + + m_lExtra = (LONG)(pDest - pStart - sizeof(DWORD)); + + return sizeof(DWORD); // The source instruction was 32 bits +} + +BYTE CDetourDis::CopyLoadAndStoreSingle(BYTE* pSource, BYTE* pDest) +{ + ULONG instruction = GetLongInstruction(pSource); + + // Note: The following masks only look at the interesting bits + // (not the opCode prefix, since that check was performed in + // order to get to this function) + if (!(instruction & 0x100000)) { + // 1111 100x xxx0 xxxxxxxxxxxxxxxxxxxx : STR, STRB, STRH, etc. + return PureCopy32(pSource, pDest); + } + + if ((instruction & 0xF81F0000) == 0xF81F0000) { + // 1111100xxxx11111xxxxxxxxxxxxxxxx : PC +/- Imm12 + return CopyLiteralLoad32(pSource, pDest); + } + + if ((instruction & 0xFE70F000) == 0xF81FF000) { + // 1111100xx001xxxx1111xxxxxxxxxxxx : PLD, PLI + // Convert PC-Relative PLD/PLI instructions to noops (1111100Xx00111111111xxxxxxxxxxxx) + if ((instruction & 0xFE7FF000) == 0xF81FF000) { + PUSHORT pDstInst = (PUSHORT)(pDest); + *pDstInst++ = c_NOP; + *pDstInst++ = c_NOP; + return sizeof(DWORD); // The source instruction was 32 bits + } + + // All other PLD/PLI instructions are blitt-able + return PureCopy32(pSource, pDest); + } + + // If the load is writing to PC + if ((instruction & 0xF950F000) == 0xF850F000) { + m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + + // All other loads LDR (immediate), etc. + return PureCopy32(pSource, pDest); +} + +BYTE CDetourDis::CopyLoadAndStoreMultipleAndSRS(BYTE* pSource, BYTE* pDest) +{ + // Probably all blitt-able, although not positive since some of these can result in a branch (LDMIA, POP, etc.) + return PureCopy32(pSource, pDest); +} + +BYTE CDetourDis::CopyTableBranch(BYTE* pSource, BYTE* pDest) +{ + m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + ULONG instruction = GetLongInstruction(pSource); + TableBranch& tableBranch = (TableBranch&)(instruction); + + // If the base register is anything other than PC, we can simply copy the instruction + if (tableBranch.BaseRegister != c_PC) { + return PureCopy32(pSource, pDest); + } + + __debugbreak(); + + // If the base register is PC, we need to manually perform the table lookup + // For example, this: + // + // 7ef40000 e8dff002 tbb [pc,r2] + // + // becomes this: + // + // 7ef40404 b401 push {r0} ; pushed as a placeholder for the target address + // 7ef40406 e92d0005 push.w {r0,r2} ; scratch register and another register are pushed; there's a minimum of two registers in the list for push.w + // 7ef40410 4820 ldr r0,=0x7EF40004 ; load the table address from the literal pool + // 7ef40414 eb000042 add r0,r0,r2,lsl #1 ; add the index value to the address of the table to get the table entry; lsl only used if it's a TBH instruction + // 7ef40418 f8d00000 ldr.w r0,[r0] ; dereference the table entry to get the value of the target + // 7ef4041c ea4f0040 lsl r0,r0,#1 ; multiply the offset by 2 (per the spec) + // 7ef40420 eb00000f add.w r0,r0,pc ; Add the offset to pc to get the target address + // 7ef40424 f8cd000c str.w r0,[sp,#0xC] ; store the target address on the stack (into the first push) + // 7ef40428 e8bd0005 pop.w {r0,r2} ; scratch register and another register are popped; there's a minimum of two registers in the list for pop.w + // 7ef4042c bd00 pop {pc} ; pop the address into pc + // + + // Push r0 to make room for our jump address on the stack + PUSHORT pDstInst = (PUSHORT)(pDest); + *pDstInst++ = 0xb401; + + // Locate a scratch register + BYTE scrReg = 0; + while (scrReg == tableBranch.IndexRegister) { + ++scrReg; + } + + // Push scrReg and tableBranch.IndexRegister (push.w doesn't support pushing just 1 register) + DWORD pushInstruction = 0xe92d0000; + pushInstruction |= 1 << scrReg; + pushInstruction |= 1 << tableBranch.IndexRegister; + EmitLongInstruction(pDstInst, pushInstruction); + + // Write the target address out to the 'literal pool'; + // when the base register of a TBB/TBH is PC, + // the branch table immediately follows the instruction + BYTE* pTarget = CalculateTarget(pSource, 0); + *--((PUSHORT&)m_pbPool) = (USHORT)((size_t)pTarget & 0xffff); + *--((PUSHORT&)m_pbPool) = (USHORT)((size_t)pTarget >> 16); + + // Load the literal pool value into our scratch register (this contains the address of the branch table) + // ldr rn, target + EmitLiteralLoad8(pDstInst, scrReg, m_pbPool); + + // Add the index offset to the address of the branch table; the result will be the value within the table that contains the branch offset + // We need to multiply the index by two if we are using halfword indexing + // Will shift tableBranch.IndexRegister by 1 (multiply by 2) if using a TBH + EmitAdd32(pDstInst, scrReg, tableBranch.IndexRegister, scrReg, tableBranch.HalfWord); + + // Dereference rn into rn, to load the value within the table + // ldr rn, [rn] + if (scrReg < 0x7) { + EmitImmediateRegisterLoad16(pDstInst, scrReg); + } + else { + EmitImmediateRegisterLoad32(pDstInst, scrReg); + } + + // Multiply the offset by two to get the true offset value (as per the spec) + EmitLogicalShiftLeft32(pDstInst, scrReg, scrReg, 1); + + // Add the offset to PC to get the target + EmitAdd32(pDstInst, scrReg, c_PC, scrReg, 0); + + // Now write the contents of scrReg to the stack, so we can pop it into PC + // Write the address of the branch table entry to the stack, so we can pop it into PC + EmitStoreImmediate12(pDstInst, scrReg, c_SP, sizeof(DWORD) * 3); + + // Pop scrReg and tableBranch.IndexRegister (pop.w doesn't support popping just 1 register) + DWORD popInstruction = 0xe8bd0000; + popInstruction |= 1 << scrReg; + popInstruction |= 1 << tableBranch.IndexRegister; + EmitLongInstruction(pDstInst, popInstruction); + + // Pop PC + *pDstInst++ = 0xbd00; + + // Compute the extra space needed for the branch sequence + m_lExtra = CalculateExtra(sizeof(USHORT), pDest, (BYTE*)(pDstInst)); + return sizeof(DWORD); +} + +BYTE CDetourDis::BeginCopy32(BYTE* pSource, BYTE* pDest) +{ + ULONG instruction = GetLongInstruction(pSource); + + // Immediate data processing instructions; ADD, SUB, MOV, MOVN, ADR, MOVT, BFC, SSAT16, etc. + if ((instruction & 0xF8008000) == 0xF0000000) { // 11110xxxxxxxxxxx0xxxxxxxxxxxxxxx + // Should all be blitt-able + // ToDo: What about ADR? Is it safe to do a straight-copy? + // ToDo: Not handling moves to or from PC + return PureCopy32(pSource, pDest); + } + + // Non-Immediate data processing instructions; ADD, EOR, TST, etc. + if ((instruction & 0xEE000000) == 0xEA000000) { // 111x101xxxxxxxxxxxxxxxxxxxxxxx + // Should all be blitt-able + return PureCopy32(pSource, pDest); + } + + // Load and store single data item, memory hints + if ((instruction & 0xFE000000) == 0xF8000000) { // 1111100xxxxxxxxxxxxxxxxxxxxxxxxx + return CopyLoadAndStoreSingle(pSource, pDest); + } + + // Load and store, double and exclusive, and table branch + if ((instruction & 0xFE400000) == 0xE8400000) { // 1110100xx1xxxxxxxxxxxxxxxxxxxxxx + // Load and store double + if (instruction & 0x1200000) { + // LDRD, STRD (immediate) : xxxxxxxPxxWxxxxxxxxxxxxxxxxxxxxx where PW != 0b00 + // The source register is PC + if ((instruction & 0xF0000) == 0xF0000) { + // ToDo: If the source register is PC, what should we do? + ASSERT(false); + } + + // If either target registers are PC + if (((instruction & 0xF000) == 0xF000) || + ((instruction & 0xF00) == 0xF00)) { + m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + + return PureCopy32(pSource, pDest); + } + + // Load and store exclusive + if (!(instruction & 0x800000)) { // LDREX, STREX : xxxxxxxx0xxxxxxxxxxxxxxxxxxxxxxx + if ((instruction & 0xF000) == 0xF000) { // xxxxxxxxxxxx1111xxxxxxxxxxxx + m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC; + } + return PureCopy32(pSource, pDest); + } + + // Table branch + if ((instruction & 0x1000F0) == 0x100000 || // TBB : xxxxxxxxxxx1xxxxxxxxxxxx0000xxxx + (instruction & 0x1000F0) == 0x100010) { // TBH : xxxxxxxxxxx1xxxxxxxxxxxx0001xxxx + return CopyTableBranch(pSource, pDest); + } + + // Load and store exclusive byte, halfword, doubleword (LDREXB, LDREXH, LDREXD, STREXB, STREXH, STREXD, etc.) + return PureCopy32(pSource, pDest); + } + + // Load and store multiple, RFE and SRS + if ((instruction & 0xFE400000) == 0xE8000000) { // 1110100xx0xxxxxxxxxxxxxxxxxxxxxx + // Return from exception (RFE) + if ((instruction & 0xE9900000) == 0xE9900000 || // 1110100110x1xxxxxxxxxxxxxxxxxxxx + (instruction & 0xE8100000) == 0xE8100000) { // 1110100000x1xxxxxxxxxxxxxxxxxxxx + return PureCopy32(pSource, pDest); + } + + return CopyLoadAndStoreMultipleAndSRS(pSource, pDest); + } + + // Branches, miscellaneous control + if ((instruction & 0xF8008000) == 0xF0008000) { // 11110xxxxxxxxxxx0xxxxxxxxxxxxxxx + // Branches, miscellaneous control + return CopyBranchOrMiscellaneous32(pSource, pDest); + } + + // Coprocessor instructions + if ((instruction & 0xEC000000) == 0xEC000000) { // 111x11xxxxxxxxxxxxxxxxxxxxxxxxxx + return PureCopy32(pSource, pDest); + } + + // Unhandled instruction; should never make it this far + ASSERT(false); + return PureCopy32(pSource, pDest); +} + +/////////////////////////////////////////////////////////// Disassembler Code. +// +CDetourDis::CDetourDis() : + m_pbTarget((PBYTE)DETOUR_INSTRUCTION_TARGET_NONE), + m_pbPool(NULL), + m_lExtra(0) +{ +} + +PBYTE CDetourDis::CopyInstruction(PBYTE pDst, + PBYTE *ppDstPool, + PBYTE pSrc, + PBYTE *ppTarget, + LONG *plExtra) +{ + if (pDst && ppDstPool && ppDstPool != NULL) { + m_pbPool = (PBYTE)*ppDstPool; + } + else { + pDst = m_rbScratchDst; + m_pbPool = m_rbScratchDst + sizeof(m_rbScratchDst); + } + // Make sure the constant pool is 32-bit aligned. + m_pbPool -= ((ULONG_PTR)m_pbPool) & 3; + + REFCOPYENTRY pEntry = &s_rceCopyTable[pSrc[1] >> 3]; + ULONG size = (this->*pEntry->pfCopy)(pSrc, pDst); + + pSrc += size; + + // If the target is needed, store our target + if (ppTarget) { + *ppTarget = m_pbTarget; + } + if (plExtra) { + *plExtra = m_lExtra; + } + if (ppDstPool) { + *ppDstPool = m_pbPool; + } + + return pSrc; +} + + +PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst, + _Inout_opt_ PVOID *ppDstPool, + _In_ PVOID pSrc, + _Out_opt_ PVOID *ppTarget, + _Out_opt_ LONG *plExtra) +{ + CDetourDis state; + return (PVOID)state.CopyInstruction((PBYTE)pDst, + (PBYTE*)ppDstPool, + (PBYTE)pSrc, + (PBYTE*)ppTarget, + plExtra); +} + +#endif // DETOURS_ARM + +#ifdef DETOURS_ARM64 + +#define c_LR 30 // The register number for the Link Register +#define c_SP 31 // The register number for the Stack Pointer +#define c_NOP 0xd503201f // A nop instruction +#define c_BREAK (0xd4200000 | (0xf000 << 5)) // A break instruction + +// +// Problematic instructions: +// +// ADR 0ll10000 hhhhhhhh hhhhhhhh hhhddddd & 0x9f000000 == 0x10000000 (l = low, h = high, d = Rd) +// ADRP 1ll10000 hhhhhhhh hhhhhhhh hhhddddd & 0x9f000000 == 0x90000000 (l = low, h = high, d = Rd) +// +// B.cond 01010100 iiiiiiii iiiiiiii iii0cccc & 0xff000010 == 0x54000000 (i = delta = SignExtend(imm19:00, 64), c = cond) +// +// B 000101ii iiiiiiii iiiiiiii iiiiiiii & 0xfc000000 == 0x14000000 (i = delta = SignExtend(imm26:00, 64)) +// BL 100101ii iiiiiiii iiiiiiii iiiiiiii & 0xfc000000 == 0x94000000 (i = delta = SignExtend(imm26:00, 64)) +// +// CBNZ z0110101 iiiiiiii iiiiiiii iiittttt & 0x7f000000 == 0x35000000 (z = size, i = delta = SignExtend(imm19:00, 64), t = Rt) +// CBZ z0110100 iiiiiiii iiiiiiii iiittttt & 0x7f000000 == 0x34000000 (z = size, i = delta = SignExtend(imm19:00, 64), t = Rt) +// +// LDR Wt 00011000 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x18000000 (i = SignExtend(imm19:00, 64), t = Rt) +// LDR Xt 01011000 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x58000000 (i = SignExtend(imm19:00, 64), t = Rt) +// LDRSW 10011000 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x98000000 (i = SignExtend(imm19:00, 64), t = Rt) +// PRFM 11011000 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0xd8000000 (i = SignExtend(imm19:00, 64), t = Rt) +// LDR St 00011100 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x1c000000 (i = SignExtend(imm19:00, 64), t = Rt) +// LDR Dt 01011100 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x5c000000 (i = SignExtend(imm19:00, 64), t = Rt) +// LDR Qt 10011100 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x9c000000 (i = SignExtend(imm19:00, 64), t = Rt) +// LDR inv 11011100 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0xdc000000 (i = SignExtend(imm19:00, 64), t = Rt) +// +// TBNZ z0110111 bbbbbiii iiiiiiii iiittttt & 0x7f000000 == 0x37000000 (z = size, b = bitnum, i = SignExtend(imm14:00, 64), t = Rt) +// TBZ z0110110 bbbbbiii iiiiiiii iiittttt & 0x7f000000 == 0x36000000 (z = size, b = bitnum, i = SignExtend(imm14:00, 64), t = Rt) +// + +class CDetourDis +{ + public: + CDetourDis(); + + PBYTE CopyInstruction(PBYTE pDst, + PBYTE pSrc, + PBYTE *ppTarget, + LONG *plExtra); + + public: + typedef BYTE (CDetourDis::* COPYFUNC)(PBYTE pbDst, PBYTE pbSrc); + + union AddImm12 + { + DWORD Assembled; + struct + { + DWORD Rd : 5; // Destination register + DWORD Rn : 5; // Source register + DWORD Imm12 : 12; // 12-bit immediate + DWORD Shift : 2; // shift (must be 0 or 1) + DWORD Opcode1 : 7; // Must be 0010001 == 0x11 + DWORD Size : 1; // 0 = 32-bit, 1 = 64-bit + } s; + static DWORD Assemble(DWORD size, DWORD rd, DWORD rn, ULONG imm, DWORD shift) + { + AddImm12 temp; + temp.s.Rd = rd; + temp.s.Rn = rn; + temp.s.Imm12 = imm & 0xfff; + temp.s.Shift = shift; + temp.s.Opcode1 = 0x11; + temp.s.Size = size; + return temp.Assembled; + } + static DWORD AssembleAdd32(DWORD rd, DWORD rn, ULONG imm, DWORD shift) { return Assemble(0, rd, rn, imm, shift); } + static DWORD AssembleAdd64(DWORD rd, DWORD rn, ULONG imm, DWORD shift) { return Assemble(1, rd, rn, imm, shift); } + }; + + union Adr19 + { + DWORD Assembled; + struct + { + DWORD Rd : 5; // Destination register + DWORD Imm19 : 19; // 19-bit upper immediate + DWORD Opcode1 : 5; // Must be 10000 == 0x10 + DWORD Imm2 : 2; // 2-bit lower immediate + DWORD Type : 1; // 0 = ADR, 1 = ADRP + } s; + inline LONG Imm() const { DWORD Imm = (s.Imm19 << 2) | s.Imm2; return (LONG)(Imm << 11) >> 11; } + static DWORD Assemble(DWORD type, DWORD rd, LONG delta) + { + Adr19 temp; + temp.s.Rd = rd; + temp.s.Imm19 = (delta >> 2) & 0x7ffff; + temp.s.Opcode1 = 0x10; + temp.s.Imm2 = delta & 3; + temp.s.Type = type; + return temp.Assembled; + } + static DWORD AssembleAdr(DWORD rd, LONG delta) { return Assemble(0, rd, delta); } + static DWORD AssembleAdrp(DWORD rd, LONG delta) { return Assemble(1, rd, delta); } + }; + + union Bcc19 + { + DWORD Assembled; + struct + { + DWORD Condition : 4; // Condition + DWORD Opcode1 : 1; // Must be 0 + DWORD Imm19 : 19; // 19-bit immediate + DWORD Opcode2 : 8; // Must be 01010100 == 0x54 + } s; + inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; } + static DWORD AssembleBcc(DWORD condition, LONG delta) + { + Bcc19 temp; + temp.s.Condition = condition; + temp.s.Opcode1 = 0; + temp.s.Imm19 = delta >> 2; + temp.s.Opcode2 = 0x54; + return temp.Assembled; + } + }; + + union Branch26 + { + DWORD Assembled; + struct + { + DWORD Imm26 : 26; // 26-bit immediate + DWORD Opcode1 : 5; // Must be 00101 == 0x5 + DWORD Link : 1; // 0 = B, 1 = BL + } s; + inline LONG Imm() const { return (LONG)(s.Imm26 << 6) >> 4; } + static DWORD Assemble(DWORD link, LONG delta) + { + Branch26 temp; + temp.s.Imm26 = delta >> 2; + temp.s.Opcode1 = 0x5; + temp.s.Link = link; + return temp.Assembled; + } + static DWORD AssembleB(LONG delta) { return Assemble(0, delta); } + static DWORD AssembleBl(LONG delta) { return Assemble(1, delta); } + }; + + union Br + { + DWORD Assembled; + struct + { + DWORD Opcode1 : 5; // Must be 00000 == 0 + DWORD Rn : 5; // Register number + DWORD Opcode2 : 22; // Must be 1101011000011111000000 == 0x3587c0 for Br + // 0x358fc0 for Brl + } s; + static DWORD Assemble(DWORD rn, bool link) + { + Br temp; + temp.s.Opcode1 = 0; + temp.s.Rn = rn; + temp.s.Opcode2 = 0x3587c0; + if (link) + temp.Assembled |= 0x00200000; + return temp.Assembled; + } + static DWORD AssembleBr(DWORD rn) + { + return Assemble(rn, false); + } + static DWORD AssembleBrl(DWORD rn) + { + return Assemble(rn, true); + } + }; + + union Cbz19 + { + DWORD Assembled; + struct + { + DWORD Rt : 5; // Register to test + DWORD Imm19 : 19; // 19-bit immediate + DWORD Nz : 1; // 0 = CBZ, 1 = CBNZ + DWORD Opcode1 : 6; // Must be 011010 == 0x1a + DWORD Size : 1; // 0 = 32-bit, 1 = 64-bit + } s; + inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; } + static DWORD Assemble(DWORD size, DWORD nz, DWORD rt, LONG delta) + { + Cbz19 temp; + temp.s.Rt = rt; + temp.s.Imm19 = delta >> 2; + temp.s.Nz = nz; + temp.s.Opcode1 = 0x1a; + temp.s.Size = size; + return temp.Assembled; + } + }; + + union LdrLit19 + { + DWORD Assembled; + struct + { + DWORD Rt : 5; // Destination register + DWORD Imm19 : 19; // 19-bit immediate + DWORD Opcode1 : 2; // Must be 0 + DWORD FpNeon : 1; // 0 = LDR Wt/LDR Xt/LDRSW/PRFM, 1 = LDR St/LDR Dt/LDR Qt + DWORD Opcode2 : 3; // Must be 011 = 3 + DWORD Size : 2; // 00 = LDR Wt/LDR St, 01 = LDR Xt/LDR Dt, 10 = LDRSW/LDR Qt, 11 = PRFM/invalid + } s; + inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; } + static DWORD Assemble(DWORD size, DWORD fpneon, DWORD rt, LONG delta) + { + LdrLit19 temp; + temp.s.Rt = rt; + temp.s.Imm19 = delta >> 2; + temp.s.Opcode1 = 0; + temp.s.FpNeon = fpneon; + temp.s.Opcode2 = 3; + temp.s.Size = size; + return temp.Assembled; + } + }; + + union LdrFpNeonImm9 + { + DWORD Assembled; + struct + { + DWORD Rt : 5; // Destination register + DWORD Rn : 5; // Base register + DWORD Imm12 : 12; // 12-bit immediate + DWORD Opcode1 : 1; // Must be 1 == 1 + DWORD Opc : 1; // Part of size + DWORD Opcode2 : 6; // Must be 111101 == 0x3d + DWORD Size : 2; // Size (0=8-bit, 1=16-bit, 2=32-bit, 3=64-bit, 4=128-bit) + } s; + static DWORD Assemble(DWORD size, DWORD rt, DWORD rn, ULONG imm) + { + LdrFpNeonImm9 temp; + temp.s.Rt = rt; + temp.s.Rn = rn; + temp.s.Imm12 = imm; + temp.s.Opcode1 = 1; + temp.s.Opc = size >> 2; + temp.s.Opcode2 = 0x3d; + temp.s.Size = size & 3; + return temp.Assembled; + } + }; + + union Mov16 + { + DWORD Assembled; + struct + { + DWORD Rd : 5; // Destination register + DWORD Imm16 : 16; // Immediate + DWORD Shift : 2; // Shift amount (0=0, 1=16, 2=32, 3=48) + DWORD Opcode : 6; // Must be 100101 == 0x25 + DWORD Type : 2; // 0 = MOVN, 1 = reserved, 2 = MOVZ, 3 = MOVK + DWORD Size : 1; // 0 = 32-bit, 1 = 64-bit + } s; + static DWORD Assemble(DWORD size, DWORD type, DWORD rd, DWORD imm, DWORD shift) + { + Mov16 temp; + temp.s.Rd = rd; + temp.s.Imm16 = imm; + temp.s.Shift = shift; + temp.s.Opcode = 0x25; + temp.s.Type = type; + temp.s.Size = size; + return temp.Assembled; + } + static DWORD AssembleMovn32(DWORD rd, DWORD imm, DWORD shift) { return Assemble(0, 0, rd, imm, shift); } + static DWORD AssembleMovn64(DWORD rd, DWORD imm, DWORD shift) { return Assemble(1, 0, rd, imm, shift); } + static DWORD AssembleMovz32(DWORD rd, DWORD imm, DWORD shift) { return Assemble(0, 2, rd, imm, shift); } + static DWORD AssembleMovz64(DWORD rd, DWORD imm, DWORD shift) { return Assemble(1, 2, rd, imm, shift); } + static DWORD AssembleMovk32(DWORD rd, DWORD imm, DWORD shift) { return Assemble(0, 3, rd, imm, shift); } + static DWORD AssembleMovk64(DWORD rd, DWORD imm, DWORD shift) { return Assemble(1, 3, rd, imm, shift); } + }; + + union Tbz14 + { + DWORD Assembled; + struct + { + DWORD Rt : 5; // Register to test + DWORD Imm14 : 14; // 14-bit immediate + DWORD Bit : 5; // 5-bit index + DWORD Nz : 1; // 0 = TBZ, 1 = TBNZ + DWORD Opcode1 : 6; // Must be 011011 == 0x1b + DWORD Size : 1; // 0 = 32-bit, 1 = 64-bit + } s; + inline LONG Imm() const { return (LONG)(s.Imm14 << 18) >> 16; } + static DWORD Assemble(DWORD size, DWORD nz, DWORD rt, DWORD bit, LONG delta) + { + Tbz14 temp; + temp.s.Rt = rt; + temp.s.Imm14 = delta >> 2; + temp.s.Bit = bit; + temp.s.Nz = nz; + temp.s.Opcode1 = 0x1b; + temp.s.Size = size; + return temp.Assembled; + } + }; + + + protected: + BYTE PureCopy32(BYTE* pSource, BYTE* pDest); + BYTE EmitMovImmediate(PULONG& pDstInst, BYTE rd, UINT64 immediate); + BYTE CopyAdr(BYTE* pSource, BYTE* pDest, ULONG instruction); + BYTE CopyBcc(BYTE* pSource, BYTE* pDest, ULONG instruction); + BYTE CopyB(BYTE* pSource, BYTE* pDest, ULONG instruction); + BYTE CopyBl(BYTE* pSource, BYTE* pDest, ULONG instruction); + BYTE CopyB_or_Bl(BYTE* pSource, BYTE* pDest, ULONG instruction, bool link); + BYTE CopyCbz(BYTE* pSource, BYTE* pDest, ULONG instruction); + BYTE CopyTbz(BYTE* pSource, BYTE* pDest, ULONG instruction); + BYTE CopyLdrLiteral(BYTE* pSource, BYTE* pDest, ULONG instruction); + + protected: + ULONG GetInstruction(BYTE* pSource) + { + return ((PULONG)pSource)[0]; + } + + BYTE EmitInstruction(PULONG& pDstInst, ULONG instruction) + { + *pDstInst++ = instruction; + return sizeof(ULONG); + } + + protected: + PBYTE m_pbTarget; + BYTE m_rbScratchDst[128]; // matches or exceeds rbCode +}; + +BYTE CDetourDis::PureCopy32(BYTE* pSource, BYTE* pDest) +{ + *(ULONG *)pDest = *(ULONG*)pSource; + return sizeof(DWORD); +} + +/////////////////////////////////////////////////////////// Disassembler Code. +// +CDetourDis::CDetourDis() : + m_pbTarget((PBYTE)DETOUR_INSTRUCTION_TARGET_NONE) +{ +} + +PBYTE CDetourDis::CopyInstruction(PBYTE pDst, + PBYTE pSrc, + PBYTE *ppTarget, + LONG *plExtra) +{ + if (pDst == NULL) { + pDst = m_rbScratchDst; + } + + DWORD Instruction = GetInstruction(pSrc); + + ULONG CopiedSize; + if ((Instruction & 0x1f000000) == 0x10000000) { + CopiedSize = CopyAdr(pSrc, pDst, Instruction); + } else if ((Instruction & 0xff000010) == 0x54000000) { + CopiedSize = CopyBcc(pSrc, pDst, Instruction); + } else if ((Instruction & 0x7c000000) == 0x14000000) { + CopiedSize = CopyB_or_Bl(pSrc, pDst, Instruction, (Instruction & 0x80000000) != 0); + } else if ((Instruction & 0x7e000000) == 0x34000000) { + CopiedSize = CopyCbz(pSrc, pDst, Instruction); + } else if ((Instruction & 0x7e000000) == 0x36000000) { + CopiedSize = CopyTbz(pSrc, pDst, Instruction); + } else if ((Instruction & 0x3b000000) == 0x18000000) { + CopiedSize = CopyLdrLiteral(pSrc, pDst, Instruction); + } else { + CopiedSize = PureCopy32(pSrc, pDst); + } + + // If the target is needed, store our target + if (ppTarget) { + *ppTarget = m_pbTarget; + } + if (plExtra) { + *plExtra = CopiedSize - sizeof(DWORD); + } + + return pSrc + 4; +} + +BYTE CDetourDis::EmitMovImmediate(PULONG& pDstInst, BYTE rd, UINT64 immediate) +{ + DWORD piece[4]; + piece[3] = (DWORD)((immediate >> 48) & 0xffff); + piece[2] = (DWORD)((immediate >> 32) & 0xffff); + piece[1] = (DWORD)((immediate >> 16) & 0xffff); + piece[0] = (DWORD)((immediate >> 0) & 0xffff); + int count = 0; + + // special case: MOVN with 32-bit dest + if (piece[3] == 0 && piece[2] == 0 && piece[1] == 0xffff) + { + EmitInstruction(pDstInst, Mov16::AssembleMovn32(rd, piece[0] ^ 0xffff, 0)); + count++; + } + + // MOVN/MOVZ with 64-bit dest + else + { + int zero_pieces = (piece[3] == 0x0000) + (piece[2] == 0x0000) + (piece[1] == 0x0000) + (piece[0] == 0x0000); + int ffff_pieces = (piece[3] == 0xffff) + (piece[2] == 0xffff) + (piece[1] == 0xffff) + (piece[0] == 0xffff); + DWORD defaultPiece = (ffff_pieces > zero_pieces) ? 0xffff : 0x0000; + bool first = true; + for (int pieceNum = 3; pieceNum >= 0; pieceNum--) + { + DWORD curPiece = piece[pieceNum]; + if (curPiece != defaultPiece || (pieceNum == 0 && first)) + { + count++; + if (first) + { + if (defaultPiece == 0xffff) + { + EmitInstruction(pDstInst, Mov16::AssembleMovn64(rd, curPiece ^ 0xffff, pieceNum)); + } + else + { + EmitInstruction(pDstInst, Mov16::AssembleMovz64(rd, curPiece, pieceNum)); + } + first = false; + } + else + { + EmitInstruction(pDstInst, Mov16::AssembleMovk64(rd, curPiece, pieceNum)); + } + } + } + } + return (BYTE)(count * sizeof(DWORD)); +} + +BYTE CDetourDis::CopyAdr(BYTE* pSource, BYTE* pDest, ULONG instruction) +{ + Adr19& decoded = (Adr19&)(instruction); + PULONG pDstInst = (PULONG)(pDest); + + // ADR case + if (decoded.s.Type == 0) + { + BYTE* pTarget = pSource + decoded.Imm(); + LONG64 delta = pTarget - pDest; + LONG64 deltaPage = ((ULONG_PTR)pTarget >> 12) - ((ULONG_PTR)pDest >> 12); + + // output as ADR + if (delta >= -(1 << 20) && delta < (1 << 20)) + { + EmitInstruction(pDstInst, Adr19::AssembleAdr(decoded.s.Rd, (LONG)delta)); + } + + // output as ADRP; ADD + else if (deltaPage >= -(1 << 20) && (deltaPage < (1 << 20))) + { + EmitInstruction(pDstInst, Adr19::AssembleAdrp(decoded.s.Rd, (LONG)deltaPage)); + EmitInstruction(pDstInst, AddImm12::AssembleAdd32(decoded.s.Rd, decoded.s.Rd, ((ULONG)(ULONG_PTR)pTarget) & 0xfff, 0)); + } + + // output as immediate move + else + { + EmitMovImmediate(pDstInst, decoded.s.Rd, (ULONG_PTR)pTarget); + } + } + + // ADRP case + else + { + BYTE* pTarget = (BYTE*)((((ULONG_PTR)pSource >> 12) + decoded.Imm()) << 12); + LONG64 deltaPage = ((ULONG_PTR)pTarget >> 12) - ((ULONG_PTR)pDest >> 12); + + // output as ADRP + if (deltaPage >= -(1 << 20) && (deltaPage < (1 << 20))) + { + EmitInstruction(pDstInst, Adr19::AssembleAdrp(decoded.s.Rd, (LONG)deltaPage)); + } + + // output as immediate move + else + { + EmitMovImmediate(pDstInst, decoded.s.Rd, (ULONG_PTR)pTarget); + } + } + + return (BYTE)((BYTE*)pDstInst - pDest); +} + +BYTE CDetourDis::CopyBcc(BYTE* pSource, BYTE* pDest, ULONG instruction) +{ + Bcc19& decoded = (Bcc19&)(instruction); + PULONG pDstInst = (PULONG)(pDest); + + BYTE* pTarget = pSource + decoded.Imm(); + m_pbTarget = pTarget; + LONG64 delta = pTarget - pDest; + LONG64 delta4 = pTarget - (pDest + 4); + + // output as BCC + if (delta >= -(1 << 20) && delta < (1 << 20)) + { + EmitInstruction(pDstInst, Bcc19::AssembleBcc(decoded.s.Condition, (LONG)delta)); + } + + // output as BCC <skip>; B + else if (delta4 >= -(1 << 27) && (delta4 < (1 << 27))) + { + EmitInstruction(pDstInst, Bcc19::AssembleBcc(decoded.s.Condition ^ 1, 8)); + EmitInstruction(pDstInst, Branch26::AssembleB((LONG)delta4)); + } + + // output as MOV x17, Target; BCC <skip>; BR x17 (BIG assumption that x17 isn't being used for anything!!) + else + { + EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget); + EmitInstruction(pDstInst, Bcc19::AssembleBcc(decoded.s.Condition ^ 1, 8)); + EmitInstruction(pDstInst, Br::AssembleBr(17)); + } + + return (BYTE)((BYTE*)pDstInst - pDest); +} + +BYTE CDetourDis::CopyB_or_Bl(BYTE* pSource, BYTE* pDest, ULONG instruction, bool link) +{ + Branch26& decoded = (Branch26&)(instruction); + PULONG pDstInst = (PULONG)(pDest); + + BYTE* pTarget = pSource + decoded.Imm(); + m_pbTarget = pTarget; + LONG64 delta = pTarget - pDest; + + // output as B or BRL + if (delta >= -(1 << 27) && (delta < (1 << 27))) + { + EmitInstruction(pDstInst, Branch26::Assemble(link, (LONG)delta)); + } + + // output as MOV x17, Target; BR or BRL x17 (BIG assumption that x17 isn't being used for anything!!) + else + { + EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget); + EmitInstruction(pDstInst, Br::Assemble(17, link)); + } + + return (BYTE)((BYTE*)pDstInst - pDest); +} + +BYTE CDetourDis::CopyB(BYTE* pSource, BYTE* pDest, ULONG instruction) +{ + return CopyB_or_Bl(pSource, pDest, instruction, false); +} + +BYTE CDetourDis::CopyBl(BYTE* pSource, BYTE* pDest, ULONG instruction) +{ + return CopyB_or_Bl(pSource, pDest, instruction, true); +} + +BYTE CDetourDis::CopyCbz(BYTE* pSource, BYTE* pDest, ULONG instruction) +{ + Cbz19& decoded = (Cbz19&)(instruction); + PULONG pDstInst = (PULONG)(pDest); + + BYTE* pTarget = pSource + decoded.Imm(); + m_pbTarget = pTarget; + LONG64 delta = pTarget - pDest; + LONG64 delta4 = pTarget - (pDest + 4); + + // output as CBZ/NZ + if (delta >= -(1 << 20) && delta < (1 << 20)) + { + EmitInstruction(pDstInst, Cbz19::Assemble(decoded.s.Size, decoded.s.Nz, decoded.s.Rt, (LONG)delta)); + } + + // output as CBNZ/Z <skip>; B + else if (delta4 >= -(1 << 27) && (delta4 < (1 << 27))) + { + EmitInstruction(pDstInst, Cbz19::Assemble(decoded.s.Size, decoded.s.Nz ^ 1, decoded.s.Rt, 8)); + EmitInstruction(pDstInst, Branch26::AssembleB((LONG)delta4)); + } + + // output as MOV x17, Target; CBNZ/Z <skip>; BR x17 (BIG assumption that x17 isn't being used for anything!!) + else + { + EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget); + EmitInstruction(pDstInst, Cbz19::Assemble(decoded.s.Size, decoded.s.Nz ^ 1, decoded.s.Rt, 8)); + EmitInstruction(pDstInst, Br::AssembleBr(17)); + } + + return (BYTE)((BYTE*)pDstInst - pDest); +} + +BYTE CDetourDis::CopyTbz(BYTE* pSource, BYTE* pDest, ULONG instruction) +{ + Tbz14& decoded = (Tbz14&)(instruction); + PULONG pDstInst = (PULONG)(pDest); + + BYTE* pTarget = pSource + decoded.Imm(); + m_pbTarget = pTarget; + LONG64 delta = pTarget - pDest; + LONG64 delta4 = pTarget - (pDest + 4); + + // output as TBZ/NZ + if (delta >= -(1 << 13) && delta < (1 << 13)) + { + EmitInstruction(pDstInst, Tbz14::Assemble(decoded.s.Size, decoded.s.Nz, decoded.s.Rt, decoded.s.Bit, (LONG)delta)); + } + + // output as TBNZ/Z <skip>; B + else if (delta4 >= -(1 << 27) && (delta4 < (1 << 27))) + { + EmitInstruction(pDstInst, Tbz14::Assemble(decoded.s.Size, decoded.s.Nz ^ 1, decoded.s.Rt, decoded.s.Bit, 8)); + EmitInstruction(pDstInst, Branch26::AssembleB((LONG)delta4)); + } + + // output as MOV x17, Target; TBNZ/Z <skip>; BR x17 (BIG assumption that x17 isn't being used for anything!!) + else + { + EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget); + EmitInstruction(pDstInst, Tbz14::Assemble(decoded.s.Size, decoded.s.Nz ^ 1, decoded.s.Rt, decoded.s.Bit, 8)); + EmitInstruction(pDstInst, Br::AssembleBr(17)); + } + + return (BYTE)((BYTE*)pDstInst - pDest); +} + +BYTE CDetourDis::CopyLdrLiteral(BYTE* pSource, BYTE* pDest, ULONG instruction) +{ + LdrLit19& decoded = (LdrLit19&)(instruction); + PULONG pDstInst = (PULONG)(pDest); + + BYTE* pTarget = pSource + decoded.Imm(); + LONG64 delta = pTarget - pDest; + + // output as LDR + if (delta >= -(1 << 21) && delta < (1 << 21)) + { + EmitInstruction(pDstInst, LdrLit19::Assemble(decoded.s.Size, decoded.s.FpNeon, decoded.s.Rt, (LONG)delta)); + } + + // output as move immediate + else if (decoded.s.FpNeon == 0) + { + UINT64 value = 0; + switch (decoded.s.Size) + { + case 0: value = *(ULONG*)pTarget; break; + case 1: value = *(UINT64*)pTarget; break; + case 2: value = *(LONG*)pTarget; break; + } + EmitMovImmediate(pDstInst, decoded.s.Rt, value); + } + + // FP/NEON register: compute address in x17 and load from there (BIG assumption that x17 isn't being used for anything!!) + else + { + EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget); + EmitInstruction(pDstInst, LdrFpNeonImm9::Assemble(2 + decoded.s.Size, decoded.s.Rt, 17, 0)); + } + + return (BYTE)((BYTE*)pDstInst - pDest); +} + + +PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst, + _Inout_opt_ PVOID *ppDstPool, + _In_ PVOID pSrc, + _Out_opt_ PVOID *ppTarget, + _Out_opt_ LONG *plExtra) +{ + UNREFERENCED_PARAMETER(ppDstPool); + + CDetourDis state; + return (PVOID)state.CopyInstruction((PBYTE)pDst, + (PBYTE)pSrc, + (PBYTE*)ppTarget, + plExtra); +} + +#endif // DETOURS_ARM64 + +BOOL WINAPI DetourSetCodeModule(_In_ HMODULE hModule, + _In_ BOOL fLimitReferencesToModule) +{ +#if defined(DETOURS_X64) || defined(DETOURS_X86) + PBYTE pbBeg = NULL; + PBYTE pbEnd = (PBYTE)~(ULONG_PTR)0; + + if (hModule != NULL) { + ULONG cbModule = DetourGetModuleSize(hModule); + + pbBeg = (PBYTE)hModule; + pbEnd = (PBYTE)hModule + cbModule; + } + + return CDetourDis::SetCodeModule(pbBeg, pbEnd, fLimitReferencesToModule); +#elif defined(DETOURS_ARM) || defined(DETOURS_ARM64) || defined(DETOURS_IA64) + (void)hModule; + (void)fLimitReferencesToModule; + return TRUE; +#else +#error unknown architecture (x86, x64, arm, arm64, ia64) +#endif +} + +// +///////////////////////////////////////////////////////////////// End of File. |