#include #include #include #include #include #include #include #include using namespace Xbyak; CYBOZU_TEST_AUTO(setSize) { struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(4096) { setSize(4095); db(1); size_t size = getSize(); CYBOZU_TEST_EQUAL(size, 4096u); CYBOZU_TEST_NO_EXCEPTION(setSize(size)); CYBOZU_TEST_EXCEPTION(db(1), Xbyak::Error); } } code; } #ifdef XBYAK64 CYBOZU_TEST_AUTO(badSSE) { struct Code : Xbyak::CodeGenerator { Code() { CYBOZU_TEST_EXCEPTION(paddd(xm16, xm1), Xbyak::Error); CYBOZU_TEST_EXCEPTION(pslld(xm16, 1), Xbyak::Error); CYBOZU_TEST_EXCEPTION(movapd(xm16, xm1), Xbyak::Error); CYBOZU_TEST_EXCEPTION(movhpd(xm16, ptr[eax]), Xbyak::Error); CYBOZU_TEST_EXCEPTION(pextrb(eax, xm16, 1), Xbyak::Error); } } code; } #endif CYBOZU_TEST_AUTO(compOperand) { using namespace Xbyak::util; CYBOZU_TEST_ASSERT(eax == eax); CYBOZU_TEST_ASSERT(ecx != xmm0); CYBOZU_TEST_ASSERT(ptr[eax] == ptr[eax]); CYBOZU_TEST_ASSERT(dword[eax] != ptr[eax]); CYBOZU_TEST_ASSERT(ptr[eax] != ptr[eax+3]); } CYBOZU_TEST_AUTO(mov_const) { struct Code : Xbyak::CodeGenerator { Code() { const struct { uint64_t v; int bit; bool error; } tbl[] = { { uint64_t(-1), 8, false }, { 0x12, 8, false }, { 0x80, 8, false }, { 0xff, 8, false }, { 0x100, 8, true }, { 1, 16, false }, { uint64_t(-1), 16, false }, { 0x7fff, 16, false }, { 0xffff, 16, false }, { 0x10000, 16, true }, { uint64_t(-1), 32, false }, { 0x7fffffff, 32, false }, { uint64_t(-0x7fffffff), 32, false }, { 0xffffffff, 32, false }, { 0x100000000ull, 32, true }, #ifdef XBYAK64 { uint64_t(-1), 64, false }, { 0x7fffffff, 64, false }, { 0xffffffffffffffffull, 64, false }, { 0x80000000, 64, true }, { 0xffffffff, 64, true }, #endif }; for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { const int bit = tbl[i].bit; const uint64_t v = tbl[i].v; const Xbyak::AddressFrame& af = bit == 8 ? byte : bit == 16 ? word : bit == 32 ? dword : qword; if (tbl[i].error) { CYBOZU_TEST_EXCEPTION(mov(af[eax], v), Xbyak::Error); } else { CYBOZU_TEST_NO_EXCEPTION(mov(af[eax], v)); } } #ifdef XBYAK64 CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x7fffffff])); if (sizeof(void*) != 4) { // sizeof(void*) == 4 on x32 CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x17fffffff]), Xbyak::Error); } #ifdef XBYAK_OLD_DISP_CHECK CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x80000000])); CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0xffffffff])); #else if (sizeof(void*) != 4) { // sizeof(void*) == 4 on x32 CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x80000000ull]), Xbyak::Error); CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0xffffffffull]), Xbyak::Error); } #endif #endif } } code; } CYBOZU_TEST_AUTO(align) { struct Code : Xbyak::CodeGenerator { Code() { const size_t alignSize = 16; for (int padding = 0; padding < 20; padding++) { for (int i = 0; i < padding; i++) { db(1); } align(alignSize); CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u); } align(alignSize); const uint8_t *p = getCurr(); // do nothing if aligned align(alignSize); CYBOZU_TEST_EQUAL(p, getCurr()); } } c; } CYBOZU_TEST_AUTO(kmask) { struct Code : Xbyak::CodeGenerator { Code() { CYBOZU_TEST_EXCEPTION(kmovb(k1, ax), std::exception); CYBOZU_TEST_EXCEPTION(kmovw(k1, ax), std::exception); CYBOZU_TEST_EXCEPTION(kmovd(k1, ax), std::exception); CYBOZU_TEST_EXCEPTION(kmovq(k1, eax), std::exception); #ifdef XBYAK64 CYBOZU_TEST_EXCEPTION(kmovb(k1, rax), std::exception); CYBOZU_TEST_EXCEPTION(kmovw(k1, rax), std::exception); CYBOZU_TEST_EXCEPTION(kmovd(k1, rax), std::exception); CYBOZU_TEST_NO_EXCEPTION(kmovq(k1, rax)); #endif CYBOZU_TEST_NO_EXCEPTION(vmovaps(xm0|k0, ptr[eax])); checkT_z(); } void checkT_z() { const uint8_t *p1 = getCurr(); vmovaps(zm0, ptr[eax]); const uint8_t *p2 = getCurr(); vmovaps(zm0|T_z, ptr[eax]); const uint8_t *end = getCurr(); CYBOZU_TEST_EQUAL(p2 - p1, end - p2); CYBOZU_TEST_EQUAL_ARRAY(p1, p2, end - p2); } } c; } CYBOZU_TEST_AUTO(gather) { struct Code : Xbyak::CodeGenerator { Code() { CYBOZU_TEST_NO_EXCEPTION(vgatherdpd(xmm1, ptr[eax+xmm2], xmm3)); CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm1, ptr[eax+xmm1], xmm2), std::exception); CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm1, ptr[eax+xmm2], xmm1), std::exception); CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm2, ptr[eax+xmm1], xmm1), std::exception); CYBOZU_TEST_NO_EXCEPTION(vgatherdpd(xmm1|k2, ptr[eax+xmm2])); CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm1, ptr[eax+xmm2]), std::exception); CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm1|k2, ptr[eax+xmm1]), std::exception); CYBOZU_TEST_NO_EXCEPTION(vpscatterdd(ptr[eax+xmm2]|k2, xmm1)); CYBOZU_TEST_NO_EXCEPTION(vpscatterdd(ptr[eax+xmm2], xmm1|k2)); CYBOZU_TEST_NO_EXCEPTION(vpscatterdd(ptr[eax+xmm2]|k3, xmm2)); CYBOZU_TEST_EXCEPTION(vpscatterdd(ptr[eax+xmm2], xmm1), std::exception); } } c; } #ifdef XBYAK64 CYBOZU_TEST_AUTO(vfmaddps) { struct Code : Xbyak::CodeGenerator { Code() { v4fmaddps(zmm1, zmm8, ptr [rdx + 64]); v4fmaddss(xmm15, xmm8, ptr [rax + 64]); v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]); v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]); vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]); vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]); } } c; const uint8_t tbl[] = { 0x62, 0xf2, 0x3f, 0x48, 0x9a, 0x4a, 0x04, 0x62, 0x72, 0x3f, 0x08, 0x9b, 0x78, 0x04, 0x62, 0xf2, 0x6f, 0x4d, 0xaa, 0x69, 0x08, 0x62, 0x62, 0x6f, 0x08, 0xab, 0x7c, 0x24, 0x08, 0x62, 0xe2, 0x77, 0xcf, 0x52, 0x78, 0x04, 0x62, 0x72, 0x67, 0x4c, 0x53, 0x54, 0x84, 0x04, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(vaes) { struct Code : Xbyak::CodeGenerator { Code() { vaesdec(xmm20, xmm30, ptr [rcx + 64]); vaesdec(ymm1, ymm2, ptr [rcx + 64]); vaesdec(zmm1, zmm2, ptr [rcx + 64]); vaesdeclast(xmm20, xmm30, ptr [rax + 64]); vaesdeclast(ymm20, ymm30, ptr [rax + 64]); vaesdeclast(zmm20, zmm30, ptr [rax + 64]); vaesenc(xmm20, xmm30, ptr [rcx + 64]); vaesenc(ymm1, ymm2, ptr [rcx + 64]); vaesenc(zmm1, zmm2, ptr [rcx + 64]); vaesenclast(xmm20, xmm30, ptr [rax + 64]); vaesenclast(ymm20, ymm30, ptr [rax + 64]); vaesenclast(zmm20, zmm30, ptr [rax + 64]); } } c; const uint8_t tbl[] = { 0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04, 0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40, 0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01, 0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04, 0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02, 0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01, 0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04, 0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40, 0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01, 0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04, 0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02, 0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(vpclmulqdq) { struct Code : Xbyak::CodeGenerator { Code() { vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3); vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3); vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3); vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3); vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3); vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3); } } c; const uint8_t tbl[] = { 0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03, 0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03, 0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03, 0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03, 0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03, 0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(vpcompressb_w) { struct Code : Xbyak::CodeGenerator { Code() { vpcompressb(ptr[rax + 64], xmm1); vpcompressb(xmm30 | k5, xmm1); vpcompressb(ptr[rax + 64], ymm1); vpcompressb(ymm30 | k3 |T_z, ymm1); vpcompressb(ptr[rax + 64], zmm1); vpcompressb(zmm30 | k2 |T_z, zmm1); vpcompressw(ptr[rax + 64], xmm1); vpcompressw(xmm30 | k5, xmm1); vpcompressw(ptr[rax + 64], ymm1); vpcompressw(ymm30 | k3 |T_z, ymm1); vpcompressw(ptr[rax + 64], zmm1); vpcompressw(zmm30 | k2 |T_z, zmm1); } } c; const uint8_t tbl[] = { 0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40, 0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce, 0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40, 0x62, 0x92, 0x7d, 0xab, 0x63, 0xce, 0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40, 0x62, 0x92, 0x7d, 0xca, 0x63, 0xce, 0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20, 0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce, 0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20, 0x62, 0x92, 0xfd, 0xab, 0x63, 0xce, 0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20, 0x62, 0x92, 0xfd, 0xca, 0x63, 0xce, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(shld) { struct Code : Xbyak::CodeGenerator { Code() { vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); } } c; const uint8_t tbl[] = { 0x62, 0xf3, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x05, 0x62, 0xf3, 0xed, 0xab, 0x70, 0x68, 0x02, 0x05, 0x62, 0xf3, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x05, 0x62, 0xf3, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x05, 0x62, 0xf3, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x05, 0x62, 0xf3, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x05, 0x62, 0xf3, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x05, 0x62, 0xf3, 0xed, 0xab, 0x71, 0x68, 0x02, 0x05, 0x62, 0xf3, 0xed, 0xcb, 0x71, 0x68, 0x01, 0x05, 0x62, 0xf2, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x62, 0xf2, 0xed, 0xab, 0x70, 0x68, 0x02, 0x62, 0xf2, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x62, 0xf2, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x62, 0xf2, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x62, 0xf2, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x62, 0xf2, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x62, 0xf2, 0xed, 0xab, 0x71, 0x68, 0x02, 0x62, 0xf2, 0xed, 0xcb, 0x71, 0x68, 0x01, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(shrd) { struct Code : Xbyak::CodeGenerator { Code() { vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5); vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5); vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5); vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5); vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5); vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5); vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]); vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]); vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]); vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]); vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]); vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]); } } c; const uint8_t tbl[] = { 0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05, 0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05, 0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05, 0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05, 0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05, 0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05, 0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05, 0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05, 0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05, 0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02, 0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02, 0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05, 0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05, 0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05, 0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05, 0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05, 0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05, 0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(vpopcnt) { struct Code : Xbyak::CodeGenerator { Code() { vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]); vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]); vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]); vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]); vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]); vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]); vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]); vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]); vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]); vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]); vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]); vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]); vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]); vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]); vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]); vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]); vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]); vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]); } } c; const uint8_t tbl[] = { 0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04, 0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02, 0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01, 0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04, 0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02, 0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01, 0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04, 0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02, 0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01, 0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10, 0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10, 0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10, 0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04, 0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02, 0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01, 0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08, 0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08, 0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(vpdpbus) { struct Code : Xbyak::CodeGenerator { Code() { vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]); vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]); vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]); vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]); vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]); vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]); vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]); vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]); vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]); vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]); vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]); vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]); vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]); vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]); vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]); vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]); vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]); vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]); vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]); vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]); vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]); vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]); vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]); vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]); } } c; const uint8_t tbl[] = { 0x62, 0xf2, 0x5d, 0x83, 0x50, 0x68, 0x04, 0x62, 0xf2, 0x5d, 0xa3, 0x50, 0x68, 0x02, 0x62, 0xf2, 0x5d, 0xc3, 0x50, 0x68, 0x01, 0x62, 0xf2, 0x5d, 0x93, 0x50, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0xb3, 0x50, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0xd3, 0x50, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0x83, 0x51, 0x68, 0x04, 0x62, 0xf2, 0x5d, 0xa3, 0x51, 0x68, 0x02, 0x62, 0xf2, 0x5d, 0xc3, 0x51, 0x68, 0x01, 0x62, 0xf2, 0x5d, 0x93, 0x51, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0xb3, 0x51, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0xd3, 0x51, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0x83, 0x52, 0x68, 0x04, 0x62, 0xf2, 0x5d, 0xa3, 0x52, 0x68, 0x02, 0x62, 0xf2, 0x5d, 0xc3, 0x52, 0x68, 0x01, 0x62, 0xf2, 0x5d, 0x93, 0x52, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0xb3, 0x52, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0xd3, 0x52, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0x83, 0x53, 0x68, 0x04, 0x62, 0xf2, 0x5d, 0xa3, 0x53, 0x68, 0x02, 0x62, 0xf2, 0x5d, 0xc3, 0x53, 0x68, 0x01, 0x62, 0xf2, 0x5d, 0x93, 0x53, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0xb3, 0x53, 0x68, 0x10, 0x62, 0xf2, 0x5d, 0xd3, 0x53, 0x68, 0x10, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb) { struct Code : Xbyak::CodeGenerator { Code() { vpexpandb(xmm5|k3|T_z, xmm30); vpexpandb(ymm5|k3|T_z, ymm30); vpexpandb(zmm5|k3|T_z, zmm30); vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]); vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]); vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]); vpexpandw(xmm5|k3|T_z, xmm30); vpexpandw(ymm5|k3|T_z, ymm30); vpexpandw(zmm5|k3|T_z, zmm30); vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]); vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]); vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]); vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]); vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]); vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]); } } c; const uint8_t tbl[] = { 0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee, 0x62, 0x92, 0x7d, 0xab, 0x62, 0xee, 0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee, 0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40, 0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40, 0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40, 0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee, 0x62, 0x92, 0xfd, 0xab, 0x62, 0xee, 0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee, 0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20, 0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20, 0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20, 0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04, 0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02, 0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(gf2) { struct Code : Xbyak::CodeGenerator { Code() { /// gf2p8affineinvqb(xmm1, xmm2, 3); gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3); vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3); vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3); vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3); vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3); vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5); vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5); vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5); vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5); vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5); vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5); vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5); vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5); vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5); /// gf2p8affineqb(xmm1, xmm2, 3); gf2p8affineqb(xmm1, ptr [rax + 0x40], 3); vgf2p8affineqb(xmm1, xmm5, xmm2, 3); vgf2p8affineqb(ymm1, ymm5, ymm2, 3); vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3); vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3); vgf2p8affineqb(xmm30, xmm31, xmm4, 5); vgf2p8affineqb(ymm30, ymm31, ymm4, 5); vgf2p8affineqb(zmm30, zmm31, zmm4, 5); vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5); vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5); vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5); vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5); vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5); vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5); /// gf2p8mulb(xmm1, xmm2); gf2p8mulb(xmm1, ptr [rax + 0x40]); vgf2p8mulb(xmm1, xmm5, xmm2); vgf2p8mulb(ymm1, ymm5, ymm2); vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]); vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]); vgf2p8mulb(xmm30, xmm31, xmm4); vgf2p8mulb(ymm30, ymm31, ymm4); vgf2p8mulb(zmm30, zmm31, zmm4); vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]); vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]); vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]); } } c; const uint8_t tbl[] = { 0x66, 0x0f, 0x3a, 0xcf, 0xca, 0x03, 0x66, 0x0f, 0x3a, 0xcf, 0x48, 0x40, 0x03, 0xc4, 0xe3, 0xd1, 0xcf, 0xca, 0x03, 0xc4, 0xe3, 0xd5, 0xcf, 0xca, 0x03, 0xc4, 0xe3, 0xd1, 0xcf, 0x48, 0x40, 0x03, 0xc4, 0xe3, 0xd5, 0xcf, 0x48, 0x40, 0x03, 0x62, 0x63, 0x85, 0x00, 0xcf, 0xf4, 0x05, 0x62, 0x63, 0x85, 0x20, 0xcf, 0xf4, 0x05, 0x62, 0x63, 0x85, 0x40, 0xcf, 0xf4, 0x05, 0x62, 0x63, 0xd5, 0x89, 0xcf, 0x70, 0x04, 0x05, 0x62, 0x63, 0xd5, 0xa9, 0xcf, 0x70, 0x02, 0x05, 0x62, 0x63, 0xd5, 0xc9, 0xcf, 0x70, 0x01, 0x05, 0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05, 0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05, 0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05, 0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03, 0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03, 0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03, 0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03, 0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03, 0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03, 0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05, 0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05, 0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05, 0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05, 0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05, 0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05, 0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05, 0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05, 0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05, 0x66, 0x0f, 0x38, 0xcf, 0xca, 0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40, 0xc4, 0xe2, 0x51, 0xcf, 0xca, 0xc4, 0xe2, 0x55, 0xcf, 0xca, 0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40, 0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40, 0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4, 0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4, 0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4, 0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04, 0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02, 0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(bf16) { struct Code : Xbyak::CodeGenerator { Code() { vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]); vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]); vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]); vcvtneps2bf16(xmm0, xword [rax + 64]); vcvtneps2bf16(xmm0 | k1, yword [rax + 64]); vcvtneps2bf16(ymm0 | k1, zword [rax + 64]); vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]); vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]); vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]); vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]); } } c; const uint8_t tbl[] = { 0x62, 0xf2, 0x77, 0x09, 0x72, 0x40, 0x04, 0x62, 0xf2, 0x7f, 0xa9, 0x72, 0x40, 0x02, 0x62, 0xf2, 0x77, 0x49, 0x72, 0x40, 0x01, 0x62, 0xf2, 0x7e, 0x08, 0x72, 0x40, 0x04, 0x62, 0xf2, 0x7e, 0x29, 0x72, 0x40, 0x02, 0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01, 0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01, 0x62, 0xf2, 0x76, 0x09, 0x52, 0x40, 0x04, 0x62, 0xf2, 0x76, 0x29, 0x52, 0x40, 0x02, 0x62, 0xf2, 0x76, 0x49, 0x52, 0x40, 0x01, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(AMX) { struct Code : Xbyak::CodeGenerator { Code() { ldtilecfg(ptr[rax + rcx * 4 + 64]); sttilecfg(ptr[rsp + rax * 8 + 128]); tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]); tileloaddt1(tmm4, ptr[r8 + r9 + 32]); tilerelease(); tilestored(ptr[r10 + r11 * 2 + 32], tmm2); tilezero(tmm7); tdpbssd(tmm1, tmm2, tmm3); tdpbsud(tmm2, tmm3, tmm4); tdpbusd(tmm3, tmm4, tmm5); tdpbuud(tmm4, tmm5, tmm6); tdpbf16ps(tmm5, tmm6, tmm7); } } c; // generated code by patch const uint8_t tbl[] = { 0xc4, 0xe2, 0x78, 0x49, 0x44, 0x88, 0x40, 0xc4, 0xe2, 0x79, 0x49, 0x84, 0xc4, 0x80, 0x00, 0x00, 0x00, 0xc4, 0xe2, 0x7b, 0x4b, 0x5c, 0x57, 0x08, 0xc4, 0x82, 0x79, 0x4b, 0x64, 0x08, 0x20, 0xc4, 0xe2, 0x78, 0x49, 0xc0, 0xc4, 0x82, 0x7a, 0x4b, 0x54, 0x5a, 0x20, 0xc4, 0xe2, 0x7b, 0x49, 0xf8, 0xc4, 0xe2, 0x63, 0x5e, 0xca, 0xc4, 0xe2, 0x5a, 0x5e, 0xd3, 0xc4, 0xe2, 0x51, 0x5e, 0xdc, 0xc4, 0xe2, 0x48, 0x5e, 0xe5, 0xc4, 0xe2, 0x42, 0x5c, 0xee, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(tileloadd) { struct Code : Xbyak::CodeGenerator { Code() { tileloadd(tmm1, ptr[r8+r8]); tileloadd(tmm1, ptr[rax+rcx*4]); tileloadd(tmm1, ptr[r8+r9*1+0x40]); } void notSupported() { tileloadd(tmm1, ptr[r8]); } void notSupported2() { tileloadd(tmm1, ptr[r8*2]); } } c; const uint8_t tbl[] = { 0xC4, 0x82, 0x7B, 0x4B, 0x0C, 0x00, 0xC4, 0xE2, 0x7B, 0x4B, 0x0C, 0x88, 0xC4, 0x82, 0x7B, 0x4B, 0x4C, 0x08, 0x40, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); // current version does not support this sibmem format CYBOZU_TEST_EXCEPTION(c.notSupported(), std::exception); CYBOZU_TEST_EXCEPTION(c.notSupported2(), std::exception); } CYBOZU_TEST_AUTO(vnni) { struct Code : Xbyak::CodeGenerator { Code() { // default encoding is EVEX vpdpbusd(xm0, xm1, xm2); vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX setDefaultEncoding(VexEncoding); vpdpbusd(xm0, xm1, xm2); // VEX setDefaultEncoding(EvexEncoding); vpdpbusd(xm0, xm1, xm2); // EVEX } void badVex() { vpdpbusd(xm0, xm1, xm31, VexEncoding); } } c; const uint8_t tbl[] = { 0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2, 0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2, 0xC4, 0xE2, 0x71, 0x50, 0xC2, 0xC4, 0xE2, 0x71, 0x50, 0xC2, 0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); CYBOZU_TEST_EXCEPTION(c.badVex(), std::exception); } CYBOZU_TEST_AUTO(vaddph) { struct Code : Xbyak::CodeGenerator { Code() { vaddph(zmm0, zmm1, ptr[rax+64]); vaddph(ymm0, ymm1, ptr[rax+64]); vaddph(xmm0, xmm1, ptr[rax+64]); vaddph(zmm0, zmm1, ptr_b[rax+64]); vaddph(ymm0, ymm1, ptr_b[rax+64]); vaddph(xmm0, xmm1, ptr_b[rax+64]); vaddsh(xmm0, xmm15, ptr[rax+64]); vaddsh(xmm0|k5|T_z|T_rd_sae, xmm15, xmm3); vcmpph(k1, xm15, ptr[rax+64], 1); vcmpph(k2, ym15, ptr[rax+64], 2); vcmpph(k3, zm15, ptr[rax+64], 3); vcmpph(k1, xm15, ptr_b[rax+64], 1); vcmpph(k2, ym15, ptr_b[rax+64], 2); vcmpph(k3, zm15, ptr_b[rax+64], 3); vcmpsh(k1, xm15, ptr[rax+64], 1); vcmpsh(k3|k5, xmm1, xmm25|T_sae, 4); vcomish(xmm1, ptr[rax+64]); vcomish(xmm1|T_sae, xmm15); vucomish(xmm1, ptr [rax+0x40]); vucomish(xmm1|T_sae, xmm15); vfmaddsub213ph(xmm1, xmm2, ptr [rax+0x40]); vfmaddsub213ph(xmm1, xmm2, ptr_b [rax+0x40]); vfmaddsub213ph(xmm1|k3, xmm2, xmm5); vfmaddsub213ph(ymm1, ymm2, ptr [rax+0x40]); vfmaddsub213ph(ymm1, ymm2, ptr_b[rax+0x40]); vfmaddsub213ph(ymm1|k3, ymm2, ymm5); vfmaddsub213ph(zmm1, zmm2, ptr [rax+0x40]); vfmaddsub213ph(zmm1, zmm2, ptr_b [rax+0x40]); vfmaddsub213ph(zmm1|T_ru_sae, zmm2, zmm5); vfmsubadd132ph(xmm1, xmm2, ptr [rax+0x40]); vfmsubadd132ph(xmm1, xmm2, ptr_b [rax+0x40]); vfmsubadd132ph(ymm1, ymm2, ptr [rax+0x40]); vfmsubadd132ph(ymm1, ymm2, ptr_b [rax+0x40]); vfmsubadd132ph(zmm1, zmm2, ptr [rax+0x40]); vfmsubadd132ph(zmm1, zmm2, ptr_b [rax+0x40]); vfmsubadd132ph(zmm1|T_ru_sae, zmm2, zmm5); vfmadd132ph(xmm1, xmm2, ptr [rax+0x40]); vfmadd132ph(xmm1, xmm2, ptr_b [rax+0x40]); vfmadd132ph(ymm1, ymm2, ptr [rax+0x40]); vfmadd132ph(ymm1, ymm2, ptr_b [rax+0x40]); vfmadd132ph(zmm1, zmm2, ptr [rax+0x40]); vfmadd132ph(zmm1, zmm2, ptr_b [rax+0x40]); vfmadd132ph(zmm1|T_rd_sae, zmm2, zmm5); vfmsub231ph(xmm1, xmm2, ptr [rax+0x40]); vfmsub231ph(xmm1, xmm2, ptr_b [rax+0x40]); vfmsub231ph(ymm1, ymm2, ptr [rax+0x40]); vfmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]); vfmsub231ph(zmm1, zmm2, ptr [rax+0x40]); vfmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]); vfmsub231ph(zmm1|T_rd_sae, zmm2, zmm5); vfnmsub231ph(xmm1, xmm2, ptr [rax+0x40]); vfnmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]); vfnmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]); vfnmsub231ph(zmm1|T_rd_sae, zmm2, zmm5); vfmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3); vfmadd132sh(xmm1, xmm2, ptr [rax+0x40]); vfnmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3); vfnmadd132sh(xmm1, xmm2, ptr [rax+0x40]); vfmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3); vfmsub132sh(xmm1, xmm2, ptr [rax+0x40]); vfnmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3); vfnmsub132sh(xmm1, xmm2, ptr [rax+0x40]); vfcmaddcph(xmm1|k1|T_z, xmm2, ptr [rax+0x40]); vfcmaddcph(ymm1|k1|T_z, ymm2, ptr [rax+0x40]); vfcmaddcph(zmm1|k1, zmm2, ptr [rax+0x40]); vfcmaddcph(zmm1|k1|T_rd_sae, zmm2, zmm5); vfcmaddcph(xmm1|k1|T_z, xmm2, ptr_b [rax+0x40]); vfcmaddcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]); vfcmaddcph(zmm1|k1|T_z, zmm2, ptr_b [rax+0x40]); vfmaddcph(xm1, xm2, ptr[rax+0x40]); vfmaddcph(ym1|k1|T_z, ym2, ptr_b[rax+0x40]); vfmaddcph(zm1, zm2, ptr_b[rax+0x40]); vfcmulcph(xmm1, xmm2, ptr [rax+0x40]); vfcmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]); vfcmulcph(zmm1, zmm2, ptr_b [rax+0x40]); vfmulcph(xmm1, xmm2, ptr [rax+0x40]); vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]); vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]); vrcpph(xmm1, ptr [rax+0x40]); vrcpph(xmm1, ptr_b [rax+0x40]); vrcpph(ymm1, ptr [rax+0x40]); vrcpph(ymm1, ptr_b [rax+0x40]); vrcpph(zmm1, ptr [rax+0x40]); vrcpph(zmm1, ptr_b [rax+0x40]); vrcpsh(xmm1, xmm3, ptr [rax+0x40]); vrsqrtph(xmm1, ptr [rax+0x40]); vrsqrtph(xmm1, ptr_b [rax+0x40]); vrsqrtph(ymm2, ptr [rax+0x40]); vrsqrtph(ymm2, ptr_b [rax+0x40]); vrsqrtph(zmm2, ptr [rax+0x40]); vrsqrtph(zmm2, ptr_b [rax+0x40]); vrsqrtsh(xmm1|k5|T_z, xmm7, ptr [rax+0x40]); vsqrtph(xmm1|k4|T_z, ptr [rax+0x40]); vsqrtph(xmm1|k4|T_z, ptr_b [rax+0x40]); vsqrtph(ymm1|k4|T_z, ptr_b [rax+0x40]); vsqrtph(zmm1|k4|T_z, ptr [rax+0x40]); vsqrtph(zmm1|k4|T_z, ptr_b [rax+0x40]); vsqrtsh(xmm1|k4|T_z, xmm5, ptr [rax+0x40]); vsqrtsh(xmm1|k4|T_z|T_rd_sae, xmm5, xmm7); vscalefph(xmm1, xmm5, ptr [rax+0x40]); vscalefph(xmm1, xmm5, ptr_b [rax+0x40]); vscalefph(ymm1, ymm5, ptr [rax+0x40]); vscalefph(ymm1, ymm5, ptr_b [rax+0x40]); vscalefph(zmm1, zmm5, ptr [rax+0x40]); vscalefph(zmm1, zmm5, ptr_b [rax+0x40]); vscalefph(zmm1|k1|T_z|T_rd_sae, zmm5, zmm7); vscalefsh(xmm1, xmm5, ptr [rax+0x40]); vscalefsh(xmm1|k1|T_z|T_rd_sae, xmm5, xmm7); vreduceph(xmm1, ptr [rax+0x40], 0x1); vreduceph(xmm1, ptr_b [rax+0x40], 0x2); vreduceph(ymm1, ptr [rax+0x40], 0x3); vreduceph(ymm1, ptr_b [rax+0x40], 0x4); vreduceph(zmm1, ptr [rax+0x40], 0x5); vreduceph(zmm1, ptr_b [rax+0x40], 0x6); vreduceph(zmm1|k1|T_z|T_sae, zmm5, 0x7); vreducesh(xmm1, xmm3, ptr [rax+0x40], 0x1); vreducesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2); vrndscaleph(xmm1, ptr [rax+0x40], 0x1); vrndscaleph(xmm1, ptr_b [rax+0x40], 0x2); vrndscaleph(ymm1, ptr [rax+0x40], 0x3); vrndscaleph(ymm1, ptr_b [rax+0x40], 0x4); vrndscaleph(zmm1, ptr [rax+0x40], 0x5); vrndscaleph(zmm1, ptr_b [rax+0x40], 0x6); vrndscaleph(zmm1|k1|T_z|T_sae, zmm5, 0x7); vrndscalesh(xmm1, xmm3, ptr [rax+0x40], 0x1); vrndscalesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2); vfpclassph(k1, xword [rax+0x40], 0x1); vfpclassph(k1, xword_b[rax+0x40], 0x2); vfpclassph(k1, yword [rax+0x40], 0x3); vfpclassph(k1, yword_b[rax+0x40], 0x4); vfpclassph(k1, zword [rax+0x40], 0x5); vfpclassph(k1, zword_b[rax+0x40], 0x6); vfpclasssh(k1|k2, xmm3, 0x5); vfpclasssh(k1|k2, ptr [rax+0x40], 0x5); vgetexpph(xmm1, ptr [rax+0x40]); vgetexpph(ymm1, ptr_b [rax+0x40]); vgetexpph(zmm1, ptr [rax+0x40]); vgetexpph(zmm1|k1|T_z|T_sae, zmm5); vgetexpsh(xmm1, xmm5, ptr [rax+0x40]); vgetexpsh(xmm1|k1|T_z|T_sae, xmm3, xmm5); vgetmantph(xmm1, ptr [rax+0x40], 0x1); vgetmantph(ymm1, ptr_b [rax+0x40], 0x2); vgetmantph(zmm1, ptr [rax+0x40], 0x3); vgetmantph(zmm1|k1|T_z|T_sae, zmm5, 0x4); vgetmantsh(xmm1, xmm5, ptr [rax+0x40], 0x5); vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6); vmovsh(xmm1|k1|T_z, ptr [rax+0x40]); vmovsh(ptr [rax+0x40]|k1, xmm1); vmovsh(xmm1|k2|T_z, xmm3, xmm5); vmovw(xmm1, r13d); vmovw(xmm3, ptr [rax+0x40]); vmovw(r9d, xmm1); vmovw(ptr [rax+0x40], xmm7); vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3); vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]); vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3); vcvtsh2sd(xmm1, xmm2, ptr [rax+0x40]); vcvtsh2ss(xmm1|k1|T_z|T_sae, xmm2, xmm3); vcvtsh2ss(xmm1, xmm2, ptr [rax+0x40]); vcvtss2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3); vcvtss2sh(xmm1, xmm2, ptr [rax+0x40]); vcvtsh2si(edx|T_rd_sae, xmm1); vcvtsh2si(edx, ptr [rax+0x40]); vcvtsh2si(rdx|T_rd_sae, xmm1); vcvtsh2si(r8, ptr [rax+0x40]); vcvtph2dq(xmm1, xmm5); vcvtph2dq(xmm1, ptr [rax+0x40]); vcvtph2dq(xmm1, ptr_b [rax+0x40]); vcvtph2dq(ymm1|k2|T_z, xmm5); vcvtph2dq(ymm1, ptr [rax+0x40]); vcvtph2dq(ymm1, ptr_b [rax+0x40]); vcvtph2dq(zmm1|k5|T_z|T_rd_sae, ymm3); vcvtph2dq(zmm1|k5|T_z, ptr [rax+0x40]); vcvtph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvtph2psx(xmm1, xmm5); vcvtph2psx(xmm1, ptr [rax+0x40]); vcvtph2psx(xmm1, ptr_b [rax+0x40]); vcvtph2psx(ymm1|k2|T_z, xmm5); vcvtph2psx(ymm1, ptr [rax+0x40]); vcvtph2psx(ymm1, ptr_b [rax+0x40]); vcvtph2psx(zmm1|k5|T_z|T_sae, ymm3); vcvtph2psx(zmm1|k5|T_z, ptr [rax+0x40]); vcvtph2psx(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvtph2udq(xmm1, xmm5); vcvtph2udq(xmm1, ptr [rax+0x40]); vcvtph2udq(xmm1, ptr_b [rax+0x40]); vcvtph2udq(ymm1|k2|T_z, xmm5); vcvtph2udq(ymm1, ptr [rax+0x40]); vcvtph2udq(ymm1, ptr_b [rax+0x40]); vcvtph2udq(zmm1|k5|T_z|T_rd_sae, ymm3); vcvtph2udq(zmm1|k5|T_z, ptr [rax+0x40]); vcvtph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvttph2dq(xmm1, xmm5); vcvttph2dq(xmm1, ptr [rax+0x40]); vcvttph2dq(xmm1, ptr_b [rax+0x40]); vcvttph2dq(ymm1|k2|T_z, xmm5); vcvttph2dq(ymm1, ptr [rax+0x40]); vcvttph2dq(ymm1, ptr_b [rax+0x40]); vcvttph2dq(zmm1|k5|T_z|T_sae, ymm3); vcvttph2dq(zmm1|k5|T_z, ptr [rax+0x40]); vcvttph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvttph2udq(xmm1, xmm5); vcvttph2udq(xmm1, ptr [rax+0x40]); vcvttph2udq(xmm1, ptr_b [rax+0x40]); vcvttph2udq(ymm1|k2|T_z, xmm5); vcvttph2udq(ymm1, ptr [rax+0x40]); vcvttph2udq(ymm1, ptr_b [rax+0x40]); vcvttph2udq(zmm1|k5|T_z|T_sae, ymm3); vcvttph2udq(zmm1|k5|T_z, ptr [rax+0x40]); vcvttph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvtph2pd(xmm1, xmm5); vcvtph2pd(xmm1, ptr [rax+0x40]); vcvtph2pd(xmm1, ptr_b [rax+0x40]); vcvtph2pd(ymm1|k2|T_z, xmm5); vcvtph2pd(ymm1, ptr [rax+0x40]); vcvtph2pd(ymm1, ptr_b [rax+0x40]); vcvtph2pd(zmm1|k5|T_z|T_sae, xmm3); vcvtph2pd(zmm1|k5|T_z, ptr [rax+0x40]); vcvtph2pd(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvtph2qq(xmm1, xmm5); vcvtph2qq(xmm1, ptr [rax+0x40]); vcvtph2qq(xmm1, ptr_b [rax+0x40]); vcvtph2qq(ymm1|k2|T_z, xmm5); vcvtph2qq(ymm1, ptr [rax+0x40]); vcvtph2qq(ymm1, ptr_b [rax+0x40]); vcvtph2qq(zmm1|k5|T_z|T_rd_sae, xmm3); vcvtph2qq(zmm1|k5|T_z, ptr [rax+0x40]); vcvtph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvtph2uqq(xmm1, xmm5); vcvtph2uqq(xmm1, ptr [rax+0x40]); vcvtph2uqq(xmm1, ptr_b [rax+0x40]); vcvtph2uqq(ymm1|k2|T_z, xmm5); vcvtph2uqq(ymm1, ptr [rax+0x40]); vcvtph2uqq(ymm1, ptr_b [rax+0x40]); vcvtph2uqq(zmm1|k5|T_z|T_rd_sae, xmm3); vcvtph2uqq(zmm1|k5|T_z, ptr [rax+0x40]); vcvtph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvttph2uqq(xmm1, xmm5); vcvttph2uqq(xmm1, ptr [rax+0x40]); vcvttph2uqq(xmm1, ptr_b [rax+0x40]); vcvttph2uqq(ymm1|k2|T_z, xmm5); vcvttph2uqq(ymm1, ptr [rax+0x40]); vcvttph2uqq(ymm1, ptr_b [rax+0x40]); vcvttph2uqq(zmm1|k5|T_z|T_sae, xmm3); vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]); vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvtdq2ph(xmm1, xmm5); vcvtdq2ph(xmm1, xword [rax+0x40]); vcvtdq2ph(xmm1, xword_b [rax+0x40]); vcvtdq2ph(xmm1, yword [rax+0x40]); vcvtdq2ph(xmm1, yword_b [rax+0x40]); vcvtdq2ph(ymm1|k2|T_z|T_rd_sae, zmm5); vcvtdq2ph(ymm1, ptr [rax+0x40]); vcvtdq2ph(ymm1, ptr_b [rax+0x40]); vcvtps2phx(xmm1, xmm5); vcvtps2phx(xmm1, xword [rax+0x40]); vcvtps2phx(xmm1, xword_b [rax+0x40]); vcvtps2phx(xmm1, yword [rax+0x40]); vcvtps2phx(xmm1, yword_b [rax+0x40]); vcvtps2phx(ymm1|k2|T_z|T_rd_sae, zmm5); vcvtps2phx(ymm1, ptr [rax+0x40]); vcvtps2phx(ymm1, ptr_b [rax+0x40]); vcvtudq2ph(xmm1, xmm5); vcvtudq2ph(xmm1, xword [rax+0x40]); vcvtudq2ph(xmm1, xword_b [rax+0x40]); vcvtudq2ph(xmm1, yword [rax+0x40]); vcvtudq2ph(xmm1, yword_b [rax+0x40]); vcvtudq2ph(ymm1|k2|T_z|T_rd_sae, zmm5); vcvtudq2ph(ymm1, ptr [rax+0x40]); vcvtudq2ph(ymm1, ptr_b [rax+0x40]); vcvtpd2ph(xmm1, xmm5); vcvtpd2ph(xmm1, ymm5); vcvtpd2ph(xmm1|k2|T_z|T_rd_sae, zmm5); vcvtpd2ph(xmm1, xword [rax+0x40]); vcvtpd2ph(xmm1, xword_b [rax+0x40]); vcvtpd2ph(xmm1, yword [rax+0x40]); vcvtpd2ph(xmm1, yword_b [rax+0x40]); vcvtpd2ph(xmm1, zword [rax+0x40]); vcvtpd2ph(xmm1, zword_b [rax+0x40]); vcvtqq2ph(xmm1, xmm5); vcvtqq2ph(xmm1, ymm5); vcvtqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5); vcvtqq2ph(xmm1, xword [rax+0x40]); vcvtqq2ph(xmm1, xword_b [rax+0x40]); vcvtqq2ph(xmm1, yword [rax+0x40]); vcvtqq2ph(xmm1, yword_b [rax+0x40]); vcvtqq2ph(xmm1, zword [rax+0x40]); vcvtqq2ph(xmm1, zword_b [rax+0x40]); vcvtuqq2ph(xmm1, xmm5); vcvtuqq2ph(xmm1, ymm5); vcvtuqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5); vcvtuqq2ph(xmm1, xword [rax+0x40]); vcvtuqq2ph(xmm1, xword_b [rax+0x40]); vcvtuqq2ph(xmm1, yword [rax+0x40]); vcvtuqq2ph(xmm1, yword_b [rax+0x40]); vcvtuqq2ph(xmm1, zword [rax+0x40]); vcvtuqq2ph(xmm1, zword_b [rax+0x40]); vcvtph2uw(xmm1, xmm5); vcvtph2uw(xmm1, ptr [rax+0x40]); vcvtph2uw(xmm1, ptr_b [rax+0x40]); vcvtph2uw(ymm1, ptr [rax+0x40]); vcvtph2uw(ymm1, ptr_b [rax+0x40]); vcvtph2uw(zmm1|k2|T_z|T_rd_sae, zmm5); vcvtph2uw(zmm1, ptr [rax+0x40]); vcvtph2uw(zmm1, ptr_b [rax+0x40]); vcvtph2w(xmm1, xmm5); vcvtph2w(xmm1, ptr [rax+0x40]); vcvtph2w(xmm1, ptr_b [rax+0x40]); vcvtph2w(ymm1, ptr [rax+0x40]); vcvtph2w(ymm1, ptr_b [rax+0x40]); vcvtph2w(zmm1|k2|T_z|T_rd_sae, zmm5); vcvtph2w(zmm1, ptr [rax+0x40]); vcvtph2w(zmm1, ptr_b [rax+0x40]); vcvttph2uw(xmm1, xmm5); vcvttph2uw(xmm1, ptr [rax+0x40]); vcvttph2uw(xmm1, ptr_b [rax+0x40]); vcvttph2uw(ymm1, ptr [rax+0x40]); vcvttph2uw(ymm1, ptr_b [rax+0x40]); vcvttph2uw(zmm1|k2|T_z|T_sae, zmm5); vcvttph2uw(zmm1, ptr [rax+0x40]); vcvttph2uw(zmm1, ptr_b [rax+0x40]); vcvttph2w(xmm1, xmm5); vcvttph2w(xmm1, ptr [rax+0x40]); vcvttph2w(xmm1, ptr_b [rax+0x40]); vcvttph2w(ymm1, ptr [rax+0x40]); vcvttph2w(ymm1, ptr_b [rax+0x40]); vcvttph2w(zmm1|k2|T_z|T_sae, zmm5); vcvttph2w(zmm1, ptr [rax+0x40]); vcvttph2w(zmm1, ptr_b [rax+0x40]); vcvtuw2ph(xmm1, xmm5); vcvtuw2ph(xmm1, ptr [rax+0x40]); vcvtuw2ph(xmm1, ptr_b [rax+0x40]); vcvtuw2ph(ymm1, ptr [rax+0x40]); vcvtuw2ph(ymm1, ptr_b [rax+0x40]); vcvtuw2ph(zmm1|k2|T_z|T_rd_sae, zmm5); vcvtuw2ph(zmm1, ptr [rax+0x40]); vcvtuw2ph(zmm1, ptr_b [rax+0x40]); vcvtw2ph(xmm1, xmm5); vcvtw2ph(xmm1, ptr [rax+0x40]); vcvtw2ph(xmm1, ptr_b [rax+0x40]); vcvtw2ph(ymm1, ptr [rax+0x40]); vcvtw2ph(ymm1, ptr_b [rax+0x40]); vcvtw2ph(zmm1|k2|T_z|T_rd_sae, zmm5); vcvtw2ph(zmm1, ptr [rax+0x40]); vcvtw2ph(zmm1, ptr_b [rax+0x40]); vcvtps2ph(xmm1, xmm2, 0x1); vcvtps2ph(ptr [rax+0x40], xmm2, 0x2); vcvtps2ph(xmm1, ymm2, 0x3); vcvtps2ph(ptr [rax+0x40], ymm2, 0x4); vcvtps2ph(xmm1|k1|T_z, xmm2, 0x5); vcvtps2ph(ptr [rax+0x40]|k1, xmm3, 0x6); vcvtps2ph(xmm1|k2, ymm4, 0x7); vcvtps2ph(ptr [rax+0x40]|k2, ymm5, 0x8); vcvtps2ph(ymm1|k2|T_sae, zmm5, 0x9); vcvtps2ph(ptr [rax+0x40]|k5, zmm4, 0xa); vcvtsh2usi(ecx|T_rd_sae, xmm1); vcvtsh2usi(eax, ptr [rax+0x40]); vcvtsh2usi(r9|T_rd_sae, xmm1); vcvtsh2usi(r13, ptr [rax+0x40]); vcvttsh2si(ecx|T_sae, xmm1); vcvttsh2si(eax, ptr [rax+0x40]); vcvttsh2si(r9|T_sae, xmm1); vcvttsh2si(r13, ptr [rax+0x40]); vcvttsh2usi(ecx|T_sae, xmm1); vcvttsh2usi(eax, ptr [rax+0x40]); vcvttsh2usi(r9|T_sae, xmm1); vcvttsh2usi(r13, ptr [rax+0x40]); vcvttph2qq(xmm1, xmm5); vcvttph2qq(xmm1, ptr [rax+0x40]); vcvttph2qq(xmm1, ptr_b [rax+0x40]); vcvttph2qq(ymm1|k2|T_z, xmm5); vcvttph2qq(ymm1, ptr [rax+0x40]); vcvttph2qq(ymm1, ptr_b [rax+0x40]); vcvttph2qq(zmm1|k5|T_z|T_sae, xmm3); vcvttph2qq(zmm1|k5|T_z, ptr [rax+0x40]); vcvttph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]); vcvtsi2sh(xmm1|T_rd_sae, xmm2, eax); vcvtsi2sh(xmm1, xmm2, dword [rax+0x40]); vcvtsi2sh(xmm1|T_rd_sae, xmm2, r9); vcvtsi2sh(xmm1, xmm2, qword [rax+0x40]); vcvtusi2sh(xmm1|T_rd_sae, xmm2, eax); vcvtusi2sh(xmm1, xmm2, dword [rax+0x40]); vcvtusi2sh(xmm1|T_rd_sae, xmm2, r9); vcvtusi2sh(xmm1, xmm2, qword [rax+0x40]); } } c; const uint8_t tbl[] = { // vaddph 0x62, 0xF5, 0x74, 0x48, 0x58, 0x40, 0x01, 0x62, 0xF5, 0x74, 0x28, 0x58, 0x40, 0x02, 0x62, 0xF5, 0x74, 0x08, 0x58, 0x40, 0x04, 0x62, 0xF5, 0x74, 0x58, 0x58, 0x40, 0x20, 0x62, 0xF5, 0x74, 0x38, 0x58, 0x40, 0x20, 0x62, 0xF5, 0x74, 0x18, 0x58, 0x40, 0x20, // vaddsh 0x62, 0xF5, 0x06, 0x08, 0x58, 0x40, 0x20, 0x62, 0xF5, 0x06, 0xBD, 0x58, 0xC3, // vcmpph 0x62, 0xf3, 0x04, 0x08, 0xc2, 0x48, 0x04, 0x01, 0x62, 0xf3, 0x04, 0x28, 0xc2, 0x50, 0x02, 0x02, 0x62, 0xf3, 0x04, 0x48, 0xc2, 0x58, 0x01, 0x03, 0x62, 0xf3, 0x04, 0x18, 0xc2, 0x48, 0x20, 0x01, 0x62, 0xf3, 0x04, 0x38, 0xc2, 0x50, 0x20, 0x02, 0x62, 0xf3, 0x04, 0x58, 0xc2, 0x58, 0x20, 0x03, // vcmpsh 0x62, 0xf3, 0x06, 0x08, 0xc2, 0x48, 0x20, 0x01, 0x62, 0x93, 0x76, 0x1d, 0xc2, 0xd9, 0x04, // vcomish 0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x48, 0x20, 0x62, 0xd5, 0x7c, 0x18, 0x2f, 0xcf, // vucomish 0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x48, 0x20, 0x62, 0xd5, 0x7c, 0x18, 0x2e, 0xcf, // vfmaddsub213ph 0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x48, 0x04, 0x62, 0xf6, 0x6d, 0x18, 0xa6, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x0b, 0xa6, 0xcd, 0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x48, 0x02, 0x62, 0xf6, 0x6d, 0x38, 0xa6, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x2b, 0xa6, 0xcd, 0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x48, 0x01, 0x62, 0xf6, 0x6d, 0x58, 0xa6, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x58, 0xa6, 0xcd, // vfmsubadd132ph 0x62, 0xf6, 0x6d, 0x08, 0x97, 0x48, 0x04, 0x62, 0xf6, 0x6d, 0x18, 0x97, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x28, 0x97, 0x48, 0x02, 0x62, 0xf6, 0x6d, 0x38, 0x97, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x48, 0x97, 0x48, 0x01, 0x62, 0xf6, 0x6d, 0x58, 0x97, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x58, 0x97, 0xcd, // vfmadd132ph 0x62, 0xf6, 0x6d, 0x08, 0x98, 0x48, 0x04, 0x62, 0xf6, 0x6d, 0x18, 0x98, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x28, 0x98, 0x48, 0x02, 0x62, 0xf6, 0x6d, 0x38, 0x98, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x48, 0x98, 0x48, 0x01, 0x62, 0xf6, 0x6d, 0x58, 0x98, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x38, 0x98, 0xcd, // vfmsub231ph 0x62, 0xf6, 0x6d, 0x08, 0xba, 0x48, 0x04, 0x62, 0xf6, 0x6d, 0x18, 0xba, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x28, 0xba, 0x48, 0x02, 0x62, 0xf6, 0x6d, 0x38, 0xba, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x48, 0xba, 0x48, 0x01, 0x62, 0xf6, 0x6d, 0x58, 0xba, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x38, 0xba, 0xcd, // vfnmsub231ph 0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x48, 0x04, 0x62, 0xf6, 0x6d, 0x38, 0xbe, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x58, 0xbe, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x38, 0xbe, 0xcd, // vfmadd132sh 0x62, 0xf6, 0x6d, 0xb9, 0x99, 0xcb, 0x62, 0xf6, 0x6d, 0x08, 0x99, 0x48, 0x20, // vfnmadd132sh 0x62, 0xf6, 0x6d, 0xb9, 0x9d, 0xcb, 0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x48, 0x20, // vfmsub132sh 0x62, 0xf6, 0x6d, 0xb9, 0x9b, 0xcb, 0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x48, 0x20, // vfnmsub132sh 0x62, 0xf6, 0x6d, 0xb9, 0x9f, 0xcb, 0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x48, 0x20, // vfcmaddcph 0x62, 0xf6, 0x6f, 0x89, 0x56, 0x48, 0x04, 0x62, 0xf6, 0x6f, 0xa9, 0x56, 0x48, 0x02, 0x62, 0xf6, 0x6f, 0x49, 0x56, 0x48, 0x01, 0x62, 0xf6, 0x6f, 0x39, 0x56, 0xcd, 0x62, 0xf6, 0x6f, 0x99, 0x56, 0x48, 0x10, 0x62, 0xf6, 0x6f, 0xb9, 0x56, 0x48, 0x10, 0x62, 0xf6, 0x6f, 0xd9, 0x56, 0x48, 0x10, // vfmaddcph 0x62, 0xf6, 0x6e, 0x08, 0x56, 0x48, 0x04, 0x62, 0xf6, 0x6e, 0xb9, 0x56, 0x48, 0x10, 0x62, 0xf6, 0x6e, 0x58, 0x56, 0x48, 0x10, // vfcmulcph 0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x48, 0x04, 0x62, 0xf6, 0x6f, 0xb9, 0xd6, 0x48, 0x10, 0x62, 0xf6, 0x6f, 0x58, 0xd6, 0x48, 0x10, // vfmulcph 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x48, 0x04, 0x62, 0xf6, 0x6e, 0xb9, 0xd6, 0x48, 0x10, 0x62, 0xf6, 0x6e, 0x58, 0xd6, 0x48, 0x10, // vrcpph 0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x48, 0x04, 0x62, 0xf6, 0x7d, 0x18, 0x4c, 0x48, 0x20, 0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x48, 0x02, 0x62, 0xf6, 0x7d, 0x38, 0x4c, 0x48, 0x20, 0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x48, 0x01, 0x62, 0xf6, 0x7d, 0x58, 0x4c, 0x48, 0x20, // vrcpsh 0x62, 0xf6, 0x65, 0x08, 0x4d, 0x48, 0x20, // vrsqrtph 0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x48, 0x04, 0x62, 0xf6, 0x7d, 0x18, 0x4e, 0x48, 0x20, 0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x50, 0x02, 0x62, 0xf6, 0x7d, 0x38, 0x4e, 0x50, 0x20, 0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x50, 0x01, 0x62, 0xf6, 0x7d, 0x58, 0x4e, 0x50, 0x20, // vrsqrtsh 0x62, 0xf6, 0x45, 0x8d, 0x4f, 0x48, 0x20, // vsqrtph 0x62, 0xf5, 0x7c, 0x8c, 0x51, 0x48, 0x04, 0x62, 0xf5, 0x7c, 0x9c, 0x51, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0xbc, 0x51, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0xcc, 0x51, 0x48, 0x01, 0x62, 0xf5, 0x7c, 0xdc, 0x51, 0x48, 0x20, // vsqrtsh 0x62, 0xf5, 0x56, 0x8c, 0x51, 0x48, 0x20, 0x62, 0xf5, 0x56, 0xbc, 0x51, 0xcf, // vscalefph 0x62, 0xf6, 0x55, 0x08, 0x2c, 0x48, 0x04, 0x62, 0xf6, 0x55, 0x18, 0x2c, 0x48, 0x20, 0x62, 0xf6, 0x55, 0x28, 0x2c, 0x48, 0x02, 0x62, 0xf6, 0x55, 0x38, 0x2c, 0x48, 0x20, 0x62, 0xf6, 0x55, 0x48, 0x2c, 0x48, 0x01, 0x62, 0xf6, 0x55, 0x58, 0x2c, 0x48, 0x20, 0x62, 0xf6, 0x55, 0xb9, 0x2c, 0xcf, // vscalefsh 0x62, 0xf6, 0x55, 0x08, 0x2d, 0x48, 0x20, 0x62, 0xf6, 0x55, 0xb9, 0x2d, 0xcf, // vreduceph 0x62, 0xf3, 0x7c, 0x08, 0x56, 0x48, 0x04, 0x01, 0x62, 0xf3, 0x7c, 0x18, 0x56, 0x48, 0x20, 0x02, 0x62, 0xf3, 0x7c, 0x28, 0x56, 0x48, 0x02, 0x03, 0x62, 0xf3, 0x7c, 0x38, 0x56, 0x48, 0x20, 0x04, 0x62, 0xf3, 0x7c, 0x48, 0x56, 0x48, 0x01, 0x05, 0x62, 0xf3, 0x7c, 0x58, 0x56, 0x48, 0x20, 0x06, 0x62, 0xf3, 0x7c, 0x99, 0x56, 0xcd, 0x07, // vreducesh 0x62, 0xf3, 0x64, 0x08, 0x57, 0x48, 0x20, 0x01, 0x62, 0xf3, 0x54, 0x99, 0x57, 0xcc, 0x02, // vrndscaleph 0x62, 0xf3, 0x7c, 0x08, 0x08, 0x48, 0x04, 0x01, 0x62, 0xf3, 0x7c, 0x18, 0x08, 0x48, 0x20, 0x02, 0x62, 0xf3, 0x7c, 0x28, 0x08, 0x48, 0x02, 0x03, 0x62, 0xf3, 0x7c, 0x38, 0x08, 0x48, 0x20, 0x04, 0x62, 0xf3, 0x7c, 0x48, 0x08, 0x48, 0x01, 0x05, 0x62, 0xf3, 0x7c, 0x58, 0x08, 0x48, 0x20, 0x06, 0x62, 0xf3, 0x7c, 0x99, 0x08, 0xcd, 0x07, // vrndscalesh 0x62, 0xf3, 0x64, 0x08, 0x0a, 0x48, 0x20, 0x01, 0x62, 0xf3, 0x54, 0x99, 0x0a, 0xcc, 0x02, // vfpclassph 0x62, 0xf3, 0x7c, 0x08, 0x66, 0x48, 0x04, 0x01, 0x62, 0xf3, 0x7c, 0x18, 0x66, 0x48, 0x20, 0x02, 0x62, 0xf3, 0x7c, 0x28, 0x66, 0x48, 0x02, 0x03, 0x62, 0xf3, 0x7c, 0x38, 0x66, 0x48, 0x20, 0x04, 0x62, 0xf3, 0x7c, 0x48, 0x66, 0x48, 0x01, 0x05, 0x62, 0xf3, 0x7c, 0x58, 0x66, 0x48, 0x20, 0x06, // vfpclasssh 0x62, 0xf3, 0x7c, 0x0a, 0x67, 0xcb, 0x05, 0x62, 0xf3, 0x7c, 0x0a, 0x67, 0x48, 0x20, 0x05, // vgetexpph 0x62, 0xf6, 0x7d, 0x08, 0x42, 0x48, 0x04, 0x62, 0xf6, 0x7d, 0x38, 0x42, 0x48, 0x20, 0x62, 0xf6, 0x7d, 0x48, 0x42, 0x48, 0x01, 0x62, 0xf6, 0x7d, 0x99, 0x42, 0xcd, // vgetexpsh 0x62, 0xf6, 0x55, 0x08, 0x43, 0x48, 0x20, 0x62, 0xf6, 0x65, 0x99, 0x43, 0xcd, // vgetmantph 0x62, 0xf3, 0x7c, 0x08, 0x26, 0x48, 0x04, 0x01, 0x62, 0xf3, 0x7c, 0x38, 0x26, 0x48, 0x20, 0x02, 0x62, 0xf3, 0x7c, 0x48, 0x26, 0x48, 0x01, 0x03, 0x62, 0xf3, 0x7c, 0x99, 0x26, 0xcd, 0x04, // vgetmantsh 0x62, 0xf3, 0x54, 0x08, 0x27, 0x48, 0x20, 0x05, 0x62, 0xf3, 0x64, 0x99, 0x27, 0xcd, 0x06, // vmovsh 0x62, 0xf5, 0x7e, 0x89, 0x10, 0x48, 0x20, 0x62, 0xf5, 0x7e, 0x09, 0x11, 0x48, 0x20, 0x62, 0xf5, 0x66, 0x8a, 0x10, 0xcd, // vmovw 0x62, 0xd5, 0x7d, 0x08, 0x6e, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x58, 0x20, 0x62, 0xd5, 0x7d, 0x08, 0x7e, 0xc9, 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x78, 0x20, // vcvtsd2sh 0x62, 0xf5, 0xef, 0xb9, 0x5a, 0xcb, 0x62, 0xf5, 0xef, 0x08, 0x5a, 0x48, 0x08, // vcvtsh2sd 0x62, 0xf5, 0x6e, 0x99, 0x5a, 0xcb, 0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x48, 0x20, // vcvtsh2ss 0x62, 0xf6, 0x6c, 0x99, 0x13, 0xcb, 0x62, 0xf6, 0x6c, 0x08, 0x13, 0x48, 0x20, // vcvtss2sh 0x62, 0xf5, 0x6c, 0xb9, 0x1d, 0xcb, 0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x48, 0x10, // vcvtsh2si 0x62, 0xf5, 0x7e, 0x38, 0x2d, 0xd1, 0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x50, 0x20, 0x62, 0xf5, 0xfe, 0x38, 0x2d, 0xd1, 0x62, 0x75, 0xfe, 0x08, 0x2d, 0x40, 0x20, // vcvtph2dq 0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x48, 0x08, 0x62, 0xf5, 0x7d, 0x18, 0x5b, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xaa, 0x5b, 0xcd, 0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0x38, 0x5b, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xbd, 0x5b, 0xcb, 0x62, 0xf5, 0x7d, 0xcd, 0x5b, 0x48, 0x02, 0x62, 0xf5, 0x7d, 0xdd, 0x5b, 0x48, 0x20, // vcvtph2psx 0x62, 0xf6, 0x7d, 0x08, 0x13, 0xcd, 0x62, 0xf6, 0x7d, 0x08, 0x13, 0x48, 0x08, 0x62, 0xf6, 0x7d, 0x18, 0x13, 0x48, 0x20, 0x62, 0xf6, 0x7d, 0xaa, 0x13, 0xcd, 0x62, 0xf6, 0x7d, 0x28, 0x13, 0x48, 0x04, 0x62, 0xf6, 0x7d, 0x38, 0x13, 0x48, 0x20, 0x62, 0xf6, 0x7d, 0x9d, 0x13, 0xcb, 0x62, 0xf6, 0x7d, 0xcd, 0x13, 0x48, 0x02, 0x62, 0xf6, 0x7d, 0xdd, 0x13, 0x48, 0x20, // vcvtph2udq 0x62, 0xf5, 0x7c, 0x08, 0x79, 0xcd, 0x62, 0xf5, 0x7c, 0x08, 0x79, 0x48, 0x08, 0x62, 0xf5, 0x7c, 0x18, 0x79, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0xaa, 0x79, 0xcd, 0x62, 0xf5, 0x7c, 0x28, 0x79, 0x48, 0x04, 0x62, 0xf5, 0x7c, 0x38, 0x79, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0xbd, 0x79, 0xcb, 0x62, 0xf5, 0x7c, 0xcd, 0x79, 0x48, 0x02, 0x62, 0xf5, 0x7c, 0xdd, 0x79, 0x48, 0x20, // vcvttph2dq 0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xcd, 0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x48, 0x08, 0x62, 0xf5, 0x7e, 0x18, 0x5b, 0x48, 0x20, 0x62, 0xf5, 0x7e, 0xaa, 0x5b, 0xcd, 0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x48, 0x04, 0x62, 0xf5, 0x7e, 0x38, 0x5b, 0x48, 0x20, 0x62, 0xf5, 0x7e, 0x9d, 0x5b, 0xcb, 0x62, 0xf5, 0x7e, 0xcd, 0x5b, 0x48, 0x02, 0x62, 0xf5, 0x7e, 0xdd, 0x5b, 0x48, 0x20, // vcvttph2udq 0x62, 0xf5, 0x7c, 0x08, 0x78, 0xcd, 0x62, 0xf5, 0x7c, 0x08, 0x78, 0x48, 0x08, 0x62, 0xf5, 0x7c, 0x18, 0x78, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0xaa, 0x78, 0xcd, 0x62, 0xf5, 0x7c, 0x28, 0x78, 0x48, 0x04, 0x62, 0xf5, 0x7c, 0x38, 0x78, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0x9d, 0x78, 0xcb, 0x62, 0xf5, 0x7c, 0xcd, 0x78, 0x48, 0x02, 0x62, 0xf5, 0x7c, 0xdd, 0x78, 0x48, 0x20, // vcvtph2pd 0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xcd, 0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x48, 0x10, 0x62, 0xf5, 0x7c, 0x18, 0x5a, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0xaa, 0x5a, 0xcd, 0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x48, 0x08, 0x62, 0xf5, 0x7c, 0x38, 0x5a, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0x9d, 0x5a, 0xcb, 0x62, 0xf5, 0x7c, 0xcd, 0x5a, 0x48, 0x04, 0x62, 0xf5, 0x7c, 0xdd, 0x5a, 0x48, 0x20, // vcvtph2qq 0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x48, 0x10, 0x62, 0xf5, 0x7d, 0x18, 0x7b, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xaa, 0x7b, 0xcd, 0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x48, 0x08, 0x62, 0xf5, 0x7d, 0x38, 0x7b, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xbd, 0x7b, 0xcb, 0x62, 0xf5, 0x7d, 0xcd, 0x7b, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0xdd, 0x7b, 0x48, 0x20, // vcvtph2uqq 0x62, 0xf5, 0x7d, 0x08, 0x79, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x79, 0x48, 0x10, 0x62, 0xf5, 0x7d, 0x18, 0x79, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xaa, 0x79, 0xcd, 0x62, 0xf5, 0x7d, 0x28, 0x79, 0x48, 0x08, 0x62, 0xf5, 0x7d, 0x38, 0x79, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xbd, 0x79, 0xcb, 0x62, 0xf5, 0x7d, 0xcd, 0x79, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0xdd, 0x79, 0x48, 0x20, // vcvttph2uqq 0x62, 0xf5, 0x7d, 0x08, 0x78, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x78, 0x48, 0x10, 0x62, 0xf5, 0x7d, 0x18, 0x78, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xaa, 0x78, 0xcd, 0x62, 0xf5, 0x7d, 0x28, 0x78, 0x48, 0x08, 0x62, 0xf5, 0x7d, 0x38, 0x78, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0x9d, 0x78, 0xcb, 0x62, 0xf5, 0x7d, 0xcd, 0x78, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0xdd, 0x78, 0x48, 0x20, // vcvtdq2ph 0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xcd, 0x62, 0xf5, 0x7c, 0x08, 0x5b, 0x48, 0x04, 0x62, 0xf5, 0x7c, 0x18, 0x5b, 0x48, 0x10, 0x62, 0xf5, 0x7c, 0x28, 0x5b, 0x48, 0x02, 0x62, 0xf5, 0x7c, 0x38, 0x5b, 0x48, 0x10, 0x62, 0xf5, 0x7c, 0xba, 0x5b, 0xcd, 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x48, 0x01, 0x62, 0xf5, 0x7c, 0x58, 0x5b, 0x48, 0x10, // vcvtps2phx 0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x1d, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0x18, 0x1d, 0x48, 0x10, 0x62, 0xf5, 0x7d, 0x28, 0x1d, 0x48, 0x02, 0x62, 0xf5, 0x7d, 0x38, 0x1d, 0x48, 0x10, 0x62, 0xf5, 0x7d, 0xba, 0x1d, 0xcd, 0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x48, 0x01, 0x62, 0xf5, 0x7d, 0x58, 0x1d, 0x48, 0x10, // vcvtudq2ph 0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xcd, 0x62, 0xf5, 0x7f, 0x08, 0x7a, 0x48, 0x04, 0x62, 0xf5, 0x7f, 0x18, 0x7a, 0x48, 0x10, 0x62, 0xf5, 0x7f, 0x28, 0x7a, 0x48, 0x02, 0x62, 0xf5, 0x7f, 0x38, 0x7a, 0x48, 0x10, 0x62, 0xf5, 0x7f, 0xba, 0x7a, 0xcd, 0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x48, 0x01, 0x62, 0xf5, 0x7f, 0x58, 0x7a, 0x48, 0x10, // vcvtpd2ph 0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xcd, 0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xcd, 0x62, 0xf5, 0xfd, 0xba, 0x5a, 0xcd, 0x62, 0xf5, 0xfd, 0x08, 0x5a, 0x48, 0x04, 0x62, 0xf5, 0xfd, 0x18, 0x5a, 0x48, 0x08, 0x62, 0xf5, 0xfd, 0x28, 0x5a, 0x48, 0x02, 0x62, 0xf5, 0xfd, 0x38, 0x5a, 0x48, 0x08, 0x62, 0xf5, 0xfd, 0x48, 0x5a, 0x48, 0x01, 0x62, 0xf5, 0xfd, 0x58, 0x5a, 0x48, 0x08, // vcvtqq2ph 0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xcd, 0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xcd, 0x62, 0xf5, 0xfc, 0xba, 0x5b, 0xcd, 0x62, 0xf5, 0xfc, 0x08, 0x5b, 0x48, 0x04, 0x62, 0xf5, 0xfc, 0x18, 0x5b, 0x48, 0x08, 0x62, 0xf5, 0xfc, 0x28, 0x5b, 0x48, 0x02, 0x62, 0xf5, 0xfc, 0x38, 0x5b, 0x48, 0x08, 0x62, 0xf5, 0xfc, 0x48, 0x5b, 0x48, 0x01, 0x62, 0xf5, 0xfc, 0x58, 0x5b, 0x48, 0x08, // vcvtuqq2ph 0x62, 0xf5, 0xff, 0x08, 0x7a, 0xcd, 0x62, 0xf5, 0xff, 0x28, 0x7a, 0xcd, 0x62, 0xf5, 0xff, 0xba, 0x7a, 0xcd, 0x62, 0xf5, 0xff, 0x08, 0x7a, 0x48, 0x04, 0x62, 0xf5, 0xff, 0x18, 0x7a, 0x48, 0x08, 0x62, 0xf5, 0xff, 0x28, 0x7a, 0x48, 0x02, 0x62, 0xf5, 0xff, 0x38, 0x7a, 0x48, 0x08, 0x62, 0xf5, 0xff, 0x48, 0x7a, 0x48, 0x01, 0x62, 0xf5, 0xff, 0x58, 0x7a, 0x48, 0x08, // vcvtph2uw 0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xcd, 0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x48, 0x04, 0x62, 0xf5, 0x7c, 0x18, 0x7d, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x48, 0x02, 0x62, 0xf5, 0x7c, 0x38, 0x7d, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0xba, 0x7d, 0xcd, 0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x48, 0x01, 0x62, 0xf5, 0x7c, 0x58, 0x7d, 0x48, 0x20, // vcvtph2w 0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0x18, 0x7d, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x48, 0x02, 0x62, 0xf5, 0x7d, 0x38, 0x7d, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xba, 0x7d, 0xcd, 0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x48, 0x01, 0x62, 0xf5, 0x7d, 0x58, 0x7d, 0x48, 0x20, // vcvttph2uw 0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xcd, 0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x48, 0x04, 0x62, 0xf5, 0x7c, 0x18, 0x7c, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x48, 0x02, 0x62, 0xf5, 0x7c, 0x38, 0x7c, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0x9a, 0x7c, 0xcd, 0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x48, 0x01, 0x62, 0xf5, 0x7c, 0x58, 0x7c, 0x48, 0x20, // vcvttph2w 0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0x18, 0x7c, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x48, 0x02, 0x62, 0xf5, 0x7d, 0x38, 0x7c, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0x9a, 0x7c, 0xcd, 0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x48, 0x01, 0x62, 0xf5, 0x7d, 0x58, 0x7c, 0x48, 0x20, // vcvtuw2ph 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xcd, 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x48, 0x04, 0x62, 0xf5, 0x7f, 0x18, 0x7d, 0x48, 0x20, 0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x48, 0x02, 0x62, 0xf5, 0x7f, 0x38, 0x7d, 0x48, 0x20, 0x62, 0xf5, 0x7f, 0xba, 0x7d, 0xcd, 0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x48, 0x01, 0x62, 0xf5, 0x7f, 0x58, 0x7d, 0x48, 0x20, // vcvtw2ph 0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xcd, 0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x48, 0x04, 0x62, 0xf5, 0x7e, 0x18, 0x7d, 0x48, 0x20, 0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x48, 0x02, 0x62, 0xf5, 0x7e, 0x38, 0x7d, 0x48, 0x20, 0x62, 0xf5, 0x7e, 0xba, 0x7d, 0xcd, 0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x48, 0x01, 0x62, 0xf5, 0x7e, 0x58, 0x7d, 0x48, 0x20, // vcvtps2ph 0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x01, 0xc4, 0xe3, 0x79, 0x1d, 0x50, 0x40, 0x02, 0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x03, 0xc4, 0xe3, 0x7d, 0x1d, 0x50, 0x40, 0x04, 0x62, 0xf3, 0x7d, 0x89, 0x1d, 0xd1, 0x05, 0x62, 0xf3, 0x7d, 0x09, 0x1d, 0x58, 0x08, 0x06, 0x62, 0xf3, 0x7d, 0x2a, 0x1d, 0xe1, 0x07, 0x62, 0xf3, 0x7d, 0x2a, 0x1d, 0x68, 0x04, 0x08, 0x62, 0xf3, 0x7d, 0x1a, 0x1d, 0xe9, 0x09, 0x62, 0xf3, 0x7d, 0x4d, 0x1d, 0x60, 0x02, 0x0a, // vcvtsh2usi 0x62, 0xf5, 0x7e, 0x38, 0x79, 0xc9, 0x62, 0xf5, 0x7e, 0x08, 0x79, 0x40, 0x20, 0x62, 0x75, 0xfe, 0x38, 0x79, 0xc9, 0x62, 0x75, 0xfe, 0x08, 0x79, 0x68, 0x20, // vcvttsh2si 0x62, 0xf5, 0x7e, 0x18, 0x2c, 0xc9, 0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x40, 0x20, 0x62, 0x75, 0xfe, 0x18, 0x2c, 0xc9, 0x62, 0x75, 0xfe, 0x08, 0x2c, 0x68, 0x20, // vcvttsh2usi 0x62, 0xf5, 0x7e, 0x18, 0x78, 0xc9, 0x62, 0xf5, 0x7e, 0x08, 0x78, 0x40, 0x20, 0x62, 0x75, 0xfe, 0x18, 0x78, 0xc9, 0x62, 0x75, 0xfe, 0x08, 0x78, 0x68, 0x20, // vcvttph2qq 0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xcd, 0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x48, 0x10, 0x62, 0xf5, 0x7d, 0x18, 0x7a, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0xaa, 0x7a, 0xcd, 0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x48, 0x08, 0x62, 0xf5, 0x7d, 0x38, 0x7a, 0x48, 0x20, 0x62, 0xf5, 0x7d, 0x9d, 0x7a, 0xcb, 0x62, 0xf5, 0x7d, 0xcd, 0x7a, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0xdd, 0x7a, 0x48, 0x20, // vcvtsi2sh 0x62, 0xf5, 0x6e, 0x38, 0x2a, 0xc8, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x48, 0x10, 0x62, 0xd5, 0xee, 0x38, 0x2a, 0xc9, 0x62, 0xf5, 0xee, 0x08, 0x2a, 0x48, 0x08, // vcvtusi2sh 0x62, 0xf5, 0x6e, 0x38, 0x7b, 0xc8, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x48, 0x10, 0x62, 0xd5, 0xee, 0x38, 0x7b, 0xc9, 0x62, 0xf5, 0xee, 0x08, 0x7b, 0x48, 0x08, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } #endif CYBOZU_TEST_AUTO(waitpkg) { struct Code : Xbyak::CodeGenerator { Code() { tpause(eax); tpause(ebx); #ifdef XBYAK32 umonitor(cx); umonitor(ecx); #else umonitor(ecx); umonitor(rcx); #endif umwait(eax); umwait(ebx); } } c; const uint8_t tbl[] = { // tpause 0x66, 0x0f, 0xae, 0xf0, 0x66, 0x0f, 0xae, 0xf3, // umonitor 0x67, 0xf3, 0x0f, 0xae, 0xf1, 0xf3, 0x0f, 0xae, 0xf1, // tpause 0xf2, 0x0f, 0xae, 0xf0, 0xf2, 0x0f, 0xae, 0xf3, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(misc) { struct Code : Xbyak::CodeGenerator { Code() { cldemote(ptr[eax+esi*4+0x12]); movdiri(ptr[edx+esi*2+4], eax); movdir64b(eax, ptr[edx]); xresldtrk(); xsusldtrk(); #ifdef XBYAK64 cldemote(ptr[rax+rdi*8+0x123]); movdiri(ptr[rax+r12], r9); movdiri(ptr[rax+r12*2+4], r9d); movdir64b(r10, ptr[r8]); clui(); senduipi(rax); senduipi(r10); stui(); testui(); uiret(); #endif } } c; const uint8_t tbl[] = { #ifdef XBYAK64 0x67, #endif 0x0f, 0x1c, 0x44, 0xb0, 0x12, // cldemote #ifdef XBYAK64 0x67, #endif 0x0f, 0x38, 0xf9, 0x44, 0x72, 0x04, // movdiri 0x66, #ifdef XBYAK64 0x67, #endif 0x0f, 0x38, 0xf8, 0x02, // movdir64b 0xf2, 0x0f, 0x01, 0xe9, // xresldtrk 0xf2, 0x0f, 0x01, 0xe8, // xsusldtrk #ifdef XBYAK64 0x0f, 0x1c, 0x84, 0xf8, 0x23, 0x01, 0x00, 0x00, // cldemote 0x4e, 0x0f, 0x38, 0xf9, 0x0c, 0x20, // movdiri 0x46, 0x0f, 0x38, 0xf9, 0x4c, 0x60, 0x04, // movdiri 0x66, 0x45, 0x0f, 0x38, 0xf8, 0x10, // movdir64b 0xf3, 0x0f, 0x01, 0xee, // clui 0xf3, 0x0f, 0xc7, 0xf0, // senduipi rax 0xf3, 0x41, 0x0f, 0xc7, 0xf2, // senduipi r10 0xf3, 0x0f, 0x01, 0xef, // stui 0xf3, 0x0f, 0x01, 0xed, // testui 0xf3, 0x0f, 0x01, 0xec, // uiret #endif }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(cpu) { // https://github.com/herumi/xbyak/issues/148 using namespace Xbyak::util; Cpu cpu; CYBOZU_TEST_EQUAL(cpu.has(Cpu::tINTEL) && cpu.has(Cpu::tAMD), cpu.has(Cpu::tINTEL | Cpu::tAMD)); } CYBOZU_TEST_AUTO(minmax) { using namespace Xbyak::util; CYBOZU_TEST_EQUAL((std::min)(3, 4), local::min_(3, 4)); CYBOZU_TEST_EQUAL((std::max)(3, 4), local::max_(3, 4)); } CYBOZU_TEST_AUTO(rao_int) { struct Code : Xbyak::CodeGenerator { Code() { #ifdef XBYAK64 aadd(ptr[rax], ecx); aadd(ptr[eax], ecx); aadd(ptr[rax], r10); aand(ptr[rax], ecx); aand(ptr[eax], ecx); aand(ptr[rax], r10); aor(ptr[rax], ecx); aor(ptr[eax], ecx); aor(ptr[rax], r10); axor(ptr[rax], ecx); axor(ptr[eax], ecx); axor(ptr[rax], r10); #else aadd(ptr[eax], ecx); aand(ptr[eax], ecx); aor(ptr[eax], ecx); axor(ptr[eax], ecx); #endif } } c; const uint8_t tbl[] = { #ifdef XBYAK64 // aadd 0x0f, 0x38, 0xfc, 0x08, 0x67, 0x0f, 0x38, 0xfc, 0x08, 0x4c, 0x0f, 0x38, 0xfc, 0x10, // aand 0x66, 0x0f, 0x38, 0xfc, 0x08, 0x66, 0x67, 0x0f, 0x38, 0xfc, 0x08, 0x66, 0x4c, 0x0f, 0x38, 0xfc, 0x10, // aor 0xf2, 0x0f, 0x38, 0xfc, 0x08, 0xf2, 0x67, 0x0f, 0x38, 0xfc, 0x08, 0xf2, 0x4c, 0x0f, 0x38, 0xfc, 0x10, // axor 0xf3, 0x0f, 0x38, 0xfc, 0x08, 0xf3, 0x67, 0x0f, 0x38, 0xfc, 0x08, 0xf3, 0x4c, 0x0f, 0x38, 0xfc, 0x10, #else // aadd 0x0f, 0x38, 0xfc, 0x08, // aand 0x66, 0x0f, 0x38, 0xfc, 0x08, // aor 0xf2, 0x0f, 0x38, 0xfc, 0x08, // axor 0xf3, 0x0f, 0x38, 0xfc, 0x08, #endif }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } #ifdef XBYAK64 CYBOZU_TEST_AUTO(CMPccXADD) { struct Code : Xbyak::CodeGenerator { Code() { // 32bit reg cmpbexadd(ptr[rax+r10*4], ecx, edx); cmpbxadd(ptr[rax+r10*4], ecx, edx); cmplexadd(ptr[rax+r10*4], ecx, edx); cmplxadd(ptr[rax+r10*4], ecx, edx); cmpnbexadd(ptr[rax+r10*4], ecx, edx); cmpnbxadd(ptr[rax+r10*4], ecx, edx); cmpnlexadd(ptr[rax+r10*4], ecx, edx); cmpnlxadd(ptr[rax+r10*4], ecx, edx); cmpnoxadd(ptr[rax+r10*4], ecx, edx); cmpnpxadd(ptr[rax+r10*4], ecx, edx); cmpnsxadd(ptr[rax+r10*4], ecx, edx); cmpnzxadd(ptr[rax+r10*4], ecx, edx); cmpoxadd(ptr[rax+r10*4], ecx, edx); cmppxadd(ptr[rax+r10*4], ecx, edx); cmpsxadd(ptr[rax+r10*4], ecx, edx); cmpzxadd(ptr[rax+r10*4], ecx, edx); // 64bit reg cmpbexadd(ptr[rax+r10*4], rcx, rdx); cmpbxadd(ptr[rax+r10*4], rcx, rdx); cmplexadd(ptr[rax+r10*4], rcx, rdx); cmplxadd(ptr[rax+r10*4], rcx, rdx); cmpnbexadd(ptr[rax+r10*4], rcx, rdx); cmpnbxadd(ptr[rax+r10*4], rcx, rdx); cmpnlexadd(ptr[rax+r10*4], rcx, rdx); cmpnlxadd(ptr[rax+r10*4], rcx, rdx); cmpnoxadd(ptr[rax+r10*4], rcx, rdx); cmpnpxadd(ptr[rax+r10*4], rcx, rdx); cmpnsxadd(ptr[rax+r10*4], rcx, rdx); cmpnzxadd(ptr[rax+r10*4], rcx, rdx); cmpoxadd(ptr[rax+r10*4], rcx, rdx); cmppxadd(ptr[rax+r10*4], rcx, rdx); cmpsxadd(ptr[rax+r10*4], rcx, rdx); cmpzxadd(ptr[rax+r10*4], rcx, rdx); } } c; const uint8_t tbl[] = { // 32bit reg 0xc4, 0xa2, 0x69, 0xe6, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe2, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xee, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xec, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe7, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe3, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xef, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xed, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe1, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xeb, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe9, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe5, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe0, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xea, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe8, 0x0c, 0x90, 0xc4, 0xa2, 0x69, 0xe4, 0x0c, 0x90, // 64bit reg 0xc4, 0xa2, 0xe9, 0xe6, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe2, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xee, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xec, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe7, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe3, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xef, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xed, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe1, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xeb, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe9, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe5, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe0, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xea, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe8, 0x0c, 0x90, 0xc4, 0xa2, 0xe9, 0xe4, 0x0c, 0x90, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(prefetchiti) { struct Code : Xbyak::CodeGenerator { Code() { prefetchit0(ptr[rax]); prefetchit1(ptr[rax]); } } c; const uint8_t tbl[] = { 0x0f, 0x18, 0x38, 0x0f, 0x18, 0x30 }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(crypto) { struct Code : Xbyak::CodeGenerator { Code() { vsha512msg1(ymm3, xmm5); vsha512msg2(ymm9, ymm10); vsha512rnds2(ymm1, ymm3, xmm2); vsm3msg1(xmm1, xmm2, xmm3); vsm3msg1(xmm1, xmm2, ptr [rax]); vsm3msg2(xmm5, xmm7, xmm3); vsm3msg2(xmm5, xmm6, ptr [rax]); vsm3rnds2(xmm5, xmm7, xmm3, 0x12); vsm3rnds2(xmm5, xmm7, ptr [rcx], 0x34); vsm4key4(xmm1, xmm2, xmm3); vsm4key4(xmm1, xmm2, ptr [rdx]); vsm4rnds4(xmm1, xmm2, xmm3); vsm4rnds4(xmm5, xmm6, ptr [rcx+rax*4]); } } c; const uint8_t tbl[] = { // sha512 0xc4, 0xe2, 0x7f, 0xcc, 0xdd, 0xc4, 0x42, 0x7f, 0xcd, 0xca, 0xc4, 0xe2, 0x67, 0xcb, 0xca, // sm3 0xC4, 0xE2, 0x68, 0xDA, 0xCB, 0xC4, 0xE2, 0x68, 0xDA, 0x08, 0xC4, 0xE2, 0x41, 0xDA, 0xEB, 0xC4, 0xE2, 0x49, 0xDA, 0x28, 0xC4, 0xE3, 0x41, 0xDE, 0xEB, 0x12, 0xC4, 0xE3, 0x41, 0xDE, 0x29, 0x34, // sm4 0xc4, 0xe2, 0x6a, 0xda, 0xcb, 0xc4, 0xe2, 0x6a, 0xda, 0x0a, 0xc4, 0xe2, 0x6b, 0xda, 0xcb, 0xc4, 0xe2, 0x4b, 0xda, 0x2c, 0x81, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(avx_vnni_int) { struct Code : Xbyak::CodeGenerator { Code() { vpdpbssd(xmm1, xmm2, xmm3); vpdpbssd(ymm1, ymm2, ptr [rax]); vpdpbssds(xmm1, xmm2, xmm3); vpdpbssds(ymm1, ymm2, ptr [rax]); vpdpbsud(xmm1, xmm2, xmm3); vpdpbsud(ymm1, ymm2, ptr [rax]); vpdpbsuds(xmm1, xmm2, xmm3); vpdpbsuds(ymm1, ymm2, ptr [rax]); vpdpbuud(xmm1, xmm2, xmm3); vpdpbuud(ymm1, ymm2, ptr [rax]); vpdpbuuds(xmm1, xmm2, xmm3); vpdpbuuds(ymm1, ymm2, ptr [rax]); vpdpwsud(xmm1, xmm2, xmm3); vpdpwsud(ymm1, ymm2, ptr [rax]); vpdpwsuds(xmm1, xmm2, xmm3); vpdpwsuds(ymm1, ymm2, ptr [rax]); vpdpwusd(xmm1, xmm2, xmm3); vpdpwusd(ymm1, ymm2, ptr [rax]); vpdpwusds(xmm1, xmm2, xmm3); vpdpwusds(ymm1, ymm2, ptr [rax]); vpdpwuud(xmm1, xmm2, xmm3); vpdpwuud(ymm1, ymm2, ptr [rax]); vpdpwuuds(xmm1, xmm2, xmm3); vpdpwuuds(ymm1, ymm2, ptr [rax]); } } c; const uint8_t tbl[] = { 0xc4, 0xe2, 0x6b, 0x50, 0xcb, 0xc4, 0xe2, 0x6f, 0x50, 0x08, 0xc4, 0xe2, 0x6b, 0x51, 0xcb, 0xc4, 0xe2, 0x6f, 0x51, 0x08, 0xc4, 0xe2, 0x6a, 0x50, 0xcb, 0xc4, 0xe2, 0x6e, 0x50, 0x08, 0xc4, 0xe2, 0x6a, 0x51, 0xcb, 0xc4, 0xe2, 0x6e, 0x51, 0x08, 0xc4, 0xe2, 0x68, 0x50, 0xcb, 0xc4, 0xe2, 0x6c, 0x50, 0x08, 0xc4, 0xe2, 0x68, 0x51, 0xcb, 0xc4, 0xe2, 0x6c, 0x51, 0x08, 0xc4, 0xe2, 0x6a, 0xd2, 0xcb, 0xc4, 0xe2, 0x6e, 0xd2, 0x08, 0xc4, 0xe2, 0x6a, 0xd3, 0xcb, 0xc4, 0xe2, 0x6e, 0xd3, 0x08, 0xc4, 0xe2, 0x69, 0xd2, 0xcb, 0xc4, 0xe2, 0x6d, 0xd2, 0x08, 0xc4, 0xe2, 0x69, 0xd3, 0xcb, 0xc4, 0xe2, 0x6d, 0xd3, 0x08, 0xc4, 0xe2, 0x68, 0xd2, 0xcb, 0xc4, 0xe2, 0x6c, 0xd2, 0x08, 0xc4, 0xe2, 0x68, 0xd3, 0xcb, 0xc4, 0xe2, 0x6c, 0xd3, 0x08, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(vmovd) { struct Code : Xbyak::CodeGenerator { Code() { setDefaultEncodingAVX10(PreAVX10v2Encoding); vmovd(eax, xm1); // always AVX10.1 vmovd(xm3, xm1); // always AVX10.2 // AVX-512 (AVX10.1) vmovd(ptr[rax+128], xm1); vmovd(xm1, ptr[rax+128]); vmovd(ptr[rax+128], xm30); vmovd(xm30, ptr[rax+128]); setDefaultEncodingAVX10(AVX10v2Encoding); vmovd(eax, xm1); // always AVX10.1 vmovd(xm3, xm1); // always AVX10.2 // AVX10.2 vmovd(ptr[rax+128], xm1); vmovd(xm1, ptr[rax+128]); vmovd(ptr[rax+128], xm30); vmovd(xm30, ptr[rax+128]); } } c; const uint8_t tbl[] = { 0xc5, 0xf9, 0x7e, 0xc8, // avx10.1 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2 0xc5, 0xf9, 0x7e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx 0xc5, 0xf9, 0x6e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx 0x62, 0x61, 0x7d, 0x08, 0x7e, 0x70, 0x20, // avx10.1 0x62, 0x61, 0x7d, 0x08, 0x6e, 0x70, 0x20, // avx10.1 0xc5, 0xf9, 0x7e, 0xc8, // avx10.1 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2 0x62, 0xf1, 0x7d, 0x08, 0xd6, 0x48, 0x20, // avx10.2 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0x48, 0x20, // avx10.2 0x62, 0x61, 0x7d, 0x08, 0xd6, 0x70, 0x20, // avx10.2 0x62, 0x61, 0x7e, 0x08, 0x7e, 0x70, 0x20, // avx10.2 }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } #endif