diff options
author | MITSUNARI Shigeo <[email protected]> | 2018-01-05 14:38:20 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2018-01-05 14:38:20 +0900 |
commit | 457f4fd060c0deb314caf2b7ada0b0326afee025 (patch) | |
tree | ec0417a956f28a17cf1d6828569ec4d2010bf9b9 | |
parent | 5af0ba39771468d1438e8d768cda65e8eda75f5f (diff) | |
download | xbyak-457f4fd060c0deb314caf2b7ada0b0326afee025.tar.gz xbyak-457f4fd060c0deb314caf2b7ada0b0326afee025.zip |
add vpshufbitqmb
-rw-r--r-- | gen/gen_avx512.cpp | 2 | ||||
-rw-r--r-- | test/misc.cpp | 10 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 1 |
3 files changed, 12 insertions, 1 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 6f3ea78..5e0591e 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -694,6 +694,8 @@ void putMisc() puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); + + puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }"); } void putV4FMA() diff --git a/test/misc.cpp b/test/misc.cpp index 231ff32..745db2b 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -522,7 +522,7 @@ CYBOZU_TEST_AUTO(vpdpbus) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } -CYBOZU_TEST_AUTO(vexpand) +CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb) { struct Code : Xbyak::CodeGenerator { Code() @@ -540,6 +540,10 @@ CYBOZU_TEST_AUTO(vexpand) vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]); vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]); vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]); + + vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]); + vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]); + vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]); } } c; const uint8_t tbl[] = { @@ -556,6 +560,10 @@ CYBOZU_TEST_AUTO(vexpand) 0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20, 0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20, 0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20, + + 0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04, + 0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02, + 0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index b3c9398..3028088 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1879,6 +1879,7 @@ void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73); } void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72); } void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); } +void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); } void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); } void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX, 0xE2); } |