diff options
author | MITSUNARI Shigeo <[email protected]> | 2018-01-05 11:45:41 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2018-01-05 11:45:41 +0900 |
commit | 9acfc1323dc3716b2c676a379f737de81ba3694c (patch) | |
tree | eee3cdf84de2fc6323d59587281eebe1c6c16c58 | |
parent | ac8de850ca352228a41768e2a48025de90ed1ab2 (diff) | |
download | xbyak-9acfc1323dc3716b2c676a379f737de81ba3694c.tar.gz xbyak-9acfc1323dc3716b2c676a379f737de81ba3694c.zip |
add vpshrd(v){w,d,q}
-rw-r--r-- | gen/gen_avx512.cpp | 8 | ||||
-rw-r--r-- | test/misc.cpp | 59 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 6 |
3 files changed, 73 insertions, 0 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 76d5f7d..0c11584 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -354,6 +354,14 @@ void putX_X_XM_IMM() { 0x70, "vpshldvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, { 0x71, "vpshldvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false }, { 0x71, "vpshldvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, + + { 0x72, "vpshrdw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true }, + { 0x73, "vpshrdd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, true }, + { 0x73, "vpshrdq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true }, + + { 0x72, "vpshrdvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, + { 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false }, + { 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/test/misc.cpp b/test/misc.cpp index 953e787..62b23e2 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -297,4 +297,63 @@ CYBOZU_TEST_AUTO(shld) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } +CYBOZU_TEST_AUTO(shrd) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); + vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); + vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); + + vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); + vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); + vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); + + vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5); + vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5); + vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5); + + vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); + vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); + vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); + + vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); + vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); + vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); + + vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]); + vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]); + vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]); + } + } c; + const uint8_t tbl[] = { + 0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05, + 0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05, + 0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05, + + 0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05, + 0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05, + 0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05, + + 0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05, + 0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05, + 0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05, + + 0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04, + 0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02, + 0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01, + + 0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04, + 0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02, + 0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01, + + 0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04, + 0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02, + 0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} #endif diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 00ca540..06da38e 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1863,6 +1863,12 @@ void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpshldvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x71); } void vpshldvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70); } void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); } +void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x73, imm); } +void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x73, imm); } +void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x73); } +void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x73); } +void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72); } +void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); } void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); } void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX, 0xE2); } |