diff options
author | MITSUNARI Shigeo <[email protected]> | 2018-01-05 23:13:52 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2018-01-05 23:13:52 +0900 |
commit | e6354f8be279e19f42f2cee72099cc392788d666 (patch) | |
tree | b789074645241be79c1e29d65469fd59e573f992 | |
parent | 09a12642b2cca32cf34235461139fa027714fe0e (diff) | |
download | xbyak-e6354f8be279e19f42f2cee72099cc392788d666.tar.gz xbyak-e6354f8be279e19f42f2cee72099cc392788d666.zip |
add vgf2p8mulb
-rw-r--r-- | gen/gen_code.cpp | 2 | ||||
-rw-r--r-- | test/misc.cpp | 29 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 2 |
3 files changed, 33 insertions, 0 deletions
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 5f11ea6..fe0b59a 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -202,8 +202,10 @@ void putX_X_XM(bool omitOnly) { 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, { 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, + { 0xCF, "gf2p8affineinvqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 }, { 0xCE, "gf2p8affineqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 }, + { 0xCF, "gf2p8mulb", T_66 | T_0F38 | T_W0 | T_EVEX | T_YMM | T_EW0 | T_SAE_Z, false, false, 3 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/test/misc.cpp b/test/misc.cpp index 0e811f3..701111c 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -614,6 +614,22 @@ CYBOZU_TEST_AUTO(gf2) vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5); vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5); vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5); + /// + gf2p8mulb(xmm1, xmm2); + gf2p8mulb(xmm1, ptr [rax + 0x40]); + + vgf2p8mulb(xmm1, xmm5, xmm2); + vgf2p8mulb(ymm1, ymm5, ymm2); + vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]); + vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]); + + vgf2p8mulb(xmm30, xmm31, xmm4); + vgf2p8mulb(ymm30, ymm31, ymm4); + vgf2p8mulb(zmm30, zmm31, zmm4); + + vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]); + vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]); + vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]); } } c; const uint8_t tbl[] = { @@ -648,6 +664,19 @@ CYBOZU_TEST_AUTO(gf2) 0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05, 0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05, 0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05, + + 0x66, 0x0f, 0x38, 0xcf, 0xca, + 0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40, + 0xc4, 0xe2, 0x51, 0xcf, 0xca, + 0xc4, 0xe2, 0x55, 0xcf, 0xca, + 0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40, + 0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40, + 0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4, + 0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4, + 0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4, + 0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04, + 0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02, + 0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 9c50aba..d7954af 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -296,6 +296,7 @@ void fyl2x() { db(0xD9); db(0xF1); } void fyl2xp1() { db(0xD9); db(0xF9); } void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); } void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); } +void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); } void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); } void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); } @@ -1006,6 +1007,7 @@ void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1 void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); } void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); } void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); } +void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); } void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); } void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); } void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); } |