aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-15 03:50:27 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-15 03:50:27 +0900
commit0c2f7fc6dbd713b2d690a5859f562746b4dd568d (patch)
tree7bc295be6e954f652808077f504d122301957c03
parent46238d9845ff1226b029152d7c787aa661324620 (diff)
downloadxbyak-0c2f7fc6dbd713b2d690a5859f562746b4dd568d.tar.gz
xbyak-0c2f7fc6dbd713b2d690a5859f562746b4dd568d.zip
vmovw supports avx10.2
-rw-r--r--doc/usage.md7
-rw-r--r--gen/gen_avx512.cpp7
-rw-r--r--test/Makefile2
-rw-r--r--test/avx10/misc.txt7
-rw-r--r--test/avx10/old.txt4
-rw-r--r--test/test_by_xed.cpp2
-rw-r--r--xbyak/xbyak.h47
-rw-r--r--xbyak/xbyak_mnemonic.h3
8 files changed, 44 insertions, 35 deletions
diff --git a/doc/usage.md b/doc/usage.md
index 9636613..ef38d63 100644
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -124,7 +124,7 @@ vpdpbusd(xm0, xm1, xm2); // VEX
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI)
vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above
vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2)
-setDefaultEncoding(VexEncoding, AVX10p2Encoding); // use 2nd argument.
+setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument.
vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2)
```
@@ -133,9 +133,10 @@ Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param.
param|vnniEnc|avx10Enc
-|-|-
-VexEncoding|AVX-VNNI|AVX-VNNI-INT8
+VexEncoding|AVX-VNNI|-
EvexEncoding|AVX512-VNNI|-
-AVX10p2Encoding|-|AVX10.2
+PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16
+AVX10v2Encoding|-|AVX10.2
default|EvexEncoding|VexEncoding
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 07e68b4..e4d319e 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -264,7 +264,6 @@ void putM_X()
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K },
- { 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@@ -1079,12 +1078,6 @@ void putFP16_2()
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str());
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str());
}
- {
- uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
- std::string s = type2String(type);
- printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str());
- printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str());
- }
}
void putFP16()
diff --git a/test/Makefile b/test/Makefile
index 8313a6c..cf5c716 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -60,7 +60,7 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
-TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt minmax.txt saturation.txt
+TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
xed_test:
@set -e; \
for target in $(addprefix avx10/, $(TEST_FILES)); do \
diff --git a/test/avx10/misc.txt b/test/avx10/misc.txt
index 7c969bf..6f5c156 100644
--- a/test/avx10/misc.txt
+++ b/test/avx10/misc.txt
@@ -1,3 +1,4 @@
+// AVX10 integer and FP16 VNNI, media and zero-extending
vdpphps(xm1, xm2, xm3);
vdpphps(xm1, xm2, ptr[rax+128]);
vdpphps(xm1, xm2, ptr_b[rax+128]);
@@ -168,5 +169,11 @@ vpdpwuuds(zm1, zm2, ptr_b[rax+128]);
//
vmovd(xm10, xm20);
+vmovd(xm1, xm2);
vmovd(xm10, ptr[rax+128]);
vmovd(ptr[rax+128], xm30);
+//
+vmovw(xm1, xm20);
+vmovw(xm1, xm2);
+vmovw(xm3, ptr [rax+0x40]);
+vmovw(ptr [rax+0x40], xm7);
diff --git a/test/avx10/old.txt b/test/avx10/old.txt
index 9e4f097..f5a143c 100644
--- a/test/avx10/old.txt
+++ b/test/avx10/old.txt
@@ -355,10 +355,6 @@ vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6);
vmovsh(xmm1|k1|T_z, ptr [rax+0x40]);
vmovsh(ptr [rax+0x40]|k1, xmm1);
vmovsh(xmm1|k2|T_z, xmm3, xmm5);
-vmovw(xmm1, r13d);
-vmovw(xmm3, ptr [rax+0x40]);
-vmovw(r9d, xmm1);
-vmovw(ptr [rax+0x40], xmm7);
vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]);
vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3);
diff --git a/test/test_by_xed.cpp b/test/test_by_xed.cpp
index 71b5137..af39296 100644
--- a/test/test_by_xed.cpp
+++ b/test/test_by_xed.cpp
@@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096*8)
{
- setDefaultEncoding(EvexEncoding, AVX10p2Encoding);
+ setDefaultEncoding(EvexEncoding, AVX10v2Encoding);
#include "tmp.cpp"
}
};
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index a3d1fca..5367d83 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -1674,8 +1674,8 @@ typedef enum {
DefaultEncoding,
VexEncoding,
EvexEncoding,
- AVX512Encoding = EvexEncoding,
- AVX10p2Encoding
+ PreAVX10v2Encoding = EvexEncoding,
+ AVX10v2Encoding
} PreferredEncoding;
class CodeGenerator : public CodeArray {
@@ -3177,9 +3177,9 @@ public:
#endif
// set default encoding
- // vnniEnc : control AVX512_VNNI (evex:default) or AVX-VNNI (vex)
- // avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (AVX10p2Encoding)
- void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)
+ // vnniEnc : AVX512_VNNI (default:EvexEncoding) or AVX-VNNI (VexEncoding)
+ // avx10Enc : mpsadbw etc., AVX-VNNI-INT8/AVX512-FP16 (default:PreAVX10v2Encoding) or AVX10.2 (AVX10v2Encoding)
+ void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVX10v2Encoding)
{ defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; }
void bswap(const Reg32e& r)
@@ -3194,7 +3194,8 @@ public:
}
db(0xC8 + (idx & 7));
}
- void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
+ // AVX10 zero-extending for vmovd, vmovw
+ void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding encoding, int bit)
{
const Operand *p1 = &op1;
const Operand *p2 = &op2;
@@ -3208,18 +3209,32 @@ public:
std::swap(p1, p2);
rev = !rev;
}
- if (getEncoding(encoding, 1) == AVX10p2Encoding) {
- if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) {
- opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|(rev ? T_F3 : T_66)|T_MUST_EVEX|T_0F|T_EW0|T_N4, rev ? 0x7E : 0xD6);
- return;
- }
+ int sel = -1;
+ if (getEncoding(encoding, 1) == AVX10v2Encoding) {
+ if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
} else {
- if ((p1->isREG(32) || p1->isMEM()) && p2->isXMM()) {
- opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|T_66|T_0F|T_W0|T_N4, rev ? 0x6E : 0x7E);
- return;
- }
+ if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
}
- XBYAK_THROW(ERR_BAD_COMBINATION)
+ if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
+ opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
+ }
+ void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
+ {
+ const uint64_t typeTbl[] = {
+ T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
+ T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2
+ };
+ const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
+ opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 32);
+ }
+ void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
+ {
+ const uint64_t typeTbl[] = {
+ T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
+ T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2
+ };
+ const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
+ opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 16|32|64);
}
/*
use single byte nop if useMultiByteNop = false
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index cea4e61..314bb13 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -2422,9 +2422,6 @@ void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F
void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_M_K, 0x11); }
void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
-void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
-void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
-void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A|T_YMM, 0x42, encoding, imm, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1); }
void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); }
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }