aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2023-12-19 17:03:19 +0900
committerMITSUNARI Shigeo <[email protected]>2023-12-19 17:03:19 +0900
commitd9c7c992f1fb4d68453f5d508e5a287c85075518 (patch)
tree08ddba74f04e98efae054e6ac7149aaa84d66807
parentcd5231de096408cb82c747761e55d80188ac639b (diff)
downloadxbyak-d9c7c992f1fb4d68453f5d508e5a287c85075518.tar.gz
xbyak-d9c7c992f1fb4d68453f5d508e5a287c85075518.zip
add aesdecwide{128,256}kl
-rw-r--r--gen/gen_code.cpp16
-rw-r--r--test/apx.cpp14
-rw-r--r--xbyak/xbyak_mnemonic.h2
3 files changed, 23 insertions, 9 deletions
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index cdf015c..52baff9 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -2020,21 +2020,21 @@ void put64()
uint64_t type1;
uint64_t type2;
uint8_t code;
- int n;
+ int idx;
} tbl[] = {
- { "aesdec128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDD, 2 },
- { "aesdec256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDF, 2 },
+ { "aesdec128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDD, 8 },
+ { "aesdec256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDF, 8 },
+ { "aesdecwide128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 1 },
+ { "aesdecwide256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 3 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string s1 = type2String(p->type1);
std::string s2 = type2String(p->type2);
- switch (p->n) {
- case 1:
- break;
- case 2:
+ if (p->idx == 8) {
printf("void %s(const Xmm& x, const Address& addr) { opAESKL(&x, addr, %s, %s, 0x%02X); }\n", p->name, s1.c_str(), s2.c_str(), p->code);
- break;
+ } else {
+ printf("void %s(const Address& addr) { opAESKL(&xmm%d, addr, %s, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), s2.c_str(), p->code);
}
}
}
diff --git a/test/apx.cpp b/test/apx.cpp
index fc29653..ab3ca0a 100644
--- a/test/apx.cpp
+++ b/test/apx.cpp
@@ -1783,9 +1783,15 @@ CYBOZU_TEST_AUTO(aeskl)
aesdec128kl(xmm15, ptr[rax+rcx*4+0x12]);
aesdec128kl(xmm15, ptr[r30+r29*8+0x34]);
-
aesdec256kl(xmm15, ptr[rax+rcx*4+0x12]);
aesdec256kl(xmm15, ptr[r30+r29*8+0x34]);
+
+ aesdecwide128kl(ptr[rax+rcx*4+0x12]);
+ aesdecwide128kl(ptr[r30+r29*8+0x34]);
+
+ aesdecwide256kl(ptr[rax+rcx*4+0x12]);
+ aesdecwide256kl(ptr[r30+r29*8+0x34]);
+
}
} c;
const uint8_t tbl[] = {
@@ -1795,6 +1801,12 @@ CYBOZU_TEST_AUTO(aeskl)
// aesdec256kl
0xf3, 0x44, 0x0f, 0x38, 0xdf, 0x7c, 0x88, 0x12,
0x62, 0x1c, 0x7a, 0x08, 0xdf, 0x7c, 0xee, 0x34,
+ // aesdecwide128kl
+ 0xf3, 0x0f, 0x38, 0xd8, 0x4c, 0x88, 0x12,
+ 0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x4c, 0xee, 0x34, 0xf3,
+ // aesdecwide256kl
+ 0x0f, 0x38, 0xd8, 0x5c, 0x88, 0x12,
+ 0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x5c, 0xee, 0x34,
};
const size_t n = sizeof(tbl);
CYBOZU_TEST_EQUAL(c.getSize(), n);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 2ea7699..915f938 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1928,6 +1928,8 @@ void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); }
void aesdec128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDD); }
void aesdec256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDF); }
+void aesdecwide128kl(const Address& addr) { opAESKL(&xmm1, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
+void aesdecwide256kl(const Address& addr) { opAESKL(&xmm3, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }
void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }