aboutsummaryrefslogtreecommitdiffhomepage
path: root/test/avx10_test.cpp
blob: 5f742fe7e890c18e13aa404ce00b0ee45166841e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#include <stdio.h>
#include <string.h>
#include <string>
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include <cybozu/inttype.hpp>
#include <cybozu/test.hpp>
#include <algorithm>

using namespace Xbyak;

CYBOZU_TEST_AUTO(ymm_with_sae)
{
	struct Code : Xbyak::CodeGenerator {
		Code()
		{
			vaddpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vaddph(ymm1, ymm2, ymm3 |T_rn_sae);
			vaddps(ymm1, ymm2, ymm3 |T_rn_sae);
			vcmppd(k1, ymm2, ymm3 |T_sae, 3);
			vcmpph(k1, ymm2, ymm3 |T_sae, 3);
			vcmpps(k1, ymm2, ymm3 |T_sae, 3);
			vcvtdq2ph(xmm1, ymm2 |T_rn_sae);
			vcvtdq2ps(ymm1, ymm2 |T_rn_sae);
			vcvtpd2dq(xmm1, ymm2 |T_rn_sae);
			vcvtpd2ph(xmm1, ymm2 |T_rn_sae);
			vcvtpd2ps(xmm1, ymm2 |T_rn_sae);
			vcvtpd2qq(ymm1, ymm2 |T_rn_sae);
			vcvtpd2udq(xmm1, ymm2 |T_rn_sae);
			vcvtpd2uqq(ymm1, ymm2 |T_rn_sae);
			vcvtph2dq(ymm1, xmm2 |T_rn_sae);
			vcvtph2pd(ymm1, xmm2 |T_sae);
			vcvtph2ps(ymm1, xmm2 |T_sae);
			vcvtph2psx(ymm1, xmm2 |T_sae);
			vcvtph2qq(ymm1, xmm2 |T_rn_sae);
			vcvtph2udq(ymm1, xmm2 |T_rn_sae);
			vcvtph2uqq(ymm1, xmm2 |T_rn_sae);
			vcvtph2uw(ymm1, ymm2 |T_rn_sae);
			vcvtph2w(ymm1, ymm2 |T_rn_sae);
			vcvtps2dq(ymm1, ymm2 |T_rn_sae);
			vcvtps2pd(ymm1, xmm2 |T_sae);
			vcvtps2ph(xmm1, ymm2 |T_sae, 3);
			vcvtps2phx(xmm1, ymm2 |T_rn_sae);
			vcvtps2qq(ymm1, xmm2 |T_rn_sae);
			vcvtps2udq(ymm1, ymm2 |T_rn_sae);
			vcvtps2uqq(ymm1, xmm2 |T_rn_sae);
			vcvtqq2pd(ymm1, ymm2 |T_rn_sae);
			vcvtqq2ph(xmm1, ymm2 |T_rn_sae);
			vcvtqq2ps(xmm1, ymm2 |T_rn_sae);
			vcvttpd2dq(xmm1, ymm2 |T_sae);
			vcvttpd2qq(ymm1, ymm2 |T_sae);
			vcvttpd2udq(xmm1, ymm2 |T_sae);
			vcvttpd2uqq(ymm1, ymm2 |T_sae);
			vcvttph2dq(ymm1, xmm2 |T_sae);
			vcvttph2qq(ymm1, xmm2 |T_sae);
			vcvttph2udq(ymm1, xmm2 |T_sae);
			vcvttph2uqq(ymm1, xmm2 |T_sae);
			vcvttph2uw(ymm1, ymm2 |T_sae);
			vcvttph2w(ymm1, ymm2 |T_sae);
			vcvttps2dq(ymm1, ymm2 |T_sae);
			vcvttps2qq(ymm1, xmm2 |T_sae);
			vcvttps2udq(ymm1, ymm2 |T_sae);
			vcvttps2uqq(ymm1, xmm2 |T_sae);
			vcvtudq2ph(xmm1, ymm2 |T_rn_sae);
			vcvtudq2ps(ymm1, ymm2 |T_rn_sae);
			vcvtuqq2pd(ymm1, ymm2 |T_rn_sae);
			vcvtuqq2ph(xmm1, ymm2 |T_rn_sae);
			vcvtuqq2ps(xmm1, ymm2 |T_rn_sae);
			vcvtuw2ph(ymm1, ymm2 |T_rn_sae);
			vcvtw2ph(ymm1, ymm2 |T_rn_sae);
			vdivpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vdivph(ymm1, ymm2, ymm3 |T_rn_sae);
			vdivps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfcmaddcph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfcmulcph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfixupimmpd(ymm1, ymm2, ymm3 |T_sae, 3);
			vfixupimmps(ymm1, ymm2, ymm3 |T_sae, 3);
			vfmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddcph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmaddsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmsubadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfmulcph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
			vfnmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
			vgetexppd(ymm1, ymm2 |T_sae);
			vgetexpph(ymm1, ymm2 |T_sae);
			vgetexpps(ymm1, ymm2 |T_sae);
			vgetmantpd(ymm1, ymm2 |T_sae, 3);
			vgetmantph(ymm1, ymm2 |T_sae, 3);
			vgetmantps(ymm1, ymm2 |T_sae, 3);
			vmaxpd(ymm1, ymm2, ymm3 |T_sae);
			vmaxph(ymm1, ymm2, ymm3 |T_sae);
			vmaxps(ymm1, ymm2, ymm3 |T_sae);
			vminpd(ymm1, ymm2, ymm3 |T_sae);
			vminph(ymm1, ymm2, ymm3 |T_sae);
			vminps(ymm1, ymm2, ymm3 |T_sae);
			vmulpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vmulph(ymm1, ymm2, ymm3 |T_rn_sae);
			vmulps(ymm1, ymm2, ymm3 |T_rn_sae);
			vrangepd(ymm1, ymm2, ymm3 |T_sae, 3);
			vrangeps(ymm1, ymm2, ymm3 |T_sae, 3);
			vreducepd(ymm1, ymm2 |T_sae, 3);
			vreduceph(ymm1, ymm2 |T_sae, 3);
			vreduceps(ymm1, ymm2 |T_sae, 3);
			vrndscalepd(ymm1, ymm2 |T_sae, 3);
			vrndscaleph(ymm1, ymm2 |T_sae, 3);
			vrndscaleps(ymm1, ymm2 |T_sae, 3);
			vscalefpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vscalefph(ymm1, ymm2, ymm3 |T_rn_sae);
			vscalefps(ymm1, ymm2, ymm3 |T_rn_sae);
			vsqrtpd(ymm1, ymm2 |T_rn_sae);
			vsqrtph(ymm1, ymm2 |T_rn_sae);
			vsqrtps(ymm1, ymm2 |T_rn_sae);
			vsubpd(ymm1, ymm2, ymm3 |T_rn_sae);
			vsubph(ymm1, ymm2, ymm3 |T_rn_sae);
			vsubps(ymm1, ymm2, ymm3 |T_rn_sae);
		}
	} c;
	const uint8_t tbl[] = {
		0x62, 0xf1, 0xe9, 0x18, 0x58, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x58, 0xcb, 0x62, 0xf1, 0x68, 0x18,
		0x58, 0xcb, 0x62, 0xf1, 0xe9, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf3, 0x68, 0x18, 0xc2, 0xcb, 0x03,
		0x62, 0xf1, 0x68, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf5, 0x78, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x78,
		0x18, 0x5b, 0xca, 0x62, 0xf1, 0xfb, 0x18, 0xe6, 0xca, 0x62, 0xf5, 0xf9, 0x18, 0x5a, 0xca, 0x62,
		0xf1, 0xf9, 0x18, 0x5a, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x7b, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x79,
		0xca, 0x62, 0xf1, 0xf9, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x5b, 0xca, 0x62, 0xf5, 0x78,
		0x18, 0x5a, 0xca, 0x62, 0xf2, 0x79, 0x18, 0x13, 0xca, 0x62, 0xf6, 0x79, 0x18, 0x13, 0xca, 0x62,
		0xf5, 0x79, 0x18, 0x7b, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x79,
		0xca, 0x62, 0xf5, 0x78, 0x18, 0x7d, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7d, 0xca, 0x62, 0xf1, 0x79,
		0x18, 0x5b, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x5a, 0xca, 0x62, 0xf3, 0x79, 0x18, 0x1d, 0xd1, 0x03,
		0x62, 0xf5, 0x79, 0x18, 0x1d, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x7b, 0xca, 0x62, 0xf1, 0x78, 0x18,
		0x79, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x79, 0xca, 0x62, 0xf1, 0xfa, 0x18, 0xe6, 0xca, 0x62, 0xf5,
		0xf8, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0xe6, 0xca,
		0x62, 0xf1, 0xf9, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x78, 0xca, 0x62, 0xf1, 0xf9, 0x18,
		0x78, 0xca, 0x62, 0xf5, 0x7a, 0x18, 0x5b, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7a, 0xca, 0x62, 0xf5,
		0x78, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x7c, 0xca,
		0x62, 0xf5, 0x79, 0x18, 0x7c, 0xca, 0x62, 0xf1, 0x7a, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x79, 0x18,
		0x7a, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x78, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x78, 0xca, 0x62, 0xf5,
		0x7b, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0x7b, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xfa, 0x18, 0x7a, 0xca,
		0x62, 0xf5, 0xfb, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xfb, 0x18, 0x7a, 0xca, 0x62, 0xf5, 0x7b, 0x18,
		0x7d, 0xca, 0x62, 0xf5, 0x7a, 0x18, 0x7d, 0xca, 0x62, 0xf1, 0xe9, 0x18, 0x5e, 0xcb, 0x62, 0xf5,
		0x68, 0x18, 0x5e, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5e, 0xcb, 0x62, 0xf6, 0x6b, 0x18, 0x56, 0xcb,
		0x62, 0xf6, 0x6b, 0x18, 0xd6, 0xcb, 0x62, 0xf3, 0xe9, 0x18, 0x54, 0xcb, 0x03, 0x62, 0xf3, 0x69,
		0x18, 0x54, 0xcb, 0x03, 0x62, 0xf2, 0xe9, 0x18, 0x98, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x98, 0xcb,
		0x62, 0xf2, 0x69, 0x18, 0x98, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa8, 0xcb, 0x62, 0xf6, 0x69, 0x18,
		0xa8, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa8, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb8, 0xcb, 0x62, 0xf6,
		0x69, 0x18, 0xb8, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xb8, 0xcb, 0x62, 0xf6, 0x6a, 0x18, 0x56, 0xcb,
		0x62, 0xf2, 0xe9, 0x18, 0x96, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x96, 0xcb, 0x62, 0xf2, 0x69, 0x18,
		0x96, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa6, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa6, 0xcb, 0x62, 0xf2,
		0x69, 0x18, 0xa6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb6, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb6, 0xcb,
		0x62, 0xf2, 0x69, 0x18, 0xb6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9a, 0xcb, 0x62, 0xf6, 0x69, 0x18,
		0x9a, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9a, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xaa, 0xcb, 0x62, 0xf6,
		0x69, 0x18, 0xaa, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xaa, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xba, 0xcb,
		0x62, 0xf6, 0x69, 0x18, 0xba, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xba, 0xcb, 0x62, 0xf2, 0xe9, 0x18,
		0x97, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x97, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x97, 0xcb, 0x62, 0xf2,
		0xe9, 0x18, 0xa7, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa7, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa7, 0xcb,
		0x62, 0xf2, 0xe9, 0x18, 0xb7, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb7, 0xcb, 0x62, 0xf2, 0x69, 0x18,
		0xb7, 0xcb, 0x62, 0xf6, 0x6a, 0x18, 0xd6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9c, 0xcb, 0x62, 0xf6,
		0x69, 0x18, 0x9c, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9c, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xac, 0xcb,
		0x62, 0xf6, 0x69, 0x18, 0xac, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xac, 0xcb, 0x62, 0xf2, 0xe9, 0x18,
		0xbc, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xbc, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xbc, 0xcb, 0x62, 0xf2,
		0xe9, 0x18, 0x9e, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x9e, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9e, 0xcb,
		0x62, 0xf2, 0xe9, 0x18, 0xae, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xae, 0xcb, 0x62, 0xf2, 0x69, 0x18,
		0xae, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xbe, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xbe, 0xcb, 0x62, 0xf2,
		0x69, 0x18, 0xbe, 0xcb, 0x62, 0xf2, 0xf9, 0x18, 0x42, 0xca, 0x62, 0xf6, 0x79, 0x18, 0x42, 0xca,
		0x62, 0xf2, 0x79, 0x18, 0x42, 0xca, 0x62, 0xf3, 0xf9, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf3, 0x78,
		0x18, 0x26, 0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf1, 0xe9, 0x18, 0x5f,
		0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5f, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5f, 0xcb, 0x62, 0xf1, 0xe9,
		0x18, 0x5d, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5d, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5d, 0xcb, 0x62,
		0xf1, 0xe9, 0x18, 0x59, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x59, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x59,
		0xcb, 0x62, 0xf3, 0xe9, 0x18, 0x50, 0xcb, 0x03, 0x62, 0xf3, 0x69, 0x18, 0x50, 0xcb, 0x03, 0x62,
		0xf3, 0xf9, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0x79,
		0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0xf9, 0x18, 0x09, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x08,
		0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x08, 0xca, 0x03, 0x62, 0xf2, 0xe9, 0x18, 0x2c, 0xcb, 0x62,
		0xf6, 0x69, 0x18, 0x2c, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x2c, 0xcb, 0x62, 0xf1, 0xf9, 0x18, 0x51,
		0xca, 0x62, 0xf5, 0x78, 0x18, 0x51, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x51, 0xca, 0x62, 0xf1, 0xe9,
		0x18, 0x5c, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5c, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5c, 0xcb,
	};
	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
	CYBOZU_TEST_EQUAL(c.getSize(), n);
	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

CYBOZU_TEST_AUTO(vmpsadbw)
{
	struct Code : Xbyak::CodeGenerator {
		Code()
		{
			setDefaultEncoding();
			vmpsadbw(xm1, xm3, xm15, 3); // vex(avx)
			vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2)
			setDefaultEncoding(VexEncoding, EvexEncoding);
			vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2)
			vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2)
		}
	} c;
	const uint8_t tbl[] = {
		0xc4, 0xc3, 0x61, 0x42, 0xcf, 0x03,
		0xc4, 0xe3, 0x65, 0x42, 0x88, 0x80, 0x00, 0x00, 0x00, 0x03,
		0x62, 0xd3, 0x66, 0x28, 0x42, 0xcf, 0x03,
		0x62, 0xf3, 0x66, 0x28, 0x42, 0x48, 0x04, 0x03,
	};
	const size_t n = sizeof(tbl) / sizeof(tbl[0]);
	CYBOZU_TEST_EQUAL(c.getSize(), n);
	CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}