#include #include #include #include #include #include #include #include using namespace Xbyak; CYBOZU_TEST_AUTO(ymm_with_sae) { struct Code : Xbyak::CodeGenerator { Code() { vaddpd(ymm1, ymm2, ymm3 |T_rn_sae); vaddph(ymm1, ymm2, ymm3 |T_rn_sae); vaddps(ymm1, ymm2, ymm3 |T_rn_sae); vcmppd(k1, ymm2, ymm3 |T_sae, 3); vcmpph(k1, ymm2, ymm3 |T_sae, 3); vcmpps(k1, ymm2, ymm3 |T_sae, 3); vcvtdq2ph(xmm1, ymm2 |T_rn_sae); vcvtdq2ps(ymm1, ymm2 |T_rn_sae); vcvtpd2dq(xmm1, ymm2 |T_rn_sae); vcvtpd2ph(xmm1, ymm2 |T_rn_sae); vcvtpd2ps(xmm1, ymm2 |T_rn_sae); vcvtpd2qq(ymm1, ymm2 |T_rn_sae); vcvtpd2udq(xmm1, ymm2 |T_rn_sae); vcvtpd2uqq(ymm1, ymm2 |T_rn_sae); vcvtph2dq(ymm1, xmm2 |T_rn_sae); vcvtph2pd(ymm1, xmm2 |T_sae); vcvtph2ps(ymm1, xmm2 |T_sae); vcvtph2psx(ymm1, xmm2 |T_sae); vcvtph2qq(ymm1, xmm2 |T_rn_sae); vcvtph2udq(ymm1, xmm2 |T_rn_sae); vcvtph2uqq(ymm1, xmm2 |T_rn_sae); vcvtph2uw(ymm1, ymm2 |T_rn_sae); vcvtph2w(ymm1, ymm2 |T_rn_sae); vcvtps2dq(ymm1, ymm2 |T_rn_sae); vcvtps2pd(ymm1, xmm2 |T_sae); vcvtps2ph(xmm1, ymm2 |T_sae, 3); vcvtps2phx(xmm1, ymm2 |T_rn_sae); vcvtps2qq(ymm1, xmm2 |T_rn_sae); vcvtps2udq(ymm1, ymm2 |T_rn_sae); vcvtps2uqq(ymm1, xmm2 |T_rn_sae); vcvtqq2pd(ymm1, ymm2 |T_rn_sae); vcvtqq2ph(xmm1, ymm2 |T_rn_sae); vcvtqq2ps(xmm1, ymm2 |T_rn_sae); vcvttpd2dq(xmm1, ymm2 |T_sae); vcvttpd2qq(ymm1, ymm2 |T_sae); vcvttpd2udq(xmm1, ymm2 |T_sae); vcvttpd2uqq(ymm1, ymm2 |T_sae); vcvttph2dq(ymm1, xmm2 |T_sae); vcvttph2qq(ymm1, xmm2 |T_sae); vcvttph2udq(ymm1, xmm2 |T_sae); vcvttph2uqq(ymm1, xmm2 |T_sae); vcvttph2uw(ymm1, ymm2 |T_sae); vcvttph2w(ymm1, ymm2 |T_sae); vcvttps2dq(ymm1, ymm2 |T_sae); vcvttps2qq(ymm1, xmm2 |T_sae); vcvttps2udq(ymm1, ymm2 |T_sae); vcvttps2uqq(ymm1, xmm2 |T_sae); vcvtudq2ph(xmm1, ymm2 |T_rn_sae); vcvtudq2ps(ymm1, ymm2 |T_rn_sae); vcvtuqq2pd(ymm1, ymm2 |T_rn_sae); vcvtuqq2ph(xmm1, ymm2 |T_rn_sae); vcvtuqq2ps(xmm1, ymm2 |T_rn_sae); vcvtuw2ph(ymm1, ymm2 |T_rn_sae); vcvtw2ph(ymm1, ymm2 |T_rn_sae); vdivpd(ymm1, ymm2, ymm3 |T_rn_sae); vdivph(ymm1, ymm2, ymm3 |T_rn_sae); vdivps(ymm1, ymm2, ymm3 |T_rn_sae); vfcmaddcph(ymm1, ymm2, ymm3 |T_rn_sae); vfcmulcph(ymm1, ymm2, ymm3 |T_rn_sae); vfixupimmpd(ymm1, ymm2, ymm3 |T_sae, 3); vfixupimmps(ymm1, ymm2, ymm3 |T_sae, 3); vfmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddcph(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmaddsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); vfmsubadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); vfmulcph(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae); vfnmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae); vfnmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae); vgetexppd(ymm1, ymm2 |T_sae); vgetexpph(ymm1, ymm2 |T_sae); vgetexpps(ymm1, ymm2 |T_sae); vgetmantpd(ymm1, ymm2 |T_sae, 3); vgetmantph(ymm1, ymm2 |T_sae, 3); vgetmantps(ymm1, ymm2 |T_sae, 3); vmaxpd(ymm1, ymm2, ymm3 |T_sae); vmaxph(ymm1, ymm2, ymm3 |T_sae); vmaxps(ymm1, ymm2, ymm3 |T_sae); vminpd(ymm1, ymm2, ymm3 |T_sae); vminph(ymm1, ymm2, ymm3 |T_sae); vminps(ymm1, ymm2, ymm3 |T_sae); vmulpd(ymm1, ymm2, ymm3 |T_rn_sae); vmulph(ymm1, ymm2, ymm3 |T_rn_sae); vmulps(ymm1, ymm2, ymm3 |T_rn_sae); vrangepd(ymm1, ymm2, ymm3 |T_sae, 3); vrangeps(ymm1, ymm2, ymm3 |T_sae, 3); vreducepd(ymm1, ymm2 |T_sae, 3); vreduceph(ymm1, ymm2 |T_sae, 3); vreduceps(ymm1, ymm2 |T_sae, 3); vrndscalepd(ymm1, ymm2 |T_sae, 3); vrndscaleph(ymm1, ymm2 |T_sae, 3); vrndscaleps(ymm1, ymm2 |T_sae, 3); vscalefpd(ymm1, ymm2, ymm3 |T_rn_sae); vscalefph(ymm1, ymm2, ymm3 |T_rn_sae); vscalefps(ymm1, ymm2, ymm3 |T_rn_sae); vsqrtpd(ymm1, ymm2 |T_rn_sae); vsqrtph(ymm1, ymm2 |T_rn_sae); vsqrtps(ymm1, ymm2 |T_rn_sae); vsubpd(ymm1, ymm2, ymm3 |T_rn_sae); vsubph(ymm1, ymm2, ymm3 |T_rn_sae); vsubps(ymm1, ymm2, ymm3 |T_rn_sae); } } c; const uint8_t tbl[] = { 0x62, 0xf1, 0xe9, 0x18, 0x58, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x58, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x58, 0xcb, 0x62, 0xf1, 0xe9, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf3, 0x68, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf1, 0x68, 0x18, 0xc2, 0xcb, 0x03, 0x62, 0xf5, 0x78, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xfb, 0x18, 0xe6, 0xca, 0x62, 0xf5, 0xf9, 0x18, 0x5a, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x5a, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x7b, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x79, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x5b, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x5a, 0xca, 0x62, 0xf2, 0x79, 0x18, 0x13, 0xca, 0x62, 0xf6, 0x79, 0x18, 0x13, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7b, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x79, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x7d, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7d, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x5a, 0xca, 0x62, 0xf3, 0x79, 0x18, 0x1d, 0xd1, 0x03, 0x62, 0xf5, 0x79, 0x18, 0x1d, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x7b, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x79, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x79, 0xca, 0x62, 0xf1, 0xfa, 0x18, 0xe6, 0xca, 0x62, 0xf5, 0xf8, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0xe6, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xf8, 0x18, 0x78, 0xca, 0x62, 0xf1, 0xf9, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x7a, 0x18, 0x5b, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7a, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x7c, 0xca, 0x62, 0xf5, 0x79, 0x18, 0x7c, 0xca, 0x62, 0xf1, 0x7a, 0x18, 0x5b, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x78, 0xca, 0x62, 0xf1, 0x79, 0x18, 0x78, 0xca, 0x62, 0xf5, 0x7b, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0x7b, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xfa, 0x18, 0x7a, 0xca, 0x62, 0xf5, 0xfb, 0x18, 0x7a, 0xca, 0x62, 0xf1, 0xfb, 0x18, 0x7a, 0xca, 0x62, 0xf5, 0x7b, 0x18, 0x7d, 0xca, 0x62, 0xf5, 0x7a, 0x18, 0x7d, 0xca, 0x62, 0xf1, 0xe9, 0x18, 0x5e, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5e, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5e, 0xcb, 0x62, 0xf6, 0x6b, 0x18, 0x56, 0xcb, 0x62, 0xf6, 0x6b, 0x18, 0xd6, 0xcb, 0x62, 0xf3, 0xe9, 0x18, 0x54, 0xcb, 0x03, 0x62, 0xf3, 0x69, 0x18, 0x54, 0xcb, 0x03, 0x62, 0xf2, 0xe9, 0x18, 0x98, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x98, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x98, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa8, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa8, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa8, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb8, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb8, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xb8, 0xcb, 0x62, 0xf6, 0x6a, 0x18, 0x56, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x96, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x96, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x96, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa6, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa6, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb6, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb6, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xb6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9a, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x9a, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9a, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xaa, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xaa, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xaa, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xba, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xba, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xba, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x97, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x97, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x97, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xa7, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xa7, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xa7, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xb7, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xb7, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xb7, 0xcb, 0x62, 0xf6, 0x6a, 0x18, 0xd6, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9c, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x9c, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9c, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xac, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xac, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xac, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xbc, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xbc, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xbc, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0x9e, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x9e, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x9e, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xae, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xae, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xae, 0xcb, 0x62, 0xf2, 0xe9, 0x18, 0xbe, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0xbe, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0xbe, 0xcb, 0x62, 0xf2, 0xf9, 0x18, 0x42, 0xca, 0x62, 0xf6, 0x79, 0x18, 0x42, 0xca, 0x62, 0xf2, 0x79, 0x18, 0x42, 0xca, 0x62, 0xf3, 0xf9, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x26, 0xca, 0x03, 0x62, 0xf1, 0xe9, 0x18, 0x5f, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5f, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5f, 0xcb, 0x62, 0xf1, 0xe9, 0x18, 0x5d, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5d, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5d, 0xcb, 0x62, 0xf1, 0xe9, 0x18, 0x59, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x59, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x59, 0xcb, 0x62, 0xf3, 0xe9, 0x18, 0x50, 0xcb, 0x03, 0x62, 0xf3, 0x69, 0x18, 0x50, 0xcb, 0x03, 0x62, 0xf3, 0xf9, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x56, 0xca, 0x03, 0x62, 0xf3, 0xf9, 0x18, 0x09, 0xca, 0x03, 0x62, 0xf3, 0x78, 0x18, 0x08, 0xca, 0x03, 0x62, 0xf3, 0x79, 0x18, 0x08, 0xca, 0x03, 0x62, 0xf2, 0xe9, 0x18, 0x2c, 0xcb, 0x62, 0xf6, 0x69, 0x18, 0x2c, 0xcb, 0x62, 0xf2, 0x69, 0x18, 0x2c, 0xcb, 0x62, 0xf1, 0xf9, 0x18, 0x51, 0xca, 0x62, 0xf5, 0x78, 0x18, 0x51, 0xca, 0x62, 0xf1, 0x78, 0x18, 0x51, 0xca, 0x62, 0xf1, 0xe9, 0x18, 0x5c, 0xcb, 0x62, 0xf5, 0x68, 0x18, 0x5c, 0xcb, 0x62, 0xf1, 0x68, 0x18, 0x5c, 0xcb, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } CYBOZU_TEST_AUTO(vmpsadbw) { struct Code : Xbyak::CodeGenerator { Code() { setDefaultEncoding(); vmpsadbw(xm1, xm3, xm15, 3); // vex(avx) vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2) setDefaultEncoding(VexEncoding, EvexEncoding); vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2) vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2) } } c; const uint8_t tbl[] = { 0xc4, 0xc3, 0x61, 0x42, 0xcf, 0x03, 0xc4, 0xe3, 0x65, 0x42, 0x88, 0x80, 0x00, 0x00, 0x00, 0x03, 0x62, 0xd3, 0x66, 0x28, 0x42, 0xcf, 0x03, 0x62, 0xf3, 0x66, 0x28, 0x42, 0x48, 0x04, 0x03, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); }