1 /** 2 * XMM opcodes 3 * 4 * Compiler implementation of the 5 * $(LINK2 https://www.dlang.org, D programming language). 6 * 7 * Copyright: Copyright (C) ?-2023 by The D Language Foundation, All Rights Reserved 8 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 9 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 10 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/xmm.d, backend/_xmm.d) 11 */ 12 13 module dmd.backend.xmm; 14 15 // Online documentation: https://dlang.org/phobos/dmd_backend_xmm.html 16 17 @safe: 18 19 enum 20 { 21 ADDSS = 0xF30F58, // ADDSS xmm1, xmm2/mem32 F3 0F 58 /r 22 ADDSD = 0xF20F58, // ADDSD xmm1, xmm2/mem64 F2 0F 58 /r 23 ADDPS = 0x000F58, // ADDPS xmm1, xmm2/mem128 0F 58 /r 24 ADDPD = 0x660F58, // ADDPD xmm1, xmm2/mem128 66 0F 58 /r 25 PADDB = 0x660FFC, // PADDB xmm1, xmm2/mem128 66 0F FC /r 26 PADDW = 0x660FFD, // PADDW xmm1, xmm2/mem128 66 0F FD /r 27 PADDD = 0x660FFE, // PADDD xmm1, xmm2/mem128 66 0F FE /r 28 PADDQ = 0x660FD4, // PADDQ xmm1, xmm2/mem128 66 0F D4 /r 29 30 SUBSS = 0xF30F5C, // SUBSS xmm1, xmm2/mem32 F3 0F 5C /r 31 SUBSD = 0xF20F5C, // SUBSD xmm1, xmm2/mem64 F2 0F 5C /r 32 SUBPS = 0x000F5C, // SUBPS xmm1, xmm2/mem128 0F 5C /r 33 SUBPD = 0x660F5C, // SUBPD xmm1, xmm2/mem128 66 0F 5C /r 34 PSUBB = 0x660FF8, // PSUBB xmm1, xmm2/mem128 66 0F F8 /r 35 PSUBW = 0x660FF9, // PSUBW xmm1, xmm2/mem128 66 0F F9 /r 36 PSUBD = 0x660FFA, // PSUBD xmm1, xmm2/mem128 66 0F FA /r 37 PSUBQ = 0x660FFB, // PSUBQ xmm1, xmm2/mem128 66 0F FB /r 38 39 MULSS = 0xF30F59, // MULSS xmm1, xmm2/mem32 F3 0F 59 /r 40 MULSD = 0xF20F59, // MULSD xmm1, xmm2/mem64 F2 0F 59 /r 41 MULPS = 0x000F59, // MULPS xmm1, xmm2/mem128 0F 59 /r 42 MULPD = 0x660F59, // MULPD xmm1, xmm2/mem128 66 0F 59 /r 43 PMULLW = 0x660FD5, // PMULLW xmm1, xmm2/mem128 66 0F D5 /r 44 45 DIVSS = 0xF30F5E, // DIVSS xmm1, xmm2/mem32 F3 0F 5E /r 46 DIVSD = 0xF20F5E, // DIVSD xmm1, xmm2/mem64 F2 0F 5E /r 47 DIVPS = 0x000F5E, // DIVPS xmm1, xmm2mem/128 0F 5E /r 48 DIVPD = 0x660F5E, // DIVPD xmm1, xmm2/mem128 66 0F 5E /r 49 50 PAND = 0x660FDB, // PAND xmm1, xmm2/mem128 66 0F DB /r 51 POR = 0x660FEB, // POR xmm1, xmm2/mem128 66 0F EB /r 52 53 UCOMISS = 0x000F2E, // UCOMISS xmm1, xmm2/mem32 0F 2E /r 54 UCOMISD = 0x660F2E, // UCOMISD xmm1, xmm2/mem64 66 0F 2E /r 55 56 XORPS = 0x000F57, // XORPS xmm1, xmm2/mem128 0F 57 /r 57 XORPD = 0x660F57, // XORPD xmm1, xmm2/mem128 66 0F 57 /r 58 59 // Use STO and LOD instead of MOV to distinguish the direction 60 STOSS = 0xF30F11, // MOVSS xmm1/mem32, xmm2 F3 0F 11 /r 61 STOSD = 0xF20F11, // MOVSD xmm1/mem64, xmm2 F2 0F 11 /r 62 STOAPS = 0x000F29, // MOVAPS xmm1/mem128, xmm2 0F 29 /r 63 STOAPD = 0x660F29, // MOVAPD xmm1/mem128, xmm2 66 0F 29 /r 64 STODQA = 0x660F7F, // MOVDQA xmm1/mem128, xmm2 66 0F 7F /r 65 STOD = 0x660F7E, // MOVD reg/mem64, xmm 66 0F 7E /r 66 STOQ = 0x660FD6, // MOVQ xmm1/mem64, xmm2 66 0F D6 / 67 68 LODSS = 0xF30F10, // MOVSS xmm1, xmm2/mem32 F3 0F 10 /r 69 LODSD = 0xF20F10, // MOVSD xmm1, xmm2/mem64 F2 0F 10 /r 70 LODAPS = 0x000F28, // MOVAPS xmm1, xmm2/mem128 0F 28 /r 71 LODAPD = 0x660F28, // MOVAPD xmm1, xmm2/mem128 66 0F 28 /r 72 LODDQA = 0x660F6F, // MOVDQA xmm1, xmm2/mem128 66 0F 6F /r 73 LODD = 0x660F6E, // MOVD xmm, reg/mem64 66 0F 6E /r 74 LODQ = 0xF30F7E, // MOVQ xmm1, xmm2/mem64 F3 0F 7E /r 75 76 LODDQU = 0xF30F6F, // MOVDQU xmm1, xmm2/mem128 F3 0F 6F /r 77 STODQU = 0xF30F7F, // MOVDQU xmm1/mem128, xmm2 F3 0F 7F /r 78 MOVDQ2Q = 0xF20FD6, // MOVDQ2Q mmx, xmm F2 0F D6 /r 79 LODHPD = 0x660F16, // MOVHPD xmm, mem64 66 0F 16 /r 80 STOHPD = 0x660F17, // MOVHPD mem64, xmm 66 0F 17 /r 81 LODHPS = 0x0F16, // MOVHPS xmm, mem64 0F 16 /r 82 STOHPS = 0x0F17, // MOVHPS mem64, xmm 0F 17 /r 83 MOVLHPS = 0x0F16, // MOVLHPS xmm1, xmm2 0F 16 /r 84 LODLPD = 0x660F12, // MOVLPD xmm, mem64 66 0F 12 /r 85 STOLPD = 0x660F13, // MOVLPD mem64, xmm 66 0F 13 /r 86 MOVHLPS = 0x0F12, // MOVHLPS xmm1, xmm2 0F 12 /r 87 LODLPS = 0x0F12, // MOVLPS xmm, mem64 0F 12 /r 88 STOLPS = 0x0F13, // MOVLPS mem64, xmm 0F 13 /r 89 MOVMSKPD = 0x660F50, // MOVMSKPD reg32, xmm 66 0F 50 /r 90 MOVMSKPS = 0x0F50, // MOVMSKPS reg32, xmm 0F 50 /r 91 MOVNTDQ = 0x660FE7, // MOVNTDQ mem128, xmm 66 0F E7 /r 92 MOVNTI = 0x0FC3, // MOVNTI m32,r32 0F C3 /r 93 // MOVNTI m64,r64 0F C3 /r 94 MOVNTPD = 0x660F2B, // MOVNTPD mem128, xmm 66 0F 2B /r 95 MOVNTPS = 0x0F2B, // MOVNTPS mem128, xmm 0F 2B /r 96 MOVNTQ = 0x0FE7, // MOVNTQ m64, mmx 0F E7 /r 97 MOVQ2DQ = 0xF30FD6, // MOVQ2DQ xmm, mmx F3 0F D6 /r 98 LODUPD = 0x660F10, // MOVUPD xmm1, xmm2/mem128 66 0F 10 /r 99 STOUPD = 0x660F11, // MOVUPD xmm1/mem128, xmm2 66 0F 11 /r 100 LODUPS = 0x0F10, // MOVUPS xmm1, xmm2/mem128 0F 10 /r 101 STOUPS = 0x0F11, // MOVUPS xmm1/mem128, xmm2 0F 11 /r 102 103 PACKSSDW = 0x660F6B, // PACKSSDW xmm1, xmm2/mem128 66 0F 6B /r 104 PACKSSWB = 0x660F63, // PACKSSWB xmm1, xmm2/mem128 66 0F 63 /r 105 PACKUSWB = 0x660F67, // PACKUSWB xmm1, xmm2/mem128 66 0F 67 /r 106 PADDSB = 0x660FEC, // PADDSB xmm1, xmm2/mem128 66 0F EC /r 107 PADDSW = 0x660FED, // PADDSW xmm1, xmm2/mem128 66 0F ED /r 108 PADDUSB = 0x660FDC, // PADDUSB xmm1, xmm2/mem128 66 0F DC /r 109 PADDUSW = 0x660FDD, // PADDUSW xmm1, xmm2/mem128 66 0F DD /r 110 PANDN = 0x660FDF, // PANDN xmm1, xmm2/mem128 66 0F DF /r 111 PCMPEQB = 0x660F74, // PCMPEQB xmm1, xmm2/mem128 66 0F 74 /r 112 PCMPEQD = 0x660F76, // PCMPEQD xmm1, xmm2/mem128 66 0F 76 /r 113 PCMPEQW = 0x660F75, // PCMPEQW xmm1, xmm2/mem128 66 0F 75 /r 114 PCMPGTB = 0x660F64, // PCMPGTB xmm1, xmm2/mem128 66 0F 64 /r 115 PCMPGTD = 0x660F66, // PCMPGTD xmm1, xmm2/mem128 66 0F 66 /r 116 PCMPGTW = 0x660F65, // PCMPGTW xmm1, xmm2/mem128 66 0F 65 /r 117 PMADDWD = 0x660FF5, // PMADDWD xmm1, xmm2/mem128 66 0F F5 /r 118 PSLLW = 0x660FF1, // PSLLW xmm1, xmm2/mem128 66 0F F1 /r 119 // PSLLW xmm, imm8 66 0F 71 /6 ib 120 PSLLD = 0x660FF2, // PSLLD xmm1, xmm2/mem128 66 0F F2 /r 121 // PSLLD xmm, imm8 66 0F 72 /6 ib 122 PSLLQ = 0x660FF3, // PSLLQ xmm1, xmm2/mem128 66 0F F3 /r 123 // PSLLQ xmm, imm8 66 0F 73 /6 ib 124 PSRAW = 0x660FE1, // PSRAW xmm1, xmm2/mem128 66 0F E1 /r 125 // PSRAW xmm, imm8 66 0F 71 /4 ib 126 PSRAD = 0x660FE2, // PSRAD xmm1, xmm2/mem128 66 0F E2 /r 127 // PSRAD xmm, imm8 66 0F 72 /4 ib 128 PSRLW = 0x660FD1, // PSRLW xmm1, xmm2/mem128 66 0F D1 /r 129 // PSRLW xmm, imm8 66 0F 71 /2 ib 130 PSRLD = 0x660FD2, // PSRLD xmm1, xmm2/mem128 66 0F D2 /r 131 // PSRLD xmm, imm8 66 0F 72 /2 ib 132 PSRLQ = 0x660FD3, // PSRLQ xmm1, xmm2/mem128 66 0F D3 /r 133 // PSRLQ xmm, imm8 66 0F 73 /2 ib 134 PSUBSB = 0x660FE8, // PSUBSB xmm1, xmm2/mem128 66 0F E8 /r 135 PSUBSW = 0x660FE9, // PSUBSW xmm1, xmm2/mem128 66 0F E9 /r 136 PSUBUSB = 0x660FD8, // PSUBUSB xmm1, xmm2/mem128 66 0F D8 /r 137 PSUBUSW = 0x660FD9, // PSUBUSW xmm1, xmm2/mem128 66 0F D9 /r 138 PUNPCKHBW = 0x660F68, // PUNPCKHBW xmm1, xmm2/mem128 66 0F 68 /r 139 PUNPCKHDQ = 0x660F6A, // PUNPCKHDQ xmm1, xmm2/mem128 66 0F 6A /r 140 PUNPCKHWD = 0x660F69, // PUNPCKHWD xmm1, xmm2/mem128 66 0F 69 /r 141 PUNPCKLBW = 0x660F60, // PUNPCKLBW xmm1, xmm2/mem128 66 0F 60 /r 142 PUNPCKLDQ = 0x660F62, // PUNPCKLDQ xmm1, xmm2/mem128 66 0F 62 /r 143 PUNPCKLWD = 0x660F61, // PUNPCKLWD xmm1, xmm2/mem128 66 0F 61 /r 144 PXOR = 0x660FEF, // PXOR xmm1, xmm2/mem128 66 0F EF /r 145 ANDPD = 0x660F54, // ANDPD xmm1, xmm2/mem128 66 0F 54 /r 146 ANDPS = 0x0F54, // ANDPS xmm1, xmm2/mem128 0F 54 /r 147 ANDNPD = 0x660F55, // ANDNPD xmm1, xmm2/mem128 66 0F 55 /r 148 ANDNPS = 0x0F55, // ANDNPS xmm1, xmm2/mem128 0F 55 /r 149 CMPPS = 0x0FC2, // CMPPS xmm1, xmm2/mem128, imm8 0F C2 /r ib 150 CMPPD = 0x660FC2, // CMPPD xmm1, xmm2/mem128, imm8 66 0F C2 /r ib 151 CMPSD = 0xF20FC2, // CMPSD xmm1, xmm2/mem64, imm8 F2 0F C2 /r ib 152 CMPSS = 0xF30FC2, // CMPSS xmm1, xmm2/mem32, imm8 F3 0F C2 /r ib 153 COMISD = 0x660F2F, // COMISD xmm1, xmm2/mem64 66 0F 2F /r 154 COMISS = 0x0F2F, // COMISS xmm1, xmm2/mem32 0F 2F /r 155 CVTDQ2PD = 0xF30FE6, // CVTDQ2PD xmm1, xmm2/mem64 F3 0F E6 /r 156 CVTDQ2PS = 0x0F5B, // CVTDQ2PS xmm1, xmm2/mem128 0F 5B /r 157 CVTPD2DQ = 0xF20FE6, // CVTPD2DQ xmm1, xmm2/mem128 F2 0F E6 /r 158 CVTPD2PI = 0x660F2D, // CVTPD2PI mmx, xmm2/mem128 66 0F 2D /r 159 CVTPD2PS = 0x660F5A, // CVTPD2PS xmm1, xmm2/mem128 66 0F 5A /r 160 CVTPI2PD = 0x660F2A, // CVTPI2PD xmm, mmx/mem64 66 0F 2A /r 161 CVTPI2PS = 0x0F2A, // CVTPI2PS xmm, mmx/mem64 0F 2A /r 162 CVTPS2DQ = 0x660F5B, // CVTPS2DQ xmm1, xmm2/mem128 66 0F 5B /r 163 CVTPS2PD = 0x0F5A, // CVTPS2PD xmm1, xmm2/mem64 0F 5A /r 164 CVTPS2PI = 0x0F2D, // CVTPS2PI mmx, xmm/mem64 0F 2D /r 165 CVTSD2SI = 0xF20F2D, // CVTSD2SI reg32, xmm/mem64 F2 0F 2D /r 166 // CVTSD2SI reg64, xmm/mem64 F2 0F 2D /r 167 CVTSD2SS = 0xF20F5A, // CVTSD2SS xmm1, xmm2/mem64 F2 0F 5A /r 168 CVTSI2SD = 0xF20F2A, // CVTSI2SD xmm, reg/mem32 F2 0F 2A /r 169 // CVTSI2SD xmm, reg/mem64 F2 0F 2A /r 170 CVTSI2SS = 0xF30F2A, // CVTSI2SS xmm, reg/mem32 F3 0F 2A /r 171 // CVTSI2SS xmm, reg/mem64 F3 0F 2A /r 172 CVTSS2SD = 0xF30F5A, // CVTSS2SD xmm1, xmm2/mem32 F3 0F 5A /r 173 CVTSS2SI = 0xF30F2D, // CVTSS2SI reg32, xmm2/mem32 F3 0F 2D /r 174 // CVTSS2SI reg64, xmm2/mem32 F3 0F 2D /r 175 CVTTPD2PI = 0x660F2C, // CVTPD2PI mmx, xmm/mem128 66 0F 2C /r 176 CVTTPD2DQ = 0x660FE6, // CVTTPD2DQ xmm1, xmm2/mem128 66 0F E6 /r 177 CVTTPS2DQ = 0xF30F5B, // CVTTPS2DQ xmm1, xmm2/mem128 F3 0F 5B /r 178 CVTTPS2PI = 0x0F2C, // CVTTPS2PI mmx xmm/mem64 0F 2C /r 179 CVTTSD2SI = 0xF20F2C, // CVTTSD2SI reg32, xmm/mem64 F2 0F 2C /r 180 // CVTTSD2SI reg64, xmm/mem64 F2 0F 2C /r 181 CVTTSS2SI = 0xF30F2C, // CVTTSS2SI reg32, xmm/mem32 F3 0F 2C /r 182 // CVTTSS2SI reg64, xmm/mem32 F3 0F 2C /r 183 MASKMOVDQU = 0x660FF7, // MASKMOVDQU xmm1, xmm2 66 0F F7 /r 184 MASKMOVQ = 0x0FF7, // MASKMOVQ mm1,mm2 0F F7 /r 185 MAXPD = 0x660F5F, // MAXPD xmm1, xmm2/mem128 66 0F 5F /r 186 MAXPS = 0x0F5F, // MAXPS xmm1, xmm2/mem128 0F 5F /r 187 MAXSD = 0xF20F5F, // MAXSD xmm1, xmm2/mem64 F2 0F 5F /r 188 MAXSS = 0xF30F5F, // MAXSS xmm1, xmm2/mem32 F3 0F 5F /r 189 MINPD = 0x660F5D, // MINPD xmm1, xmm2/mem128 66 0F 5D /r 190 MINPS = 0x0F5D, // MINPS xmm1, xmm2/mem128 0F 5D /r 191 MINSD = 0xF20F5D, // MINSD xmm1, xmm2/mem64 F2 0F 5D /r 192 MINSS = 0xF30F5D, // MINSS xmm1, xmm2/mem32 F3 0F 5D /r 193 ORPD = 0x660F56, // ORPD xmm1, xmm2/mem128 66 0F 56 /r 194 ORPS = 0x0F56, // ORPS xmm1, xmm2/mem128 0F 56 /r 195 PAVGB = 0x660FE0, // PAVGB xmm1, xmm2/mem128 66 0F E0 /r 196 PAVGW = 0x660FE3, // PAVGW xmm1, xmm2/mem128 66 0F E3 /r 197 PMAXSW = 0x660FEE, // PMAXSW xmm1, xmm2/mem128 66 0F EE / 198 PINSRW = 0x660FC4, // PINSRW xmm, reg32/mem16, imm8 66 0F C4 /r ib 199 PMAXUB = 0x660FDE, // PMAXUB xmm1, xmm2/mem128 66 0F DE /r 200 PMINSW = 0x660FEA, // PMINSW xmm1, xmm2/mem128 66 0F EA /r 201 PMINUB = 0x660FDA, // PMINUB xmm1, xmm2/mem128 66 0F DA /r 202 PMOVMSKB = 0x660FD7, // PMOVMSKB reg32, xmm 66 0F D7 /r 203 PMULHUW = 0x660FE4, // PMULHUW xmm1, xmm2/mem128 66 0F E4 /r 204 PMULHW = 0x660FE5, // PMULHW xmm1, xmm2/mem128 66 0F E5 / 205 PMULUDQ = 0x660FF4, // PMULUDQ xmm1, xmm2/mem128 66 0F F4 /r 206 PSADBW = 0x660FF6, // PSADBW xmm1, xmm2/mem128 66 0F F6 /r 207 PUNPCKHQDQ = 0x660F6D, // PUNPCKHQDQ xmm1, xmm2/mem128 66 0F 6D /r 208 PUNPCKLQDQ = 0x660F6C, // PUNPCKLQDQ xmm1, xmm2/mem128 66 0F 6C /r 209 RCPPS = 0x0F53, // RCPPS xmm1, xmm2/mem128 0F 53 /r 210 RCPSS = 0xF30F53, // RCPSS xmm1, xmm2/mem32 F3 0F 53 /r 211 RSQRTPS = 0x0F52, // RSQRTPS xmm1, xmm2/mem128 0F 52 /r 212 RSQRTSS = 0xF30F52, // RSQRTSS xmm1, xmm2/mem32 F3 0F 52 /r 213 SQRTPD = 0x660F51, // SQRTPD xmm1, xmm2/mem128 66 0F 51 /r 214 SHUFPD = 0x660FC6, // SHUFPD xmm1, xmm2/mem128, imm8 66 0F C6 /r ib 215 SHUFPS = 0x0FC6, // SHUFPS xmm1, xmm2/mem128, imm8 0F C6 /r ib 216 SQRTPS = 0x0F51, // SQRTPS xmm1, xmm2/mem128 0F 51 /r 217 SQRTSD = 0xF20F51, // SQRTSD xmm1, xmm2/mem64 F2 0F 51 /r 218 SQRTSS = 0xF30F51, // SQRTSS xmm1, xmm2/mem32 F3 0F 51 /r 219 UNPCKHPD = 0x660F15, // UNPCKHPD xmm1, xmm2/mem12866 0F 15 /r 220 UNPCKHPS = 0x0F15, // UNPCKHPS xmm1, xmm2/mem1280F 15 /r 221 UNPCKLPD = 0x660F14, // UNPCKLPD xmm1, xmm2/mem128 66 0F 14 /r 222 UNPCKLPS = 0x0F14, // UNPCKLPS xmm1, xmm2/mem1280F 14 /r 223 224 PSHUFD = 0x660F70, // PSHUFD xmm1, xmm2/mem128, imm8 66 0F 70 /r ib 225 PSHUFHW = 0xF30F70, // PSHUFHW xmm1, xmm2/mem128, imm8 F3 0F 70 /r ib 226 PSHUFLW = 0xF20F70, // PSHUFLW xmm1, xmm2/mem128, imm8 F2 0F 70 /r ib 227 PSHUFW = 0x0F70, // PSHUFW mm1, mm2/mem64, imm8 0F 70 /r ib 228 PSLLDQ = 0x07660F73, // PSLLDQ xmm, imm8 66 0F 73 /7 ib 229 PSRLDQ = 0x03660F73, // PSRLDQ xmm, imm8 66 0F 73 /3 ib 230 231 PREFETCH = 0x0F18, 232 233 PEXTRW = 0x660FC5, // PEXTRW reg32, xmm, imm8 66 0F C5 /r ib 234 STMXCSR = 0x0FAE, // STMXCSR mem32 0F AE /3 235 236 // SSE3 Pentium 4 (Prescott) 237 238 ADDSUBPD = 0x660FD0, // ADDSUBPD xmm1, xmm2/m128 239 ADDSUBPS = 0xF20FD0, 240 HADDPD = 0x660F7C, 241 HADDPS = 0xF20F7C, 242 HSUBPD = 0x660F7D, 243 HSUBPS = 0xF20F7D, 244 MOVDDUP = 0xF20F12, 245 MOVSHDUP = 0xF30F16, 246 MOVSLDUP = 0xF30F12, 247 LDDQU = 0xF20FF0, 248 MONITOR = 0x0F01C8, 249 MWAIT = 0x0F01C9, 250 251 // SSSE3 252 PALIGNR = 0x660F3A0F, 253 PHADDD = 0x660F3802, 254 PHADDW = 0x660F3801, 255 PHADDSW = 0x660F3803, 256 PABSB = 0x660F381C, 257 PABSD = 0x660F381E, 258 PABSW = 0x660F381D, 259 PSIGNB = 0x660F3808, 260 PSIGND = 0x660F380A, 261 PSIGNW = 0x660F3809, 262 PSHUFB = 0x660F3800, 263 PMADDUBSW = 0x660F3804, 264 PMULHRSW = 0x660F380B, 265 PHSUBD = 0x660F3806, 266 PHSUBW = 0x660F3805, 267 PHSUBSW = 0x660F3807, 268 269 // SSE4.1 270 // See Intel SSE4 Programming Reference 271 272 BLENDPD = 0x660F3A0D, // 66 0F 3A 0D /r ib BLENDPD xmm1, xmm2/m128, imm8 273 BLENDPS = 0x660F3A0C, // 66 0F 3A 0C /r ib BLENDPS xmm1, xmm2/m128, imm8 274 BLENDVPD = 0x660F3815, // 66 0F 38 15 /r BLENDVPD xmm1, xmm2/m128, <XMM0> 275 BLENDVPS = 0x660F3814, // 66 0F 38 14 /r BLENDVPS xmm1, xmm2/m128, <XMM0> 276 DPPD = 0x660F3A41, 277 DPPS = 0x660F3A40, 278 EXTRACTPS = 0x660F3A17, 279 INSERTPS = 0x660F3A21, 280 MPSADBW = 0x660F3A42, 281 PBLENDVB = 0x660F3810, 282 PBLENDW = 0x660F3A0E, 283 PEXTRD = 0x660F3A16, 284 PEXTRQ = 0x660F3A16, 285 PINSRB = 0x660F3A20, // 66 0F 3A 20 /r ib PINSRB xmm1, r32/m8, imm8 286 PINSRD = 0x660F3A22, 287 PINSRQ = 0x660F3A22, 288 289 MOVNTDQA = 0x660F382A, 290 PACKUSDW = 0x660F382B, 291 PCMPEQQ = 0x660F3829, 292 PEXTRB = 0x660F3A14, // 66 0F 3A 14 /r ib PEXTRB r32/m8, xmm2, imm8 293 // 66 REX.W 0F 3A 14 /r ib PEXTRB r64/m8, xmm2, imm8 294 PHMINPOSUW = 0x660F3841, // 66 0F 38 41 /r PHMINPOSUW xmm1, xmm2/m128 295 PMAXSB = 0x660F383C, 296 PMAXSD = 0x660F383D, 297 PMAXUD = 0x660F383F, 298 PMAXUW = 0x660F383E, 299 PMINSB = 0x660F3838, 300 PMINSD = 0x660F3839, 301 PMINUD = 0x660F383B, 302 PMINUW = 0x660F383A, 303 PMOVSXBW = 0x660F3820, 304 PMOVSXBD = 0x660F3821, 305 PMOVSXBQ = 0x660F3822, 306 PMOVSXWD = 0x660F3823, 307 PMOVSXWQ = 0x660F3824, 308 PMOVSXDQ = 0x660F3825, 309 PMOVZXBW = 0x660F3830, 310 PMOVZXBD = 0x660F3831, 311 PMOVZXBQ = 0x660F3832, 312 PMOVZXWD = 0x660F3833, 313 PMOVZXWQ = 0x660F3834, 314 PMOVZXDQ = 0x660F3835, 315 PMULDQ = 0x660F3828, 316 PMULLD = 0x660F3840, 317 PTEST = 0x660F3817, // 66 0F 38 17 /r PTEST xmm1, xmm2/m128 318 319 ROUNDPD = 0x660F3A09, // 66 0F 3A 09 /r ib ROUNDPD xmm1, xmm2/m128, imm8 320 ROUNDPS = 0x660F3A08, 321 ROUNDSD = 0x660F3A0B, 322 ROUNDSS = 0x660F3A0A, 323 324 // SSE4.2 325 PCMPESTRI = 0x660F3A61, 326 PCMPESTRM = 0x660F3A60, 327 PCMPISTRI = 0x660F3A63, 328 PCMPISTRM = 0x660F3A62, 329 PCMPGTQ = 0x660F3837, 330 // CRC32 331 332 // SSE4a (AMD only) 333 // EXTRQ,INSERTQ,MOVNTSD,MOVNTSS 334 335 // POPCNT and LZCNT (have their own CPUID bits) 336 POPCNT = 0xF30FB8, 337 // LZCNT 338 339 // AVX 340 XGETBV = 0x0F01D0, 341 XSETBV = 0x0F01D1, 342 VBROADCASTSS = 0x660F3818, 343 VBROADCASTSD = 0x660F3819, 344 VBROADCASTF128 = 0x660F381A, 345 VINSERTF128 = 0x660F3A18, 346 347 // AVX2 348 VPBROADCASTB = 0x660F3878, 349 VPBROADCASTW = 0x660F3879, 350 VPBROADCASTD = 0x660F3858, 351 VPBROADCASTQ = 0x660F3859, 352 VBROADCASTI128 = 0x660F385A, 353 VINSERTI128 = 0x660F3A38, 354 355 // AES 356 AESENC = 0x660F38DC, 357 AESENCLAST = 0x660F38DD, 358 AESDEC = 0x660F38DE, 359 AESDECLAST = 0x660F38DF, 360 AESIMC = 0x660F38DB, 361 AESKEYGENASSIST = 0x660F3ADF, 362 }