From 8a53207f2557fb7854cf6fc4da910fb00c1222ed Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Thu, 25 Jun 2026 14:29:40 -0700
Subject: [PATCH 1/6] Fix x64 instruction size estimates

Correct xarch size accounting for VEX/K encodings and add an opt-in debug check.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/coreclr/jit/emitxarch.cpp     | 70 ++++++++++++++++---------------
 src/coreclr/jit/jitconfigvalues.h |  2 +
 2 files changed, 39 insertions(+), 33 deletions(-)
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 90eee967367021..4d4834cd479fcd 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -3811,32 +3811,13 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const
 
         assert(prefixAdjustedSize != 0);
 
-        // In this case, opcode will contains escape prefix at least one byte,
-        // prefixAdjustedSize should be minus one.
+        // VEX/EVEX encodes the escape prefix and optional SIMD prefix, so remove them from the legacy opcode size.
         prefixAdjustedSize -= 1;
 
-        // Get the fourth byte in Opcode.
-        // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
-        BYTE check = (code >> 24) & 0xFF;
-
-        if (check != 0)
+        BYTE sizePrefix = (code >> 16) & 0xFF;
+        if (sizePrefix != 0 && isPrefix(sizePrefix))
         {
-            // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
-            // 4-byte opcode: with the bytes ordered as 0x22114433
-            // Simd prefix is at the first byte.
-            BYTE sizePrefix = (code >> 16) & 0xFF;
-
-            if (sizePrefix != 0 && isPrefix(sizePrefix))
-            {
-                prefixAdjustedSize -= 1;
-            }
-
-            // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
-            // But in this case the opcode has not counted R\M part.
-            // opcodeSize + prefixAdjustedSize - extraEscapePrefixSize + modRMSize
-            //  = opcodeSize + prefixAdjustedSize -1 + 1
-            //  = opcodeSize + prefixAdjustedSize
-            // So although we may have second byte escape prefix, we won't decrease prefixAdjustedSize.
+            prefixAdjustedSize -= 1;
         }
 
         adjustedSize = prefixAdjustedSize;
@@ -5240,14 +5221,24 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id)
 
     if ((code & 0xFF00) != 0)
     {
-        sz += (IsSimdInstruction(ins) || TakesApxExtendedEvexPrefix(id)) ? emitInsSize(id, code, includeRexPrefixSize)
-                                                                         : 5;
+        sz += emitInsSize(id, code, includeRexPrefixSize);
+        if (!IsSimdInstruction(ins) && !TakesApxExtendedEvexPrefix(id) && ((code & 0xFF00) != 0xC000))
+        {
+            // Non-SIMD two-byte opcodes generally emit a separate ModRM byte; 0xC000 already includes it.
+            sz++;
+        }
     }
     else
     {
         sz += emitInsSize(id, insEncodeRMreg(id, code), includeRexPrefixSize);
     }
 
+    if (IsKInstruction(ins))
+    {
+        // K instructions add VEX before this helper; avoid counting the prefix once here and once in the adjustment.
+        sz -= emitGetVexPrefixSize(id);
+    }
+
     return sz;
 }
 
@@ -11503,7 +11494,7 @@ void emitter::emitIns_Call(const EmitCallParams& params)
                 // An absolute indir address that doesn't need reloc should fit within 32-bits
                 // to be encoded as offset relative to zero.  This addr mode requires an extra
                 // SIB byte
-                noway_assert((size_t) static_cast<int>(reinterpret_cast<intptr_t>(params.addr)) == (size_t)params.addr);
+                noway_assert((size_t)static_cast<int>(reinterpret_cast<intptr_t>(params.addr)) == (size_t)params.addr);
                 sz++;
             }
 #endif // TARGET_AMD64
@@ -11537,7 +11528,7 @@ void emitter::emitIns_Call(const EmitCallParams& params)
             // An absolute indir address that doesn't need reloc should fit within 32-bits
             // to be encoded as offset relative to zero.  This addr mode requires an extra
             // SIB byte
-            noway_assert((size_t) static_cast<int>(reinterpret_cast<intptr_t>(params.addr)) == (size_t)params.addr);
+            noway_assert((size_t)static_cast<int>(reinterpret_cast<intptr_t>(params.addr)) == (size_t)params.addr);
             sz++;
         }
 #endif // TARGET_AMD64
@@ -15747,14 +15738,11 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
             assert(isCompressed && dspInByte);
             dsp = (int)compressedDsp;
         }
-        else if (TakesEvexPrefix(id) && !IsBMIInstruction(ins))
+        else if (TakesEvexPrefix(id) && hasTupleTypeInfo(ins))
         {
 #if FEATURE_FIXED_OUT_ARGS
-            // TODO-AMD64-CQ: We should be able to accurately predict this when FEATURE_FIXED_OUT_ARGS
-            // is available. However, there's some nuance in how emitInsSizeSVCalcDisp does things
-            // compared to emitOutputSV here, so we will miss a few cases today.
-            //
-            // assert(!TryEvexCompressDisp8Byte(id, dsp, &compressedDsp, &dspInByte));
+            // The estimator should mark all encodable compressed displacements before we reach output.
+            assert(!TryEvexCompressDisp8Byte(id, dsp, &compressedDsp, &dspInByte));
 #endif
             dspInByte = false;
         }
@@ -20196,6 +20184,22 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
     }
 #endif
 
+#ifdef DEBUG
+    if (JitConfig.JitAssertOnEmitSizeMismatch() != 0)
+    {
+        // Labels and align pseudo-instructions can intentionally shrink after initial size estimation.
+        const bool skipSizeCheck =
+            (id->idIns() == INS_align) || (insFmt == IF_LABEL) || (insFmt == IF_RWR_LABEL) || (insFmt == IF_SWR_LABEL);
+        const UNATIVE_OFFSET actualCodeSize = static_cast<UNATIVE_OFFSET>(dst - *dp);
+        if (!skipSizeCheck && (id->idCodeSize() != actualCodeSize))
+        {
+            printf("Instruction size mismatch: estimated=%u actual=%u\n", id->idCodeSize(), actualCodeSize);
+            emitDispIns(id, false, 0, true, emitCurCodeOffs(*dp), *dp, actualCodeSize);
+            assert(!"Instruction size mismatch");
+        }
+    }
+#endif
+
 #if FEATURE_LOOP_ALIGN
     // Only compensate over-estimated instructions if emitCurIG is before the last IG that needs alignment.
     if ((emitLastAlignedIG != nullptr) && emitCurIG->IsBeforeOrEqual(emitLastAlignedIG))
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 1cc7e8aec86497..f62d5c68cbcaec 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -55,6 +55,8 @@ CONFIG_INTEGER(EnablePCRelAddr, "JitEnablePCRelAddr", 1) // Whether absolute add
                                                          // RyuJIT where possible
 CONFIG_INTEGER(JitAssertOnMaxRAPasses, "JitAssertOnMaxRAPasses", 0)
 CONFIG_INTEGER(JitBreakEmitOutputInstr, "JitBreakEmitOutputInstr", -1)
+// Assert if the xarch encoder emits a different number of bytes than its estimator predicted.
+CONFIG_INTEGER(JitAssertOnEmitSizeMismatch, "JitAssertOnEmitSizeMismatch", 0)
 CONFIG_INTEGER(JitBreakMorphTree, "JitBreakMorphTree", 0xffffffff)
 CONFIG_INTEGER(JitBreakOnBadCode, "JitBreakOnBadCode", 0)
 CONFIG_INTEGER(JitBreakOnMinOpts, "JITBreakOnMinOpts", 0) // Halt if jit switches to MinOpts

From c822a8148f20266c1dfb9bace66c80cbac10dcea Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Thu, 25 Jun 2026 16:54:47 -0700
Subject: [PATCH 2/6] Finish x64 instruction size fixes

Make the emit-size check exact by default and cover the remaining xarch estimator gaps.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/coreclr/jit/emitxarch.cpp     | 75 ++++++++++++++++++++++++-------
 src/coreclr/jit/jitconfigvalues.h |  4 +-
 2 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 4d4834cd479fcd..a2497e7fd669f8 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -2092,6 +2092,15 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
 #endif
 }
 
+static bool HasRewrittenBuiltInRexPrefix(instruction ins)
+{
+#ifdef TARGET_AMD64
+    return (ins >= INS_imul_08) && (ins <= INS_imul_15);
+#else
+    return false;
+#endif
+}
+
 //------------------------------------------------------------------------
 // TakesApxExtendedEvexPrefix: Checks if the instruction should be legacy-promoted-EVEX encoded.
 //
@@ -3872,7 +3881,7 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const
 
         emitAttr attr = id->idOpSize();
 
-        if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx))
+        if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx) && (ins != INS_cmpxchg))
         {
             // Most 16-bit operand instructions will need a 0x66 prefix.
             adjustedSize++;
@@ -5222,9 +5231,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id)
     if ((code & 0xFF00) != 0)
     {
         sz += emitInsSize(id, code, includeRexPrefixSize);
-        if (!IsSimdInstruction(ins) && !TakesApxExtendedEvexPrefix(id) && ((code & 0xFF00) != 0xC000))
+        if (!IsSimdInstruction(ins) && !TakesApxExtendedEvexPrefix(id) && (ins != INS_crc32) &&
+            ((code & 0xFF00) != 0xC000))
         {
             // Non-SIMD two-byte opcodes generally emit a separate ModRM byte; 0xC000 already includes it.
+            // CRC32 accounts for its ModRM byte in emitGetAdjustedSize.
             sz++;
         }
     }
@@ -5239,6 +5250,12 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id)
         sz -= emitGetVexPrefixSize(id);
     }
 
+    if (HasRewrittenBuiltInRexPrefix(ins) && TakesRexWPrefix(id) && !TakesRex2Prefix(id))
+    {
+        // Legacy 3-op IMUL opcodes carry a built-in REX byte that output rewrites from operand state.
+        sz--;
+    }
+
     return sz;
 }
 
@@ -5259,6 +5276,14 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code,
     instruction    ins  = id->idIns();
     UNATIVE_OFFSET size = emitInsSize(id, code, /* includeRexPrefixSize */ true);
 
+#ifdef TARGET_AMD64
+    if (HasRewrittenBuiltInRexPrefix(ins) && TakesRexWPrefix(id) && !TakesRex2Prefix(id))
+    {
+        // Legacy 3-op IMUL opcodes carry a built-in REX byte that output rewrites from operand state.
+        size--;
+    }
+#endif
+
     int  adr;
     bool EBPbased;
     bool dspInByte;
@@ -5510,6 +5535,15 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
         size = 2;
     }
 
+#ifdef TARGET_AMD64
+    if (HasRewrittenBuiltInRexPrefix(ins) && TakesRexWPrefix(id) && !TakesRex2Prefix(id) &&
+        ((reg != REG_NA) || (rgx != REG_NA)))
+    {
+        // The legacy 3-op IMUL opcodes carry a built-in REX byte that output rewrites from operand state.
+        size--;
+    }
+#endif
+
     size += emitGetAdjustedSize(id, code);
 
     if (hasRexPrefix(code))
@@ -5523,7 +5557,8 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
         size += emitGetRexPrefixSize(id, ins);
     }
     else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) ||
-             ((ins != INS_call) && (IsExtendedReg(id->idReg1(), attrSize) || IsExtendedReg(id->idReg2(), attrSize))))
+             ((ins != INS_call) && ((id->idHasReg1() && IsExtendedReg(id->idReg1(), attrSize)) ||
+                                    (id->idHasReg2() && IsExtendedReg(id->idReg2(), attrSize)))))
     {
         // Should have a REX byte
         size += emitGetRexPrefixSize(id, ins);
@@ -7453,12 +7488,6 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld
         sz = emitInsSizeCV(id, insCodeMR(ins));
     }
 
-    if (TakesRexWPrefix(id))
-    {
-        // REX.W prefix
-        sz += emitGetRexPrefixSize(id, ins);
-    }
-
     id->idAddr()->iiaFieldHnd = fldHnd;
 
     id->idCodeSize(sz);
@@ -7487,6 +7516,11 @@ void emitter::emitIns_A(instruction ins, emitAttr attr, GenTreeIndir* indir)
     emitHandleMemOp(indir, id, fmt, ins);
 
     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
+    if (ins == INS_idiv)
+    {
+        printf("emitIns_A idiv sz=%u attr=%x code=%llx fmt=%u reloc=%u reg1=%u reg2=%u\n", sz, attr, insCodeMR(ins),
+               id->idInsFmt(), id->idIsDspReloc(), id->idReg1(), id->idReg2());
+    }
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -8781,7 +8815,8 @@ void emitter::emitIns_R_R_A_R(instruction   ins,
     SetEvexBroadcastIfNeeded(id, instOptions);
     SetEvexEmbMaskIfNeeded(id, instOptions);
 
-    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
+    // AVX512 blendv encodes mask registers in EVEX.aaa, not as an immediate byte.
+    UNATIVE_OFFSET sz = isMaskReg(op3Reg) ? emitInsSizeAM(id, insCodeRM(ins)) : emitInsSizeAM(id, insCodeRM(ins), ival);
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -8836,7 +8871,8 @@ void emitter::emitIns_R_R_C_R(instruction          ins,
     SetEvexBroadcastIfNeeded(id, instOptions);
     SetEvexEmbMaskIfNeeded(id, instOptions);
 
-    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
+    // AVX512 blendv encodes mask registers in EVEX.aaa, not as an immediate byte.
+    UNATIVE_OFFSET sz = isMaskReg(op3Reg) ? emitInsSizeCV(id, insCodeRM(ins)) : emitInsSizeCV(id, insCodeRM(ins), ival);
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -8885,7 +8921,9 @@ void emitter::emitIns_R_R_S_R(instruction ins,
     SetEvexBroadcastIfNeeded(id, instOptions);
     SetEvexEmbMaskIfNeeded(id, instOptions);
 
-    UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
+    // AVX512 blendv encodes mask registers in EVEX.aaa, not as an immediate byte.
+    UNATIVE_OFFSET sz = isMaskReg(op3Reg) ? emitInsSizeSV(id, insCodeRM(ins), varx, offs)
+                                          : emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -20188,12 +20226,19 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
     if (JitConfig.JitAssertOnEmitSizeMismatch() != 0)
     {
         // Labels and align pseudo-instructions can intentionally shrink after initial size estimation.
-        const bool skipSizeCheck =
-            (id->idIns() == INS_align) || (insFmt == IF_LABEL) || (insFmt == IF_RWR_LABEL) || (insFmt == IF_SWR_LABEL);
+        const IS_INFO insFmtInfo = emitGetSchedInfo(insFmt);
+#if !FEATURE_FIXED_OUT_ARGS
+        const bool skipStackSizeCheck = ((insFmtInfo & (IS_SF_RD | IS_SF_RW | IS_SF_WR)) != 0);
+#else
+        const bool skipStackSizeCheck = false;
+#endif
+        const bool skipSizeCheck = (id->idIns() == INS_align) || (insFmt == IF_LABEL) || (insFmt == IF_RWR_LABEL) ||
+                                   (insFmt == IF_SWR_LABEL) || skipStackSizeCheck;
         const UNATIVE_OFFSET actualCodeSize = static_cast<UNATIVE_OFFSET>(dst - *dp);
         if (!skipSizeCheck && (id->idCodeSize() != actualCodeSize))
         {
-            printf("Instruction size mismatch: estimated=%u actual=%u\n", id->idCodeSize(), actualCodeSize);
+            printf("Instruction size mismatch: estimated=%u actual=%u fmt=%u attr=%x reg1=%u reg2=%u\n",
+                   id->idCodeSize(), actualCodeSize, insFmt, id->idOpSize(), id->idReg1(), id->idReg2());
             emitDispIns(id, false, 0, true, emitCurCodeOffs(*dp), *dp, actualCodeSize);
             assert(!"Instruction size mismatch");
         }
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index f62d5c68cbcaec..aab4008884fda8 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -55,8 +55,8 @@ CONFIG_INTEGER(EnablePCRelAddr, "JitEnablePCRelAddr", 1) // Whether absolute add
                                                          // RyuJIT where possible
 CONFIG_INTEGER(JitAssertOnMaxRAPasses, "JitAssertOnMaxRAPasses", 0)
 CONFIG_INTEGER(JitBreakEmitOutputInstr, "JitBreakEmitOutputInstr", -1)
-// Assert if the xarch encoder emits a different number of bytes than its estimator predicted.
-CONFIG_INTEGER(JitAssertOnEmitSizeMismatch, "JitAssertOnEmitSizeMismatch", 0)
+// Assert if the xarch encoder emits a different number of bytes than its estimator predicted; set to 0 to disable.
+CONFIG_INTEGER(JitAssertOnEmitSizeMismatch, "JitAssertOnEmitSizeMismatch", 1)
 CONFIG_INTEGER(JitBreakMorphTree, "JitBreakMorphTree", 0xffffffff)
 CONFIG_INTEGER(JitBreakOnBadCode, "JitBreakOnBadCode", 0)
 CONFIG_INTEGER(JitBreakOnMinOpts, "JITBreakOnMinOpts", 0) // Halt if jit switches to MinOpts

From 283781a443ec4f743a100640250b3d57a6ed82c3 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Thu, 25 Jun 2026 18:59:56 -0700
Subject: [PATCH 3/6] Remove leftover JIT debug print

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/coreclr/jit/emitxarch.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index a2497e7fd669f8..cf6e61d0646c43 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -7516,11 +7516,6 @@ void emitter::emitIns_A(instruction ins, emitAttr attr, GenTreeIndir* indir)
     emitHandleMemOp(indir, id, fmt, ins);
 
     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
-    if (ins == INS_idiv)
-    {
-        printf("emitIns_A idiv sz=%u attr=%x code=%llx fmt=%u reloc=%u reg1=%u reg2=%u\n", sz, attr, insCodeMR(ins),
-               id->idInsFmt(), id->idIsDspReloc(), id->idReg1(), id->idReg2());
-    }
     id->idCodeSize(sz);
 
     dispIns(id);

From 3ee4a3f79ceecc8b8e0b3a94eedaa83f82e8ab4e Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Fri, 26 Jun 2026 07:38:22 -0700
Subject: [PATCH 4/6] Handle EVEX K-instruction size estimates

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/coreclr/jit/emitxarch.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index cf6e61d0646c43..acc269779044c8 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -5244,7 +5244,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id)
         sz += emitInsSize(id, insEncodeRMreg(id, code), includeRexPrefixSize);
     }
 
-    if (IsKInstruction(ins))
+    if (IsKInstruction(ins) && !TakesEvexPrefix(id))
     {
         // K instructions add VEX before this helper; avoid counting the prefix once here and once in the adjustment.
         sz -= emitGetVexPrefixSize(id);

From b006801ce795694adfdb6e5430b857b17bb52204 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Fri, 26 Jun 2026 11:24:46 -0700
Subject: [PATCH 5/6] Fix K instruction size estimates under EVEX stress

Use the actual opcode prefix state when adjusting K-instruction size estimates so EVEX stress and VEX output stay consistent.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/coreclr/jit/emitxarch.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index acc269779044c8..6d8e507800005f 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -5244,7 +5244,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id)
         sz += emitInsSize(id, insEncodeRMreg(id, code), includeRexPrefixSize);
     }
 
-    if (IsKInstruction(ins) && !TakesEvexPrefix(id))
+    if (IsKInstruction(ins) && hasVexPrefix(code))
     {
         // K instructions add VEX before this helper; avoid counting the prefix once here and once in the adjustment.
         sz -= emitGetVexPrefixSize(id);
@@ -11527,7 +11527,7 @@ void emitter::emitIns_Call(const EmitCallParams& params)
                 // An absolute indir address that doesn't need reloc should fit within 32-bits
                 // to be encoded as offset relative to zero.  This addr mode requires an extra
                 // SIB byte
-                noway_assert((size_t)static_cast<int>(reinterpret_cast<intptr_t>(params.addr)) == (size_t)params.addr);
+                noway_assert((size_t) static_cast<int>(reinterpret_cast<intptr_t>(params.addr)) == (size_t)params.addr);
                 sz++;
             }
 #endif // TARGET_AMD64
@@ -11561,7 +11561,7 @@ void emitter::emitIns_Call(const EmitCallParams& params)
             // An absolute indir address that doesn't need reloc should fit within 32-bits
             // to be encoded as offset relative to zero.  This addr mode requires an extra
             // SIB byte
-            noway_assert((size_t)static_cast<int>(reinterpret_cast<intptr_t>(params.addr)) == (size_t)params.addr);
+            noway_assert((size_t) static_cast<int>(reinterpret_cast<intptr_t>(params.addr)) == (size_t)params.addr);
             sz++;
         }
 #endif // TARGET_AMD64

From da3ef0469c694b9c356b5c4199bface64f3bbd9f Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Fri, 26 Jun 2026 18:30:39 -0700
Subject: [PATCH 6/6] Fix remaining xarch size estimates

Correct x86 K-instruction prefix accounting and memory-source IMUL estimates exposed by checked CI runs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/coreclr/jit/emitxarch.cpp | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 6d8e507800005f..9f20207a10dee1 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -5244,11 +5244,13 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id)
         sz += emitInsSize(id, insEncodeRMreg(id, code), includeRexPrefixSize);
     }
 
+#ifdef TARGET_AMD64
     if (IsKInstruction(ins) && hasVexPrefix(code))
     {
         // K instructions add VEX before this helper; avoid counting the prefix once here and once in the adjustment.
         sz -= emitGetVexPrefixSize(id);
     }
+#endif // TARGET_AMD64
 
     if (HasRewrittenBuiltInRexPrefix(ins) && TakesRexWPrefix(id) && !TakesRex2Prefix(id))
     {
@@ -5535,15 +5537,6 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
         size = 2;
     }
 
-#ifdef TARGET_AMD64
-    if (HasRewrittenBuiltInRexPrefix(ins) && TakesRexWPrefix(id) && !TakesRex2Prefix(id) &&
-        ((reg != REG_NA) || (rgx != REG_NA)))
-    {
-        // The legacy 3-op IMUL opcodes carry a built-in REX byte that output rewrites from operand state.
-        size--;
-    }
-#endif
-
     size += emitGetAdjustedSize(id, code);
 
     if (hasRexPrefix(code))