Skip to content

Commit 1162b6c

Browse files
committed
cpu/drcbex64.cpp: Avoid slow microcoded pushfq/popfq altogether.
Also avoid some false dependencies and reduce near cache usage by almost 4KiB.
1 parent 09b00ba commit 1162b6c

File tree

1 file changed

+45
-36
lines changed

1 file changed

+45
-36
lines changed

src/devices/cpu/drcbex64.cpp

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -469,8 +469,8 @@ class drcbe_x64 : public drcbe_interface
469469

470470
void * stacksave; // saved stack pointer
471471

472-
uint8_t flagsmap[0x1000]; // flags map
473-
uint64_t flagsunmap[0x20]; // flags unmapper
472+
uint8_t flagsmap[0x100]; // flags map
473+
uint32_t flagsunmap[0x20]; // flags unmapper
474474
};
475475

476476
// resolved memory handler functions
@@ -1047,17 +1047,17 @@ drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, u
10471047
if (entry & 0x004) flags |= FLAG_U;
10481048
if (entry & 0x040) flags |= FLAG_Z;
10491049
if (entry & 0x080) flags |= FLAG_S;
1050-
if (entry & 0x800) flags |= FLAG_V;
1050+
// can't get FLAG_V from lahf
10511051
m_near.flagsmap[entry] = flags;
10521052
}
10531053
for (int entry = 0; entry < std::size(m_near.flagsunmap); entry++)
10541054
{
1055-
uint64_t flags = 0;
1056-
if (entry & FLAG_C) flags |= 0x001;
1057-
if (entry & FLAG_U) flags |= 0x004;
1058-
if (entry & FLAG_Z) flags |= 0x040;
1059-
if (entry & FLAG_S) flags |= 0x080;
1060-
if (entry & FLAG_V) flags |= 0x800;
1055+
uint32_t flags = 0;
1056+
if (entry & FLAG_C) flags |= 0x001 << 8;
1057+
if (entry & FLAG_U) flags |= 0x004 << 8;
1058+
if (entry & FLAG_Z) flags |= 0x040 << 8;
1059+
if (entry & FLAG_S) flags |= 0x080 << 8;
1060+
// can't set V -> O with sahf
10611061
m_near.flagsunmap[entry] = flags;
10621062
}
10631063

@@ -2527,27 +2527,31 @@ void drcbe_x64::op_setflgs(Assembler &a, const instruction &inst)
25272527

25282528
be_parameter srcp(*this, inst.param(0), PTYPE_MRI);
25292529

2530-
a.pushfq();
2531-
a.and_(qword_ptr(rsp), ~0x8c5);
2532-
25332530
if (srcp.is_immediate())
25342531
{
2535-
uint64_t const flags = m_near.flagsunmap[srcp.immediate() & FLAGS_ALL];
2532+
uint32_t const flags = m_near.flagsunmap[srcp.immediate() & FLAGS_ALL];
25362533
if (!flags)
2537-
a.xor_(rax, rax);
2534+
a.xor_(eax, eax);
2535+
else
2536+
a.mov(eax, flags);
2537+
2538+
if (srcp.immediate() & FLAG_V)
2539+
a.mov(ecx, 1);
25382540
else
2539-
a.mov(rax, flags);
2541+
a.xor_(ecx, ecx);
25402542
}
25412543
else
25422544
{
2543-
mov_reg_param(a, rax, srcp);
2544-
a.and_(rax, FLAGS_ALL);
2545+
mov_reg_param(a, eax, srcp);
2546+
a.mov(ecx, FLAG_V);
2547+
a.and_(ecx, eax);
2548+
a.and_(eax, FLAGS_ALL);
25452549

2546-
a.mov(rax, ptr(rbp, rax, 3, offset_from_rbp(&m_near.flagsunmap[0])));
2550+
a.mov(eax, ptr(rbp, rax, 2, offset_from_rbp(&m_near.flagsunmap[0])));
25472551
}
2548-
a.or_(qword_ptr(rsp), rax);
25492552

2550-
a.popfq();
2553+
a.add(cl, 0x7f);
2554+
a.sahf();
25512555
}
25522556

25532557

@@ -2566,20 +2570,25 @@ void drcbe_x64::op_save(Assembler &a, const instruction &inst)
25662570
be_parameter dstp(*this, inst.param(0), PTYPE_M);
25672571

25682572
// copy live state to the destination
2569-
mov_r64_imm(a, rcx, (uintptr_t)dstp.memory()); // mov rcx,dstp
2573+
mov_r64_imm(a, rcx, (uintptr_t)dstp.memory());
25702574

25712575
// copy flags
2572-
a.pushfq(); // pushf
2573-
a.pop(rax); // pop rax
2574-
a.and_(eax, 0x8c5); // and eax,0x8c5
2575-
a.mov(al, ptr(rbp, rax, 0, offset_from_rbp(&m_near.flagsmap[0]))); // mov al,[flags_map]
2576-
a.mov(ptr(rcx, offsetof(drcuml_machine_state, flags)), al); // mov state->flags,al
2576+
a.lahf();
2577+
a.seto(dl);
2578+
a.shr(eax, 8);
2579+
a.movzx(edx, dl);
2580+
a.and_(eax, 0x0c5);
2581+
a.movzx(eax, byte_ptr(rbp, rax, 0, offset_from_rbp(&m_near.flagsmap[0])));
2582+
a.lea(rax, ptr(rax, rdx, 1));
2583+
a.mov(ptr(rcx, offsetof(drcuml_machine_state, flags)), al);
25772584

25782585
// copy fmod and exp
2579-
a.mov(al, MABS(&m_state.fmod)); // mov al,[fmod]
2580-
a.mov(ptr(rcx, offsetof(drcuml_machine_state, fmod)), al); // mov state->fmod,al
2581-
a.mov(eax, MABS(&m_state.exp)); // mov eax,[exp]
2582-
a.mov(ptr(rcx, offsetof(drcuml_machine_state, exp)), eax); // mov state->exp,eax
2586+
Mem fmod = MABS(&m_state.fmod);
2587+
fmod.setSize(1);
2588+
a.movzx(eax, fmod);
2589+
a.mov(ptr(rcx, offsetof(drcuml_machine_state, fmod)), al);
2590+
a.mov(eax, MABS(&m_state.exp));
2591+
a.mov(ptr(rcx, offsetof(drcuml_machine_state, exp)), eax);
25832592

25842593
// copy integer registers
25852594
int regoffs = offsetof(drcuml_machine_state, r);
@@ -2627,7 +2636,7 @@ void drcbe_x64::op_restore(Assembler &a, const instruction &inst)
26272636
be_parameter srcp(*this, inst.param(0), PTYPE_M);
26282637

26292638
// copy live state from the destination
2630-
mov_r64_imm(a, rcx, (uintptr_t)srcp.memory()); // mov rcx,dstp
2639+
mov_r64_imm(a, rcx, (uintptr_t)srcp.memory());
26312640

26322641
// copy integer registers
26332642
int regoffs = offsetof(drcuml_machine_state, r);
@@ -2667,13 +2676,13 @@ void drcbe_x64::op_restore(Assembler &a, const instruction &inst)
26672676
a.mov(MABS(&m_state.exp), eax);
26682677

26692678
// copy flags
2670-
a.pushfq();
2671-
a.and_(qword_ptr(rsp), ~0x8c5);
26722679
a.movzx(eax, byte_ptr(rcx, offsetof(drcuml_machine_state, flags)));
2680+
a.mov(ecx, FLAG_V); // don't need pointer to src any more
2681+
a.and_(ecx, eax);
26732682
a.and_(eax, FLAGS_ALL);
2674-
a.mov(rax, ptr(rbp, rax, 3, offset_from_rbp(&m_near.flagsunmap[0])));
2675-
a.or_(qword_ptr(rsp), rax);
2676-
a.popfq();
2683+
a.mov(eax, ptr(rbp, rax, 2, offset_from_rbp(&m_near.flagsunmap[0])));
2684+
a.add(cl, 0x7f);
2685+
a.sahf();
26772686
}
26782687

26792688

0 commit comments

Comments
 (0)