From 62b745c111ce8e8afb32394b8ca975ad0c685b68 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 4 Dec 2024 13:17:32 +0000 Subject: [PATCH] vdp: add support for targets that don't sufficiently waitstate the TMS I/O Based on all the work from wigglethemouse and productiondave's testing on the Z80 Retro discord What seems to happen is that the Z80 at 10MHz without waitstates is enough that out (c),l out (c),h sometimes loses stuff and it needs a nop in the middle. We don't see this on slower systems or on other fast ports as the other fast ports wait state the TMS9918A so meet the timings. --- Kernel/dev/vdp1.s | 20 +++++++++++++++++++- Kernel/platform/platform-2063/vdp.s | 25 +++++++++++++------------ Kernel/platform/platform-msx1/vdp.s | 2 ++ Kernel/platform/platform-mtx/vdp.s | 4 +++- Kernel/platform/platform-n8/vdp.s | 4 ++++ 5 files changed, 41 insertions(+), 14 deletions(-) diff --git a/Kernel/dev/vdp1.s b/Kernel/dev/vdp1.s index 9ecf7b8926..0d2d82d362 100644 --- a/Kernel/dev/vdp1.s +++ b/Kernel/dev/vdp1.s @@ -170,6 +170,7 @@ _vdp_load_font: ld bc,(_vdpport) ld de,#0x7C00 out (c),e ; set write and font area + VDP_DELAY3 out (c),d ld hl,#_fontdata_6x8 ld b,e @@ -202,18 +203,22 @@ _vdp_restore_font: in a,(c) fontnext: out (c),e + VDP_DELAY3 out (c),d VDP_DELAY2 dec c in a,(c) inc c + VDP_DELAY3 ; shouldn't need a normal delay here out (c),e + VDP_DELAY3 out (c),h dec c out (c),a VDP_DELAY inc c out (c),e + VDP_DELAY3 out (c),l cpl dec c @@ -234,6 +239,7 @@ _vdp_wipe_consoles: ld b,#0 ld a,#0x40 out (c),b ; 0x0000 for writing + VDP_DELAY3 out (c),a dec c ld a,#32 @@ -251,6 +257,7 @@ _vdp_setcolour: ld b,#0 ld a,#0x60 ; 0x2000 in the VDP, for write out (c),b + VDP_DELAY3 out (c),a dec c ld b,#32 @@ -270,6 +277,7 @@ _vdp_set: ex de, hl vdpout: ld bc, (_vdpport) out (c), e ; Write the data + VDP_DELAY3 out (c), d ; and then the register | 0x80 ret @@ -335,6 +343,7 @@ plotit: plotit2: ld bc, (_vdpport) out (c), l ; address + VDP_DELAY3 out (c), h ; address | 0x40 dec c out (c), a ; character @@ -380,6 +389,7 @@ upline: ld bc, (_vdpport) ; vdpport + 1 always holds #80 ld hl, #scrollbuf out (c), e ; our position + VDP_DELAY3 out (c), d dec c down_0: @@ -390,8 +400,8 @@ down_0: ld hl, (_scrolld_s1); go down one line and into write mode add hl, de ; relative to our position out (c), l + VDP_DELAY3 out (c), h - ; FIME - from vdpport - and into other copies ld b, #40 ld hl, #scrollbuf dec c @@ -430,6 +440,7 @@ downline: push bc ld bc, (_vdpport) ld hl, #scrollbuf out (c), e + VDP_DELAY3 out (c), d dec c up_0: @@ -442,6 +453,7 @@ up_0: ; nybble) add hl, de out (c), l + VDP_DELAY3 out (c), h dec c ld hl, #scrollbuf @@ -479,6 +491,7 @@ clear_lines: ld e, c ld bc, (_vdpport) out (c), l + VDP_DELAY3 out (c), h ; Safe on MSX 2 to loop the data with IRQ on ; but *not* on MSX 1 @@ -516,6 +529,7 @@ clear_across: ld a, c ld bc, (_vdpport) out (c), l + VDP_DELAY3 out (c), h ld b, a ld a, #' ' @@ -550,6 +564,7 @@ cursor_on: ld a, c ld bc, (_vdpport) out (c), l ; address + VDP_DELAY3 out (c), h ; address dec c VDP_DELAY2 @@ -627,6 +642,7 @@ _vdp_rop: ld d, #0 ld bc, (_vdpport) out (c), l + VDP_DELAY3 out (c), h ; Set starting pointer exx ld l, (ix) ; User pointer @@ -648,6 +664,7 @@ ropc: cp h ; just 3F jr c, bounds out (c), l ; next line + VDP_DELAY3 out (c), h exx dec e @@ -675,6 +692,7 @@ _vdp_wop: ld e, 6(ix) ; Stride ld d, #0 out (c),l + VDP_DELAY3 out (c),h ; Set starting pointer exx ld l,(ix) ; User pointer diff --git a/Kernel/platform/platform-2063/vdp.s b/Kernel/platform/platform-2063/vdp.s index 01f617a3b5..0a952633b0 100644 --- a/Kernel/platform/platform-2063/vdp.s +++ b/Kernel/platform/platform-2063/vdp.s @@ -27,23 +27,24 @@ VDP_ROP .equ 1 ; We need 2us between the last control write and a data read. We might ; be running at 10MHz or faster so use long delays ; -; -; These are 38 cycles a pair so 72 cycles all in. That should be -; good for 10MHz and a fair bit more. We could tune the delays down -; a bit if we were sure nobody went over 10MHz. +; These will only work on the TMS9918A text mode but fail on the +; later chips. There is no 9938/58 for the 2063 however ; .macro VDP_DELAY - ex (sp),hl - ex (sp),hl - ex (sp),hl - ex (sp),hl + nop + nop + nop .endm .macro VDP_DELAY2 - ex (sp),hl - ex (sp),hl - ex (sp),hl - ex (sp),hl + nop + nop + nop +.endm + +; The 2063 doesn't waitstate the I/O so back to back outs go funny +.macro VDP_DELAY3 + nop .endm .include "../../dev/vdp1.s" diff --git a/Kernel/platform/platform-msx1/vdp.s b/Kernel/platform/platform-msx1/vdp.s index 09ea4fb6d8..a2a0c1779e 100644 --- a/Kernel/platform/platform-msx1/vdp.s +++ b/Kernel/platform/platform-msx1/vdp.s @@ -35,6 +35,8 @@ VDP_ROP .equ 1 .endm .macro VDP_DELAY2 nop +.endm +.macro VDP_DELAY3 .endm .include "../../dev/vdp1.s" diff --git a/Kernel/platform/platform-mtx/vdp.s b/Kernel/platform/platform-mtx/vdp.s index 4e8a50ba3e..151657168f 100644 --- a/Kernel/platform/platform-mtx/vdp.s +++ b/Kernel/platform/platform-mtx/vdp.s @@ -24,7 +24,7 @@ VDP_IRQ .equ 0 ; leave the vdp irq off VDP_ROP .equ 1 ; -; On an MSX at 4Mhz our loop worst case is 26 clocks so for +; On an MTX at 4Mhz our loop worst case is 26 clocks so for ; graphics one we need a nop ; .macro VDP_DELAY @@ -32,6 +32,8 @@ VDP_ROP .equ 1 .endm .macro VDP_DELAY2 nop +.endm +.macro VDP_DELAY2 .endm .area _COMMONMEM diff --git a/Kernel/platform/platform-n8/vdp.s b/Kernel/platform/platform-n8/vdp.s index 23a1bcdf26..0252bcd96b 100644 --- a/Kernel/platform/platform-n8/vdp.s +++ b/Kernel/platform/platform-n8/vdp.s @@ -44,6 +44,10 @@ VDP_ROP .equ 1 call twiddle_thumbs .endm +; This is covered by the I/O wait states +.macro VDP_DELAY3 +.endm + twiddle_thumbs: ; Burn 125 clocks including the call return ; We spend 27 getting here and going back ex (sp),ix ; 19