From 372595f18c83bf1f337a3bf3bc55d69cddfbca1a Mon Sep 17 00:00:00 2001 From: johnny9 Date: Sat, 14 Feb 2026 23:58:20 -0500 Subject: [PATCH 01/19] feat(board): add BIRDS board stub with USB detection --- mujina-miner/src/board/birds.rs | 107 ++++++++++++++++++++++++++++++++ mujina-miner/src/board/mod.rs | 1 + 2 files changed, 108 insertions(+) create mode 100644 mujina-miner/src/board/birds.rs diff --git a/mujina-miner/src/board/birds.rs b/mujina-miner/src/board/birds.rs new file mode 100644 index 0000000..dc960c8 --- /dev/null +++ b/mujina-miner/src/board/birds.rs @@ -0,0 +1,107 @@ +//! BIRDS mining board support (stub). +//! +//! The BIRDS board is a mining board with 4 BZM2 ASIC chips, communicating via +//! USB using two serial ports: a control UART for GPIO/I2C and a data UART for +//! ASIC communication with 8-bit to 9-bit serial translation. +//! +//! This is currently a stub implementation pending full BZM2 ASIC support. + +use async_trait::async_trait; + +use super::{ + Board, BoardDescriptor, BoardError, BoardInfo, + pattern::{BoardPattern, Match, StringMatch}, +}; +use crate::{asic::hash_thread::HashThread, error::Error, transport::UsbDeviceInfo}; + +/// Number of BZM2 ASICs on a BIRDS board. +#[expect(dead_code, reason = "will be used during ASIC init")] +const ASICS_PER_BOARD: usize = 4; + +/// Default baud rate for the BIRDS data UART (5 Mbps). +#[expect(dead_code, reason = "will be used when opening data port")] +const DATA_UART_BAUD: u32 = 5_000_000; + +/// Baud rate for the BIRDS control UART. +#[expect(dead_code, reason = "will be used when opening control port")] +const CONTROL_UART_BAUD: u32 = 115_200; + +/// BIRDS mining board. +pub struct BirdsBoard { + device_info: UsbDeviceInfo, +} + +impl BirdsBoard { + /// Create a new BIRDS board instance. + pub fn new(device_info: UsbDeviceInfo) -> Result { + Ok(Self { device_info }) + } +} + +#[async_trait] +impl Board for BirdsBoard { + fn board_info(&self) -> BoardInfo { + BoardInfo { + model: "BIRDS".to_string(), + firmware_version: None, + serial_number: self.device_info.serial_number.clone(), + } + } + + async fn shutdown(&mut self) -> Result<(), BoardError> { + tracing::info!("BIRDS stub shutdown (no-op)"); + Ok(()) + } + + async fn create_hash_threads(&mut self) -> Result>, BoardError> { + Err(BoardError::InitializationFailed( + "BIRDS not yet implemented".into(), + )) + } +} + +// Factory function to create BIRDS board from USB device info +async fn create_from_usb(device: UsbDeviceInfo) -> crate::error::Result> { + let board = BirdsBoard::new(device) + .map_err(|e| Error::Hardware(format!("Failed to create board: {}", e)))?; + + Ok(Box::new(board)) +} + +// Register this board type with the inventory system +inventory::submit! { + BoardDescriptor { + pattern: BoardPattern { + vid: Match::Any, + pid: Match::Any, + manufacturer: Match::Specific(StringMatch::Exact("OSMU")), + product: Match::Specific(StringMatch::Exact("BIRDS")), + serial_pattern: Match::Any, + }, + name: "BIRDS", + create_fn: |device| Box::pin(create_from_usb(device)), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_board_creation() { + let device = UsbDeviceInfo::new_for_test( + 0xc0de, + 0xcafe, + Some("TEST001".to_string()), + Some("BIRDS".to_string()), + Some("Mining Board".to_string()), + "/sys/devices/test".to_string(), + ); + + let board = BirdsBoard::new(device); + assert!(board.is_ok()); + + let board = board.unwrap(); + assert_eq!(board.board_info().model, "BIRDS"); + } +} diff --git a/mujina-miner/src/board/mod.rs b/mujina-miner/src/board/mod.rs index d8ce752..331db23 100644 --- a/mujina-miner/src/board/mod.rs +++ b/mujina-miner/src/board/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod birds; pub(crate) mod bitaxe; pub mod cpu; pub(crate) mod emberone; From 1b0fe17191faf9c3ae3601c2f76f3b9c196c1006 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Sun, 15 Feb 2026 00:09:07 -0500 Subject: [PATCH 02/19] feat(transport): add 9-bit serial encoding for BZM2 --- mujina-miner/src/transport/mod.rs | 1 + mujina-miner/src/transport/nine_bit.rs | 105 +++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 mujina-miner/src/transport/nine_bit.rs diff --git a/mujina-miner/src/transport/mod.rs b/mujina-miner/src/transport/mod.rs index 45457d9..0ea83ca 100644 --- a/mujina-miner/src/transport/mod.rs +++ b/mujina-miner/src/transport/mod.rs @@ -6,6 +6,7 @@ //! events when devices are connected or disconnected. pub mod cpu; +pub mod nine_bit; pub mod serial; pub mod usb; diff --git a/mujina-miner/src/transport/nine_bit.rs b/mujina-miner/src/transport/nine_bit.rs new file mode 100644 index 0000000..446d2f9 --- /dev/null +++ b/mujina-miner/src/transport/nine_bit.rs @@ -0,0 +1,105 @@ +//! 9-bit serial TX encoding for BZM2 ASIC communication. +//! +//! The BZM2 ASIC uses 9-bit serial (9N1), where the 9th bit marks the start +//! of a new command frame (address byte). When communicating through a USB-CDC +//! bridge (like bitaxe-raw firmware on RP2350), each outgoing 9-bit word is +//! encoded as a pair of bytes over USB: +//! +//! - First byte: lower 8 bits of the 9-bit word (data) +//! - Second byte: bit 8 (0x00 = data, 0x01 = address/frame start) +//! +//! The firmware strips the 9th bit on RX, so responses come back as plain +//! 8-bit bytes and no decoding is needed on the read path. + +use bytes::{BufMut, BytesMut}; + +/// Encode a complete command frame into 9-bit serial format. +/// +/// The first byte of the frame gets flag=0x01 (address byte, 9th bit set), +/// all subsequent bytes get flag=0x00 (data bytes). This matches the encoding +/// expected by the bitaxe-raw firmware's PIO UART bridge. +/// +/// # Arguments +/// +/// * `frame` - Raw protocol bytes for one complete command frame +/// +/// # Returns +/// +/// Encoded bytes with interleaved flag bytes (2x the input length). +pub fn nine_bit_encode_frame(frame: &[u8]) -> BytesMut { + let mut encoded = BytesMut::with_capacity(frame.len() * 2); + for (i, &byte) in frame.iter().enumerate() { + encoded.put_u8(byte); + encoded.put_u8(if i == 0 { 0x01 } else { 0x00 }); + } + encoded +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_encode_frame_single_byte() { + let encoded = nine_bit_encode_frame(&[0xAA]); + assert_eq!(encoded.as_ref(), &[0xAA, 0x01]); + } + + #[test] + fn test_encode_frame_multi_byte() { + let encoded = nine_bit_encode_frame(&[0xFA, 0x0F, 0x42, 0x00]); + assert_eq!( + encoded.as_ref(), + &[ + 0xFA, 0x01, // first byte: flag=0x01 (address) + 0x0F, 0x00, // subsequent: flag=0x00 (data) + 0x42, 0x00, 0x00, 0x00, + ] + ); + } + + #[test] + fn test_encode_frame_empty() { + let encoded = nine_bit_encode_frame(&[]); + assert!(encoded.is_empty()); + } + + #[test] + fn test_encode_noop_command() { + // BZM2 NOOP command (non-EHL): [length_lo, length_hi, header_hi, header_lo] + // Example: asic_id=0xFA, opcode=NOOP(0xF) + // header = (0xFA << 8) | (0xF << 4) = 0xFAF0 + // length = 4 + let frame = [0x04, 0x00, 0xFA, 0xF0]; + let encoded = nine_bit_encode_frame(&frame); + assert_eq!( + encoded.as_ref(), + &[ + 0x04, 0x01, // length LSB: address byte + 0x00, 0x00, // length MSB: data byte + 0xFA, 0x00, // header byte 1: data byte + 0xF0, 0x00, // header byte 2: data byte + ] + ); + } + + #[test] + fn test_roundtrip() { + // Encode a frame, then verify the raw pairs match expected format + let original = vec![0x07, 0x00, 0xFA, 0x20, 0x00, 0x03, 0xFF]; + let encoded = nine_bit_encode_frame(&original); + + // Verify length doubled + assert_eq!(encoded.len(), original.len() * 2); + + // Verify first pair has flag=0x01 + assert_eq!(encoded[0], original[0]); + assert_eq!(encoded[1], 0x01); + + // Verify remaining pairs have flag=0x00 + for i in 1..original.len() { + assert_eq!(encoded[i * 2], original[i]); + assert_eq!(encoded[i * 2 + 1], 0x00); + } + } +} From 7555f166957a775de5a18df89f8953874b05a2ce Mon Sep 17 00:00:00 2001 From: johnny9 Date: Mon, 16 Feb 2026 00:52:35 -0500 Subject: [PATCH 03/19] feat(bzm2): add protocol definitions and codec --- mujina-miner/src/asic/bzm2/error.rs | 27 ++ mujina-miner/src/asic/bzm2/mod.rs | 7 + mujina-miner/src/asic/bzm2/protocol.rs | 606 +++++++++++++++++++++++++ mujina-miner/src/asic/mod.rs | 1 + 4 files changed, 641 insertions(+) create mode 100644 mujina-miner/src/asic/bzm2/error.rs create mode 100644 mujina-miner/src/asic/bzm2/mod.rs create mode 100644 mujina-miner/src/asic/bzm2/protocol.rs diff --git a/mujina-miner/src/asic/bzm2/error.rs b/mujina-miner/src/asic/bzm2/error.rs new file mode 100644 index 0000000..00feb23 --- /dev/null +++ b/mujina-miner/src/asic/bzm2/error.rs @@ -0,0 +1,27 @@ +//! Error types for BZM2 protocol operations. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ProtocolError { + #[error("register write payload cannot be empty")] + EmptyWritePayload, + + #[error("register write payload too large: {0} bytes")] + WritePayloadTooLarge(usize), + + #[error("invalid read register byte count: {0} (expected 1, 2, or 4)")] + InvalidReadRegCount(u8), + + #[error("unsupported read register response size: {0} (expected 1 or 4)")] + UnsupportedReadRegResponseSize(usize), + + #[error("frame too large to encode: {0} bytes")] + FrameTooLarge(usize), + + #[error("invalid NOOP signature: {0:02x?}")] + InvalidNoopSignature([u8; 3]), + + #[error("unsupported response opcode: 0x{0:02x}")] + UnsupportedResponseOpcode(u8), +} diff --git a/mujina-miner/src/asic/bzm2/mod.rs b/mujina-miner/src/asic/bzm2/mod.rs new file mode 100644 index 0000000..a6c6d61 --- /dev/null +++ b/mujina-miner/src/asic/bzm2/mod.rs @@ -0,0 +1,7 @@ +//! BZM2 ASIC protocol support. + +pub mod error; +pub mod protocol; + +pub use error::ProtocolError; +pub use protocol::{Command, FrameCodec, Opcode, ReadRegData, Response}; diff --git a/mujina-miner/src/asic/bzm2/protocol.rs b/mujina-miner/src/asic/bzm2/protocol.rs new file mode 100644 index 0000000..e3a5932 --- /dev/null +++ b/mujina-miner/src/asic/bzm2/protocol.rs @@ -0,0 +1,606 @@ +//! BZM2 wire protocol and frame codec. +//! +//! This module implements pass-1 support for bring-up: +//! - Command encoding for `NOOP`, `READREG`, `WRITEREG` +//! - Response decoding for `NOOP` and `READREG` +//! - 9-bit TX framing via the BIRDS USB bridge format + +use std::io; + +use bytes::{Buf, BufMut, Bytes, BytesMut}; +use strum::FromRepr; +use tokio_util::codec::{Decoder, Encoder}; + +use super::error::ProtocolError; +use crate::transport::nine_bit::nine_bit_encode_frame; + +pub const ASIC_STRING: &[u8; 3] = b"BZ2"; +pub const NOOP_STRING: &[u8; 3] = b"2ZB"; +pub const DEFAULT_ASIC_ID: u8 = 0xfa; + +pub const ASIC_HW_ID_STRIDE: u8 = 10; +pub const ENGINES_PER_ASIC: usize = 240; + +pub const NOTCH_REG: u16 = 0x0fff; +pub const BIST_REG: u16 = 0x0fc0; +pub const BROADCAST_ASIC: u8 = 0xff; +pub const BROADCAST_ENGINE: u16 = 0x00ff; + +pub const TERM_BYTE: u8 = 0xa5; +pub const TAR_BYTE: u8 = 0x08; + +pub mod engine_reg { + pub const STATUS: u16 = 0x00; + pub const CONFIG: u16 = 0x01; + pub const DELAY: u16 = 0x0c; + pub const MIDSTATE: u16 = 0x10; + pub const MRRESIDUE: u16 = 0x30; + pub const START_TIMESTAMP: u16 = 0x34; + pub const SEQUENCE_ID: u16 = 0x38; + pub const JOB_CONTROL: u16 = 0x39; + pub const START_NONCE: u16 = 0x3c; + pub const END_NONCE: u16 = 0x40; + pub const TARGET: u16 = 0x44; + pub const TIMESTAMP_COUNT: u16 = 0x48; + pub const ZEROS_TO_FIND: u16 = 0x49; + pub const RESULT_VALID: u16 = 0x70; + pub const RESULT_SEQUENCE: u16 = 0x71; + pub const RESULT_TIME: u16 = 0x72; + pub const RESULT_NONCE: u16 = 0x73; + pub const RESULT_POP: u16 = 0x77; +} + +pub mod local_reg { + pub const RESULT_STS_CTL: u16 = 0x00; + pub const ERROR_LOG0: u16 = 0x01; + pub const ERROR_LOG1: u16 = 0x02; + pub const ERROR_LOG2: u16 = 0x03; + pub const ERROR_LOG3: u16 = 0x04; + pub const SPI_STS_CTL: u16 = 0x05; + pub const UART_LINE_CTL: u16 = 0x06; + pub const UART_TDM_CTL: u16 = 0x07; + pub const SLOW_CLK_DIV: u16 = 0x08; + pub const TDM_DELAY: u16 = 0x09; + pub const UART_TX: u16 = 0x0a; + pub const ASIC_ID: u16 = 0x0b; + pub const PLL_CNTRL: u16 = 0x0f; + pub const PLL_POSTDIV: u16 = 0x10; + pub const PLL_FBDIV: u16 = 0x11; + pub const PLL_ENABLE: u16 = 0x12; + pub const PLL_MISC: u16 = 0x13; + pub const ENG_SOFT_RESET: u16 = 0x16; + pub const PLL1_CNTRL: u16 = 0x19; + pub const PLL1_POSTDIV: u16 = 0x1a; + pub const PLL1_FBDIV: u16 = 0x1b; + pub const PLL1_ENABLE: u16 = 0x1c; + pub const PLL1_MISC: u16 = 0x1d; + pub const UART_SPI_TAP: u16 = 0x20; + pub const SENS_TDM_GAP_CNT: u16 = 0x2d; + pub const DTS_SRST_PD: u16 = 0x2e; + pub const DTS_CFG: u16 = 0x2f; + pub const TEMPSENSOR_TUNE_CODE: u16 = 0x30; + pub const THERMAL_TRIP_STATUS: u16 = 0x31; + pub const THERMAL_TEMP_CODE: u16 = 0x32; + pub const THERMAL_SAR_COUNT_LOAD: u16 = 0x34; + pub const THERMAL_SAR_STATE_RESET: u16 = 0x35; + pub const SENSOR_THRS_CNT: u16 = 0x3c; + pub const SENSOR_CLK_DIV: u16 = 0x3d; + pub const VSENSOR_SRST_PD: u16 = 0x3e; + pub const VSENSOR_CFG: u16 = 0x3f; + pub const VOLTAGE_SENSOR_ENABLE: u16 = 0x40; + pub const VOLTAGE_SENSOR_STATUS: u16 = 0x41; + pub const VOLTAGE_SENSOR_MISC: u16 = 0x42; + pub const VOLTAGE_SENSOR_DFT: u16 = 0x43; + pub const BANDGAP: u16 = 0x45; + pub const LDO_0_CTL_STS: u16 = 0x46; + pub const LDO_1_CTL_STS: u16 = 0x47; + pub const IO_PEPS: u16 = 0x50; + pub const IO_PEPS_DS: u16 = 0x51; + pub const IO_PUPDST: u16 = 0x52; + pub const IO_NON_CLK_DS: u16 = 0x53; + pub const CKDCCR_0_0: u16 = 0x54; + pub const CKDCCR_1_0: u16 = 0x55; + pub const CKDCCR_2_0: u16 = 0x56; + pub const CKDCCR_3_0: u16 = 0x57; + pub const CKDCCR_4_0: u16 = 0x58; + pub const CKDCCR_5_0: u16 = 0x59; + pub const CKDLLR_0_0: u16 = 0x5a; + pub const CKDLLR_1_0: u16 = 0x5b; + pub const CKDCCR_0_1: u16 = 0x5c; + pub const CKDCCR_1_1: u16 = 0x5d; + pub const CKDCCR_2_1: u16 = 0x5e; + pub const CKDCCR_3_1: u16 = 0x5f; + pub const CKDCCR_4_1: u16 = 0x60; + pub const CKDCCR_5_1: u16 = 0x61; + pub const CKDLLR_0_1: u16 = 0x62; + pub const CKDLLR_1_1: u16 = 0x63; +} + +pub mod bist_reg { + pub const RESULT_FSM_CTL: u16 = 0x00; + pub const ERROR_LOG0: u16 = 0x01; + pub const ERROR_LOG1: u16 = 0x02; + pub const ERROR_LOG2: u16 = 0x03; + pub const ERROR_LOG3: u16 = 0x04; + pub const ENABLE: u16 = 0x06; + pub const CONTROL: u16 = 0x07; + pub const RESULT_TIMEOUT: u16 = 0x08; + pub const STATUS: u16 = 0x09; + pub const JOB_COUNT: u16 = 0x0a; + pub const GAP_COUNT: u16 = 0x0b; + pub const ENG_CLK_GATE: u16 = 0x0c; + pub const INT_START_NONCE: u16 = 0x0d; + pub const INT_END_NONCE: u16 = 0x0e; + pub const RESULT_SEL: u16 = 0x17; + pub const EXPECTED_RES_REG0: u16 = 0x18; + pub const EXPECTED_RES_REG1: u16 = 0x19; + pub const EXPECTED_RES_REG2: u16 = 0x1a; + pub const EXPECTED_RES_REG3: u16 = 0x1b; + pub const EXP_PAT_REG0: u16 = 0x1c; + pub const EXP_PAT_REG1: u16 = 0x1d; + pub const EXP_PAT_REG2: u16 = 0x1e; + pub const EXP_PAT_REG3: u16 = 0x1f; + + pub const fn exp_pat_subjob0(n: u16) -> u16 { + 0x20 + n + } + + pub const fn exp_pat_subjob1(n: u16) -> u16 { + 0x80 + n + } + + pub const fn exp_pat_subjob2(n: u16) -> u16 { + 0x94 + n + } + + pub const fn exp_pat_subjob3(n: u16) -> u16 { + 0xa8 + n + } + + pub const fn job_tce_row(j: u16, t: u16, r: u16) -> u16 { + 0x30 + (0x50 * j) + (0x14 * t) + r + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, FromRepr)] +#[repr(u8)] +pub enum Opcode { + WriteJob = 0x0, + ReadResult = 0x1, + WriteReg = 0x2, + ReadReg = 0x3, + MulticastWrite = 0x4, + DtsVs = 0x0d, + Loopback = 0x0e, + Noop = 0x0f, +} + +/// Translate logical ASIC index (0..N) to hardware ASIC ID used on UART. +pub fn logical_to_hw_asic_id(logical_asic: u8) -> u8 { + logical_asic + .saturating_add(1) + .saturating_mul(ASIC_HW_ID_STRIDE) +} + +/// Translate hardware ASIC ID from UART into logical ASIC index. +pub fn hw_to_logical_asic_id(hw_asic_id: u8) -> Option { + if hw_asic_id < ASIC_HW_ID_STRIDE || hw_asic_id % ASIC_HW_ID_STRIDE != 0 { + return None; + } + + Some((hw_asic_id / ASIC_HW_ID_STRIDE) - 1) +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Command { + /// Send short NOOP command. + Noop { asic_hw_id: u8 }, + + /// Read register value (1/2/4 bytes). + ReadReg { + asic_hw_id: u8, + engine: u16, + offset: u16, + count: u8, + }, + + /// Write register value (1-255 bytes). + WriteReg { + asic_hw_id: u8, + engine: u16, + offset: u16, + value: Bytes, + }, +} + +impl Command { + pub fn read_reg_u32(asic_hw_id: u8, engine: u16, offset: u16) -> Self { + Self::ReadReg { + asic_hw_id, + engine, + offset, + count: 4, + } + } + + pub fn write_reg_u8(asic_hw_id: u8, engine: u16, offset: u16, value: u8) -> Self { + Self::WriteReg { + asic_hw_id, + engine, + offset, + value: Bytes::copy_from_slice(&[value]), + } + } + + pub fn write_reg_u32_le(asic_hw_id: u8, engine: u16, offset: u16, value: u32) -> Self { + Self::WriteReg { + asic_hw_id, + engine, + offset, + value: Bytes::copy_from_slice(&value.to_le_bytes()), + } + } + + fn encode_raw(&self) -> Result { + let mut raw = BytesMut::new(); + + match self { + Self::Noop { asic_hw_id } => { + // NOOP command: + // [asic_hw_id][opcode<<4] + raw.reserve(2); + raw.put_u16(build_short_header(*asic_hw_id, Opcode::Noop)); + } + Self::ReadReg { + asic_hw_id, + engine, + offset, + count, + } => { + if !matches!(*count, 1 | 2 | 4) { + return Err(ProtocolError::InvalidReadRegCount(*count)); + } + + // READREG command + // [header:u32_be][count-1][TAR_BYTE] + raw.reserve(6); + raw.put_u32(build_full_header( + *asic_hw_id, + Opcode::ReadReg, + *engine, + *offset, + )); + raw.put_u8(count.saturating_sub(1)); + raw.put_u8(TAR_BYTE); + } + Self::WriteReg { + asic_hw_id, + engine, + offset, + value, + } => { + if value.is_empty() { + return Err(ProtocolError::EmptyWritePayload); + } + if value.len() > usize::from(u8::MAX) { + return Err(ProtocolError::WritePayloadTooLarge(value.len())); + } + + // WRITEREG command (no length prefix): + // [header:u32_be][count-1][data...] + raw.reserve(5 + value.len()); + raw.put_u32(build_full_header( + *asic_hw_id, + Opcode::WriteReg, + *engine, + *offset, + )); + raw.put_u8((value.len() as u8).saturating_sub(1)); + raw.extend_from_slice(value); + } + } + + Ok(raw) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ReadRegData { + U8(u8), + U32(u32), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Response { + Noop { asic_hw_id: u8, signature: [u8; 3] }, + ReadReg { asic_hw_id: u8, data: ReadRegData }, +} + +/// BZM2 frame codec. +/// +/// Encoder emits 9-bit-translated TX bytes (`[data, flag]` pairs) using +/// `nine_bit_encode_frame`. Decoder expects plain 8-bit RX bytes in TDM mode. +#[derive(Debug, Clone)] +pub struct FrameCodec { + readreg_response_size: usize, +} + +impl FrameCodec { + /// Create codec with explicit READREG response payload size (1 or 4 bytes). + pub fn new(readreg_response_size: usize) -> Result { + if !matches!(readreg_response_size, 1 | 4) { + return Err(ProtocolError::UnsupportedReadRegResponseSize( + readreg_response_size, + )); + } + + Ok(Self { + readreg_response_size, + }) + } + + fn io_error(err: ProtocolError) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, err) + } +} + +impl Default for FrameCodec { + fn default() -> Self { + Self { + readreg_response_size: 4, + } + } +} + +impl Encoder for FrameCodec { + type Error = io::Error; + + fn encode(&mut self, item: Command, dst: &mut BytesMut) -> Result<(), Self::Error> { + let raw = item.encode_raw().map_err(Self::io_error)?; + let encoded = nine_bit_encode_frame(&raw); + dst.extend_from_slice(&encoded); + Ok(()) + } +} + +impl Decoder for FrameCodec { + type Item = Response; + type Error = io::Error; + + fn decode(&mut self, src: &mut BytesMut) -> Result, Self::Error> { + loop { + // Minimum frame is [asic_hw_id, opcode] + if src.len() < 2 { + return Ok(None); + } + + let opcode = match Opcode::from_repr(src[1]) { + Some(op) => op, + None => { + // Byte-level resync when stream is misaligned. + src.advance(1); + continue; + } + }; + + match opcode { + Opcode::Noop => { + if src.len() < 5 { + return Ok(None); + } + + let mut frame = src.split_to(5); + let asic_hw_id = frame.get_u8(); + let _opcode = frame.get_u8(); + let mut signature = [0u8; 3]; + frame.copy_to_slice(&mut signature); + + if signature != *NOOP_STRING { + return Err(Self::io_error(ProtocolError::InvalidNoopSignature( + signature, + ))); + } + + return Ok(Some(Response::Noop { + asic_hw_id, + signature, + })); + } + Opcode::ReadReg => { + let frame_len = 2 + self.readreg_response_size; + if src.len() < frame_len { + return Ok(None); + } + + let mut frame = src.split_to(frame_len); + let asic_hw_id = frame.get_u8(); + let _opcode = frame.get_u8(); + let data = match self.readreg_response_size { + 1 => ReadRegData::U8(frame.get_u8()), + 4 => ReadRegData::U32(frame.get_u32_le()), + n => { + return Err(Self::io_error( + ProtocolError::UnsupportedReadRegResponseSize(n), + )); + } + }; + + return Ok(Some(Response::ReadReg { asic_hw_id, data })); + } + // Pass-1 decoder only surfaces NOOP and READREG. Drop other + // fixed-length TDM messages so callers can keep waiting for + // the response type they care about. + Opcode::ReadResult | Opcode::DtsVs => { + const TDM_FIXED_LEN: usize = 10; // [asic:u8][opcode:u8][payload:8] + if src.len() < TDM_FIXED_LEN { + return Ok(None); + } + src.advance(TDM_FIXED_LEN); + continue; + } + other => { + return Err(Self::io_error(ProtocolError::UnsupportedResponseOpcode( + other as u8, + ))); + } + } + } + } +} + +fn build_short_header(asic_hw_id: u8, opcode: Opcode) -> u16 { + ((asic_hw_id as u16) << 8) | ((opcode as u16) << 4) +} + +fn build_full_header(asic_hw_id: u8, opcode: Opcode, engine: u16, offset: u16) -> u32 { + ((asic_hw_id as u32) << 24) | ((opcode as u32) << 20) | ((engine as u32) << 8) | (offset as u32) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_asic_id_translation() { + assert_eq!(logical_to_hw_asic_id(0), 10); + assert_eq!(logical_to_hw_asic_id(1), 20); + assert_eq!(hw_to_logical_asic_id(10), Some(0)); + assert_eq!(hw_to_logical_asic_id(20), Some(1)); + assert_eq!(hw_to_logical_asic_id(9), None); + assert_eq!(hw_to_logical_asic_id(11), None); + } + + #[test] + fn test_encode_noop_frame() { + let cmd = Command::Noop { asic_hw_id: 0xfa }; + let raw = cmd.encode_raw().expect("encode should succeed"); + assert_eq!(raw.as_ref(), &[0xfa, 0xf0]); + + let mut codec = FrameCodec::default(); + let mut encoded = BytesMut::new(); + codec + .encode(cmd, &mut encoded) + .expect("encode should succeed"); + + assert_eq!(encoded.as_ref(), &[0xfa, 0x01, 0xf0, 0x00]); + } + + #[test] + fn test_encode_readreg_u32_frame() { + let cmd = Command::read_reg_u32(0x0a, NOTCH_REG, local_reg::ASIC_ID); + let raw = cmd.encode_raw().expect("encode should succeed"); + + // header = (0x0a << 24) | (0x3 << 20) | (0x0fff << 8) | 0x0b + assert_eq!(raw.as_ref(), &[0x0a, 0x3f, 0xff, 0x0b, 0x03, TAR_BYTE]); + } + + #[test] + fn test_encode_writereg_u32_frame() { + let cmd = Command::write_reg_u32_le(0x0a, NOTCH_REG, local_reg::UART_TX, 0x1234_5678); + let raw = cmd.encode_raw().expect("encode should succeed"); + + // count byte = 4 - 1 = 3 + assert_eq!( + raw.as_ref(), + &[0x0a, 0x2f, 0xff, 0x0a, 0x03, 0x78, 0x56, 0x34, 0x12,] + ); + } + + #[test] + fn test_decode_noop_response() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from(&[0x0a, Opcode::Noop as u8, b'2', b'Z', b'B'][..]); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::Noop { + asic_hw_id: 0x0a, + signature: *NOOP_STRING, + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_readreg_u32_response() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from(&[0x0a, Opcode::ReadReg as u8, 0x78, 0x56, 0x34, 0x12][..]); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::ReadReg { + asic_hw_id: 0x0a, + data: ReadRegData::U32(0x1234_5678), + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_readreg_u8_response() { + let mut codec = FrameCodec::new(1).expect("codec should construct"); + let mut src = BytesMut::from(&[0x0a, Opcode::ReadReg as u8, 0xab][..]); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::ReadReg { + asic_hw_id: 0x0a, + data: ReadRegData::U8(0xab), + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_resync_from_garbage() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from(&[0xaa, 0xbb, 0x0a, Opcode::Noop as u8, b'2', b'Z', b'B'][..]); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::Noop { + asic_hw_id: 0x0a, + signature: *NOOP_STRING, + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_skips_tdm_telemetry_before_noop() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from( + &[ + 0x0a, + Opcode::DtsVs as u8, + 0x00, + 0x01, + 0x02, + 0x03, + 0x04, + 0x05, + 0x06, + 0x07, + 0x0a, + Opcode::Noop as u8, + b'2', + b'Z', + b'B', + ][..], + ); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::Noop { + asic_hw_id: 0x0a, + signature: *NOOP_STRING, + }) + ); + assert!(src.is_empty()); + } +} diff --git a/mujina-miner/src/asic/mod.rs b/mujina-miner/src/asic/mod.rs index 062ee8f..d5a74cb 100644 --- a/mujina-miner/src/asic/mod.rs +++ b/mujina-miner/src/asic/mod.rs @@ -1,4 +1,5 @@ pub mod bm13xx; +pub mod bzm2; pub mod hash_thread; use async_trait::async_trait; From af62af3a8dba485a68233a0c7487bfeba7f087b2 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Mon, 16 Feb 2026 22:51:03 -0500 Subject: [PATCH 04/19] feat(board): wire BIRDS board to BZM2 hash thread --- mujina-miner/src/asic/bzm2/mod.rs | 2 + mujina-miner/src/asic/bzm2/thread.rs | 296 +++++++++++++++++++++++++++ mujina-miner/src/board/birds.rs | 268 ++++++++++++++++++++++-- 3 files changed, 554 insertions(+), 12 deletions(-) create mode 100644 mujina-miner/src/asic/bzm2/thread.rs diff --git a/mujina-miner/src/asic/bzm2/mod.rs b/mujina-miner/src/asic/bzm2/mod.rs index a6c6d61..6cf0b87 100644 --- a/mujina-miner/src/asic/bzm2/mod.rs +++ b/mujina-miner/src/asic/bzm2/mod.rs @@ -2,6 +2,8 @@ pub mod error; pub mod protocol; +pub mod smoke; +pub mod thread; pub use error::ProtocolError; pub use protocol::{Command, FrameCodec, Opcode, ReadRegData, Response}; diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs new file mode 100644 index 0000000..5d01736 --- /dev/null +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -0,0 +1,296 @@ +//! BZM2 HashThread implementation. +//! +//! This is the first thread integration pass: it mirrors the BM13xx actor +//! structure and wiring while keeping mining/job execution minimal until +//! WRITEJOB/READRESULT support lands. + +use std::sync::{Arc, RwLock}; + +use async_trait::async_trait; +use futures::{SinkExt, sink::Sink, stream::Stream}; +use tokio::sync::{mpsc, oneshot, watch}; +use tokio_stream::StreamExt; + +use super::protocol; +use crate::{ + asic::hash_thread::{ + BoardPeripherals, HashTask, HashThread, HashThreadCapabilities, HashThreadError, + HashThreadEvent, HashThreadStatus, ThreadRemovalSignal, + }, + tracing::prelude::*, + types::HashRate, +}; + +#[derive(Debug)] +enum ThreadCommand { + UpdateTask { + new_task: HashTask, + response_tx: oneshot::Sender, HashThreadError>>, + }, + ReplaceTask { + new_task: HashTask, + response_tx: oneshot::Sender, HashThreadError>>, + }, + GoIdle { + response_tx: oneshot::Sender, HashThreadError>>, + }, + #[expect(unused)] + Shutdown, +} + +/// HashThread wrapper for a BZM2 board worker. +pub struct Bzm2Thread { + name: String, + command_tx: mpsc::Sender, + event_rx: Option>, + capabilities: HashThreadCapabilities, + status: Arc>, +} + +impl Bzm2Thread { + pub fn new( + name: String, + chip_responses: R, + chip_commands: W, + peripherals: BoardPeripherals, + removal_rx: watch::Receiver, + ) -> Self + where + R: Stream> + Unpin + Send + 'static, + W: Sink + Unpin + Send + 'static, + W::Error: std::fmt::Debug, + { + let (cmd_tx, cmd_rx) = mpsc::channel(10); + let (evt_tx, evt_rx) = mpsc::channel(100); + + let status = Arc::new(RwLock::new(HashThreadStatus::default())); + let status_clone = Arc::clone(&status); + + tokio::spawn(async move { + bzm2_thread_actor( + cmd_rx, + evt_tx, + removal_rx, + status_clone, + chip_responses, + chip_commands, + peripherals, + ) + .await; + }); + + Self { + name, + command_tx: cmd_tx, + event_rx: Some(evt_rx), + capabilities: HashThreadCapabilities { + hashrate_estimate: HashRate::from_terahashes(1.0), // Stub + }, + status, + } + } +} + +#[async_trait] +impl HashThread for Bzm2Thread { + fn name(&self) -> &str { + &self.name + } + + fn capabilities(&self) -> &HashThreadCapabilities { + &self.capabilities + } + + async fn update_task( + &mut self, + new_task: HashTask, + ) -> std::result::Result, HashThreadError> { + let (response_tx, response_rx) = oneshot::channel(); + self.command_tx + .send(ThreadCommand::UpdateTask { + new_task, + response_tx, + }) + .await + .map_err(|_| HashThreadError::ChannelClosed("command channel closed".into()))?; + + response_rx + .await + .map_err(|_| HashThreadError::WorkAssignmentFailed("no response from thread".into()))? + } + + async fn replace_task( + &mut self, + new_task: HashTask, + ) -> std::result::Result, HashThreadError> { + let (response_tx, response_rx) = oneshot::channel(); + self.command_tx + .send(ThreadCommand::ReplaceTask { + new_task, + response_tx, + }) + .await + .map_err(|_| HashThreadError::ChannelClosed("command channel closed".into()))?; + + response_rx + .await + .map_err(|_| HashThreadError::WorkAssignmentFailed("no response from thread".into()))? + } + + async fn go_idle(&mut self) -> std::result::Result, HashThreadError> { + let (response_tx, response_rx) = oneshot::channel(); + self.command_tx + .send(ThreadCommand::GoIdle { response_tx }) + .await + .map_err(|_| HashThreadError::ChannelClosed("command channel closed".into()))?; + + response_rx + .await + .map_err(|_| HashThreadError::WorkAssignmentFailed("no response from thread".into()))? + } + + fn take_event_receiver(&mut self) -> Option> { + self.event_rx.take() + } + + fn status(&self) -> HashThreadStatus { + self.status.read().expect("status lock poisoned").clone() + } +} + +async fn initialize_chip( + chip_commands: &mut W, + peripherals: &mut BoardPeripherals, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + if let Some(ref mut asic_enable) = peripherals.asic_enable { + asic_enable.enable().await.map_err(|e| { + HashThreadError::InitializationFailed(format!("failed to enable ASIC: {}", e)) + })?; + } + + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + + chip_commands + .send(protocol::Command::Noop { + asic_hw_id: protocol::DEFAULT_ASIC_ID, + }) + .await + .map_err(|e| { + HashThreadError::InitializationFailed(format!("failed to send BZM2 NOOP: {:?}", e)) + })?; + + Ok(()) +} + +async fn bzm2_thread_actor( + mut cmd_rx: mpsc::Receiver, + evt_tx: mpsc::Sender, + mut removal_rx: watch::Receiver, + status: Arc>, + mut chip_responses: R, + mut chip_commands: W, + mut peripherals: BoardPeripherals, +) where + R: Stream> + Unpin, + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + if let Some(ref mut asic_enable) = peripherals.asic_enable + && let Err(e) = asic_enable.disable().await + { + warn!(error = %e, "Failed to disable BZM2 ASIC on thread startup"); + } + + let mut chip_initialized = false; + let mut current_task: Option = None; + let mut status_ticker = tokio::time::interval(std::time::Duration::from_secs(5)); + status_ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + + loop { + tokio::select! { + _ = removal_rx.changed() => { + let signal = removal_rx.borrow().clone(); + if signal != ThreadRemovalSignal::Running { + { + let mut s = status.write().expect("status lock poisoned"); + s.is_active = false; + } + break; + } + } + + Some(cmd) = cmd_rx.recv() => { + match cmd { + ThreadCommand::UpdateTask { new_task, response_tx } => { + if !chip_initialized { + if let Err(e) = initialize_chip(&mut chip_commands, &mut peripherals).await { + error!(error = %e, "BZM2 chip initialization failed"); + let _ = response_tx.send(Err(e)); + continue; + } + chip_initialized = true; + } + + let old_task = current_task.replace(new_task); + { + let mut s = status.write().expect("status lock poisoned"); + s.is_active = true; + } + let _ = response_tx.send(Ok(old_task)); + } + ThreadCommand::ReplaceTask { new_task, response_tx } => { + if !chip_initialized { + if let Err(e) = initialize_chip(&mut chip_commands, &mut peripherals).await { + error!(error = %e, "BZM2 chip initialization failed"); + let _ = response_tx.send(Err(e)); + continue; + } + chip_initialized = true; + } + + let old_task = current_task.replace(new_task); + { + let mut s = status.write().expect("status lock poisoned"); + s.is_active = true; + } + let _ = response_tx.send(Ok(old_task)); + } + ThreadCommand::GoIdle { response_tx } => { + let old_task = current_task.take(); + { + let mut s = status.write().expect("status lock poisoned"); + s.is_active = false; + } + let _ = response_tx.send(Ok(old_task)); + } + ThreadCommand::Shutdown => { + break; + } + } + } + + Some(result) = chip_responses.next() => { + match result { + Ok(protocol::Response::Noop { asic_hw_id, signature }) => { + trace!(asic_hw_id, signature = ?signature, "BZM2 NOOP response"); + } + Ok(protocol::Response::ReadReg { asic_hw_id, data }) => { + trace!(asic_hw_id, data = ?data, "BZM2 READREG response"); + } + Err(e) => { + warn!(error = %e, "Error reading BZM2 response stream"); + } + } + } + + _ = status_ticker.tick() => { + let snapshot = status.read().expect("status lock poisoned").clone(); + let _ = evt_tx.send(HashThreadEvent::StatusUpdate(snapshot)).await; + } + } + } +} diff --git a/mujina-miner/src/board/birds.rs b/mujina-miner/src/board/birds.rs index dc960c8..7036f26 100644 --- a/mujina-miner/src/board/birds.rs +++ b/mujina-miner/src/board/birds.rs @@ -3,38 +3,233 @@ //! The BIRDS board is a mining board with 4 BZM2 ASIC chips, communicating via //! USB using two serial ports: a control UART for GPIO/I2C and a data UART for //! ASIC communication with 8-bit to 9-bit serial translation. -//! -//! This is currently a stub implementation pending full BZM2 ASIC support. use async_trait::async_trait; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::sync::watch; +use tokio::time::{Duration, sleep}; +use tokio_serial::SerialPortBuilderExt; +use tokio_util::codec::{FramedRead, FramedWrite}; use super::{ Board, BoardDescriptor, BoardError, BoardInfo, pattern::{BoardPattern, Match, StringMatch}, }; -use crate::{asic::hash_thread::HashThread, error::Error, transport::UsbDeviceInfo}; +use crate::{ + asic::{ + bzm2::{FrameCodec, smoke, thread::Bzm2Thread}, + hash_thread::{AsicEnable, BoardPeripherals, HashThread, ThreadRemovalSignal}, + }, + error::Error, + transport::{ + UsbDeviceInfo, + serial::{SerialControl, SerialReader, SerialStream, SerialWriter}, + }, +}; /// Number of BZM2 ASICs on a BIRDS board. -#[expect(dead_code, reason = "will be used during ASIC init")] const ASICS_PER_BOARD: usize = 4; /// Default baud rate for the BIRDS data UART (5 Mbps). -#[expect(dead_code, reason = "will be used when opening data port")] const DATA_UART_BAUD: u32 = 5_000_000; /// Baud rate for the BIRDS control UART. -#[expect(dead_code, reason = "will be used when opening control port")] const CONTROL_UART_BAUD: u32 = 115_200; +/// BIRDS control GPIO: 5V power enable. +const GPIO_5V_EN: u8 = 1; +/// BIRDS control GPIO: ASIC reset (active-low). +const GPIO_ASIC_RST: u8 = 2; +/// BIRDS control board ID for 5V/ASIC reset GPIO operations. +const CTRL_ID_POWER_RESET: u8 = 0xAB; +/// Control protocol page for GPIO. +const CTRL_PAGE_GPIO: u8 = 0x06; + /// BIRDS mining board. pub struct BirdsBoard { device_info: UsbDeviceInfo, + control_port: Option, + data_reader: Option>, + data_writer: Option>, + data_control: Option, + thread_shutdown: Option>, } impl BirdsBoard { /// Create a new BIRDS board instance. pub fn new(device_info: UsbDeviceInfo) -> Result { - Ok(Self { device_info }) + Ok(Self { + device_info, + control_port: None, + data_reader: None, + data_writer: None, + data_control: None, + thread_shutdown: None, + }) + } + + /// Early bring-up init path. + /// + /// Until full thread integration lands, we run a basic UART smoke test + /// (NOOP + READREG ASIC_ID) during board initialization. + pub async fn initialize(&mut self) -> Result<(), BoardError> { + let (control_port, data_port) = { + let serial_ports = self.device_info.serial_ports().map_err(|e| { + BoardError::InitializationFailed(format!("Failed to enumerate serial ports: {}", e)) + })?; + + if serial_ports.len() != 2 { + return Err(BoardError::InitializationFailed(format!( + "BIRDS requires exactly 2 serial ports, found {}", + serial_ports.len() + ))); + } + + (serial_ports[0].clone(), serial_ports[1].clone()) + }; + + tracing::info!( + serial = ?self.device_info.serial_number, + control_port = %control_port, + data_port = %data_port, + data_baud = DATA_UART_BAUD, + control_baud = CONTROL_UART_BAUD, + asics = ASICS_PER_BOARD, + "Running BIRDS ASIC smoke test during initialization" + ); + + // Match known-good bring-up sequence from reference scripts: + // 1) Enable 5V rail + // 2) Pulse ASIC reset low/high + // 3) Wait for UART startup + self.bringup_power_and_reset(&control_port).await?; + self.control_port = Some(control_port); + + let result = smoke::run_smoke(&data_port, 0).await.map_err(|e| { + BoardError::InitializationFailed(format!("BIRDS ASIC smoke test failed: {:#}", e)) + })?; + + tracing::info!( + logical_asic = result.logical_asic, + asic_hw_id = result.asic_hw_id, + asic_id = format_args!("0x{:08x}", result.asic_id), + "BIRDS ASIC smoke test succeeded" + ); + + let data_stream = SerialStream::new(&data_port, DATA_UART_BAUD).map_err(|e| { + BoardError::InitializationFailed(format!("Failed to open BIRDS data port: {}", e)) + })?; + let (data_reader, data_writer, data_control) = data_stream.split(); + self.data_reader = Some(FramedRead::new(data_reader, FrameCodec::default())); + self.data_writer = Some(FramedWrite::new(data_writer, FrameCodec::default())); + self.data_control = Some(data_control); + + Ok(()) + } + + async fn bringup_power_and_reset(&self, control_port: &str) -> Result<(), BoardError> { + let mut control_stream = tokio_serial::new(control_port, CONTROL_UART_BAUD) + .open_native_async() + .map_err(|e| { + BoardError::InitializationFailed(format!( + "Failed to open BIRDS control port {}: {}", + control_port, e + )) + })?; + + Self::control_gpio_write(&mut control_stream, GPIO_5V_EN, true).await?; + sleep(Duration::from_millis(100)).await; + + Self::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, false).await?; + sleep(Duration::from_millis(100)).await; + + Self::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, true).await?; + sleep(Duration::from_millis(1000)).await; + + Ok(()) + } + + async fn control_gpio_write( + stream: &mut tokio_serial::SerialStream, + pin: u8, + value_high: bool, + ) -> Result<(), BoardError> { + // Packet format: [len:u16_le][id][bus][page][cmd=pin][value] + // For BIRDS, id is the board target (0xAB for 5V/RST). + let packet: [u8; 7] = [ + 0x07, + 0x00, + CTRL_ID_POWER_RESET, + 0x00, + CTRL_PAGE_GPIO, + pin, + if value_high { 0x01 } else { 0x00 }, + ]; + stream.write_all(&packet).await.map_err(|e| { + BoardError::HardwareControl(format!( + "Failed to write GPIO control packet (pin {}): {}", + pin, e + )) + })?; + + // Ack is 4 bytes. Byte[2] should echo board id. + let mut ack = [0u8; 4]; + stream.read_exact(&mut ack).await.map_err(|e| { + BoardError::HardwareControl(format!( + "Failed to read GPIO control ack (pin {}): {}", + pin, e + )) + })?; + if ack[2] != CTRL_ID_POWER_RESET { + return Err(BoardError::HardwareControl(format!( + "GPIO ack ID mismatch for pin {}: expected 0x{:02x}, got 0x{:02x}", + pin, CTRL_ID_POWER_RESET, ack[2] + ))); + } + + Ok(()) + } + + async fn hold_in_reset(&self) -> Result<(), BoardError> { + let control_port = self.control_port.as_ref().ok_or_else(|| { + BoardError::InitializationFailed("BIRDS control port not initialized".into()) + })?; + + let mut control_stream = tokio_serial::new(control_port, CONTROL_UART_BAUD) + .open_native_async() + .map_err(|e| { + BoardError::InitializationFailed(format!( + "Failed to open BIRDS control port {}: {}", + control_port, e + )) + })?; + + Self::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, false).await + } +} + +struct BirdsAsicEnable { + control_port: String, +} + +#[async_trait] +impl AsicEnable for BirdsAsicEnable { + async fn enable(&mut self) -> anyhow::Result<()> { + let mut control_stream = tokio_serial::new(&self.control_port, CONTROL_UART_BAUD) + .open_native_async() + .map_err(|e| anyhow::anyhow!("failed to open control port: {}", e))?; + BirdsBoard::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, true) + .await + .map_err(|e| anyhow::anyhow!("failed to release BZM2 reset: {}", e)) + } + + async fn disable(&mut self) -> anyhow::Result<()> { + let mut control_stream = tokio_serial::new(&self.control_port, CONTROL_UART_BAUD) + .open_native_async() + .map_err(|e| anyhow::anyhow!("failed to open control port: {}", e))?; + BirdsBoard::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, false) + .await + .map_err(|e| anyhow::anyhow!("failed to assert BZM2 reset: {}", e)) } } @@ -49,22 +244,71 @@ impl Board for BirdsBoard { } async fn shutdown(&mut self) -> Result<(), BoardError> { - tracing::info!("BIRDS stub shutdown (no-op)"); + if let Some(ref tx) = self.thread_shutdown { + if let Err(e) = tx.send(ThreadRemovalSignal::Shutdown) { + tracing::warn!("Failed to send shutdown signal to BIRDS thread: {}", e); + } + } + + self.hold_in_reset().await?; Ok(()) } async fn create_hash_threads(&mut self) -> Result>, BoardError> { - Err(BoardError::InitializationFailed( - "BIRDS not yet implemented".into(), - )) + let (removal_tx, removal_rx) = watch::channel(ThreadRemovalSignal::Running); + self.thread_shutdown = Some(removal_tx); + + let data_reader = self + .data_reader + .take() + .ok_or(BoardError::InitializationFailed( + "No BIRDS data reader available".into(), + ))?; + let data_writer = self + .data_writer + .take() + .ok_or(BoardError::InitializationFailed( + "No BIRDS data writer available".into(), + ))?; + + let control_port = self + .control_port + .clone() + .ok_or(BoardError::InitializationFailed( + "No BIRDS control port available".into(), + ))?; + let asic_enable = BirdsAsicEnable { control_port }; + let peripherals = BoardPeripherals { + asic_enable: Some(Box::new(asic_enable)), + voltage_regulator: None, + }; + + let thread_name = match &self.device_info.serial_number { + Some(serial) => format!("BIRDS-{}", &serial[..8.min(serial.len())]), + None => "BIRDS".to_string(), + }; + + let thread = Bzm2Thread::new( + thread_name, + data_reader, + data_writer, + peripherals, + removal_rx, + ); + Ok(vec![Box::new(thread)]) } } // Factory function to create BIRDS board from USB device info async fn create_from_usb(device: UsbDeviceInfo) -> crate::error::Result> { - let board = BirdsBoard::new(device) + let mut board = BirdsBoard::new(device) .map_err(|e| Error::Hardware(format!("Failed to create board: {}", e)))?; + board + .initialize() + .await + .map_err(|e| Error::Hardware(format!("Failed to initialize BIRDS board: {}", e)))?; + Ok(Box::new(board)) } From bfea9d6115d7a0bd7b980300d0b8510452e0784b Mon Sep 17 00:00:00 2001 From: johnny9 Date: Mon, 16 Feb 2026 23:24:55 -0500 Subject: [PATCH 05/19] feat(bzm2): implement bring-up initialization flow --- mujina-miner/src/asic/bzm2/protocol.rs | 88 ++- mujina-miner/src/asic/bzm2/smoke.rs | 147 +++++ mujina-miner/src/asic/bzm2/thread.rs | 797 ++++++++++++++++++++++++- mujina-miner/src/board/birds.rs | 100 +++- 4 files changed, 1077 insertions(+), 55 deletions(-) create mode 100644 mujina-miner/src/asic/bzm2/smoke.rs diff --git a/mujina-miner/src/asic/bzm2/protocol.rs b/mujina-miner/src/asic/bzm2/protocol.rs index e3a5932..fc44a38 100644 --- a/mujina-miner/src/asic/bzm2/protocol.rs +++ b/mujina-miner/src/asic/bzm2/protocol.rs @@ -29,6 +29,13 @@ pub const BROADCAST_ENGINE: u16 = 0x00ff; pub const TERM_BYTE: u8 = 0xa5; pub const TAR_BYTE: u8 = 0x08; +fn format_hex(data: &[u8]) -> String { + data.iter() + .map(|byte| format!("{:02X}", byte)) + .collect::>() + .join(" ") +} + pub mod engine_reg { pub const STATUS: u16 = 0x00; pub const CONFIG: u16 = 0x01; @@ -193,7 +200,7 @@ pub fn hw_to_logical_asic_id(hw_asic_id: u8) -> Option { #[derive(Debug, Clone, PartialEq, Eq)] pub enum Command { - /// Send short NOOP command. + /// Send NOOP command. Noop { asic_hw_id: u8 }, /// Read register value (1/2/4 bytes). @@ -211,6 +218,14 @@ pub enum Command { offset: u16, value: Bytes, }, + + /// Write one or more bytes using opcode 0x4 (row/group write). + MulticastWrite { + asic_hw_id: u8, + group: u16, + offset: u16, + value: Bytes, + }, } impl Command { @@ -241,6 +256,15 @@ impl Command { } } + pub fn multicast_write_u8(asic_hw_id: u8, group: u16, offset: u16, value: u8) -> Self { + Self::MulticastWrite { + asic_hw_id, + group, + offset, + value: Bytes::copy_from_slice(&[value]), + } + } + fn encode_raw(&self) -> Result { let mut raw = BytesMut::new(); @@ -298,6 +322,29 @@ impl Command { raw.put_u8((value.len() as u8).saturating_sub(1)); raw.extend_from_slice(value); } + Self::MulticastWrite { + asic_hw_id, + group, + offset, + value, + } => { + if value.is_empty() { + return Err(ProtocolError::EmptyWritePayload); + } + if value.len() > usize::from(u8::MAX) { + return Err(ProtocolError::WritePayloadTooLarge(value.len())); + } + + raw.reserve(5 + value.len()); + raw.put_u32(build_full_header( + *asic_hw_id, + Opcode::MulticastWrite, + *group, + *offset, + )); + raw.put_u8((value.len() as u8).saturating_sub(1)); + raw.extend_from_slice(value); + } } Ok(raw) @@ -358,6 +405,11 @@ impl Encoder for FrameCodec { fn encode(&mut self, item: Command, dst: &mut BytesMut) -> Result<(), Self::Error> { let raw = item.encode_raw().map_err(Self::io_error)?; let encoded = nine_bit_encode_frame(&raw); + tracing::debug!( + raw = %format_hex(&raw), + encoded = %format_hex(&encoded), + "BZM2 tx frame" + ); dst.extend_from_slice(&encoded); Ok(()) } @@ -378,6 +430,11 @@ impl Decoder for FrameCodec { Some(op) => op, None => { // Byte-level resync when stream is misaligned. + tracing::debug!( + dropped = format_args!("0x{:02X}", src[0]), + next = format_args!("0x{:02X}", src[1]), + "BZM2 rx resync: dropping byte" + ); src.advance(1); continue; } @@ -388,6 +445,7 @@ impl Decoder for FrameCodec { if src.len() < 5 { return Ok(None); } + tracing::debug!(rx = %format_hex(&src[..5]), "BZM2 rx NOOP frame"); let mut frame = src.split_to(5); let asic_hw_id = frame.get_u8(); @@ -411,6 +469,10 @@ impl Decoder for FrameCodec { if src.len() < frame_len { return Ok(None); } + tracing::debug!( + rx = %format_hex(&src[..frame_len]), + "BZM2 rx READREG frame" + ); let mut frame = src.split_to(frame_len); let asic_hw_id = frame.get_u8(); @@ -435,13 +497,24 @@ impl Decoder for FrameCodec { if src.len() < TDM_FIXED_LEN { return Ok(None); } + tracing::trace!( + opcode = opcode as u8, + rx = %format_hex(&src[..TDM_FIXED_LEN]), + "BZM2 rx skipping telemetry frame" + ); src.advance(TDM_FIXED_LEN); continue; } other => { - return Err(Self::io_error(ProtocolError::UnsupportedResponseOpcode( - other as u8, - ))); + let preview_len = src.len().min(32); + tracing::debug!( + opcode = format_args!("0x{:02X}", other as u8), + buffer_len = src.len(), + buffer_preview = %format_hex(&src[..preview_len]), + "BZM2 rx unsupported opcode, resync by dropping one byte" + ); + src.advance(1); + continue; } } } @@ -506,6 +579,13 @@ mod tests { ); } + #[test] + fn test_encode_multicast_write_u8_frame() { + let cmd = Command::multicast_write_u8(0x0a, 0x0012, engine_reg::CONFIG, 0x04); + let raw = cmd.encode_raw().expect("encode should succeed"); + assert_eq!(raw.as_ref(), &[0x0a, 0x40, 0x12, 0x01, 0x00, 0x04]); + } + #[test] fn test_decode_noop_response() { let mut codec = FrameCodec::default(); diff --git a/mujina-miner/src/asic/bzm2/smoke.rs b/mujina-miner/src/asic/bzm2/smoke.rs new file mode 100644 index 0000000..2b8c041 --- /dev/null +++ b/mujina-miner/src/asic/bzm2/smoke.rs @@ -0,0 +1,147 @@ +//! BZM2 UART smoke test helpers. +//! +//! Used for early bring-up to verify basic command/response on the ASIC UART. + +use anyhow::{Context, Result, bail}; +use futures::SinkExt; +use tokio::io::AsyncReadExt; +use tokio::time::{self, Duration}; +use tokio_util::codec::FramedWrite; + +use super::{ + Command, FrameCodec, + protocol::{DEFAULT_ASIC_ID, NOOP_STRING, NOTCH_REG, local_reg}, +}; +use crate::transport::serial::SerialStream; + +/// Default BZM2 UART baud rate used by BIRDS data port. +pub const DEFAULT_BZM2_DATA_BAUD: u32 = 5_000_000; + +/// Default timeout for each request/response step. +pub const DEFAULT_IO_TIMEOUT: Duration = Duration::from_secs(2); + +fn format_hex(data: &[u8]) -> String { + data.iter() + .map(|byte| format!("{:02X}", byte)) + .collect::>() + .join(" ") +} + +/// Output from the smoke test. +#[derive(Debug, Clone, Copy)] +pub struct SmokeResult { + pub logical_asic: u8, + pub asic_hw_id: u8, + pub asic_id: u32, +} + +/// Run NOOP + READREG(ASIC_ID) smoke test on a BZM2 UART port. +pub async fn run_smoke(serial_port: &str, logical_asic: u8) -> Result { + run_smoke_with_options( + serial_port, + logical_asic, + DEFAULT_BZM2_DATA_BAUD, + DEFAULT_IO_TIMEOUT, + ) + .await +} + +/// Run smoke test with explicit baud and timeout. +pub async fn run_smoke_with_options( + serial_port: &str, + logical_asic: u8, + baud: u32, + timeout: Duration, +) -> Result { + // Initial bring-up uses BZM2 default ASIC ID (0xFA) before ID assignment. + let asic_hw_id = DEFAULT_ASIC_ID; + + let serial = SerialStream::new(serial_port, baud) + .with_context(|| format!("failed to open serial port {}", serial_port))?; + let (mut reader, writer, _control) = serial.split(); + let mut tx = FramedWrite::new( + writer, + FrameCodec::new(4).context("failed to construct BZM2 codec")?, + ); + + // Reset/power-up can leave transient bytes on the data UART. Drain any + // pending bytes before issuing the first command. + drain_input_noise(&mut reader).await; + + // Step 1: NOOP + tx.send(Command::Noop { asic_hw_id }) + .await + .context("failed to send NOOP")?; + + let mut noop_raw = [0u8; 5]; + time::timeout(timeout, reader.read_exact(&mut noop_raw)) + .await + .context("timeout waiting for NOOP response")? + .context("read error while waiting for NOOP response")?; + tracing::debug!( + asic_hw_id = format_args!("0x{:02X}", asic_hw_id), + rx = %format_hex(&noop_raw), + "BZM2 smoke NOOP rx" + ); + + let mut signature = [0u8; 3]; + signature.copy_from_slice(&noop_raw[2..5]); + if signature != *NOOP_STRING { + bail!( + "NOOP signature mismatch: got {:02x?} (raw={:02x?})", + signature, + noop_raw + ); + } + + // Step 2: READREG NOTCH_REG:LOCAL_REG_ASIC_ID + tx.send(Command::read_reg_u32( + asic_hw_id, + NOTCH_REG, + local_reg::ASIC_ID, + )) + .await + .context("failed to send READREG(ASIC_ID)")?; + + let mut readreg_raw = [0u8; 6]; + time::timeout(timeout, reader.read_exact(&mut readreg_raw)) + .await + .context("timeout waiting for READREG response")? + .context("read error while waiting for READREG response")?; + tracing::debug!( + asic_hw_id = format_args!("0x{:02X}", asic_hw_id), + rx = %format_hex(&readreg_raw), + "BZM2 smoke READREG rx" + ); + + let asic_id = u32::from_le_bytes( + readreg_raw[2..6] + .try_into() + .expect("slice is exactly 4 bytes"), + ); + + Ok(SmokeResult { + logical_asic, + asic_hw_id, + asic_id, + }) +} + +async fn drain_input_noise(reader: &mut crate::transport::serial::SerialReader) { + let mut scratch = [0u8; 256]; + loop { + match time::timeout(Duration::from_millis(20), reader.read(&mut scratch)).await { + Ok(Ok(0)) => break, + Ok(Ok(n)) => { + tracing::debug!( + bytes = n, + rx = %format_hex(&scratch[..n]), + "BZM2 smoke drained residual input" + ); + continue; + } + Ok(Err(_)) => break, + Err(_elapsed) => break, + } + } +} diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index 5d01736..31170a1 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -1,14 +1,17 @@ //! BZM2 HashThread implementation. //! -//! This is the first thread integration pass: it mirrors the BM13xx actor -//! structure and wiring while keeping mining/job execution minimal until -//! WRITEJOB/READRESULT support lands. +//! This module mirrors the BM13xx actor model and performs full BZM2 bring-up +//! before the first task is accepted. -use std::sync::{Arc, RwLock}; +use std::{ + io, + sync::{Arc, RwLock}, +}; use async_trait::async_trait; use futures::{SinkExt, sink::Sink, stream::Stream}; use tokio::sync::{mpsc, oneshot, watch}; +use tokio::time::{self, Duration, Instant}; use tokio_stream::StreamExt; use super::protocol; @@ -21,6 +24,30 @@ use crate::{ types::HashRate, }; +const FIRST_ASIC_ID: u8 = 0x0a; +const ENGINE_ROWS: u16 = 20; +const ENGINE_COLS: u16 = 10; + +const SENSOR_REPORT_INTERVAL: u32 = 63; +const THERMAL_TRIP_C: f32 = 115.0; +const VOLTAGE_TRIP_MV: f32 = 500.0; + +const PLL_LOCK_MASK: u32 = 0x4; +const REF_CLK_MHZ: f32 = 50.0; +const REF_DIVIDER: u32 = 2; +const POST2_DIVIDER: u32 = 1; +const POST1_DIVIDER: u8 = 1; +const TARGET_FREQ_MHZ: f32 = 800.0; + +const DRIVE_STRENGTH_STRONG: u32 = 0x4448_4444; +const ENGINE_CONFIG_ENHANCED_MODE_BIT: u8 = 1 << 2; + +const INIT_NOOP_TIMEOUT: Duration = Duration::from_millis(500); +const INIT_READREG_TIMEOUT: Duration = Duration::from_millis(500); +const PLL_LOCK_TIMEOUT: Duration = Duration::from_secs(3); +const PLL_POLL_DELAY: Duration = Duration::from_millis(100); +const SOFT_RESET_DELAY: Duration = Duration::from_millis(1); + #[derive(Debug)] enum ThreadCommand { UpdateTask { @@ -54,9 +81,10 @@ impl Bzm2Thread { chip_commands: W, peripherals: BoardPeripherals, removal_rx: watch::Receiver, + asic_count: u8, ) -> Self where - R: Stream> + Unpin + Send + 'static, + R: Stream> + Unpin + Send + 'static, W: Sink + Unpin + Send + 'static, W::Error: std::fmt::Debug, { @@ -75,6 +103,7 @@ impl Bzm2Thread { chip_responses, chip_commands, peripherals, + asic_count, ) .await; }); @@ -158,32 +187,719 @@ impl HashThread for Bzm2Thread { } } -async fn initialize_chip( +fn init_failed(msg: impl Into) -> HashThreadError { + HashThreadError::InitializationFailed(msg.into()) +} + +async fn send_command( chip_commands: &mut W, - peripherals: &mut BoardPeripherals, + command: protocol::Command, + context: &str, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + chip_commands + .send(command) + .await + .map_err(|e| init_failed(format!("{context}: {e:?}"))) +} + +async fn drain_input(chip_responses: &mut R) +where + R: Stream> + Unpin, +{ + loop { + match time::timeout(Duration::from_millis(20), chip_responses.next()).await { + Ok(Some(_)) => continue, + _ => break, + } + } +} + +async fn wait_for_noop( + chip_responses: &mut R, + expected_asic_id: u8, + timeout: Duration, ) -> Result<(), HashThreadError> where + R: Stream> + Unpin, +{ + let deadline = Instant::now() + timeout; + loop { + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + return Err(init_failed(format!( + "timeout waiting for NOOP response from ASIC 0x{expected_asic_id:02x}" + ))); + } + + match time::timeout(remaining, chip_responses.next()).await { + Ok(Some(Ok(protocol::Response::Noop { asic_hw_id, .. }))) + if asic_hw_id == expected_asic_id => + { + return Ok(()); + } + Ok(Some(Ok(_))) => continue, + Ok(Some(Err(e))) => { + return Err(init_failed(format!("failed while waiting for NOOP: {e}"))); + } + Ok(None) => { + return Err(init_failed("response stream closed while waiting for NOOP")); + } + Err(_) => { + return Err(init_failed(format!( + "timeout waiting for NOOP response from ASIC 0x{expected_asic_id:02x}" + ))); + } + } + } +} + +async fn read_reg_u32( + chip_responses: &mut R, + chip_commands: &mut W, + asic_id: u8, + engine: u16, + offset: u16, + timeout: Duration, + context: &str, +) -> Result +where + R: Stream> + Unpin, W: Sink + Unpin, W::Error: std::fmt::Debug, { + send_command( + chip_commands, + protocol::Command::read_reg_u32(asic_id, engine, offset), + context, + ) + .await?; + + let deadline = Instant::now() + timeout; + loop { + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + return Err(init_failed(format!( + "{context}: timeout waiting for READREG response" + ))); + } + + match time::timeout(remaining, chip_responses.next()).await { + Ok(Some(Ok(protocol::Response::ReadReg { asic_hw_id, data }))) + if asic_hw_id == asic_id => + { + return match data { + protocol::ReadRegData::U32(value) => Ok(value), + protocol::ReadRegData::U8(value) => Ok(value as u32), + }; + } + Ok(Some(Ok(_))) => continue, + Ok(Some(Err(e))) => { + return Err(init_failed(format!("{context}: stream read error: {e}"))); + } + Ok(None) => { + return Err(init_failed(format!("{context}: response stream closed"))); + } + Err(_) => { + return Err(init_failed(format!( + "{context}: timeout waiting for response" + ))); + } + } + } +} + +async fn write_reg_u32( + chip_commands: &mut W, + asic_id: u8, + engine: u16, + offset: u16, + value: u32, + context: &str, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + send_command( + chip_commands, + protocol::Command::write_reg_u32_le(asic_id, engine, offset, value), + context, + ) + .await +} + +async fn write_reg_u8( + chip_commands: &mut W, + asic_id: u8, + engine: u16, + offset: u16, + value: u8, + context: &str, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + send_command( + chip_commands, + protocol::Command::write_reg_u8(asic_id, engine, offset, value), + context, + ) + .await +} + +async fn group_write_u8( + chip_commands: &mut W, + asic_id: u8, + group: u16, + offset: u16, + value: u8, + context: &str, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + send_command( + chip_commands, + protocol::Command::multicast_write_u8(asic_id, group, offset, value), + context, + ) + .await +} + +fn thermal_c_to_tune_code(thermal_c: f32) -> u32 { + let tune_code = (2048.0 / 4096.0) + (4096.0 * (thermal_c + 293.8) / 631.8); + tune_code.max(0.0) as u32 +} + +fn voltage_mv_to_tune_code(voltage_mv: f32) -> u32 { + let tune_code = (16384.0 / 6.0) * (2.5 * voltage_mv / 706.7 + 3.0 / 16384.0 + 1.0); + tune_code.max(0.0) as u32 +} + +fn calc_pll_dividers(freq_mhz: f32, post1_divider: u8) -> (u32, u32) { + let fb = + REF_DIVIDER as f32 * (post1_divider as f32 + 1.0) * (POST2_DIVIDER as f32 + 1.0) * freq_mhz + / REF_CLK_MHZ; + let mut fb_div = fb as u32; + if fb - fb_div as f32 > 0.5 { + fb_div += 1; + } + + let post_div = (1 << 12) | (POST2_DIVIDER << 9) | ((post1_divider as u32) << 6) | REF_DIVIDER; + (post_div, fb_div) +} + +fn engine_id(row: u16, col: u16) -> u16 { + ((col & 0x3f) << 6) | (row & 0x3f) +} + +async fn configure_sensors( + chip_responses: &mut R, + chip_commands: &mut W, + read_asic_id: u8, +) -> Result<(), HashThreadError> +where + R: Stream> + Unpin, + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + let thermal_trip_code = thermal_c_to_tune_code(THERMAL_TRIP_C); + let voltage_trip_code = voltage_mv_to_tune_code(VOLTAGE_TRIP_MV); + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::UART_TX, + 0xF, + "enable sensors: UART_TX", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::SLOW_CLK_DIV, + 2, + "enable sensors: SLOW_CLK_DIV", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::SENSOR_CLK_DIV, + (8 << 5) | 8, + "enable sensors: SENSOR_CLK_DIV", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::DTS_SRST_PD, + 1 << 8, + "enable sensors: DTS_SRST_PD", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::SENS_TDM_GAP_CNT, + SENSOR_REPORT_INTERVAL, + "enable sensors: SENS_TDM_GAP_CNT", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::DTS_CFG, + 0, + "enable sensors: DTS_CFG", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::SENSOR_THRS_CNT, + (10 << 16) | 10, + "enable sensors: SENSOR_THRS_CNT", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::TEMPSENSOR_TUNE_CODE, + 0x8001 | (thermal_trip_code << 1), + "enable sensors: TEMPSENSOR_TUNE_CODE", + ) + .await?; + + let bandgap = read_reg_u32( + chip_responses, + chip_commands, + read_asic_id, + protocol::NOTCH_REG, + protocol::local_reg::BANDGAP, + INIT_READREG_TIMEOUT, + "enable sensors: read BANDGAP", + ) + .await?; + let bandgap_updated = (bandgap & !0xF) | 0x3; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::BANDGAP, + bandgap_updated, + "enable sensors: write BANDGAP", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::VSENSOR_SRST_PD, + 1 << 8, + "enable sensors: VSENSOR_SRST_PD", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::VSENSOR_CFG, + (8 << 28) | (1 << 24), + "enable sensors: VSENSOR_CFG", + ) + .await?; + + let vs_enable = (voltage_trip_code << 16) | (voltage_trip_code << 1) | 1; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::VOLTAGE_SENSOR_ENABLE, + vs_enable, + "enable sensors: VOLTAGE_SENSOR_ENABLE", + ) + .await?; + + Ok(()) +} + +async fn set_frequency( + chip_responses: &mut R, + chip_commands: &mut W, + read_asic_id: u8, +) -> Result<(), HashThreadError> +where + R: Stream> + Unpin, + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + let (post_div, fb_div) = calc_pll_dividers(TARGET_FREQ_MHZ, POST1_DIVIDER); + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL_FBDIV, + fb_div, + "set frequency: PLL_FBDIV", + ) + .await?; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL_POSTDIV, + post_div, + "set frequency: PLL_POSTDIV", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL1_FBDIV, + fb_div, + "set frequency: PLL1_FBDIV", + ) + .await?; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL1_POSTDIV, + post_div, + "set frequency: PLL1_POSTDIV", + ) + .await?; + + time::sleep(Duration::from_millis(1)).await; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL_ENABLE, + 1, + "set frequency: PLL_ENABLE", + ) + .await?; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL1_ENABLE, + 1, + "set frequency: PLL1_ENABLE", + ) + .await?; + + let deadline = Instant::now() + PLL_LOCK_TIMEOUT; + for pll_enable_offset in [ + protocol::local_reg::PLL_ENABLE, + protocol::local_reg::PLL1_ENABLE, + ] { + loop { + let lock = read_reg_u32( + chip_responses, + chip_commands, + read_asic_id, + protocol::NOTCH_REG, + pll_enable_offset, + INIT_READREG_TIMEOUT, + "set frequency: wait PLL lock", + ) + .await?; + if (lock & PLL_LOCK_MASK) != 0 { + break; + } + + if Instant::now() >= deadline { + return Err(init_failed(format!( + "set frequency: PLL at offset 0x{pll_enable_offset:02x} failed to lock" + ))); + } + + time::sleep(PLL_POLL_DELAY).await; + } + } + + Ok(()) +} + +async fn soft_reset(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + write_reg_u32( + chip_commands, + asic_id, + protocol::NOTCH_REG, + protocol::local_reg::ENG_SOFT_RESET, + 0, + "soft reset assert", + ) + .await?; + time::sleep(SOFT_RESET_DELAY).await; + write_reg_u32( + chip_commands, + asic_id, + protocol::NOTCH_REG, + protocol::local_reg::ENG_SOFT_RESET, + 1, + "soft reset release", + ) + .await?; + time::sleep(SOFT_RESET_DELAY).await; + Ok(()) +} + +async fn set_all_clock_gates(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + for group_id in 0..ENGINE_ROWS { + group_write_u8( + chip_commands, + asic_id, + group_id, + protocol::engine_reg::CONFIG, + ENGINE_CONFIG_ENHANCED_MODE_BIT, + "set all clock gates", + ) + .await?; + } + Ok(()) +} + +async fn start_warm_up_jobs(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + for col in 0..ENGINE_COLS { + for row in 0..ENGINE_ROWS { + let engine = engine_id(row, col); + for _ in 0..2 { + write_reg_u8( + chip_commands, + asic_id, + engine, + protocol::engine_reg::TIMESTAMP_COUNT, + 0xff, + "warm-up: TIMESTAMP_COUNT", + ) + .await?; + + for seq in [0xfc, 0xfd, 0xfe, 0xff] { + write_reg_u8( + chip_commands, + asic_id, + engine, + protocol::engine_reg::SEQUENCE_ID, + seq, + "warm-up: SEQUENCE_ID", + ) + .await?; + } + + write_reg_u8( + chip_commands, + asic_id, + engine, + protocol::engine_reg::JOB_CONTROL, + 1, + "warm-up: JOB_CONTROL", + ) + .await?; + } + } + } + Ok(()) +} + +async fn initialize_chip( + chip_responses: &mut R, + chip_commands: &mut W, + peripherals: &mut BoardPeripherals, + asic_count: u8, +) -> Result, HashThreadError> +where + R: Stream> + Unpin, + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + if asic_count == 0 { + return Err(init_failed("asic_count must be > 0")); + } + if let Some(ref mut asic_enable) = peripherals.asic_enable { - asic_enable.enable().await.map_err(|e| { - HashThreadError::InitializationFailed(format!("failed to enable ASIC: {}", e)) - })?; + asic_enable + .enable() + .await + .map_err(|e| init_failed(format!("failed to release reset for BZM2 bring-up: {e}")))?; } + time::sleep(Duration::from_millis(200)).await; - tokio::time::sleep(std::time::Duration::from_millis(200)).await; + drain_input(chip_responses).await; - chip_commands - .send(protocol::Command::Noop { + send_command( + chip_commands, + protocol::Command::Noop { asic_hw_id: protocol::DEFAULT_ASIC_ID, - }) - .await - .map_err(|e| { - HashThreadError::InitializationFailed(format!("failed to send BZM2 NOOP: {:?}", e)) - })?; + }, + "default ping", + ) + .await?; + wait_for_noop(chip_responses, protocol::DEFAULT_ASIC_ID, INIT_NOOP_TIMEOUT).await?; + debug!("BZM2 default ASIC ID ping succeeded"); - Ok(()) + let mut asic_ids = Vec::with_capacity(asic_count as usize); + for index in 0..asic_count { + let asic_id = FIRST_ASIC_ID + .checked_add(index) + .ok_or_else(|| init_failed("ASIC ID overflow while programming chain IDs"))?; + + write_reg_u32( + chip_commands, + protocol::DEFAULT_ASIC_ID, + protocol::NOTCH_REG, + protocol::local_reg::ASIC_ID, + asic_id as u32, + "program chain IDs", + ) + .await?; + time::sleep(Duration::from_millis(50)).await; + + let readback = read_reg_u32( + chip_responses, + chip_commands, + asic_id, + protocol::NOTCH_REG, + protocol::local_reg::ASIC_ID, + INIT_READREG_TIMEOUT, + "verify programmed ASIC ID", + ) + .await?; + + if (readback & 0xff) as u8 != asic_id { + return Err(init_failed(format!( + "ASIC ID verify mismatch for 0x{asic_id:02x}: read 0x{readback:08x}" + ))); + } + + asic_ids.push(asic_id); + } + debug!(asic_ids = ?asic_ids, "BZM2 chain IDs programmed"); + + drain_input(chip_responses).await; + for &asic_id in &asic_ids { + send_command( + chip_commands, + protocol::Command::Noop { + asic_hw_id: asic_id, + }, + "per-ASIC ping", + ) + .await?; + wait_for_noop(chip_responses, asic_id, INIT_NOOP_TIMEOUT).await?; + } + debug!("BZM2 per-ASIC ping succeeded"); + + let first_asic = *asic_ids + .first() + .ok_or_else(|| init_failed("no ASIC IDs programmed"))?; + + debug!("Configuring BZM2 sensors"); + configure_sensors(chip_responses, chip_commands, first_asic).await?; + debug!("Configuring BZM2 PLL"); + set_frequency(chip_responses, chip_commands, first_asic).await?; + + write_reg_u8( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::CKDCCR_5_0, + 0x00, + "disable DLL0", + ) + .await?; + write_reg_u8( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::CKDCCR_5_1, + 0x00, + "disable DLL1", + ) + .await?; + + let uart_tdm_control = (0x7f << 9) | (100 << 1) | 1; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::UART_TDM_CTL, + uart_tdm_control, + "enable UART TDM mode", + ) + .await?; + + write_reg_u32( + chip_commands, + first_asic, + protocol::NOTCH_REG, + protocol::local_reg::IO_PEPS_DS, + DRIVE_STRENGTH_STRONG, + "set drive strength", + ) + .await?; + + for &asic_id in &asic_ids { + debug!(asic_id, "BZM2 soft reset + clock gate + warm-up start"); + soft_reset(chip_commands, asic_id).await?; + set_all_clock_gates(chip_commands, asic_id).await?; + start_warm_up_jobs(chip_commands, asic_id).await?; + debug!(asic_id, "BZM2 warm-up complete"); + } + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::RESULT_STS_CTL, + 0x10, + "enable TDM results", + ) + .await?; + + Ok(asic_ids) } async fn bzm2_thread_actor( @@ -194,8 +910,9 @@ async fn bzm2_thread_actor( mut chip_responses: R, mut chip_commands: W, mut peripherals: BoardPeripherals, + asic_count: u8, ) where - R: Stream> + Unpin, + R: Stream> + Unpin, W: Sink + Unpin, W::Error: std::fmt::Debug, { @@ -207,8 +924,8 @@ async fn bzm2_thread_actor( let mut chip_initialized = false; let mut current_task: Option = None; - let mut status_ticker = tokio::time::interval(std::time::Duration::from_secs(5)); - status_ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + let mut status_ticker = time::interval(Duration::from_secs(5)); + status_ticker.set_missed_tick_behavior(time::MissedTickBehavior::Skip); loop { tokio::select! { @@ -227,12 +944,20 @@ async fn bzm2_thread_actor( match cmd { ThreadCommand::UpdateTask { new_task, response_tx } => { if !chip_initialized { - if let Err(e) = initialize_chip(&mut chip_commands, &mut peripherals).await { - error!(error = %e, "BZM2 chip initialization failed"); - let _ = response_tx.send(Err(e)); - continue; + match initialize_chip(&mut chip_responses, &mut chip_commands, &mut peripherals, asic_count).await { + Ok(ids) => { + chip_initialized = true; + info!( + asic_ids = ?ids, + "BZM2 initialization completed" + ); + } + Err(e) => { + error!(error = %e, "BZM2 chip initialization failed"); + let _ = response_tx.send(Err(e)); + continue; + } } - chip_initialized = true; } let old_task = current_task.replace(new_task); @@ -244,12 +969,20 @@ async fn bzm2_thread_actor( } ThreadCommand::ReplaceTask { new_task, response_tx } => { if !chip_initialized { - if let Err(e) = initialize_chip(&mut chip_commands, &mut peripherals).await { - error!(error = %e, "BZM2 chip initialization failed"); - let _ = response_tx.send(Err(e)); - continue; + match initialize_chip(&mut chip_responses, &mut chip_commands, &mut peripherals, asic_count).await { + Ok(ids) => { + chip_initialized = true; + info!( + asic_ids = ?ids, + "BZM2 initialization completed" + ); + } + Err(e) => { + error!(error = %e, "BZM2 chip initialization failed"); + let _ = response_tx.send(Err(e)); + continue; + } } - chip_initialized = true; } let old_task = current_task.replace(new_task); diff --git a/mujina-miner/src/board/birds.rs b/mujina-miner/src/board/birds.rs index 7036f26..639ef4c 100644 --- a/mujina-miner/src/board/birds.rs +++ b/mujina-miner/src/board/birds.rs @@ -38,13 +38,24 @@ const CONTROL_UART_BAUD: u32 = 115_200; /// BIRDS control GPIO: 5V power enable. const GPIO_5V_EN: u8 = 1; +/// BIRDS control GPIO: VR power enable. +const GPIO_VR_EN: u8 = 0; /// BIRDS control GPIO: ASIC reset (active-low). const GPIO_ASIC_RST: u8 = 2; /// BIRDS control board ID for 5V/ASIC reset GPIO operations. const CTRL_ID_POWER_RESET: u8 = 0xAB; +/// BIRDS control board ID for VR GPIO operations. +const CTRL_ID_VR: u8 = 0xAA; /// Control protocol page for GPIO. const CTRL_PAGE_GPIO: u8 = 0x06; +fn format_hex(data: &[u8]) -> String { + data.iter() + .map(|byte| format!("{:02X}", byte)) + .collect::>() + .join(" ") +} + /// BIRDS mining board. pub struct BirdsBoard { device_info: UsbDeviceInfo, @@ -98,10 +109,12 @@ impl BirdsBoard { "Running BIRDS ASIC smoke test during initialization" ); - // Match known-good bring-up sequence from reference scripts: - // 1) Enable 5V rail - // 2) Pulse ASIC reset low/high - // 3) Wait for UART startup + // Match known-good bring-up sequence from birds_asyncio.py: + // 1) VR off and settle + // 2) Enable 5V rail + // 3) Enable VR + // 4) Pulse ASIC reset low/high + // 5) Wait for UART startup self.bringup_power_and_reset(&control_port).await?; self.control_port = Some(control_port); @@ -137,13 +150,32 @@ impl BirdsBoard { )) })?; - Self::control_gpio_write(&mut control_stream, GPIO_5V_EN, true).await?; + Self::control_gpio_write(&mut control_stream, CTRL_ID_VR, GPIO_VR_EN, false).await?; + sleep(Duration::from_millis(2000)).await; + + Self::control_gpio_write(&mut control_stream, CTRL_ID_POWER_RESET, GPIO_5V_EN, true) + .await?; sleep(Duration::from_millis(100)).await; - Self::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, false).await?; + Self::control_gpio_write(&mut control_stream, CTRL_ID_VR, GPIO_VR_EN, true).await?; sleep(Duration::from_millis(100)).await; - Self::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, true).await?; + Self::control_gpio_write( + &mut control_stream, + CTRL_ID_POWER_RESET, + GPIO_ASIC_RST, + false, + ) + .await?; + sleep(Duration::from_millis(100)).await; + + Self::control_gpio_write( + &mut control_stream, + CTRL_ID_POWER_RESET, + GPIO_ASIC_RST, + true, + ) + .await?; sleep(Duration::from_millis(1000)).await; Ok(()) @@ -151,20 +183,27 @@ impl BirdsBoard { async fn control_gpio_write( stream: &mut tokio_serial::SerialStream, + dev_id: u8, pin: u8, value_high: bool, ) -> Result<(), BoardError> { - // Packet format: [len:u16_le][id][bus][page][cmd=pin][value] - // For BIRDS, id is the board target (0xAB for 5V/RST). + // Packet format: [len:u16_le][id][bus][page][cmd=pin][value]. let packet: [u8; 7] = [ 0x07, 0x00, - CTRL_ID_POWER_RESET, + dev_id, 0x00, CTRL_PAGE_GPIO, pin, if value_high { 0x01 } else { 0x00 }, ]; + tracing::debug!( + dev_id = format_args!("0x{:02X}", dev_id), + pin, + value = if value_high { 1 } else { 0 }, + tx = %format_hex(&packet), + "BIRDS ctrl gpio tx" + ); stream.write_all(&packet).await.map_err(|e| { BoardError::HardwareControl(format!( "Failed to write GPIO control packet (pin {}): {}", @@ -180,10 +219,16 @@ impl BirdsBoard { pin, e )) })?; - if ack[2] != CTRL_ID_POWER_RESET { + tracing::debug!( + dev_id = format_args!("0x{:02X}", dev_id), + pin, + rx = %format_hex(&ack), + "BIRDS ctrl gpio rx" + ); + if ack[2] != dev_id { return Err(BoardError::HardwareControl(format!( "GPIO ack ID mismatch for pin {}: expected 0x{:02x}, got 0x{:02x}", - pin, CTRL_ID_POWER_RESET, ack[2] + pin, dev_id, ack[2] ))); } @@ -204,7 +249,13 @@ impl BirdsBoard { )) })?; - Self::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, false).await + Self::control_gpio_write( + &mut control_stream, + CTRL_ID_POWER_RESET, + GPIO_ASIC_RST, + false, + ) + .await } } @@ -218,18 +269,28 @@ impl AsicEnable for BirdsAsicEnable { let mut control_stream = tokio_serial::new(&self.control_port, CONTROL_UART_BAUD) .open_native_async() .map_err(|e| anyhow::anyhow!("failed to open control port: {}", e))?; - BirdsBoard::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, true) - .await - .map_err(|e| anyhow::anyhow!("failed to release BZM2 reset: {}", e)) + BirdsBoard::control_gpio_write( + &mut control_stream, + CTRL_ID_POWER_RESET, + GPIO_ASIC_RST, + true, + ) + .await + .map_err(|e| anyhow::anyhow!("failed to release BZM2 reset: {}", e)) } async fn disable(&mut self) -> anyhow::Result<()> { let mut control_stream = tokio_serial::new(&self.control_port, CONTROL_UART_BAUD) .open_native_async() .map_err(|e| anyhow::anyhow!("failed to open control port: {}", e))?; - BirdsBoard::control_gpio_write(&mut control_stream, GPIO_ASIC_RST, false) - .await - .map_err(|e| anyhow::anyhow!("failed to assert BZM2 reset: {}", e)) + BirdsBoard::control_gpio_write( + &mut control_stream, + CTRL_ID_POWER_RESET, + GPIO_ASIC_RST, + false, + ) + .await + .map_err(|e| anyhow::anyhow!("failed to assert BZM2 reset: {}", e)) } } @@ -294,6 +355,7 @@ impl Board for BirdsBoard { data_writer, peripherals, removal_rx, + ASICS_PER_BOARD as u8, ); Ok(vec![Box::new(thread)]) } From e93009a0ac0167d8308d4d820153cb4408f1a343 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Mon, 16 Feb 2026 23:27:42 -0500 Subject: [PATCH 06/19] feat(bzm2): add WRITEJOB command encoding --- mujina-miner/src/asic/bzm2/protocol.rs | 69 ++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/mujina-miner/src/asic/bzm2/protocol.rs b/mujina-miner/src/asic/bzm2/protocol.rs index fc44a38..440df1e 100644 --- a/mujina-miner/src/asic/bzm2/protocol.rs +++ b/mujina-miner/src/asic/bzm2/protocol.rs @@ -200,6 +200,17 @@ pub fn hw_to_logical_asic_id(hw_asic_id: u8) -> Option { #[derive(Debug, Clone, PartialEq, Eq)] pub enum Command { + /// Push a job payload to one engine. + WriteJob { + asic_hw_id: u8, + engine: u16, + midstate: [u8; 32], + merkle_residue: u32, + timestamp: u32, + sequence: u8, + job_ctl: u8, + }, + /// Send NOOP command. Noop { asic_hw_id: u8 }, @@ -229,6 +240,26 @@ pub enum Command { } impl Command { + pub fn write_job( + asic_hw_id: u8, + engine: u16, + midstate: [u8; 32], + merkle_residue: u32, + timestamp: u32, + sequence: u8, + job_ctl: u8, + ) -> Self { + Self::WriteJob { + asic_hw_id, + engine, + midstate, + merkle_residue, + timestamp, + sequence, + job_ctl, + } + } + pub fn read_reg_u32(asic_hw_id: u8, engine: u16, offset: u16) -> Self { Self::ReadReg { asic_hw_id, @@ -269,6 +300,26 @@ impl Command { let mut raw = BytesMut::new(); match self { + Self::WriteJob { + asic_hw_id, + engine, + midstate, + merkle_residue, + timestamp, + sequence, + job_ctl, + } => { + // WRITEJOB command: + // [header:u32_be][midstate:32][merkle_residue:u32_le] + // [timestamp:u32_le][sequence:u8][job_ctl:u8] + raw.reserve(46); + raw.put_u32(build_full_header(*asic_hw_id, Opcode::WriteJob, *engine, 0)); + raw.extend_from_slice(midstate); + raw.put_u32_le(*merkle_residue); + raw.put_u32_le(*timestamp); + raw.put_u8(*sequence); + raw.put_u8(*job_ctl); + } Self::Noop { asic_hw_id } => { // NOOP command: // [asic_hw_id][opcode<<4] @@ -586,6 +637,24 @@ mod tests { assert_eq!(raw.as_ref(), &[0x0a, 0x40, 0x12, 0x01, 0x00, 0x04]); } + #[test] + fn test_encode_writejob_frame() { + let mut midstate = [0u8; 32]; + for (i, byte) in midstate.iter_mut().enumerate() { + *byte = i as u8; + } + + let cmd = Command::write_job(0x0a, 0x0123, midstate, 0x1122_3344, 0x5566_7788, 0xfe, 0x03); + + let raw = cmd.encode_raw().expect("encode should succeed"); + assert_eq!(&raw[..4], [0x0a, 0x01, 0x23, 0x00]); + assert_eq!(&raw[4..36], midstate); + assert_eq!(&raw[36..40], 0x1122_3344u32.to_le_bytes()); + assert_eq!(&raw[40..44], 0x5566_7788u32.to_le_bytes()); + assert_eq!(raw[44], 0xfe); + assert_eq!(raw[45], 0x03); + } + #[test] fn test_decode_noop_response() { let mut codec = FrameCodec::default(); From 116b7b3aba7660157636263cd56a3b0513347bda Mon Sep 17 00:00:00 2001 From: johnny9 Date: Mon, 16 Feb 2026 23:33:29 -0500 Subject: [PATCH 07/19] feat(bzm2): enforce enhanced-mode WRITEJOB framing --- mujina-miner/src/asic/bzm2/error.rs | 3 + mujina-miner/src/asic/bzm2/protocol.rs | 109 ++++++++++++++++++++++++- 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/mujina-miner/src/asic/bzm2/error.rs b/mujina-miner/src/asic/bzm2/error.rs index 00feb23..4ada16c 100644 --- a/mujina-miner/src/asic/bzm2/error.rs +++ b/mujina-miner/src/asic/bzm2/error.rs @@ -13,6 +13,9 @@ pub enum ProtocolError { #[error("invalid read register byte count: {0} (expected 1, 2, or 4)")] InvalidReadRegCount(u8), + #[error("invalid job control value: {0} (expected 1 or 3)")] + InvalidJobControl(u8), + #[error("unsupported read register response size: {0} (expected 1 or 4)")] UnsupportedReadRegResponseSize(usize), diff --git a/mujina-miner/src/asic/bzm2/protocol.rs b/mujina-miner/src/asic/bzm2/protocol.rs index 440df1e..726bb90 100644 --- a/mujina-miner/src/asic/bzm2/protocol.rs +++ b/mujina-miner/src/asic/bzm2/protocol.rs @@ -28,6 +28,7 @@ pub const BROADCAST_ENGINE: u16 = 0x00ff; pub const TERM_BYTE: u8 = 0xa5; pub const TAR_BYTE: u8 = 0x08; +pub const WRITEJOB_OFFSET: u16 = 41; fn format_hex(data: &[u8]) -> String { data.iter() @@ -260,6 +261,66 @@ impl Command { } } + /// Build the 4-command enhanced-mode WRITEJOB burst. + /// + /// Sequence mapping follows bzmd: + /// `seq_start = (sequence_id % 2) * 4`, then `seq_start + [0,1,2,3]`. + /// The first three commands carry `job_ctl=0`; the final command carries + /// the requested `job_ctl` (must be 1 or 3). + pub fn write_job_enhanced( + asic_hw_id: u8, + engine: u16, + midstates: [[u8; 32]; 4], + merkle_residue: u32, + timestamp: u32, + sequence_id: u8, + job_ctl: u8, + ) -> Result<[Self; 4], ProtocolError> { + if !matches!(job_ctl, 1 | 3) { + return Err(ProtocolError::InvalidJobControl(job_ctl)); + } + + let seq_start = (sequence_id % 2) * 4; + Ok([ + Self::write_job( + asic_hw_id, + engine, + midstates[0], + merkle_residue, + timestamp, + seq_start, + 0, + ), + Self::write_job( + asic_hw_id, + engine, + midstates[1], + merkle_residue, + timestamp, + seq_start + 1, + 0, + ), + Self::write_job( + asic_hw_id, + engine, + midstates[2], + merkle_residue, + timestamp, + seq_start + 2, + 0, + ), + Self::write_job( + asic_hw_id, + engine, + midstates[3], + merkle_residue, + timestamp, + seq_start + 3, + job_ctl, + ), + ]) + } + pub fn read_reg_u32(asic_hw_id: u8, engine: u16, offset: u16) -> Self { Self::ReadReg { asic_hw_id, @@ -313,7 +374,12 @@ impl Command { // [header:u32_be][midstate:32][merkle_residue:u32_le] // [timestamp:u32_le][sequence:u8][job_ctl:u8] raw.reserve(46); - raw.put_u32(build_full_header(*asic_hw_id, Opcode::WriteJob, *engine, 0)); + raw.put_u32(build_full_header( + *asic_hw_id, + Opcode::WriteJob, + *engine, + WRITEJOB_OFFSET, + )); raw.extend_from_slice(midstate); raw.put_u32_le(*merkle_residue); raw.put_u32_le(*timestamp); @@ -647,7 +713,7 @@ mod tests { let cmd = Command::write_job(0x0a, 0x0123, midstate, 0x1122_3344, 0x5566_7788, 0xfe, 0x03); let raw = cmd.encode_raw().expect("encode should succeed"); - assert_eq!(&raw[..4], [0x0a, 0x01, 0x23, 0x00]); + assert_eq!(&raw[..4], [0x0a, 0x01, 0x23, 0x29]); assert_eq!(&raw[4..36], midstate); assert_eq!(&raw[36..40], 0x1122_3344u32.to_le_bytes()); assert_eq!(&raw[40..44], 0x5566_7788u32.to_le_bytes()); @@ -655,6 +721,45 @@ mod tests { assert_eq!(raw[45], 0x03); } + #[test] + fn test_writejob_enhanced_builds_four_commands() { + let mut midstates = [[0u8; 32]; 4]; + midstates[0][0] = 0x10; + midstates[1][0] = 0x20; + midstates[2][0] = 0x30; + midstates[3][0] = 0x40; + + let cmds = + Command::write_job_enhanced(0x0a, 0x0123, midstates, 0x1122_3344, 0x5566_7788, 0xff, 3) + .expect("enhanced writejob should build"); + + let raw0 = cmds[0].clone().encode_raw().expect("encode should succeed"); + let raw1 = cmds[1].clone().encode_raw().expect("encode should succeed"); + let raw2 = cmds[2].clone().encode_raw().expect("encode should succeed"); + let raw3 = cmds[3].clone().encode_raw().expect("encode should succeed"); + + assert_eq!(raw0[44], 4); + assert_eq!(raw1[44], 5); + assert_eq!(raw2[44], 6); + assert_eq!(raw3[44], 7); + assert_eq!(raw0[45], 0); + assert_eq!(raw1[45], 0); + assert_eq!(raw2[45], 0); + assert_eq!(raw3[45], 3); + assert_eq!(raw0[4], 0x10); + assert_eq!(raw1[4], 0x20); + assert_eq!(raw2[4], 0x30); + assert_eq!(raw3[4], 0x40); + } + + #[test] + fn test_writejob_enhanced_rejects_invalid_job_ctl() { + let midstates = [[0u8; 32]; 4]; + let err = Command::write_job_enhanced(0x0a, 0x0123, midstates, 0, 0, 0, 0x02) + .expect_err("invalid job_ctl should fail"); + assert!(matches!(err, ProtocolError::InvalidJobControl(0x02))); + } + #[test] fn test_decode_noop_response() { let mut codec = FrameCodec::default(); From b1a9d92cedc414fe47dc4559894b4507bfb611b3 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Mon, 16 Feb 2026 23:49:58 -0500 Subject: [PATCH 08/19] feat(bzm2): send rolled-midstate jobs from HashTask --- mujina-miner/src/asic/bzm2/thread.rs | 238 +++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index 31170a1..f2e1705 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -9,6 +9,12 @@ use std::{ }; use async_trait::async_trait; +use bitcoin::{ + TxMerkleNode, + block::{Header as BlockHeader, Version as BlockVersion}, + consensus, + hashes::{HashEngine as _, sha256}, +}; use futures::{SinkExt, sink::Sink, stream::Stream}; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{self, Duration, Instant}; @@ -20,6 +26,7 @@ use crate::{ BoardPeripherals, HashTask, HashThread, HashThreadCapabilities, HashThreadError, HashThreadEvent, HashThreadStatus, ThreadRemovalSignal, }, + job_source::{GeneralPurposeBits, MerkleRootKind}, tracing::prelude::*, types::HashRate, }; @@ -47,6 +54,8 @@ const INIT_READREG_TIMEOUT: Duration = Duration::from_millis(500); const PLL_LOCK_TIMEOUT: Duration = Duration::from_secs(3); const PLL_POLL_DELAY: Duration = Duration::from_millis(100); const SOFT_RESET_DELAY: Duration = Duration::from_millis(1); +const MIDSTATE_COUNT: usize = 4; +const WRITEJOB_CTL_REPLACE: u8 = 3; #[derive(Debug)] enum ThreadCommand { @@ -399,6 +408,166 @@ fn engine_id(row: u16, col: u16) -> u16 { ((col & 0x3f) << 6) | (row & 0x3f) } +struct TaskJobPayload { + midstates: [[u8; 32]; MIDSTATE_COUNT], + merkle_residue: u32, + timestamp: u32, +} + +fn expand_counter_into_mask(mask: u16, mut counter: u16) -> u16 { + let mut rolled = 0u16; + for bit in 0..16 { + let bit_mask = 1u16 << bit; + if (mask & bit_mask) != 0 { + if (counter & 1) != 0 { + rolled |= bit_mask; + } + counter >>= 1; + } + } + rolled +} + +fn compute_task_merkle_root(task: &HashTask) -> Result { + let template = task.template.as_ref(); + match &template.merkle_root { + MerkleRootKind::Computed(_) => { + let en2 = task.en2.as_ref().ok_or_else(|| { + HashThreadError::WorkAssignmentFailed( + "EN2 is required for computed merkle roots".into(), + ) + })?; + template.compute_merkle_root(en2).map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!("failed to compute merkle root: {e}")) + }) + } + MerkleRootKind::Fixed(merkle_root) => Ok(*merkle_root), + } +} + +fn build_header_bytes( + task: &HashTask, + version: BlockVersion, + merkle_root: TxMerkleNode, +) -> Result<[u8; 80], HashThreadError> { + let template = task.template.as_ref(); + let header = BlockHeader { + version, + prev_blockhash: template.prev_blockhash, + merkle_root, + time: task.ntime, + bits: template.bits, + nonce: 0, + }; + + let bytes = consensus::serialize(&header); + let len = bytes.len(); + bytes.try_into().map_err(|_| { + HashThreadError::WorkAssignmentFailed(format!("unexpected serialized header size: {}", len)) + }) +} + +fn compute_midstate_le(header_prefix_64: &[u8; 64]) -> [u8; 32] { + let mut engine = sha256::HashEngine::default(); + engine.input(header_prefix_64); + let mut midstate = engine.midstate().to_byte_array(); + for word in midstate.chunks_exact_mut(4) { + // Firmware expects each state word little-endian on the wire. + word.reverse(); + } + midstate +} + +fn task_to_bzm2_payload( + task: &HashTask, + version_counter: u16, +) -> Result { + let template = task.template.as_ref(); + let merkle_root = compute_task_merkle_root(task)?; + let base_version = template.version.base(); + let version_mask = u16::from_be_bytes(*template.version.gp_bits_mask().as_bytes()); + + let mut midstates = [[0u8; 32]; MIDSTATE_COUNT]; + let mut merkle_residue = 0u32; + let mut timestamp = 0u32; + + for (idx, midstate) in midstates.iter_mut().enumerate() { + let rolled_bits_u16 = + expand_counter_into_mask(version_mask, version_counter.wrapping_add(idx as u16)); + let rolled_bits = GeneralPurposeBits::new(rolled_bits_u16.to_be_bytes()); + let rolled_version = rolled_bits.apply_to_version(base_version); + + let header = build_header_bytes(task, rolled_version, merkle_root)?; + let header_prefix: [u8; 64] = header[..64] + .try_into() + .expect("header prefix length is fixed"); + + *midstate = compute_midstate_le(&header_prefix); + + if idx == 0 { + merkle_residue = u32::from_le_bytes( + header[64..68] + .try_into() + .expect("slice length is exactly 4 bytes"), + ); + timestamp = u32::from_le_bytes( + header[68..72] + .try_into() + .expect("slice length is exactly 4 bytes"), + ); + } + } + + Ok(TaskJobPayload { + midstates, + merkle_residue, + timestamp, + }) +} + +async fn send_task_to_all_engines( + chip_commands: &mut W, + task: &HashTask, + version_counter: u16, + sequence_id: u8, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + let payload = task_to_bzm2_payload(task, version_counter)?; + + for col in 0..ENGINE_COLS { + for row in 0..ENGINE_ROWS { + let engine = engine_id(row, col); + let commands = protocol::Command::write_job_enhanced( + protocol::BROADCAST_ASIC, + engine, + payload.midstates, + payload.merkle_residue, + payload.timestamp, + sequence_id, + WRITEJOB_CTL_REPLACE, + ) + .map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!( + "failed to build WRITEJOB payload for engine 0x{engine:03x}: {e}" + )) + })?; + + for command in commands { + chip_commands.send(command).await.map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!( + "failed to send WRITEJOB to engine 0x{engine:03x}: {e:?}" + )) + })?; + } + } + } + + Ok(()) +} + async fn configure_sensors( chip_responses: &mut R, chip_commands: &mut W, @@ -924,6 +1093,8 @@ async fn bzm2_thread_actor( let mut chip_initialized = false; let mut current_task: Option = None; + let mut next_sequence_id: u8 = 0; + let mut next_version_counter: u16 = 0; let mut status_ticker = time::interval(Duration::from_secs(5)); status_ticker.set_missed_tick_behavior(time::MissedTickBehavior::Skip); @@ -960,6 +1131,29 @@ async fn bzm2_thread_actor( } } + if let Err(e) = send_task_to_all_engines( + &mut chip_commands, + &new_task, + next_version_counter, + next_sequence_id, + ) + .await + { + error!(error = %e, "Failed to send BZM2 work during update_task"); + let _ = response_tx.send(Err(e)); + continue; + } + + debug!( + job_id = %new_task.template.id, + sequence_id = next_sequence_id, + version_counter = next_version_counter, + "Sent BZM2 WRITEJOB payloads for update_task" + ); + next_sequence_id = next_sequence_id.wrapping_add(1); + next_version_counter = + next_version_counter.wrapping_add(MIDSTATE_COUNT as u16); + let old_task = current_task.replace(new_task); { let mut s = status.write().expect("status lock poisoned"); @@ -985,6 +1179,29 @@ async fn bzm2_thread_actor( } } + if let Err(e) = send_task_to_all_engines( + &mut chip_commands, + &new_task, + next_version_counter, + next_sequence_id, + ) + .await + { + error!(error = %e, "Failed to send BZM2 work during replace_task"); + let _ = response_tx.send(Err(e)); + continue; + } + + debug!( + job_id = %new_task.template.id, + sequence_id = next_sequence_id, + version_counter = next_version_counter, + "Sent BZM2 WRITEJOB payloads for replace_task" + ); + next_sequence_id = next_sequence_id.wrapping_add(1); + next_version_counter = + next_version_counter.wrapping_add(MIDSTATE_COUNT as u16); + let old_task = current_task.replace(new_task); { let mut s = status.write().expect("status lock poisoned"); @@ -1027,3 +1244,24 @@ async fn bzm2_thread_actor( } } } + +#[cfg(test)] +mod tests { + use super::expand_counter_into_mask; + + #[test] + fn test_expand_counter_into_contiguous_mask() { + assert_eq!(expand_counter_into_mask(0b0011, 0), 0b0000); + assert_eq!(expand_counter_into_mask(0b0011, 1), 0b0001); + assert_eq!(expand_counter_into_mask(0b0011, 2), 0b0010); + assert_eq!(expand_counter_into_mask(0b0011, 3), 0b0011); + } + + #[test] + fn test_expand_counter_into_sparse_mask() { + assert_eq!(expand_counter_into_mask(0b1010, 0), 0b0000); + assert_eq!(expand_counter_into_mask(0b1010, 1), 0b0010); + assert_eq!(expand_counter_into_mask(0b1010, 2), 0b1000); + assert_eq!(expand_counter_into_mask(0b1010, 3), 0b1010); + } +} From da88e6c103408b9a90d7a143a2e00be30d363c2f Mon Sep 17 00:00:00 2001 From: johnny9 Date: Mon, 16 Feb 2026 23:55:45 -0500 Subject: [PATCH 09/19] feat(bzm2): map READRESULT responses to shares --- mujina-miner/src/asic/bzm2/protocol.rs | 79 ++++++++++++- mujina-miner/src/asic/bzm2/thread.rs | 153 +++++++++++++++++++++++-- 2 files changed, 218 insertions(+), 14 deletions(-) diff --git a/mujina-miner/src/asic/bzm2/protocol.rs b/mujina-miner/src/asic/bzm2/protocol.rs index 726bb90..f2c6a65 100644 --- a/mujina-miner/src/asic/bzm2/protocol.rs +++ b/mujina-miner/src/asic/bzm2/protocol.rs @@ -476,8 +476,22 @@ pub enum ReadRegData { #[derive(Debug, Clone, PartialEq, Eq)] pub enum Response { - Noop { asic_hw_id: u8, signature: [u8; 3] }, - ReadReg { asic_hw_id: u8, data: ReadRegData }, + Noop { + asic_hw_id: u8, + signature: [u8; 3], + }, + ReadReg { + asic_hw_id: u8, + data: ReadRegData, + }, + ReadResult { + asic_hw_id: u8, + engine_id: u16, + status: u8, + nonce: u32, + sequence: u8, + timecode: u8, + }, } /// BZM2 frame codec. @@ -606,10 +620,36 @@ impl Decoder for FrameCodec { return Ok(Some(Response::ReadReg { asic_hw_id, data })); } + Opcode::ReadResult => { + const FRAME_LEN: usize = 10; // [asic:u8][opcode:u8][engine+status:2][nonce:4][sequence:1][time:1] + if src.len() < FRAME_LEN { + return Ok(None); + } + tracing::trace!(rx = %format_hex(&src[..FRAME_LEN]), "BZM2 rx READRESULT frame"); + + let mut frame = src.split_to(FRAME_LEN); + let asic_hw_id = frame.get_u8(); + let _opcode = frame.get_u8(); + let engine_status = frame.get_u16(); + let engine_id = (engine_status >> 4) & 0x0fff; + let status = (engine_status & 0x000f) as u8; + let nonce = frame.get_u32_le(); + let sequence = frame.get_u8(); + let timecode = frame.get_u8(); + + return Ok(Some(Response::ReadResult { + asic_hw_id, + engine_id, + status, + nonce, + sequence, + timecode, + })); + } // Pass-1 decoder only surfaces NOOP and READREG. Drop other // fixed-length TDM messages so callers can keep waiting for // the response type they care about. - Opcode::ReadResult | Opcode::DtsVs => { + Opcode::DtsVs => { const TDM_FIXED_LEN: usize = 10; // [asic:u8][opcode:u8][payload:8] if src.len() < TDM_FIXED_LEN { return Ok(None); @@ -808,6 +848,39 @@ mod tests { assert!(src.is_empty()); } + #[test] + fn test_decode_readresult_response() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from( + &[ + 0x0a, + Opcode::ReadResult as u8, + 0x12, + 0x34, // engine_status: engine_id=0x123, status=0x4 + 0x78, + 0x56, + 0x34, + 0x12, // nonce LE + 0x07, // sequence + 0x2a, // timecode + ][..], + ); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::ReadResult { + asic_hw_id: 0x0a, + engine_id: 0x123, + status: 0x4, + nonce: 0x1234_5678, + sequence: 0x07, + timecode: 0x2a, + }) + ); + assert!(src.is_empty()); + } + #[test] fn test_decode_resync_from_garbage() { let mut codec = FrameCodec::default(); diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index f2e1705..acde086 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -24,7 +24,7 @@ use super::protocol; use crate::{ asic::hash_thread::{ BoardPeripherals, HashTask, HashThread, HashThreadCapabilities, HashThreadError, - HashThreadEvent, HashThreadStatus, ThreadRemovalSignal, + HashThreadEvent, HashThreadStatus, Share, ThreadRemovalSignal, }, job_source::{GeneralPurposeBits, MerkleRootKind}, tracing::prelude::*, @@ -414,6 +414,13 @@ struct TaskJobPayload { timestamp: u32, } +#[derive(Clone)] +struct AssignedTask { + task: HashTask, + merkle_root: TxMerkleNode, + version_counter: u16, +} + fn expand_counter_into_mask(mask: u16, mut counter: u16) -> u16 { let mut rolled = 0u16; for bit in 0..16 { @@ -478,24 +485,27 @@ fn compute_midstate_le(header_prefix_64: &[u8; 64]) -> [u8; 32] { midstate } -fn task_to_bzm2_payload( - task: &HashTask, - version_counter: u16, -) -> Result { +fn task_version_for_counter(task: &HashTask, version_counter: u16) -> BlockVersion { let template = task.template.as_ref(); - let merkle_root = compute_task_merkle_root(task)?; let base_version = template.version.base(); let version_mask = u16::from_be_bytes(*template.version.gp_bits_mask().as_bytes()); + let rolled_bits_u16 = expand_counter_into_mask(version_mask, version_counter); + let rolled_bits = GeneralPurposeBits::new(rolled_bits_u16.to_be_bytes()); + rolled_bits.apply_to_version(base_version) +} +fn task_to_bzm2_payload( + task: &HashTask, + merkle_root: TxMerkleNode, + version_counter: u16, +) -> Result { let mut midstates = [[0u8; 32]; MIDSTATE_COUNT]; let mut merkle_residue = 0u32; let mut timestamp = 0u32; for (idx, midstate) in midstates.iter_mut().enumerate() { - let rolled_bits_u16 = - expand_counter_into_mask(version_mask, version_counter.wrapping_add(idx as u16)); - let rolled_bits = GeneralPurposeBits::new(rolled_bits_u16.to_be_bytes()); - let rolled_version = rolled_bits.apply_to_version(base_version); + let rolled_version = + task_version_for_counter(task, version_counter.wrapping_add(idx as u16)); let header = build_header_bytes(task, rolled_version, merkle_root)?; let header_prefix: [u8; 64] = header[..64] @@ -528,6 +538,7 @@ fn task_to_bzm2_payload( async fn send_task_to_all_engines( chip_commands: &mut W, task: &HashTask, + merkle_root: TxMerkleNode, version_counter: u16, sequence_id: u8, ) -> Result<(), HashThreadError> @@ -535,7 +546,7 @@ where W: Sink + Unpin, W::Error: std::fmt::Debug, { - let payload = task_to_bzm2_payload(task, version_counter)?; + let payload = task_to_bzm2_payload(task, merkle_root, version_counter)?; for col in 0..ENGINE_COLS { for row in 0..ENGINE_ROWS { @@ -1093,6 +1104,7 @@ async fn bzm2_thread_actor( let mut chip_initialized = false; let mut current_task: Option = None; + let mut assigned_tasks: [Option; 2] = [None, None]; let mut next_sequence_id: u8 = 0; let mut next_version_counter: u16 = 0; let mut status_ticker = time::interval(Duration::from_secs(5)); @@ -1131,9 +1143,19 @@ async fn bzm2_thread_actor( } } + let merkle_root = match compute_task_merkle_root(&new_task) { + Ok(root) => root, + Err(e) => { + error!(error = %e, "Failed to derive merkle root for update_task"); + let _ = response_tx.send(Err(e)); + continue; + } + }; + if let Err(e) = send_task_to_all_engines( &mut chip_commands, &new_task, + merkle_root, next_version_counter, next_sequence_id, ) @@ -1144,6 +1166,13 @@ async fn bzm2_thread_actor( continue; } + let slot = (next_sequence_id as usize) & 0x01; + assigned_tasks[slot] = Some(AssignedTask { + task: new_task.clone(), + merkle_root, + version_counter: next_version_counter, + }); + debug!( job_id = %new_task.template.id, sequence_id = next_sequence_id, @@ -1179,9 +1208,19 @@ async fn bzm2_thread_actor( } } + let merkle_root = match compute_task_merkle_root(&new_task) { + Ok(root) => root, + Err(e) => { + error!(error = %e, "Failed to derive merkle root for replace_task"); + let _ = response_tx.send(Err(e)); + continue; + } + }; + if let Err(e) = send_task_to_all_engines( &mut chip_commands, &new_task, + merkle_root, next_version_counter, next_sequence_id, ) @@ -1192,6 +1231,13 @@ async fn bzm2_thread_actor( continue; } + let slot = (next_sequence_id as usize) & 0x01; + assigned_tasks[slot] = Some(AssignedTask { + task: new_task.clone(), + merkle_root, + version_counter: next_version_counter, + }); + debug!( job_id = %new_task.template.id, sequence_id = next_sequence_id, @@ -1211,6 +1257,7 @@ async fn bzm2_thread_actor( } ThreadCommand::GoIdle { response_tx } => { let old_task = current_task.take(); + assigned_tasks = [None, None]; { let mut s = status.write().expect("status lock poisoned"); s.is_active = false; @@ -1231,6 +1278,90 @@ async fn bzm2_thread_actor( Ok(protocol::Response::ReadReg { asic_hw_id, data }) => { trace!(asic_hw_id, data = ?data, "BZM2 READREG response"); } + Ok(protocol::Response::ReadResult { + asic_hw_id, + engine_id, + status: result_status, + nonce, + sequence, + timecode, + }) => { + // status bit3 indicates a valid nonce candidate. + if (result_status & 0x8) == 0 { + trace!( + asic_hw_id, + engine_id, + result_status, + nonce, + sequence, + timecode, + "Ignoring BZM2 READRESULT without valid-nonce flag" + ); + continue; + } + + let slot = (sequence as usize) / MIDSTATE_COUNT; + if slot >= assigned_tasks.len() { + trace!( + asic_hw_id, + engine_id, + sequence, + "Ignoring BZM2 READRESULT with unsupported sequence slot" + ); + continue; + } + + let Some(assigned) = assigned_tasks[slot].as_ref() else { + trace!( + asic_hw_id, + engine_id, + sequence, + "Ignoring BZM2 READRESULT with no assigned task for slot" + ); + continue; + }; + + let micro_job = (sequence % (MIDSTATE_COUNT as u8)) as u16; + let share_version = + task_version_for_counter(&assigned.task, assigned.version_counter.wrapping_add(micro_job)); + // BZM2 result timecode identifies which rolled ntime found the nonce. + let share_ntime = assigned.task.ntime.wrapping_add(timecode as u32); + + let header = BlockHeader { + version: share_version, + prev_blockhash: assigned.task.template.prev_blockhash, + merkle_root: assigned.merkle_root, + time: share_ntime, + bits: assigned.task.template.bits, + nonce, + }; + let hash = header.block_hash(); + + if assigned.task.share_target.is_met_by(hash) { + let share = Share { + nonce, + hash, + version: share_version, + ntime: share_ntime, + extranonce2: assigned.task.en2, + expected_work: assigned.task.share_target.to_work(), + }; + + if assigned.task.share_tx.send(share).await.is_ok() { + let mut s = status.write().expect("status lock poisoned"); + s.chip_shares_found = s.chip_shares_found.saturating_add(1); + } + } else { + trace!( + asic_hw_id, + engine_id, + nonce, + sequence, + timecode, + "BZM2 nonce filtered by share target" + ); + } + } Err(e) => { warn!(error = %e, "Error reading BZM2 response stream"); } From 23eb1adf7028c64d385bcec2a891fdcab7c8b3f7 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Tue, 24 Feb 2026 10:11:19 -0500 Subject: [PATCH 10/19] fix(bzm2): harden frame decoding and clarify WRITEJOB API --- mujina-miner/src/asic/bzm2/protocol.rs | 491 +++++++++++++++++++++---- 1 file changed, 419 insertions(+), 72 deletions(-) diff --git a/mujina-miner/src/asic/bzm2/protocol.rs b/mujina-miner/src/asic/bzm2/protocol.rs index f2c6a65..3479ba6 100644 --- a/mujina-miner/src/asic/bzm2/protocol.rs +++ b/mujina-miner/src/asic/bzm2/protocol.rs @@ -2,7 +2,7 @@ //! //! This module implements pass-1 support for bring-up: //! - Command encoding for `NOOP`, `READREG`, `WRITEREG` -//! - Response decoding for `NOOP` and `READREG` +//! - Response decoding for `NOOP`, `READREG`, `READRESULT`, and `DTS/VS` //! - 9-bit TX framing via the BIRDS USB bridge format use std::io; @@ -241,7 +241,7 @@ pub enum Command { } impl Command { - pub fn write_job( + pub fn write_job_single_midstate( asic_hw_id: u8, engine: u16, midstate: [u8; 32], @@ -261,13 +261,13 @@ impl Command { } } - /// Build the 4-command enhanced-mode WRITEJOB burst. + /// Build the 4-command WRITEJOB burst. /// - /// Sequence mapping follows bzmd: + /// Sequence mapping: /// `seq_start = (sequence_id % 2) * 4`, then `seq_start + [0,1,2,3]`. /// The first three commands carry `job_ctl=0`; the final command carries /// the requested `job_ctl` (must be 1 or 3). - pub fn write_job_enhanced( + pub fn write_job( asic_hw_id: u8, engine: u16, midstates: [[u8; 32]; 4], @@ -282,7 +282,7 @@ impl Command { let seq_start = (sequence_id % 2) * 4; Ok([ - Self::write_job( + Self::write_job_single_midstate( asic_hw_id, engine, midstates[0], @@ -291,7 +291,7 @@ impl Command { seq_start, 0, ), - Self::write_job( + Self::write_job_single_midstate( asic_hw_id, engine, midstates[1], @@ -300,7 +300,7 @@ impl Command { seq_start + 1, 0, ), - Self::write_job( + Self::write_job_single_midstate( asic_hw_id, engine, midstates[2], @@ -309,7 +309,7 @@ impl Command { seq_start + 2, 0, ), - Self::write_job( + Self::write_job_single_midstate( asic_hw_id, engine, midstates[3], @@ -471,6 +471,7 @@ impl Command { #[derive(Debug, Clone, PartialEq, Eq)] pub enum ReadRegData { U8(u8), + U16(u16), U32(u32), } @@ -492,6 +493,18 @@ pub enum Response { sequence: u8, timecode: u8, }, + DtsVs { + asic_hw_id: u8, + data: DtsVsData, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DtsVsData { + /// Generation-1 payload (`uart_dts_vs_msg`) represented as a big-endian `u32`. + Gen1(u32), + /// Generation-2 payload (`uart_gen2_dts_vs_msg`) represented as a big-endian `u64`. + Gen2(u64), } /// BZM2 frame codec. @@ -501,12 +514,13 @@ pub enum Response { #[derive(Debug, Clone)] pub struct FrameCodec { readreg_response_size: usize, + dts_vs_payload_size: Option, } impl FrameCodec { - /// Create codec with explicit READREG response payload size (1 or 4 bytes). + /// Create codec with explicit READREG response payload size (1, 2, or 4 bytes). pub fn new(readreg_response_size: usize) -> Result { - if !matches!(readreg_response_size, 1 | 4) { + if !matches!(readreg_response_size, 1 | 2 | 4) { return Err(ProtocolError::UnsupportedReadRegResponseSize( readreg_response_size, )); @@ -514,6 +528,7 @@ impl FrameCodec { Ok(Self { readreg_response_size, + dts_vs_payload_size: None, }) } @@ -526,10 +541,39 @@ impl Default for FrameCodec { fn default() -> Self { Self { readreg_response_size: 4, + dts_vs_payload_size: None, } } } +impl FrameCodec { + const DTS_VS_GEN1_PAYLOAD_LEN: usize = 4; + const DTS_VS_GEN2_PAYLOAD_LEN: usize = 8; + + fn is_plausible_asic_hw_id(asic_hw_id: u8) -> bool { + asic_hw_id == BROADCAST_ASIC + || asic_hw_id == DEFAULT_ASIC_ID + || hw_to_logical_asic_id(asic_hw_id).is_some() + } + + fn response_opcode(opcode: u8) -> Option { + match opcode { + 0x01 => Some(Opcode::ReadResult), + 0x03 => Some(Opcode::ReadReg), + 0x0d => Some(Opcode::DtsVs), + 0x0f => Some(Opcode::Noop), + _ => None, + } + } + + fn is_plausible_response_header(buf: &[u8]) -> bool { + if buf.len() < 2 { + return false; + } + Self::is_plausible_asic_hw_id(buf[0]) && Self::response_opcode(buf[1]).is_some() + } +} + impl Encoder for FrameCodec { type Error = io::Error; @@ -557,19 +601,28 @@ impl Decoder for FrameCodec { return Ok(None); } - let opcode = match Opcode::from_repr(src[1]) { + let opcode = match Self::response_opcode(src[1]) { Some(op) => op, None => { // Byte-level resync when stream is misaligned. - tracing::debug!( - dropped = format_args!("0x{:02X}", src[0]), - next = format_args!("0x{:02X}", src[1]), - "BZM2 rx resync: dropping byte" - ); + // tracing::debug!( + // dropped = format_args!("0x{:02X}", src[0]), + // next = format_args!("0x{:02X}", src[1]), + // "BZM2 rx resync: dropping byte" + // ); src.advance(1); continue; } }; + if !Self::is_plausible_asic_hw_id(src[0]) { + // tracing::debug!( + // dropped = format_args!("0x{:02X}", src[0]), + // next = format_args!("0x{:02X}", src[1]), + // "BZM2 rx resync: dropping byte" + // ); + src.advance(1); + continue; + } match opcode { Opcode::Noop => { @@ -578,18 +631,20 @@ impl Decoder for FrameCodec { } tracing::debug!(rx = %format_hex(&src[..5]), "BZM2 rx NOOP frame"); - let mut frame = src.split_to(5); - let asic_hw_id = frame.get_u8(); - let _opcode = frame.get_u8(); - let mut signature = [0u8; 3]; - frame.copy_to_slice(&mut signature); - + let asic_hw_id = src[0]; + let signature = [src[2], src[3], src[4]]; if signature != *NOOP_STRING { - return Err(Self::io_error(ProtocolError::InvalidNoopSignature( - signature, - ))); + // tracing::debug!( + // asic_hw_id, + // signature = %format_hex(&signature), + // buffer_len = src.len(), + // "BZM2 rx NOOP signature mismatch, resync by dropping one byte" + // ); + src.advance(1); + continue; } + src.advance(5); return Ok(Some(Response::Noop { asic_hw_id, signature, @@ -610,6 +665,7 @@ impl Decoder for FrameCodec { let _opcode = frame.get_u8(); let data = match self.readreg_response_size { 1 => ReadRegData::U8(frame.get_u8()), + 2 => ReadRegData::U16(frame.get_u16_le()), 4 => ReadRegData::U32(frame.get_u32_le()), n => { return Err(Self::io_error( @@ -625,17 +681,18 @@ impl Decoder for FrameCodec { if src.len() < FRAME_LEN { return Ok(None); } + + let engine_status = u16::from_be_bytes([src[2], src[3]]); + // BIRDS/bzm2 layout packs [status:4 | engine_id:12] in network byte order. + let engine_id = engine_status & 0x0fff; + let status = ((engine_status >> 12) & 0x000f) as u8; tracing::trace!(rx = %format_hex(&src[..FRAME_LEN]), "BZM2 rx READRESULT frame"); - let mut frame = src.split_to(FRAME_LEN); - let asic_hw_id = frame.get_u8(); - let _opcode = frame.get_u8(); - let engine_status = frame.get_u16(); - let engine_id = (engine_status >> 4) & 0x0fff; - let status = (engine_status & 0x000f) as u8; - let nonce = frame.get_u32_le(); - let sequence = frame.get_u8(); - let timecode = frame.get_u8(); + let asic_hw_id = src[0]; + let nonce = u32::from_le_bytes([src[4], src[5], src[6], src[7]]); + let sequence = src[8]; + let timecode = src[9]; + src.advance(FRAME_LEN); return Ok(Some(Response::ReadResult { asic_hw_id, @@ -646,30 +703,68 @@ impl Decoder for FrameCodec { timecode, })); } - // Pass-1 decoder only surfaces NOOP and READREG. Drop other - // fixed-length TDM messages so callers can keep waiting for - // the response type they care about. Opcode::DtsVs => { - const TDM_FIXED_LEN: usize = 10; // [asic:u8][opcode:u8][payload:8] - if src.len() < TDM_FIXED_LEN { + let gen1_frame_len = 2 + Self::DTS_VS_GEN1_PAYLOAD_LEN; + let gen2_frame_len = 2 + Self::DTS_VS_GEN2_PAYLOAD_LEN; + if src.len() < gen1_frame_len { return Ok(None); } - tracing::trace!( - opcode = opcode as u8, - rx = %format_hex(&src[..TDM_FIXED_LEN]), - "BZM2 rx skipping telemetry frame" - ); - src.advance(TDM_FIXED_LEN); - continue; + + let payload_len = if let Some(payload_len) = self.dts_vs_payload_size { + payload_len + } else { + // Don't lock to gen1 on a fragmented gen2 frame. + // Wait until we have enough bytes to disambiguate. + if src.len() < gen2_frame_len { + return Ok(None); + } + + let gen1_boundary_ok = + Self::is_plausible_response_header(&src[gen1_frame_len..]); + let gen2_boundary_ok = + Self::is_plausible_response_header(&src[gen2_frame_len..]); + + let chosen = match (gen1_boundary_ok, gen2_boundary_ok) { + (true, false) => Self::DTS_VS_GEN1_PAYLOAD_LEN, + (false, true) => Self::DTS_VS_GEN2_PAYLOAD_LEN, + // Prefer gen2 in ambiguous cases: this aligns with existing boards. + _ => Self::DTS_VS_GEN2_PAYLOAD_LEN, + }; + self.dts_vs_payload_size = Some(chosen); + chosen + }; + + let frame_len = 2 + payload_len; + if src.len() < frame_len { + return Ok(None); + } + // tracing::trace!( + // payload_len, + // rx = %format_hex(&src[..frame_len]), + // "BZM2 rx DTS/VS frame" + // ); + + let mut frame = src.split_to(frame_len); + let asic_hw_id = frame.get_u8(); + let _opcode = frame.get_u8(); + let data = match payload_len { + Self::DTS_VS_GEN1_PAYLOAD_LEN => DtsVsData::Gen1(frame.get_u32()), + Self::DTS_VS_GEN2_PAYLOAD_LEN => DtsVsData::Gen2(frame.get_u64()), + n => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("unsupported DTS/VS payload size: {n}"), + )); + } + }; + + return Ok(Some(Response::DtsVs { asic_hw_id, data })); } - other => { - let preview_len = src.len().min(32); - tracing::debug!( - opcode = format_args!("0x{:02X}", other as u8), - buffer_len = src.len(), - buffer_preview = %format_hex(&src[..preview_len]), - "BZM2 rx unsupported opcode, resync by dropping one byte" - ); + _other => { + // tracing::debug!( + // opcode = format_args!("0x{:02X}", _other as u8), + // "BZM2 rx non-response opcode after header check, resync by dropping one byte" + // ); src.advance(1); continue; } @@ -700,6 +795,18 @@ mod tests { assert_eq!(hw_to_logical_asic_id(11), None); } + #[test] + fn test_response_header_rejects_command_opcode() { + assert!(!FrameCodec::is_plausible_response_header(&[ + 0x0a, + Opcode::WriteReg as u8 + ])); + assert!(FrameCodec::is_plausible_response_header(&[ + 0x0a, + Opcode::ReadReg as u8 + ])); + } + #[test] fn test_encode_noop_frame() { let cmd = Command::Noop { asic_hw_id: 0xfa }; @@ -744,13 +851,21 @@ mod tests { } #[test] - fn test_encode_writejob_frame() { + fn test_encode_writejob_single_midstate_frame() { let mut midstate = [0u8; 32]; for (i, byte) in midstate.iter_mut().enumerate() { *byte = i as u8; } - let cmd = Command::write_job(0x0a, 0x0123, midstate, 0x1122_3344, 0x5566_7788, 0xfe, 0x03); + let cmd = Command::write_job_single_midstate( + 0x0a, + 0x0123, + midstate, + 0x1122_3344, + 0x5566_7788, + 0xfe, + 0x03, + ); let raw = cmd.encode_raw().expect("encode should succeed"); assert_eq!(&raw[..4], [0x0a, 0x01, 0x23, 0x29]); @@ -762,7 +877,7 @@ mod tests { } #[test] - fn test_writejob_enhanced_builds_four_commands() { + fn test_writejob_builds_four_commands() { let mut midstates = [[0u8; 32]; 4]; midstates[0][0] = 0x10; midstates[1][0] = 0x20; @@ -770,8 +885,8 @@ mod tests { midstates[3][0] = 0x40; let cmds = - Command::write_job_enhanced(0x0a, 0x0123, midstates, 0x1122_3344, 0x5566_7788, 0xff, 3) - .expect("enhanced writejob should build"); + Command::write_job(0x0a, 0x0123, midstates, 0x1122_3344, 0x5566_7788, 0xff, 3) + .expect("writejob should build"); let raw0 = cmds[0].clone().encode_raw().expect("encode should succeed"); let raw1 = cmds[1].clone().encode_raw().expect("encode should succeed"); @@ -793,9 +908,9 @@ mod tests { } #[test] - fn test_writejob_enhanced_rejects_invalid_job_ctl() { + fn test_writejob_rejects_invalid_job_ctl() { let midstates = [[0u8; 32]; 4]; - let err = Command::write_job_enhanced(0x0a, 0x0123, midstates, 0, 0, 0, 0x02) + let err = Command::write_job(0x0a, 0x0123, midstates, 0, 0, 0, 0x02) .expect_err("invalid job_ctl should fail"); assert!(matches!(err, ProtocolError::InvalidJobControl(0x02))); } @@ -848,6 +963,22 @@ mod tests { assert!(src.is_empty()); } + #[test] + fn test_decode_readreg_u16_response() { + let mut codec = FrameCodec::new(2).expect("codec should construct"); + let mut src = BytesMut::from(&[0x0a, Opcode::ReadReg as u8, 0x34, 0x12][..]); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::ReadReg { + asic_hw_id: 0x0a, + data: ReadRegData::U16(0x1234), + }) + ); + assert!(src.is_empty()); + } + #[test] fn test_decode_readresult_response() { let mut codec = FrameCodec::default(); @@ -855,8 +986,8 @@ mod tests { &[ 0x0a, Opcode::ReadResult as u8, - 0x12, - 0x34, // engine_status: engine_id=0x123, status=0x4 + 0x40, + 0xe3, // engine_status: status=0x4, engine_id=0x0e3 0x78, 0x56, 0x34, @@ -871,7 +1002,7 @@ mod tests { response, Some(Response::ReadResult { asic_hw_id: 0x0a, - engine_id: 0x123, + engine_id: 0x0e3, status: 0x4, nonce: 0x1234_5678, sequence: 0x07, @@ -898,20 +1029,174 @@ mod tests { } #[test] - fn test_decode_skips_tdm_telemetry_before_noop() { + fn test_decode_resyncs_from_invalid_noop_signature() { let mut codec = FrameCodec::default(); let mut src = BytesMut::from( &[ 0x0a, - Opcode::DtsVs as u8, + Opcode::Noop as u8, + 0xfd, + 0x7f, + 0x0a, // bogus NOOP-like frame + 0x0a, + Opcode::Noop as u8, + b'2', + b'Z', + b'B', // valid NOOP frame + ][..], + ); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::Noop { + asic_hw_id: 0x0a, + signature: *NOOP_STRING, + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_resyncs_from_implausible_asic_id() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from( + &[ + 0x6b, + Opcode::ReadResult as u8, + 0x8f, + 0xff, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // bogus frame with impossible ASIC ID + 0x0a, + Opcode::Noop as u8, + b'2', + b'Z', + b'B', + ][..], + ); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::Noop { + asic_hw_id: 0x0a, + signature: *NOOP_STRING, + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_accepts_high_readresult_engine_id() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from( + &[ + 0x0a, + Opcode::ReadResult as u8, + 0x8f, + 0xff, // engine_id=0x0fff + 0x00, + 0x00, + 0x00, 0x00, 0x01, - 0x02, - 0x03, - 0x04, - 0x05, - 0x06, + 0x00, + 0x0a, + Opcode::ReadResult as u8, + 0x80, + 0xe3, // engine_id=0x0e3, status=0x8 + 0x78, + 0x56, + 0x34, + 0x12, 0x07, + 0x2a, + ][..], + ); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::ReadResult { + asic_hw_id: 0x0a, + engine_id: 0x0fff, + status: 0x8, + nonce: 0x0000_0000, + sequence: 0x01, + timecode: 0x00, + }) + ); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::ReadResult { + asic_hw_id: 0x0a, + engine_id: 0x0e3, + status: 0x8, + nonce: 0x1234_5678, + sequence: 0x07, + timecode: 0x2a, + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_drops_echoed_tx_pairs_before_noop() { + let mut codec = FrameCodec::default(); + + let echoed_raw = [0x0a, 0x2f, 0xff, 0x00, 0x00, 0xaa, 0xbb, 0xcc]; + let mut src = nine_bit_encode_frame(&echoed_raw); + src.extend_from_slice(&[0x0a, Opcode::Noop as u8, b'2', b'Z', b'B']); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::Noop { + asic_hw_id: 0x0a, + signature: *NOOP_STRING, + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_realigns_and_drops_echoed_tx_pairs() { + let mut codec = FrameCodec::default(); + + let echoed_raw = [0xff, 0x40, 0x12, 0x01, 0x00, 0x04, 0x08]; + let mut src = BytesMut::from(&[0x99][..]); // misalignment byte + src.extend_from_slice(&nine_bit_encode_frame(&echoed_raw)); + src.extend_from_slice(&[0x0a, Opcode::Noop as u8, b'2', b'Z', b'B']); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::Noop { + asic_hw_id: 0x0a, + signature: *NOOP_STRING, + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_dts_vs_gen1_before_noop() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from( + &[ + 0x0a, + Opcode::DtsVs as u8, + 0x12, + 0x34, + 0x56, + 0x78, 0x0a, Opcode::Noop as u8, b'2', @@ -920,6 +1205,15 @@ mod tests { ][..], ); + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::DtsVs { + asic_hw_id: 0x0a, + data: DtsVsData::Gen1(0x1234_5678), + }) + ); + let response = codec.decode(&mut src).expect("decode should succeed"); assert_eq!( response, @@ -930,4 +1224,57 @@ mod tests { ); assert!(src.is_empty()); } + + #[test] + fn test_decode_dts_vs_gen2_response() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from( + &[ + 0x0a, + Opcode::DtsVs as u8, + 0x01, + 0x02, + 0x03, + 0x04, + 0x05, + 0x06, + 0x07, + 0x08, + ][..], + ); + + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::DtsVs { + asic_hw_id: 0x0a, + data: DtsVsData::Gen2(0x0102_0304_0506_0708), + }) + ); + assert!(src.is_empty()); + } + + #[test] + fn test_decode_dts_vs_gen2_fragmented_does_not_lock_gen1() { + let mut codec = FrameCodec::default(); + let mut src = BytesMut::from(&[0x0a, Opcode::DtsVs as u8, 0x01, 0x02, 0x03, 0x04][..]); + + assert!( + codec + .decode(&mut src) + .expect("decode should succeed") + .is_none() + ); + + src.extend_from_slice(&[0x05, 0x06, 0x07, 0x08]); + let response = codec.decode(&mut src).expect("decode should succeed"); + assert_eq!( + response, + Some(Response::DtsVs { + asic_hw_id: 0x0a, + data: DtsVsData::Gen2(0x0102_0304_0506_0708), + }) + ); + assert!(src.is_empty()); + } } From 0ff5db7e7cfa246eb134b90fe5126054cf17da6d Mon Sep 17 00:00:00 2001 From: johnny9 Date: Tue, 24 Feb 2026 10:11:24 -0500 Subject: [PATCH 11/19] feat(hash_thread): implement BZM2 share validation --- mujina-miner/src/asic/bzm2/thread.rs | 2281 ++++++++++++++++++++++++-- 1 file changed, 2126 insertions(+), 155 deletions(-) diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index acde086..e04a6e2 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -4,7 +4,8 @@ //! before the first task is accepted. use std::{ - io, + collections::VecDeque, + env, io, sync::{Arc, RwLock}, }; @@ -13,7 +14,7 @@ use bitcoin::{ TxMerkleNode, block::{Header as BlockHeader, Version as BlockVersion}, consensus, - hashes::{HashEngine as _, sha256}, + hashes::Hash as _, }; use futures::{SinkExt, sink::Sink, stream::Stream}; use tokio::sync::{mpsc, oneshot, watch}; @@ -26,14 +27,26 @@ use crate::{ BoardPeripherals, HashTask, HashThread, HashThreadCapabilities, HashThreadError, HashThreadEvent, HashThreadStatus, Share, ThreadRemovalSignal, }, - job_source::{GeneralPurposeBits, MerkleRootKind}, + job_source::{Extranonce2, MerkleRootKind}, tracing::prelude::*, - types::HashRate, + types::{Difficulty, HashRate}, }; -const FIRST_ASIC_ID: u8 = 0x0a; const ENGINE_ROWS: u16 = 20; -const ENGINE_COLS: u16 = 10; +const ENGINE_COLS: u16 = 12; +// Invalid or non-existent engine coordinates. +const INVALID_ENGINE_0_ROW: u16 = 0; +const INVALID_ENGINE_0_COL: u16 = 4; +const INVALID_ENGINE_1_ROW: u16 = 0; +const INVALID_ENGINE_1_COL: u16 = 5; +const INVALID_ENGINE_2_ROW: u16 = 19; +const INVALID_ENGINE_2_COL: u16 = 5; +const INVALID_ENGINE_3_ROW: u16 = 19; +const INVALID_ENGINE_3_COL: u16 = 11; +const INVALID_ENGINE_COUNT: usize = 4; +const WORK_ENGINE_COUNT: usize = + (ENGINE_ROWS as usize * ENGINE_COLS as usize) - INVALID_ENGINE_COUNT; +const ENGINE_EN2_OFFSET_START: u64 = 1; const SENSOR_REPORT_INTERVAL: u32 = 63; const THERMAL_TRIP_C: f32 = 115.0; @@ -45,8 +58,7 @@ const REF_DIVIDER: u32 = 2; const POST2_DIVIDER: u32 = 1; const POST1_DIVIDER: u8 = 1; const TARGET_FREQ_MHZ: f32 = 800.0; - -const DRIVE_STRENGTH_STRONG: u32 = 0x4448_4444; +const DRIVE_STRENGTH_STRONG: u32 = 0x4446_4444; const ENGINE_CONFIG_ENHANCED_MODE_BIT: u8 = 1 << 2; const INIT_NOOP_TIMEOUT: Duration = Duration::from_millis(500); @@ -56,6 +68,38 @@ const PLL_POLL_DELAY: Duration = Duration::from_millis(100); const SOFT_RESET_DELAY: Duration = Duration::from_millis(1); const MIDSTATE_COUNT: usize = 4; const WRITEJOB_CTL_REPLACE: u8 = 3; +const MIN_LEADING_ZEROS: u8 = 32; +const ENGINE_LEADING_ZEROS: u8 = 36; +const ENGINE_ZEROS_TO_FIND: u8 = ENGINE_LEADING_ZEROS - MIN_LEADING_ZEROS; +// Timestamp register uses bit7 for AUTO_CLOCK_UNGATE, so max counter value is 0x7f. +const ENGINE_TIMESTAMP_COUNT: u8 = 0x7f; +const AUTO_CLOCK_UNGATE: u8 = 1; +// Runtime nonce gap value. +const BZM2_NONCE_MINUS: u32 = 0x4c; +// Per-ASIC nonce assignment: each active ASIC searches the full +// nonce space (except 0xffff_ffff). +const BZM2_START_NONCE: u32 = 0x0000_0000; +const BZM2_END_NONCE: u32 = 0xffff_fffe; +const READRESULT_SEQUENCE_SPACE: usize = 64; // sequence byte carries 4 micro-jobs => 6 visible sequence bits +const READRESULT_SLOT_HISTORY: usize = 16; +const READRESULT_ASSIGNMENT_HISTORY_LIMIT: usize = + READRESULT_SEQUENCE_SPACE * READRESULT_SLOT_HISTORY; +const SANITY_DIAGNOSTIC_LIMIT: u64 = 24; +const SEQUENCE_LOOKUP_DIAGNOSTIC_LIMIT: u64 = 24; +const ZERO_LZ_DIAGNOSTIC_LIMIT: u64 = 24; +const SHA256_IV: [u32; 8] = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; +const SHA256_K: [u32; 64] = [ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +]; #[derive(Debug)] enum ThreadCommand { @@ -302,6 +346,7 @@ where { return match data { protocol::ReadRegData::U32(value) => Ok(value), + protocol::ReadRegData::U16(value) => Ok(value as u32), protocol::ReadRegData::U8(value) => Ok(value as u32), }; } @@ -408,31 +453,817 @@ fn engine_id(row: u16, col: u16) -> u16 { ((col & 0x3f) << 6) | (row & 0x3f) } +fn is_invalid_engine(row: u16, col: u16) -> bool { + (row == INVALID_ENGINE_0_ROW && col == INVALID_ENGINE_0_COL) + || (row == INVALID_ENGINE_1_ROW && col == INVALID_ENGINE_1_COL) + || (row == INVALID_ENGINE_2_ROW && col == INVALID_ENGINE_2_COL) + || (row == INVALID_ENGINE_3_ROW && col == INVALID_ENGINE_3_COL) +} + +fn logical_engine_index(row: u16, col: u16) -> Option { + if row >= ENGINE_ROWS || col >= ENGINE_COLS || is_invalid_engine(row, col) { + return None; + } + + let mut logical = 0usize; + for r in 0..ENGINE_ROWS { + for c in 0..ENGINE_COLS { + if is_invalid_engine(r, c) { + continue; + } + if r == row && c == col { + return Some(logical); + } + logical = logical.saturating_add(1); + } + } + + None +} + +fn engine_extranonce2_for_logical_engine( + task: &HashTask, + logical_engine: usize, +) -> Option { + let base = task.en2?; + let offset = (logical_engine as u64).saturating_add(ENGINE_EN2_OFFSET_START); + + if let Some(range) = task.en2_range.as_ref() + && range.size == base.size() + { + let value = if range.min == 0 && range.max == u64::MAX { + base.value().wrapping_add(offset) + } else { + let span = range.max.saturating_sub(range.min).saturating_add(1); + let base_value = if base.value() < range.min || base.value() > range.max { + range.min + } else { + base.value() + }; + let rel = base_value.saturating_sub(range.min); + range + .min + .saturating_add((rel.saturating_add(offset % span)) % span) + }; + return Extranonce2::new(value, base.size()).ok(); + } + + let width_bits = u32::from(base.size()).saturating_mul(8); + let max = if width_bits >= 64 { + u64::MAX + } else { + (1u64 << width_bits) - 1 + }; + let value = if max == u64::MAX { + base.value().wrapping_add(offset) + } else { + base.value().wrapping_add(offset) & max + }; + Extranonce2::new(value, base.size()).ok() +} + +fn readresult_sequence_slot(sequence_id: u8) -> u8 { + sequence_id & 0x3f +} + +fn writejob_effective_sequence_id(sequence_id: u8) -> u8 { + // Keep the thread's assignment tracking in the same sequence domain as + // Command::write_job (seq_start = (sequence_id % 2) * 4). + sequence_id % 2 +} + +fn retain_assigned_task(assigned_tasks: &mut VecDeque, new_task: AssignedTask) { + let slot = readresult_sequence_slot(new_task.sequence_id); + assigned_tasks.push_back(new_task); + + // Keep a small per-slot history so delayed READRESULT frames can still be + // validated against recent predecessors in the same visible sequence slot. + let mut slot_count = assigned_tasks + .iter() + .filter(|task| readresult_sequence_slot(task.sequence_id) == slot) + .count(); + while slot_count > READRESULT_SLOT_HISTORY { + if let Some(index) = assigned_tasks + .iter() + .position(|task| readresult_sequence_slot(task.sequence_id) == slot) + { + let _ = assigned_tasks.remove(index); + slot_count = slot_count.saturating_sub(1); + } else { + break; + } + } + + while assigned_tasks.len() > READRESULT_ASSIGNMENT_HISTORY_LIMIT { + let _ = assigned_tasks.pop_front(); + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct ReadResultFields { + sequence: u8, + timecode: u8, + sequence_id: u8, + micro_job_id: u8, + used_masked_fields: bool, +} + +fn resolve_readresult_fields( + sequence_raw: u8, + timecode_raw: u8, + has_sequence_slot: impl Fn(u8) -> bool, +) -> Option { + let sequence_id_raw = sequence_raw / (MIDSTATE_COUNT as u8); + let sequence_slot_raw = readresult_sequence_slot(sequence_id_raw); + if has_sequence_slot(sequence_slot_raw) { + return Some(ReadResultFields { + sequence: sequence_raw, + timecode: timecode_raw, + sequence_id: sequence_id_raw, + micro_job_id: sequence_raw % (MIDSTATE_COUNT as u8), + used_masked_fields: false, + }); + } + + let sequence_masked = sequence_raw & 0x7f; + let timecode_masked = timecode_raw & 0x7f; + let sequence_id_masked = sequence_masked / (MIDSTATE_COUNT as u8); + let sequence_slot_masked = readresult_sequence_slot(sequence_id_masked); + if (sequence_masked != sequence_raw || timecode_masked != timecode_raw) + && has_sequence_slot(sequence_slot_masked) + { + return Some(ReadResultFields { + sequence: sequence_masked, + timecode: timecode_masked, + sequence_id: sequence_id_masked, + micro_job_id: sequence_masked % (MIDSTATE_COUNT as u8), + used_masked_fields: true, + }); + } + + None +} + struct TaskJobPayload { midstates: [[u8; 32]; MIDSTATE_COUNT], merkle_residue: u32, timestamp: u32, } +#[derive(Clone)] +struct EngineAssignment { + merkle_root: TxMerkleNode, + extranonce2: Option, + midstates: [[u8; 32]; MIDSTATE_COUNT], +} + #[derive(Clone)] struct AssignedTask { task: HashTask, merkle_root: TxMerkleNode, - version_counter: u16, + engine_assignments: Arc<[EngineAssignment]>, + microjob_versions: [BlockVersion; MIDSTATE_COUNT], + sequence_id: u8, + timestamp_count: u8, + leading_zeros: u8, + nonce_minus_value: u32, +} + +#[derive(Clone, Debug)] +struct ReplayCheckConfig { + job_id: Option, + en2_value: u64, + en2_size: u8, + ntime: u32, + nonce: u32, + version_bits: u32, +} + +#[derive(Clone, Debug)] +struct FocusedReadResultConfig { + adjusted_nonce: Option, + raw_nonce: Option, + break_on_match: bool, +} + +fn format_replay_en2_hex(value: u64, size: u8) -> String { + format!("{:0width$x}", value, width = size as usize * 2) } -fn expand_counter_into_mask(mask: u16, mut counter: u16) -> u16 { - let mut rolled = 0u16; - for bit in 0..16 { - let bit_mask = 1u16 << bit; - if (mask & bit_mask) != 0 { - if (counter & 1) != 0 { - rolled |= bit_mask; +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum Bzm2CheckResult { + Correct, + NotMeetTarget, + Error, +} + +// Compute the four version-mask deltas used across the 4-midstate micro-jobs. +fn midstate_version_mask_variants(version_mask: u32) -> [u32; MIDSTATE_COUNT] { + if version_mask == 0 { + return [0, 0, 0, 0]; + } + + let mut mask = version_mask; + let mut cnt: u32 = 0; + while (mask % 16) == 0 { + cnt = cnt.saturating_add(1); + mask /= 16; + } + + let mut tmp_mask = 0u32; + if (mask % 16) != 0 { + tmp_mask = mask % 16; + } else if (mask % 8) != 0 { + tmp_mask = mask % 8; + } else if (mask % 4) != 0 { + tmp_mask = mask % 4; + } else if (mask % 2) != 0 { + tmp_mask = mask % 2; + } + + for _ in 0..cnt { + tmp_mask = tmp_mask.saturating_mul(16); + } + + [ + 0, + tmp_mask, + version_mask.saturating_sub(tmp_mask), + version_mask, + ] +} + +// Derive per-midstate block versions from the template base version and gp_bits mask. +fn task_midstate_versions(task: &HashTask) -> [BlockVersion; MIDSTATE_COUNT] { + let template = task.template.as_ref(); + let base = template.version.base().to_consensus() as u32; + let gp_mask = u16::from_be_bytes(*template.version.gp_bits_mask().as_bytes()) as u32; + let version_mask = gp_mask << 13; + let variants = midstate_version_mask_variants(version_mask); + + variants.map(|variant| BlockVersion::from_consensus((base | variant) as i32)) +} + +fn check_result( + sha256_le: &[u8; 32], + target_le: &[u8; 32], + leading_zeros: u8, +) -> Bzm2CheckResult { + let mut i: usize = 31; + while i > 0 && sha256_le[i] == 0 { + i -= 1; + } + + let threshold = 31i32 - i32::from(leading_zeros / 8); + if (i as i32) > threshold { + return Bzm2CheckResult::Error; + } + if (i as i32) == threshold { + let mut bit_count = leading_zeros % 8; + let mut bit_index = 7u8; + while bit_count > 0 { + if (sha256_le[i] & (1u8 << bit_index)) != 0 { + return Bzm2CheckResult::Error; } - counter >>= 1; + bit_count -= 1; + bit_index = bit_index.saturating_sub(1); + } + } + + for k in (1..=31).rev() { + if sha256_le[k] < target_le[k] { + return Bzm2CheckResult::Correct; + } + if sha256_le[k] > target_le[k] { + return Bzm2CheckResult::NotMeetTarget; + } + } + + Bzm2CheckResult::Correct +} + +fn leading_zero_bits(sha256_le: &[u8; 32]) -> u16 { + let mut bits = 0u16; + for byte in sha256_le.iter().rev() { + if *byte == 0 { + bits = bits.saturating_add(8); + continue; + } + bits = bits.saturating_add(byte.leading_zeros() as u16); + return bits; + } + bits +} + +fn sha256_compress_state(initial_state: [u32; 8], block: &[u8; 64]) -> [u32; 8] { + let mut w = [0u32; 64]; + for (i, chunk) in block.chunks_exact(4).enumerate() { + w[i] = u32::from_be_bytes(chunk.try_into().expect("chunk size is 4")); + } + for i in 16..64 { + let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); + let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); + w[i] = w[i - 16] + .wrapping_add(s0) + .wrapping_add(w[i - 7]) + .wrapping_add(s1); + } + + let mut a = initial_state[0]; + let mut b = initial_state[1]; + let mut c = initial_state[2]; + let mut d = initial_state[3]; + let mut e = initial_state[4]; + let mut f = initial_state[5]; + let mut g = initial_state[6]; + let mut h = initial_state[7]; + + for i in 0..64 { + let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); + let ch = (e & f) ^ ((!e) & g); + let t1 = h + .wrapping_add(s1) + .wrapping_add(ch) + .wrapping_add(SHA256_K[i]) + .wrapping_add(w[i]); + let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); + let maj = (a & b) ^ (a & c) ^ (b & c); + let t2 = s0.wrapping_add(maj); + + h = g; + g = f; + f = e; + e = d.wrapping_add(t1); + d = c; + c = b; + b = a; + a = t1.wrapping_add(t2); + } + + [ + initial_state[0].wrapping_add(a), + initial_state[1].wrapping_add(b), + initial_state[2].wrapping_add(c), + initial_state[3].wrapping_add(d), + initial_state[4].wrapping_add(e), + initial_state[5].wrapping_add(f), + initial_state[6].wrapping_add(g), + initial_state[7].wrapping_add(h), + ] +} + +fn sha256_state_to_be_bytes(state: [u32; 8]) -> [u8; 32] { + let mut out = [0u8; 32]; + for (i, word) in state.iter().copied().enumerate() { + out[i * 4..i * 4 + 4].copy_from_slice(&word.to_be_bytes()); + } + out +} + +fn bzm2_double_sha_from_midstate_and_tail(midstate_le: &[u8; 32], tail16: &[u8; 16]) -> [u8; 32] { + // 1) resume SHA256 from midstate with 16-byte tail + // 2) SHA256 the resulting 32-byte digest again. + let mut resumed_state = [0u32; 8]; + for (i, chunk) in midstate_le.chunks_exact(4).enumerate() { + resumed_state[i] = u32::from_le_bytes(chunk.try_into().expect("chunk size is 4")); + } + + let mut first_block = [0u8; 64]; + first_block[..16].copy_from_slice(tail16); + first_block[16] = 0x80; + first_block[56..64].copy_from_slice(&(80u64 * 8).to_be_bytes()); + let first_state = sha256_compress_state(resumed_state, &first_block); + let first_digest = sha256_state_to_be_bytes(first_state); + + let mut second_block = [0u8; 64]; + second_block[..32].copy_from_slice(&first_digest); + second_block[32] = 0x80; + second_block[56..64].copy_from_slice(&(32u64 * 8).to_be_bytes()); + let second_state = sha256_compress_state(SHA256_IV, &second_block); + sha256_state_to_be_bytes(second_state) +} + +fn bzm2_tail16_bytes(assigned: &AssignedTask, ntime: u32, nonce_submit: u32) -> [u8; 16] { + let merkle_root_bytes = consensus::serialize(&assigned.merkle_root); + let mut tail16 = [0u8; 16]; + tail16[0..4].copy_from_slice(&merkle_root_bytes[28..32]); + tail16[4..8].copy_from_slice(&ntime.to_le_bytes()); + tail16[8..12].copy_from_slice(&assigned.task.template.bits.to_consensus().to_le_bytes()); + tail16[12..16].copy_from_slice(&nonce_submit.to_le_bytes()); + tail16 +} + +#[cfg(test)] +fn hash_bytes_bzm2_order(hash: &bitcoin::BlockHash) -> [u8; 32] { + *hash.as_byte_array() +} + +fn format_hex(data: &[u8]) -> String { + data.iter() + .map(|byte| format!("{:02X}", byte)) + .collect::>() + .join(" ") +} + +fn validation_probe_summary( + assigned: &AssignedTask, + version: BlockVersion, + ntime: u32, + nonce: u32, +) -> String { + let header = BlockHeader { + version, + prev_blockhash: assigned.task.template.prev_blockhash, + merkle_root: assigned.merkle_root, + time: ntime, + bits: assigned.task.template.bits, + nonce, + }; + let header_bytes = consensus::serialize(&header); + let header_prefix: [u8; 64] = header_bytes[..64] + .try_into() + .expect("header prefix length is fixed"); + let midstate = compute_midstate_le(&header_prefix); + let tail16 = bzm2_tail16_bytes(assigned, ntime, nonce); + let hash_bytes = bzm2_double_sha_from_midstate_and_tail(&midstate, &tail16); + let target_bytes = assigned.task.share_target.to_le_bytes(); + let check = check_result(&hash_bytes, &target_bytes, assigned.leading_zeros); + let lz_bits = leading_zero_bits(&hash_bytes); + format!( + "v={:#010x},t={:#010x},n={:#010x},chk={:?},lz={},msb={:#04x}", + version.to_consensus() as u32, + ntime, + nonce, + check, + lz_bits, + hash_bytes[31] + ) +} + +fn evaluate_check_with_hash_orders( + assigned: &AssignedTask, + version: BlockVersion, + ntime: u32, + nonce_submit: u32, +) -> (Bzm2CheckResult, u8, Bzm2CheckResult, u8) { + let evaluate = |candidate_nonce: u32| { + let header = BlockHeader { + version, + prev_blockhash: assigned.task.template.prev_blockhash, + merkle_root: assigned.merkle_root, + time: ntime, + bits: assigned.task.template.bits, + nonce: candidate_nonce, + }; + let header_bytes = consensus::serialize(&header); + let header_prefix: [u8; 64] = header_bytes[..64] + .try_into() + .expect("header prefix length is fixed"); + let midstate = compute_midstate_le(&header_prefix); + let tail16 = bzm2_tail16_bytes(assigned, ntime, candidate_nonce); + let hash_bytes = bzm2_double_sha_from_midstate_and_tail(&midstate, &tail16); + let target_bytes = assigned.task.share_target.to_le_bytes(); + let check = check_result(&hash_bytes, &target_bytes, assigned.leading_zeros); + (check, hash_bytes[31]) + }; + + // Keep the legacy "le/be" labels in focused diagnostics, but compare + // submit-order nonce vs swapped-order nonce to surface byte-order mistakes. + let (submit_check, submit_msb) = evaluate(nonce_submit); + let (swapped_check, swapped_msb) = evaluate(nonce_submit.swap_bytes()); + (submit_check, submit_msb, swapped_check, swapped_msb) +} + +fn focused_validation_entry( + label: &str, + assigned: &AssignedTask, + sequence: u8, + timecode: u8, + nonce: u32, +) -> String { + let sequence_id = sequence / (MIDSTATE_COUNT as u8); + let micro_job_id = (sequence % (MIDSTATE_COUNT as u8)) as usize; + let version = assigned.microjob_versions[micro_job_id]; + let ntime_rev = assigned + .task + .ntime + .wrapping_add(u32::from(assigned.timestamp_count.wrapping_sub(timecode))); + let ntime_plus = assigned.task.ntime.wrapping_add(u32::from(timecode)); + let (rev_le, rev_le_msb, rev_be, rev_be_msb) = + evaluate_check_with_hash_orders(assigned, version, ntime_rev, nonce); + let (plus_le, plus_le_msb, plus_be, plus_be_msb) = + evaluate_check_with_hash_orders(assigned, version, ntime_plus, nonce); + + format!( + "{label}(seq={:#04x}/sid={}/mj={},time={:#04x},n={:#010x},rev(le={:?}/{:#04x},be={:?}/{:#04x}),plus(le={:?}/{:#04x},be={:?}/{:#04x}))", + sequence, + sequence_id, + micro_job_id, + timecode, + nonce, + rev_le, + rev_le_msb, + rev_be, + rev_be_msb, + plus_le, + plus_le_msb, + plus_be, + plus_be_msb + ) +} + +fn focused_readresult_diagnostic( + assigned: &AssignedTask, + sequence_raw: u8, + timecode_raw: u8, + nonce_raw: u32, +) -> String { + let sequence_masked = sequence_raw & 0x7f; + let timecode_masked = timecode_raw & 0x7f; + let nonce_adjusted = nonce_raw.wrapping_sub(assigned.nonce_minus_value); + let entries = [ + focused_validation_entry( + "raw_adj", + assigned, + sequence_raw, + timecode_raw, + nonce_adjusted, + ), + focused_validation_entry("raw_raw", assigned, sequence_raw, timecode_raw, nonce_raw), + focused_validation_entry( + "m7_adj", + assigned, + sequence_masked, + timecode_masked, + nonce_adjusted, + ), + focused_validation_entry( + "m7_raw", + assigned, + sequence_masked, + timecode_masked, + nonce_raw, + ), + ]; + entries.join(" | ") +} + +fn parse_hex_u32(input: &str) -> Option { + let trimmed = input + .trim() + .trim_start_matches("0x") + .trim_start_matches("0X"); + u32::from_str_radix(trimmed, 16).ok() +} + +fn parse_u32_env(input: &str) -> Option { + let trimmed = input.trim(); + if trimmed.is_empty() { + return None; + } + if trimmed.starts_with("0x") || trimmed.starts_with("0X") { + return parse_hex_u32(trimmed); + } + trimmed + .parse::() + .ok() + .or_else(|| parse_hex_u32(trimmed)) +} + +fn parse_bool_env_flag(name: &str) -> bool { + let Ok(raw) = env::var(name) else { + return false; + }; + let v = raw.trim(); + v == "1" + || v.eq_ignore_ascii_case("true") + || v.eq_ignore_ascii_case("yes") + || v.eq_ignore_ascii_case("on") +} + +fn parse_focused_readresult_config_from_env() -> Option { + let adjusted_nonce = match env::var("MUJINA_BZM2_TRACE_NONCE") { + Ok(v) => { + let Some(parsed) = parse_u32_env(&v) else { + warn!(value = %v, "Invalid MUJINA_BZM2_TRACE_NONCE (expected hex or decimal u32)"); + return None; + }; + Some(parsed) } + Err(_) => None, + }; + let raw_nonce = match env::var("MUJINA_BZM2_TRACE_RAW_NONCE") { + Ok(v) => { + let Some(parsed) = parse_u32_env(&v) else { + warn!(value = %v, "Invalid MUJINA_BZM2_TRACE_RAW_NONCE (expected hex or decimal u32)"); + return None; + }; + Some(parsed) + } + Err(_) => None, + }; + if adjusted_nonce.is_none() && raw_nonce.is_none() { + return None; + } + let break_on_match = parse_bool_env_flag("MUJINA_BZM2_TRACE_BREAK_ON_NONCE"); + Some(FocusedReadResultConfig { + adjusted_nonce, + raw_nonce, + break_on_match, + }) +} + +fn parse_replay_check_config_from_env() -> Option { + let en2_hex = match env::var("MUJINA_BZM2_REPLAY_EN2") { + Ok(v) => v, + Err(_) => return None, + }; + let ntime_s = match env::var("MUJINA_BZM2_REPLAY_NTIME") { + Ok(v) => v, + Err(_) => { + warn!("MUJINA_BZM2_REPLAY_EN2 is set but MUJINA_BZM2_REPLAY_NTIME is missing"); + return None; + } + }; + let nonce_s = match env::var("MUJINA_BZM2_REPLAY_NONCE") { + Ok(v) => v, + Err(_) => { + warn!("MUJINA_BZM2_REPLAY_EN2 is set but MUJINA_BZM2_REPLAY_NONCE is missing"); + return None; + } + }; + let version_bits_s = match env::var("MUJINA_BZM2_REPLAY_VERSION_BITS") { + Ok(v) => v, + Err(_) => { + warn!("MUJINA_BZM2_REPLAY_EN2 is set but MUJINA_BZM2_REPLAY_VERSION_BITS is missing"); + return None; + } + }; + + let en2_trim = en2_hex + .trim() + .trim_start_matches("0x") + .trim_start_matches("0X"); + if en2_trim.is_empty() || (en2_trim.len() % 2) != 0 || en2_trim.len() > 16 { + warn!( + en2 = %en2_hex, + "Invalid MUJINA_BZM2_REPLAY_EN2 (must be 1-8 bytes of hex)" + ); + return None; } - rolled + + let en2_size = (en2_trim.len() / 2) as u8; + let mut en2_bytes = [0u8; 8]; + for (idx, pair) in en2_trim.as_bytes().chunks_exact(2).enumerate() { + let Ok(byte_str) = std::str::from_utf8(pair) else { + warn!(en2 = %en2_hex, "Invalid UTF-8 in MUJINA_BZM2_REPLAY_EN2"); + return None; + }; + let Ok(byte) = u8::from_str_radix(byte_str, 16) else { + warn!(en2 = %en2_hex, "Invalid MUJINA_BZM2_REPLAY_EN2 hex"); + return None; + }; + en2_bytes[idx] = byte; + } + // Stratum submit extranonce2 is sent as raw bytes hex. Extranonce2 stores value as little-endian. + let en2_value = u64::from_le_bytes(en2_bytes); + + let Some(ntime) = parse_hex_u32(&ntime_s) else { + warn!(ntime = %ntime_s, "Invalid MUJINA_BZM2_REPLAY_NTIME hex"); + return None; + }; + let Some(nonce) = parse_hex_u32(&nonce_s) else { + warn!(nonce = %nonce_s, "Invalid MUJINA_BZM2_REPLAY_NONCE hex"); + return None; + }; + let Some(version_bits) = parse_hex_u32(&version_bits_s) else { + warn!( + version_bits = %version_bits_s, + "Invalid MUJINA_BZM2_REPLAY_VERSION_BITS hex" + ); + return None; + }; + let job_id = env::var("MUJINA_BZM2_REPLAY_JOB_ID") + .ok() + .filter(|s| !s.trim().is_empty()); + + Some(ReplayCheckConfig { + job_id, + en2_value, + en2_size, + ntime, + nonce, + version_bits, + }) +} + +fn log_replay_check_for_task(config: &ReplayCheckConfig, assigned: &AssignedTask) -> bool { + if let Some(job_id) = &config.job_id + && assigned.task.template.id.as_str() != job_id + { + debug!( + configured_job_id = %job_id, + assigned_job_id = %assigned.task.template.id, + "BZM2 replay check skipped (job_id mismatch)" + ); + return false; + } + + let Ok(config_en2) = Extranonce2::new(config.en2_value, config.en2_size) else { + debug!( + job_id = %assigned.task.template.id, + configured_en2 = %format_replay_en2_hex(config.en2_value, config.en2_size), + "BZM2 replay check skipped (configured extranonce2 invalid for configured size)" + ); + return false; + }; + + let matched_engine = assigned + .engine_assignments + .iter() + .position(|engine| engine.extranonce2 == Some(config_en2)); + let (task_en2, replay_merkle_root) = if let Some(logical_engine_id) = matched_engine { + ( + config_en2, + assigned.engine_assignments[logical_engine_id].merkle_root, + ) + } else { + let Some(task_en2) = assigned.task.en2 else { + debug!( + job_id = %assigned.task.template.id, + configured_en2 = %format_replay_en2_hex(config.en2_value, config.en2_size), + "BZM2 replay check skipped (assigned task has no extranonce2)" + ); + return false; + }; + if task_en2 != config_en2 { + debug!( + job_id = %assigned.task.template.id, + configured_en2 = %format_replay_en2_hex(config.en2_value, config.en2_size), + assigned_en2 = %task_en2, + "BZM2 replay check skipped (extranonce2 mismatch)" + ); + return false; + } + (task_en2, assigned.merkle_root) + }; + + let base_version = assigned.task.template.version.base().to_consensus() as u32; + let replay_version_u32 = base_version | config.version_bits; + let replay_version = BlockVersion::from_consensus(replay_version_u32 as i32); + let matched_microjob = assigned + .microjob_versions + .iter() + .position(|v| v.to_consensus() as u32 == replay_version_u32); + + let header = BlockHeader { + version: replay_version, + prev_blockhash: assigned.task.template.prev_blockhash, + merkle_root: replay_merkle_root, + time: config.ntime, + bits: assigned.task.template.bits, + nonce: config.nonce, + }; + let header_bytes = consensus::serialize(&header); + let replay_midstate = matched_microjob + .and_then(|idx| { + matched_engine.map(|logical_engine_id| { + assigned.engine_assignments[logical_engine_id].midstates[idx] + }) + }) + .unwrap_or_else(|| { + let header_prefix: [u8; 64] = header_bytes[..64] + .try_into() + .expect("header prefix length is fixed"); + compute_midstate_le(&header_prefix) + }); + let replay_tail16 = bzm2_tail16_bytes(assigned, config.ntime, config.nonce); + let hash_bzm2 = bzm2_double_sha_from_midstate_and_tail(&replay_midstate, &replay_tail16); + let hash = bitcoin::BlockHash::from_byte_array(hash_bzm2); + let target_bytes = assigned.task.share_target.to_le_bytes(); + let check_result = check_result(&hash_bzm2, &target_bytes, assigned.leading_zeros); + let achieved_difficulty = Difficulty::from_hash(&hash); + let target_difficulty = Difficulty::from_target(assigned.task.share_target); + + debug!( + job_id = %assigned.task.template.id, + assigned_sequence_id = assigned.sequence_id, + assigned_en2 = %task_en2, + replay_en2 = format_args!("{:0width$x}", config.en2_value, width = config.en2_size as usize * 2), + replay_ntime = format_args!("{:#010x}", config.ntime), + replay_nonce = format_args!("{:#010x}", config.nonce), + replay_version_bits = format_args!("{:#010x}", config.version_bits), + replay_version = format_args!("{:#010x}", replay_version_u32), + matched_logical_engine = ?matched_engine, + matched_microjob = ?matched_microjob, + check_result = ?check_result, + achieved_difficulty = %achieved_difficulty, + target_difficulty = %target_difficulty, + hash_bzm2 = %format_hex(&hash_bzm2), + header = %format_hex(&header_bytes), + "BZM2 replay check" + ); + true } fn compute_task_merkle_root(task: &HashTask) -> Result { @@ -475,39 +1306,81 @@ fn build_header_bytes( } fn compute_midstate_le(header_prefix_64: &[u8; 64]) -> [u8; 32] { - let mut engine = sha256::HashEngine::default(); - engine.input(header_prefix_64); - let mut midstate = engine.midstate().to_byte_array(); - for word in midstate.chunks_exact_mut(4) { - // Firmware expects each state word little-endian on the wire. - word.reverse(); + // Midstate derivation: SHA256-compress the first 64-byte header block and + // send the raw SHA256 state words in little-endian byte order (OpenSSL ctx.h on x86). + let mut w = [0u32; 64]; + for (i, chunk) in header_prefix_64.chunks_exact(4).enumerate() { + w[i] = u32::from_be_bytes(chunk.try_into().expect("chunk size is 4")); + } + for i in 16..64 { + let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); + let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); + w[i] = w[i - 16] + .wrapping_add(s0) + .wrapping_add(w[i - 7]) + .wrapping_add(s1); } - midstate -} -fn task_version_for_counter(task: &HashTask, version_counter: u16) -> BlockVersion { - let template = task.template.as_ref(); - let base_version = template.version.base(); - let version_mask = u16::from_be_bytes(*template.version.gp_bits_mask().as_bytes()); - let rolled_bits_u16 = expand_counter_into_mask(version_mask, version_counter); - let rolled_bits = GeneralPurposeBits::new(rolled_bits_u16.to_be_bytes()); - rolled_bits.apply_to_version(base_version) + let mut a = SHA256_IV[0]; + let mut b = SHA256_IV[1]; + let mut c = SHA256_IV[2]; + let mut d = SHA256_IV[3]; + let mut e = SHA256_IV[4]; + let mut f = SHA256_IV[5]; + let mut g = SHA256_IV[6]; + let mut h = SHA256_IV[7]; + + for i in 0..64 { + let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); + let ch = (e & f) ^ ((!e) & g); + let t1 = h + .wrapping_add(s1) + .wrapping_add(ch) + .wrapping_add(SHA256_K[i]) + .wrapping_add(w[i]); + let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); + let maj = (a & b) ^ (a & c) ^ (b & c); + let t2 = s0.wrapping_add(maj); + + h = g; + g = f; + f = e; + e = d.wrapping_add(t1); + d = c; + c = b; + b = a; + a = t1.wrapping_add(t2); + } + + let state = [ + SHA256_IV[0].wrapping_add(a), + SHA256_IV[1].wrapping_add(b), + SHA256_IV[2].wrapping_add(c), + SHA256_IV[3].wrapping_add(d), + SHA256_IV[4].wrapping_add(e), + SHA256_IV[5].wrapping_add(f), + SHA256_IV[6].wrapping_add(g), + SHA256_IV[7].wrapping_add(h), + ]; + + let mut out = [0u8; 32]; + for (i, word) in state.iter().copied().enumerate() { + out[i * 4..i * 4 + 4].copy_from_slice(&word.to_le_bytes()); + } + out } fn task_to_bzm2_payload( task: &HashTask, merkle_root: TxMerkleNode, - version_counter: u16, + versions: [BlockVersion; MIDSTATE_COUNT], ) -> Result { let mut midstates = [[0u8; 32]; MIDSTATE_COUNT]; let mut merkle_residue = 0u32; let mut timestamp = 0u32; for (idx, midstate) in midstates.iter_mut().enumerate() { - let rolled_version = - task_version_for_counter(task, version_counter.wrapping_add(idx as u16)); - - let header = build_header_bytes(task, rolled_version, merkle_root)?; + let header = build_header_bytes(task, versions[idx], merkle_root)?; let header_prefix: [u8; 64] = header[..64] .try_into() .expect("header prefix length is fixed"); @@ -515,12 +1388,12 @@ fn task_to_bzm2_payload( *midstate = compute_midstate_le(&header_prefix); if idx == 0 { - merkle_residue = u32::from_le_bytes( + merkle_residue = u32::from_be_bytes( header[64..68] .try_into() .expect("slice length is exactly 4 bytes"), ); - timestamp = u32::from_le_bytes( + timestamp = u32::from_be_bytes( header[68..72] .try_into() .expect("slice length is exactly 4 bytes"), @@ -535,23 +1408,160 @@ fn task_to_bzm2_payload( }) } +fn log_bzm2_job_fingerprint( + task: &HashTask, + merkle_root: TxMerkleNode, + versions: [BlockVersion; MIDSTATE_COUNT], + payload: &TaskJobPayload, + sequence_id: u8, + zeros_to_find: u8, + timestamp_count: u8, +) -> Result<(), HashThreadError> { + let target_swapped = task.template.bits.to_consensus().swap_bytes(); + let target_reg_bytes = target_swapped.to_le_bytes(); + let merkle_root_bytes = consensus::serialize(&merkle_root); + let en2_dbg = task + .en2 + .as_ref() + .map(|v| format!("{v:?}")) + .unwrap_or_else(|| "None".to_owned()); + + let mut version_map = Vec::with_capacity(MIDSTATE_COUNT); + let mut header_tails = Vec::with_capacity(MIDSTATE_COUNT); + let mut header_full = Vec::with_capacity(MIDSTATE_COUNT); + let mut midstates_hex = Vec::with_capacity(MIDSTATE_COUNT); + + for (idx, version) in versions.iter().copied().enumerate() { + let header = build_header_bytes(task, version, merkle_root)?; + version_map.push(format!("mj{idx}={:#010x}", version.to_consensus() as u32)); + header_tails.push(format!("mj{idx}={}", format_hex(&header[64..80]))); + header_full.push(format!("mj{idx}={}", format_hex(&header))); + midstates_hex.push(format!("mj{idx}={}", format_hex(&payload.midstates[idx]))); + } + + debug!( + job_id = %task.template.id, + sequence_id, + ntime = format_args!("{:#x}", task.ntime), + template_time = format_args!("{:#x}", task.template.time), + bits = format_args!("{:#x}", task.template.bits.to_consensus()), + share_target = %task.share_target, + en2 = %en2_dbg, + zeros_to_find, + timestamp_count, + target_reg = %format_hex(&target_reg_bytes), + merkle_root = %format_hex(&merkle_root_bytes), + payload_merkle_residue = format_args!("{:#010x}", payload.merkle_residue), + payload_timestamp = format_args!("{:#010x}", payload.timestamp), + versions = %version_map.join(" "), + header_tail = %header_tails.join(" | "), + midstates = %midstates_hex.join(" | "), + headers = %header_full.join(" | "), + "BZM2 job fingerprint" + ); + + Ok(()) +} + async fn send_task_to_all_engines( chip_commands: &mut W, task: &HashTask, - merkle_root: TxMerkleNode, - version_counter: u16, + versions: [BlockVersion; MIDSTATE_COUNT], sequence_id: u8, -) -> Result<(), HashThreadError> + zeros_to_find: u8, + timestamp_count: u8, +) -> Result, HashThreadError> where W: Sink + Unpin, W::Error: std::fmt::Debug, { - let payload = task_to_bzm2_payload(task, merkle_root, version_counter)?; + // `data[2]` comes from big-endian nbits bytes copied into + // a little-endian u32, so the numeric value is byte-swapped consensus nbits. + let target = task.template.bits.to_consensus().swap_bytes(); + let timestamp_reg_value = ((AUTO_CLOCK_UNGATE & 0x1) << 7) | (timestamp_count & 0x7f); + let mut engine_assignments = Vec::with_capacity(WORK_ENGINE_COUNT); + let mut fingerprint_logged = false; + + for row in 0..ENGINE_ROWS { + for col in 0..ENGINE_COLS { + if is_invalid_engine(row, col) { + continue; + } - for col in 0..ENGINE_COLS { - for row in 0..ENGINE_ROWS { + let Some(logical_engine_id) = logical_engine_index(row, col) else { + continue; + }; let engine = engine_id(row, col); - let commands = protocol::Command::write_job_enhanced( + let mut engine_task = task.clone(); + engine_task.en2 = engine_extranonce2_for_logical_engine(task, logical_engine_id); + let merkle_root = compute_task_merkle_root(&engine_task).map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!( + "failed to derive per-engine merkle root for logical engine {logical_engine_id} (row {row} col {col}): {e}" + )) + })?; + let payload = task_to_bzm2_payload(&engine_task, merkle_root, versions).map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!( + "failed to derive per-engine payload for logical engine {logical_engine_id} (row {row} col {col}): {e}" + )) + })?; + if !fingerprint_logged { + fingerprint_logged = true; + if let Err(e) = log_bzm2_job_fingerprint( + &engine_task, + merkle_root, + versions, + &payload, + sequence_id, + zeros_to_find, + timestamp_count, + ) { + warn!(error = %e, "Failed to emit BZM2 job fingerprint"); + } + } + debug!( + logical_engine_id, + engine_hw_id = format_args!("{:#05x}", engine), + row, + column = col, + sequence_id, + extranonce2 = ?engine_task.en2, + data0 = format_args!("{:#010x}", payload.merkle_residue), + data1 = format_args!("{:#010x}", payload.timestamp), + data2 = format_args!("{:#010x}", target), + "BZM2 dispatch map" + ); + + write_reg_u8( + chip_commands, + protocol::BROADCAST_ASIC, + engine, + protocol::engine_reg::ZEROS_TO_FIND, + zeros_to_find, + "task assign: ZEROS_TO_FIND", + ) + .await?; + + write_reg_u8( + chip_commands, + protocol::BROADCAST_ASIC, + engine, + protocol::engine_reg::TIMESTAMP_COUNT, + timestamp_reg_value, + "task assign: TIMESTAMP_COUNT", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + engine, + protocol::engine_reg::TARGET, + target, + "task assign: TARGET", + ) + .await?; + + let commands = protocol::Command::write_job( protocol::BROADCAST_ASIC, engine, payload.midstates, @@ -573,10 +1583,23 @@ where )) })?; } + engine_assignments.push(EngineAssignment { + merkle_root, + extranonce2: engine_task.en2, + midstates: payload.midstates, + }); } } - Ok(()) + if engine_assignments.len() != WORK_ENGINE_COUNT { + return Err(HashThreadError::WorkAssignmentFailed(format!( + "unexpected BZM2 engine assignment count: got {}, expected {}", + engine_assignments.len(), + WORK_ENGINE_COUNT + ))); + } + + Ok(engine_assignments) } async fn configure_sensors( @@ -878,6 +1901,47 @@ where Ok(()) } +async fn set_asic_nonce_range( + chip_commands: &mut W, + asic_id: u8, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + let start_nonce = BZM2_START_NONCE; + let end_nonce = BZM2_END_NONCE; + + for col in 0..ENGINE_COLS { + for row in 0..ENGINE_ROWS { + if is_invalid_engine(row, col) { + continue; + } + let engine = engine_id(row, col); + write_reg_u32( + chip_commands, + asic_id, + engine, + protocol::engine_reg::START_NONCE, + start_nonce, + "set nonce range: START_NONCE", + ) + .await?; + write_reg_u32( + chip_commands, + asic_id, + engine, + protocol::engine_reg::END_NONCE, + end_nonce, + "set nonce range: END_NONCE", + ) + .await?; + } + } + + Ok(()) +} + async fn start_warm_up_jobs(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> where W: Sink + Unpin, @@ -885,40 +1949,42 @@ where { for col in 0..ENGINE_COLS { for row in 0..ENGINE_ROWS { + if is_invalid_engine(row, col) { + continue; + } let engine = engine_id(row, col); - for _ in 0..2 { - write_reg_u8( - chip_commands, - asic_id, - engine, - protocol::engine_reg::TIMESTAMP_COUNT, - 0xff, - "warm-up: TIMESTAMP_COUNT", - ) - .await?; - for seq in [0xfc, 0xfd, 0xfe, 0xff] { - write_reg_u8( - chip_commands, - asic_id, - engine, - protocol::engine_reg::SEQUENCE_ID, - seq, - "warm-up: SEQUENCE_ID", - ) - .await?; - } + write_reg_u8( + chip_commands, + asic_id, + engine, + protocol::engine_reg::TIMESTAMP_COUNT, + 0xff, + "warm-up: TIMESTAMP_COUNT", + ) + .await?; + for seq in [0xfc, 0xfd, 0xfe, 0xff] { write_reg_u8( chip_commands, asic_id, engine, - protocol::engine_reg::JOB_CONTROL, - 1, - "warm-up: JOB_CONTROL", + protocol::engine_reg::SEQUENCE_ID, + seq, + "warm-up: SEQUENCE_ID", ) .await?; } + + write_reg_u8( + chip_commands, + asic_id, + engine, + protocol::engine_reg::JOB_CONTROL, + 1, + "warm-up: JOB_CONTROL", + ) + .await?; } } Ok(()) @@ -962,9 +2028,13 @@ where let mut asic_ids = Vec::with_capacity(asic_count as usize); for index in 0..asic_count { - let asic_id = FIRST_ASIC_ID - .checked_add(index) - .ok_or_else(|| init_failed("ASIC ID overflow while programming chain IDs"))?; + let asic_id = protocol::logical_to_hw_asic_id(index); + if protocol::hw_to_logical_asic_id(asic_id) != Some(index) { + return Err(init_failed(format!( + "invalid ASIC ID mapping for logical index {} -> 0x{:02x}", + index, asic_id + ))); + } write_reg_u32( chip_commands, @@ -1065,6 +2135,7 @@ where debug!(asic_id, "BZM2 soft reset + clock gate + warm-up start"); soft_reset(chip_commands, asic_id).await?; set_all_clock_gates(chip_commands, asic_id).await?; + set_asic_nonce_range(chip_commands, asic_id).await?; start_warm_up_jobs(chip_commands, asic_id).await?; debug!(asic_id, "BZM2 warm-up complete"); } @@ -1104,9 +2175,36 @@ async fn bzm2_thread_actor( let mut chip_initialized = false; let mut current_task: Option = None; - let mut assigned_tasks: [Option; 2] = [None, None]; + let mut assigned_tasks: VecDeque = + VecDeque::with_capacity(READRESULT_ASSIGNMENT_HISTORY_LIMIT); let mut next_sequence_id: u8 = 0; - let mut next_version_counter: u16 = 0; + let mut sanity_candidates_total: u64 = 0; + let mut sanity_candidates_meet_task: u64 = 0; + let mut sanity_best_difficulty: Option = None; + let mut sanity_diagnostic_samples: u64 = 0; + let mut sequence_lookup_diagnostic_samples: u64 = 0; + let mut zero_lz_diagnostic_samples: u64 = 0; + let replay_check_config = parse_replay_check_config_from_env(); + let focused_readresult_config = parse_focused_readresult_config_from_env(); + if let Some(cfg) = replay_check_config.as_ref() { + info!( + replay_job_id = ?cfg.job_id, + replay_en2 = %format_replay_en2_hex(cfg.en2_value, cfg.en2_size), + replay_ntime = format_args!("{:#010x}", cfg.ntime), + replay_nonce = format_args!("{:#010x}", cfg.nonce), + replay_version_bits = format_args!("{:#010x}", cfg.version_bits), + "BZM2 replay check configured" + ); + } + if let Some(cfg) = focused_readresult_config.as_ref() { + info!( + trace_nonce = ?cfg.adjusted_nonce.map(|n| format!("{:#010x}", n)), + trace_raw_nonce = ?cfg.raw_nonce.map(|n| format!("{:#010x}", n)), + break_on_match = cfg.break_on_match, + "BZM2 focused READRESULT tracing configured" + ); + } + let mut replay_check_hits: u64 = 0; let mut status_ticker = time::interval(Duration::from_secs(5)); status_ticker.set_missed_tick_behavior(time::MissedTickBehavior::Skip); @@ -1143,45 +2241,66 @@ async fn bzm2_thread_actor( } } - let merkle_root = match compute_task_merkle_root(&new_task) { - Ok(root) => root, - Err(e) => { - error!(error = %e, "Failed to derive merkle root for update_task"); - let _ = response_tx.send(Err(e)); - continue; - } - }; + let microjob_versions = task_midstate_versions(&new_task); + let write_sequence_id = writejob_effective_sequence_id(next_sequence_id); - if let Err(e) = send_task_to_all_engines( + let engine_assignments = match send_task_to_all_engines( &mut chip_commands, &new_task, - merkle_root, - next_version_counter, - next_sequence_id, + microjob_versions, + write_sequence_id, + ENGINE_ZEROS_TO_FIND, + ENGINE_TIMESTAMP_COUNT, ) .await { + Ok(assignments) => assignments, + Err(e) => { + error!(error = %e, "Failed to send BZM2 work during update_task"); + let _ = response_tx.send(Err(e)); + continue; + } + }; + let Some(default_assignment) = engine_assignments.first().cloned() else { + let e = HashThreadError::WorkAssignmentFailed( + "no engine assignments produced for update_task".into(), + ); error!(error = %e, "Failed to send BZM2 work during update_task"); let _ = response_tx.send(Err(e)); continue; - } + }; - let slot = (next_sequence_id as usize) & 0x01; - assigned_tasks[slot] = Some(AssignedTask { + // `job_ctl=3` behavior: old jobs are canceled on every assign. + let new_assigned_task = AssignedTask { task: new_task.clone(), - merkle_root, - version_counter: next_version_counter, - }); + merkle_root: default_assignment.merkle_root, + engine_assignments: Arc::from(engine_assignments.into_boxed_slice()), + microjob_versions, + sequence_id: write_sequence_id, + timestamp_count: ENGINE_TIMESTAMP_COUNT, + leading_zeros: ENGINE_LEADING_ZEROS, + nonce_minus_value: BZM2_NONCE_MINUS, + }; + retain_assigned_task(&mut assigned_tasks, new_assigned_task); + if let Some(cfg) = replay_check_config.as_ref() + && let Some(assigned) = assigned_tasks.back() + { + if log_replay_check_for_task(cfg, assigned) { + replay_check_hits = replay_check_hits.saturating_add(1); + trace!( + replay_check_hits, + "BZM2 replay check matched on update_task" + ); + } + } debug!( job_id = %new_task.template.id, sequence_id = next_sequence_id, - version_counter = next_version_counter, + write_sequence_id, "Sent BZM2 WRITEJOB payloads for update_task" ); next_sequence_id = next_sequence_id.wrapping_add(1); - next_version_counter = - next_version_counter.wrapping_add(MIDSTATE_COUNT as u16); let old_task = current_task.replace(new_task); { @@ -1208,45 +2327,66 @@ async fn bzm2_thread_actor( } } - let merkle_root = match compute_task_merkle_root(&new_task) { - Ok(root) => root, - Err(e) => { - error!(error = %e, "Failed to derive merkle root for replace_task"); - let _ = response_tx.send(Err(e)); - continue; - } - }; + let microjob_versions = task_midstate_versions(&new_task); + let write_sequence_id = writejob_effective_sequence_id(next_sequence_id); - if let Err(e) = send_task_to_all_engines( + let engine_assignments = match send_task_to_all_engines( &mut chip_commands, &new_task, - merkle_root, - next_version_counter, - next_sequence_id, + microjob_versions, + write_sequence_id, + ENGINE_ZEROS_TO_FIND, + ENGINE_TIMESTAMP_COUNT, ) .await { + Ok(assignments) => assignments, + Err(e) => { + error!(error = %e, "Failed to send BZM2 work during replace_task"); + let _ = response_tx.send(Err(e)); + continue; + } + }; + let Some(default_assignment) = engine_assignments.first().cloned() else { + let e = HashThreadError::WorkAssignmentFailed( + "no engine assignments produced for replace_task".into(), + ); error!(error = %e, "Failed to send BZM2 work during replace_task"); let _ = response_tx.send(Err(e)); continue; - } + }; - let slot = (next_sequence_id as usize) & 0x01; - assigned_tasks[slot] = Some(AssignedTask { + // `job_ctl=3` behavior: old jobs are canceled on every assign. + let new_assigned_task = AssignedTask { task: new_task.clone(), - merkle_root, - version_counter: next_version_counter, - }); + merkle_root: default_assignment.merkle_root, + engine_assignments: Arc::from(engine_assignments.into_boxed_slice()), + microjob_versions, + sequence_id: write_sequence_id, + timestamp_count: ENGINE_TIMESTAMP_COUNT, + leading_zeros: ENGINE_LEADING_ZEROS, + nonce_minus_value: BZM2_NONCE_MINUS, + }; + retain_assigned_task(&mut assigned_tasks, new_assigned_task); + if let Some(cfg) = replay_check_config.as_ref() + && let Some(assigned) = assigned_tasks.back() + { + if log_replay_check_for_task(cfg, assigned) { + replay_check_hits = replay_check_hits.saturating_add(1); + trace!( + replay_check_hits, + "BZM2 replay check matched on replace_task" + ); + } + } debug!( job_id = %new_task.template.id, sequence_id = next_sequence_id, - version_counter = next_version_counter, + write_sequence_id, "Sent BZM2 WRITEJOB payloads for replace_task" ); next_sequence_id = next_sequence_id.wrapping_add(1); - next_version_counter = - next_version_counter.wrapping_add(MIDSTATE_COUNT as u16); let old_task = current_task.replace(new_task); { @@ -1257,7 +2397,7 @@ async fn bzm2_thread_actor( } ThreadCommand::GoIdle { response_tx } => { let old_task = current_task.take(); - assigned_tasks = [None, None]; + assigned_tasks.clear(); { let mut s = status.write().expect("status lock poisoned"); s.is_active = false; @@ -1278,6 +2418,10 @@ async fn bzm2_thread_actor( Ok(protocol::Response::ReadReg { asic_hw_id, data }) => { trace!(asic_hw_id, data = ?data, "BZM2 READREG response"); } + Ok(protocol::Response::DtsVs { asic_hw_id, data }) => { + // Temporarily suppress noisy DTS/VS logging while debugging share flow. + let _ = (asic_hw_id, data); + } Ok(protocol::Response::ReadResult { asic_hw_id, engine_id, @@ -1300,46 +2444,387 @@ async fn bzm2_thread_actor( continue; } - let slot = (sequence as usize) / MIDSTATE_COUNT; - if slot >= assigned_tasks.len() { + let row = engine_id & 0x3f; + let column = engine_id >> 6; + if row >= ENGINE_ROWS || column >= ENGINE_COLS { trace!( asic_hw_id, engine_id, + row, + column, sequence, - "Ignoring BZM2 READRESULT with unsupported sequence slot" + "Ignoring BZM2 READRESULT with unmapped engine coordinates" ); continue; } + if is_invalid_engine(row, column) { + trace!( + asic_hw_id, + engine_id, + row, + column, + sequence, + "Ignoring BZM2 READRESULT from invalid engine coordinate" + ); + continue; + } + let Some(logical_engine_id) = logical_engine_index(row, column) else { + trace!( + asic_hw_id, + engine_id, + row, + column, + sequence, + "Ignoring BZM2 READRESULT with unmapped logical engine index" + ); + continue; + }; - let Some(assigned) = assigned_tasks[slot].as_ref() else { + let sequence_id_raw = sequence / (MIDSTATE_COUNT as u8); + let sequence_masked = sequence & 0x7f; + let sequence_id_masked = sequence_masked / (MIDSTATE_COUNT as u8); + let micro_job_id_masked = sequence_masked % (MIDSTATE_COUNT as u8); + let timecode_masked = timecode & 0x7f; + let Some(resolved_fields) = + resolve_readresult_fields(sequence, timecode, |slot| { + assigned_tasks.iter().rev().any(|task| { + readresult_sequence_slot(task.sequence_id) == slot + }) + }) + else { + if sequence_lookup_diagnostic_samples < SEQUENCE_LOOKUP_DIAGNOSTIC_LIMIT { + sequence_lookup_diagnostic_samples = + sequence_lookup_diagnostic_samples.saturating_add(1); + let masked_match = assigned_tasks + .iter() + .rev() + .find(|task| { + readresult_sequence_slot(task.sequence_id) + == sequence_id_masked + }) + .map(|task| task.sequence_id); + let recent_slots: Vec = assigned_tasks + .iter() + .rev() + .take(6) + .map(|task| readresult_sequence_slot(task.sequence_id)) + .collect(); + let recent_sequence_ids: Vec = assigned_tasks + .iter() + .rev() + .take(6) + .map(|task| task.sequence_id) + .collect(); + debug!( + asic_hw_id, + engine_id, + sequence_raw = format_args!("{:#04x}", sequence), + sequence_id_raw, + sequence_masked = format_args!("{:#04x}", sequence_masked), + sequence_id_masked, + micro_job_id_masked, + timecode_raw = format_args!("{:#04x}", timecode), + timecode_masked = format_args!("{:#04x}", timecode_masked), + masked_lookup_hit = masked_match.is_some(), + masked_lookup_sequence_id = ?masked_match, + recent_slots = ?recent_slots, + recent_sequence_ids = ?recent_sequence_ids, + "BZM2 READRESULT lookup diagnostic" + ); + } trace!( asic_hw_id, engine_id, + sequence_id_raw, sequence, - "Ignoring BZM2 READRESULT with no assigned task for slot" + timecode, + "Ignoring BZM2 READRESULT with no assigned task" ); continue; }; + let sequence_id = resolved_fields.sequence_id; + let micro_job_id = resolved_fields.micro_job_id; + let sequence_effective = resolved_fields.sequence; + let timecode_effective = resolved_fields.timecode; + let sequence_slot = readresult_sequence_slot(sequence_id); + let slot_candidates: Vec = assigned_tasks + .iter() + .rev() + .filter(|task| readresult_sequence_slot(task.sequence_id) == sequence_slot) + .cloned() + .collect(); + let slot_candidate_count = slot_candidates.len(); + if slot_candidate_count == 0 { + trace!( + asic_hw_id, + engine_id, + sequence_id, + sequence_raw = sequence, + sequence_effective, + "Ignoring BZM2 READRESULT with no assigned task after field resolution" + ); + continue; + } + + let nonce_raw = nonce; + let mut selected_candidate: Option<( + AssignedTask, + BlockVersion, + [u8; 32], + u32, + u32, + u32, + u32, + BlockHeader, + [u8; 16], + [u8; 32], + bitcoin::BlockHash, + [u8; 32], + Bzm2CheckResult, + u16, + Difficulty, + Difficulty, + f64, + f64, + )> = None; + let mut selected_rank = 0u8; + + for mut candidate in slot_candidates { + let Some(engine_assignment) = + candidate.engine_assignments.get(logical_engine_id).cloned() + else { + continue; + }; + candidate.merkle_root = engine_assignment.merkle_root; + candidate.task.en2 = engine_assignment.extranonce2; + let share_version = candidate.microjob_versions[micro_job_id as usize]; + let selected_midstate = engine_assignment.midstates[micro_job_id as usize]; + // Result time is reverse-counted and must be + // converted into a forward ntime offset. + let ntime_offset = + u32::from(candidate.timestamp_count.wrapping_sub(timecode_effective)); + let share_ntime = candidate.task.ntime.wrapping_add(ntime_offset); + // READRESULT mapping: + // READRESULT nonce is first adjusted by nonce_minus, then byte-swapped + // for reconstructed header hashing and Stratum submit nonce field. + let nonce_adjusted = nonce_raw.wrapping_sub(candidate.nonce_minus_value); + let nonce_submit = nonce_adjusted.swap_bytes(); + + // Build a canonical header for logging/replay diagnostics. + let header = BlockHeader { + version: share_version, + prev_blockhash: candidate.task.template.prev_blockhash, + merkle_root: candidate.merkle_root, + time: share_ntime, + bits: candidate.task.template.bits, + nonce: nonce_submit, + }; + let tail16 = bzm2_tail16_bytes(&candidate, share_ntime, nonce_submit); + let hash_bytes = + bzm2_double_sha_from_midstate_and_tail(&selected_midstate, &tail16); + let hash = bitcoin::BlockHash::from_byte_array(hash_bytes); + let target_bytes = candidate.task.share_target.to_le_bytes(); + let check_result = check_result( + &hash_bytes, + &target_bytes, + candidate.leading_zeros, + ); + let observed_leading_zeros = + leading_zero_bits(&hash_bytes); + let achieved_difficulty = Difficulty::from_hash(&hash); + let target_difficulty = + Difficulty::from_target(candidate.task.share_target); + let achieved_difficulty_f64 = achieved_difficulty.as_f64(); + let target_difficulty_f64 = target_difficulty.as_f64(); + let rank = match check_result { + Bzm2CheckResult::Correct => 3, + Bzm2CheckResult::NotMeetTarget => 2, + Bzm2CheckResult::Error => 1, + }; + + if selected_candidate.is_none() || rank > selected_rank { + selected_rank = rank; + selected_candidate = Some(( + candidate, + share_version, + selected_midstate, + ntime_offset, + share_ntime, + nonce_adjusted, + nonce_submit, + header, + tail16, + hash_bytes, + hash, + target_bytes, + check_result, + observed_leading_zeros, + achieved_difficulty, + target_difficulty, + achieved_difficulty_f64, + target_difficulty_f64, + )); + if rank == 3 { + break; + } + } + } - let micro_job = (sequence % (MIDSTATE_COUNT as u8)) as u16; - let share_version = - task_version_for_counter(&assigned.task, assigned.version_counter.wrapping_add(micro_job)); - // BZM2 result timecode identifies which rolled ntime found the nonce. - let share_ntime = assigned.task.ntime.wrapping_add(timecode as u32); - - let header = BlockHeader { - version: share_version, - prev_blockhash: assigned.task.template.prev_blockhash, - merkle_root: assigned.merkle_root, - time: share_ntime, - bits: assigned.task.template.bits, - nonce, + let Some(( + assigned, + share_version, + selected_midstate, + ntime_offset, + share_ntime, + nonce_adjusted, + nonce_submit, + header, + tail16, + hash_bytes, + hash, + target_bytes, + check_result, + observed_leading_zeros, + achieved_difficulty, + target_difficulty, + achieved_difficulty_f64, + target_difficulty_f64, + )) = selected_candidate + else { + trace!( + asic_hw_id, + engine_id, + logical_engine_id, + sequence_id, + slot_candidate_count, + "Ignoring BZM2 READRESULT without a usable retained assignment" + ); + continue; }; - let hash = header.block_hash(); - if assigned.task.share_target.is_met_by(hash) { + if slot_candidate_count > 1 { + trace!( + asic_hw_id, + engine_id, + logical_engine_id, + sequence_id, + matched_sequence_id = assigned.sequence_id, + slot_candidate_count, + "BZM2 READRESULT evaluated retained slot history" + ); + } + + if resolved_fields.used_masked_fields { + trace!( + asic_hw_id, + engine_id, + sequence_raw = format_args!("{:#04x}", sequence), + sequence_effective = format_args!("{:#04x}", sequence_effective), + timecode_raw = format_args!("{:#04x}", timecode), + timecode_effective = format_args!("{:#04x}", timecode_effective), + "BZM2 READRESULT using masked sequence/timecode fields" + ); + } + + sanity_candidates_total = sanity_candidates_total.saturating_add(1); + if sanity_best_difficulty.map_or(true, |best| achieved_difficulty > best) { + sanity_best_difficulty = Some(achieved_difficulty); + } + + if let Some(cfg) = focused_readresult_config.as_ref() { + let adjusted_match = cfg.adjusted_nonce.map_or(true, |n| n == nonce_adjusted); + let raw_match = cfg.raw_nonce.map_or(true, |n| n == nonce_raw); + if adjusted_match && raw_match { + let header_bytes = consensus::serialize(&header); + let merkle_root_bytes = consensus::serialize(&assigned.merkle_root); + let header_prefix: [u8; 64] = header_bytes[..64] + .try_into() + .expect("header prefix length is fixed"); + let derived_midstate = compute_midstate_le(&header_prefix); + let mut hash_rev = hash_bytes; + hash_rev.reverse(); + debug!( + asic_hw_id, + engine_hw_id = engine_id, + logical_engine_id, + sequence_raw = format_args!("{:#04x}", sequence), + sequence_effective = format_args!("{:#04x}", sequence_effective), + sequence_id, + micro_job_id, + timecode_raw = format_args!("{:#04x}", timecode), + timecode_effective = format_args!("{:#04x}", timecode_effective), + nonce_raw = format_args!("{:#010x}", nonce_raw), + nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), + nonce_submit = format_args!("{:#010x}", nonce_submit), + nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), + ntime_offset, + ntime = format_args!("{:#010x}", share_ntime), + version = format_args!("{:#010x}", share_version.to_consensus() as u32), + bits = format_args!("{:#010x}", assigned.task.template.bits.to_consensus()), + extranonce2 = ?assigned.task.en2, + merkle_root = %format_hex(&merkle_root_bytes), + midstate = %format_hex(&selected_midstate), + derived_midstate = %format_hex(&derived_midstate), + header = %format_hex(&header_bytes), + tail16 = %format_hex(&tail16), + hash_bzm2_order = %format_hex(&hash_bytes), + hash_reversed = %format_hex(&hash_rev), + hash_msb_bzm2 = format_args!("{:#04x}", hash_bytes[31]), + target = %format_hex(&target_bytes), + check_result = ?check_result, + observed_leading_zeros_bits = observed_leading_zeros, + achieved_difficulty = %achieved_difficulty, + achieved_difficulty_f64 = format_args!("{:.3e}", achieved_difficulty_f64), + target_difficulty = %target_difficulty, + target_difficulty_f64 = format_args!("{:.3e}", target_difficulty_f64), + "BZM2 focused READRESULT trace" + ); + if cfg.break_on_match { + panic!( + "BZM2 focused READRESULT breakpoint hit: engine_hw_id={:#x} logical_engine_id={} sequence={:#x} timecode={:#x} raw_nonce={:#010x} adjusted_nonce={:#010x}", + engine_id, logical_engine_id, sequence, timecode, nonce_raw, nonce_adjusted + ); + } + } + } + + if check_result == Bzm2CheckResult::Error + && observed_leading_zeros == 0 + && zero_lz_diagnostic_samples < ZERO_LZ_DIAGNOSTIC_LIMIT + { + zero_lz_diagnostic_samples = + zero_lz_diagnostic_samples.saturating_add(1); + warn!( + asic_hw_id, + engine_hw_id = engine_id, + logical_engine_id, + sequence_raw = format_args!("{:#04x}", sequence), + sequence_effective = format_args!("{:#04x}", sequence_effective), + sequence_id, + matched_sequence_id = assigned.sequence_id, + micro_job_id, + timecode_raw = format_args!("{:#04x}", timecode), + timecode_effective = format_args!("{:#04x}", timecode_effective), + slot_candidate_count, + nonce_raw = format_args!("{:#010x}", nonce_raw), + nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), + nonce_submit = format_args!("{:#010x}", nonce_submit), + nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), + ntime_offset, + ntime = format_args!("{:#010x}", share_ntime), + version = format_args!("{:#010x}", share_version.to_consensus() as u32), + observed_leading_zeros_bits = observed_leading_zeros, + required_leading_zeros_bits = assigned.leading_zeros, + hash_msb = format_args!("{:#04x}", hash_bytes[31]), + "BZM2 READRESULT valid-flag nonce reconstructed with zero leading zeros" + ); + } + + if check_result == Bzm2CheckResult::Correct { + sanity_candidates_meet_task = + sanity_candidates_meet_task.saturating_add(1); let share = Share { - nonce, + nonce: nonce_submit, hash, version: share_version, ntime: share_ntime, @@ -1351,15 +2836,184 @@ async fn bzm2_thread_actor( let mut s = status.write().expect("status lock poisoned"); s.chip_shares_found = s.chip_shares_found.saturating_add(1); } - } else { + trace!( asic_hw_id, - engine_id, - nonce, + engine_hw_id = engine_id, + logical_engine_id, + sequence_id, + micro_job_id, + nonce = format_args!("{:#010x}", nonce_submit), + nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), + sequence, + timecode, + ntime_offset, + expected_sequence_id = assigned.sequence_id, + nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), + observed_leading_zeros_bits = observed_leading_zeros, + achieved_difficulty = %achieved_difficulty, + achieved_difficulty_f64 = format_args!("{:.3e}", achieved_difficulty_f64), + target_difficulty = %target_difficulty, + target_difficulty_f64 = format_args!("{:.3e}", target_difficulty_f64), + "BZM2 candidate met task share target" + ); + } else if check_result == Bzm2CheckResult::NotMeetTarget { + trace!( + asic_hw_id, + engine_hw_id = engine_id, + logical_engine_id, + sequence_id, + micro_job_id, + nonce = format_args!("{:#010x}", nonce_submit), + nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), sequence, timecode, + ntime_offset, + expected_sequence_id = assigned.sequence_id, + nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), + observed_leading_zeros_bits = observed_leading_zeros, + achieved_difficulty = %achieved_difficulty, + achieved_difficulty_f64 = format_args!("{:.3e}", achieved_difficulty_f64), + target_difficulty = %target_difficulty, + target_difficulty_f64 = format_args!("{:.3e}", target_difficulty_f64), "BZM2 nonce filtered by share target" ); + } else { + if sanity_diagnostic_samples < SANITY_DIAGNOSTIC_LIMIT { + sanity_diagnostic_samples = sanity_diagnostic_samples.saturating_add(1); + + let header_bytes = consensus::serialize(&header); + let base_ntime = assigned.task.ntime; + let mut probes = Vec::new(); + let focused = focused_readresult_diagnostic( + &assigned, + sequence, + timecode, + nonce_raw, + ); + + probes.push(format!( + "current({})", + validation_probe_summary( + &assigned, + share_version, + share_ntime, + nonce_submit, + ) + )); + probes.push(format!( + "raw_nonce({})", + validation_probe_summary(&assigned, share_version, share_ntime, nonce_raw) + )); + for gap in [0x14u32, 0x28, 0x4c, 0x98] { + probes.push(format!( + "gap_{gap:#x}({})", + validation_probe_summary( + &assigned, + share_version, + share_ntime, + nonce_raw.wrapping_sub(gap).swap_bytes(), + ) + )); + } + probes.push(format!( + "time_base({})", + validation_probe_summary( + &assigned, + share_version, + base_ntime, + nonce_submit, + ) + )); + probes.push(format!( + "time_plus_tc({})", + validation_probe_summary( + &assigned, + share_version, + base_ntime.wrapping_add(u32::from(timecode)), + nonce_submit, + ) + )); + probes.push(format!( + "time_minus_tc({})", + validation_probe_summary( + &assigned, + share_version, + base_ntime.wrapping_sub(u32::from(timecode)), + nonce_submit, + ) + )); + for (alt_idx, alt_version) in + assigned.microjob_versions.iter().copied().enumerate() + { + probes.push(format!( + "ver_mj{alt_idx}({})", + validation_probe_summary( + &assigned, + alt_version, + share_ntime, + nonce_submit, + ) + )); + } + + debug!( + asic_hw_id, + engine_id, + sequence_id, + micro_job_id, + sequence, + timecode, + result_status, + nonce_raw, + nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), + nonce_submit = format_args!("{:#010x}", nonce_submit), + assigned_sequence_id = assigned.sequence_id, + assigned_timestamp_count = assigned.timestamp_count, + assigned_nonce_minus = format_args!("{:#x}", assigned.nonce_minus_value), + base_ntime = format_args!("{:#x}", base_ntime), + selected_ntime = format_args!("{:#x}", share_ntime), + selected_version = format_args!("{:#x}", share_version.to_consensus() as u32), + bits = format_args!("{:#x}", assigned.task.template.bits.to_consensus()), + header = %format_hex(&header_bytes), + focused = %focused, + probes = %probes.join(" | "), + "BZM2 READRESULT sanity diagnostic" + ); + } + + trace!( + asic_hw_id, + engine_hw_id = engine_id, + logical_engine_id, + sequence_id, + micro_job_id, + nonce = format_args!("{:#010x}", nonce_submit), + nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), + sequence, + timecode, + ntime_offset, + expected_sequence_id = assigned.sequence_id, + nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), + observed_leading_zeros_bits = observed_leading_zeros, + hash_msb = format_args!("{:#04x}", hash_bytes[31]), + "BZM2 nonce rejected by leading-zeros sanity check" + ); + } + + if sanity_candidates_total % 500 == 0 { + debug!( + total_candidates = sanity_candidates_total, + candidates_meeting_task_target = sanity_candidates_meet_task, + best_achieved_difficulty = %sanity_best_difficulty + .expect("sanity_best_difficulty is set when total_candidates > 0"), + best_achieved_difficulty_f64 = format_args!("{:.3e}", sanity_best_difficulty + .expect("sanity_best_difficulty is set when total_candidates > 0") + .as_f64()), + current_target_difficulty = %target_difficulty, + current_target_difficulty_f64 = format_args!("{:.3e}", target_difficulty_f64), + "BZM2 candidate sanity summary" + ); } } Err(e) => { @@ -1378,21 +3032,338 @@ async fn bzm2_thread_actor( #[cfg(test)] mod tests { - use super::expand_counter_into_mask; + use std::sync::Arc; + + use bitcoin::{block::Header as BlockHeader, hashes::Hash as _}; + use bytes::BytesMut; + use serde_json::json; + use tokio::sync::mpsc; + use tokio_util::codec::Decoder as _; + + use crate::{ + asic::hash_thread::HashTask, + job_source::{ + Extranonce2, Extranonce2Range, GeneralPurposeBits, JobTemplate, MerkleRootKind, + MerkleRootTemplate, VersionTemplate, + }, + stratum_v1::JobNotification, + types::Difficulty, + }; + + use super::{ + AssignedTask, BZM2_NONCE_MINUS, Bzm2CheckResult, ENGINE_LEADING_ZEROS, + ENGINE_TIMESTAMP_COUNT, EngineAssignment, MIDSTATE_COUNT, WORK_ENGINE_COUNT, + bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, midstate_version_mask_variants, + check_result, hash_bytes_bzm2_order, protocol, resolve_readresult_fields, + task_to_bzm2_payload, task_midstate_versions, + }; + + #[test] + fn test_midstate_version_mask_variants_for_full_mask() { + assert_eq!( + midstate_version_mask_variants(0x1fff_e000), + [0x0000_0000, 0x0000_e000, 0x1fff_0000, 0x1fff_e000] + ); + } + + #[test] + fn test_midstate_version_mask_variants_for_zero_mask() { + assert_eq!(midstate_version_mask_variants(0), [0, 0, 0, 0]); + } + + #[test] + fn test_resolve_readresult_fields_prefers_raw_when_slot_exists() { + let active_slots = [32u8, 0u8]; + let fields = resolve_readresult_fields(0x80, 0xbc, |slot| active_slots.contains(&slot)) + .expect("raw slot should resolve"); + assert_eq!(fields.sequence, 0x80); + assert_eq!(fields.timecode, 0xbc); + assert_eq!(fields.sequence_id, 32); + assert_eq!(fields.micro_job_id, 0); + assert!(!fields.used_masked_fields); + } #[test] - fn test_expand_counter_into_contiguous_mask() { - assert_eq!(expand_counter_into_mask(0b0011, 0), 0b0000); - assert_eq!(expand_counter_into_mask(0b0011, 1), 0b0001); - assert_eq!(expand_counter_into_mask(0b0011, 2), 0b0010); - assert_eq!(expand_counter_into_mask(0b0011, 3), 0b0011); + fn test_resolve_readresult_fields_uses_masked_fallback() { + let active_slots = [0u8]; + let fields = resolve_readresult_fields(0x82, 0xbc, |slot| active_slots.contains(&slot)) + .expect("masked slot should resolve"); + assert_eq!(fields.sequence, 0x02); + assert_eq!(fields.timecode, 0x3c); + assert_eq!(fields.sequence_id, 0); + assert_eq!(fields.micro_job_id, 2); + assert!(fields.used_masked_fields); } #[test] - fn test_expand_counter_into_sparse_mask() { - assert_eq!(expand_counter_into_mask(0b1010, 0), 0b0000); - assert_eq!(expand_counter_into_mask(0b1010, 1), 0b0010); - assert_eq!(expand_counter_into_mask(0b1010, 2), 0b1000); - assert_eq!(expand_counter_into_mask(0b1010, 3), 0b1010); + fn test_resolve_readresult_fields_none_when_no_slot_matches() { + let active_slots = [0u8]; + let fields = resolve_readresult_fields(0xfd, 0x7f, |slot| active_slots.contains(&slot)); + assert!(fields.is_none()); + } + + #[test] + fn test_check_result_leading_zeros_error() { + let mut hash = [0u8; 32]; + let target = [0xffu8; 32]; + hash[31] = 0x80; + assert_eq!( + check_result(&hash, &target, 32), + Bzm2CheckResult::Error + ); + } + + #[test] + fn test_check_result_accepts_required_leading_zeros() { + let mut hash = [0u8; 32]; + let target = [0xffu8; 32]; + hash[27] = 0x3f; + assert_eq!( + check_result(&hash, &target, 34), + Bzm2CheckResult::Correct + ); + } + + #[test] + fn test_check_result_rejects_missing_partial_zero_bits() { + let mut hash = [0u8; 32]; + let target = [0xffu8; 32]; + hash[27] = 0x40; + assert_eq!( + check_result(&hash, &target, 34), + Bzm2CheckResult::Error + ); + } + + #[test] + fn test_check_result_target_compare() { + let mut hash = [0u8; 32]; + let mut target = [0u8; 32]; + + hash[1] = 0x10; + target[1] = 0x20; + assert_eq!( + check_result(&hash, &target, 32), + Bzm2CheckResult::Correct + ); + + hash[1] = 0x30; + target[1] = 0x20; + assert_eq!( + check_result(&hash, &target, 32), + Bzm2CheckResult::NotMeetTarget + ); + } + + #[test] + fn test_hash_bytes_bzm2_order_keeps_digest_order() { + let src = core::array::from_fn(|i| i as u8); + let hash = bitcoin::BlockHash::from_byte_array(src); + assert_eq!(hash_bytes_bzm2_order(&hash), src); + } + + #[test] + fn test_bzm2_double_sha_matches_known_trace_sample() { + // Captured from birds-bzm2 valid-share-hash-input logging. + let midstate = + hex::decode("07348faef527b8ec3733171cb0781bc545efb4220d71e0a5b54af23de2106bfd") + .expect("midstate hex should parse"); + let tail16 = + hex::decode("ef70e3ac38979a6903f301176467a52b").expect("tail16 hex should parse"); + let expected_double_sha = + hex::decode("25ef6a2327c5304bd263126a6a38ad16c3b27cd8b647085624a7130000000000") + .expect("double sha hex should parse"); + let midstate: [u8; 32] = midstate.try_into().expect("midstate must be 32 bytes"); + let tail16: [u8; 16] = tail16.try_into().expect("tail16 must be 16 bytes"); + let expected_double_sha: [u8; 32] = expected_double_sha + .try_into() + .expect("double sha must be 32 bytes"); + assert_eq!( + bzm2_double_sha_from_midstate_and_tail(&midstate, &tail16), + expected_double_sha + ); + } + + #[test] + fn test_readresult_hash_check_with_known_good_bzm2_share() { + // Job + accepted share captured from known working messages + // - notify: job_id=18965aa3c6b2c4cf, ntime=0x699a9733, version mask 0x1fffe000 + // - accepted submit: en2=7200000000000000, ntime=699a9735, nonce=1c1a2bff, vmask=1fff0000 + let notify_params = json!([ + "18965aa3c6b2c4cf", + "fe207277906478ce38c2ea1089c75d1da29c36ff0000a8a70000000000000000", + "02000000010000000000000000000000000000000000000000000000000000000000000000ffffffff2c03304f0e01000438979a69041dd270030c", + "0e6879647261706f6f6c2f32353666ffffffff02e575a31200000000160014c64b1b9283ba1ea86bb9e7b696b0c8f68dad04000000000000000000266a24aa21a9ed413814acda23cadaad2f189d0dd7794ab6892d1eaad4b1a1433156a31ccb62a800000000", + [ + "be51038f82c6f95e407ff56a88a85e179935927e20ec26994e453c858c52b2d5", + "41f1b3ef96540488c96e6a53ca5156541082ab6d670e87069d84ca600fe32323", + "ef3b47f15c4e98960b53cbd23c6bc6ce29ffcfa6d5c23b869db0a8e5699e7b0d", + "48653d2575674cfd6417dee08bafd2de5246ff615c8b3af9829d21d972ad4e73", + "2013f4b7781327c760228203e073a252ed48547770c7033fb283e521dbf062d2", + "a83041e9c9bdc76e5fe2be707c6b114d6f33a4e42632fe8d79f1015e1a0c8caf", + "8f136aca72f1f36a1e7ac1a40b3a2dd0cf7fc8e36be6a8c1f520933b1511cdf0", + "93dc2365dce4dece9d317654715c0a7bcfa6a175afba9693199dd0dacb9bab15", + "3f11ffc73e9f01af072a495c47b03bec824eeab3fc7e92e1f52907d16516764d" + ], + "20000000", + "1701f303", + "699a9733", + true + ]); + let job = JobNotification::from_stratum_params( + notify_params + .as_array() + .expect("notify_params must be an array"), + ) + .expect("notify params should parse"); + + let en2_size = 8u8; + let en2_bytes = hex::decode("7200000000000000").expect("en2 hex should parse"); + let en2_value = + u64::from_le_bytes(en2_bytes.as_slice().try_into().expect("en2 size must be 8")); + let en2 = Extranonce2::new(en2_value, en2_size).expect("en2 should construct"); + + let template = Arc::new(JobTemplate { + id: job.job_id, + prev_blockhash: job.prev_hash, + version: VersionTemplate::new( + job.version, + GeneralPurposeBits::from(&0x1fffe000u32.to_be_bytes()), + ) + .expect("version template should construct"), + bits: job.nbits, + share_target: Difficulty::from(1000u64).to_target(), + time: job.ntime, + merkle_root: MerkleRootKind::Computed(MerkleRootTemplate { + coinbase1: job.coinbase1, + extranonce1: hex::decode("e1a253ac").expect("extranonce1 hex should parse"), + extranonce2_range: Extranonce2Range::new(en2_size) + .expect("en2 range should construct"), + coinbase2: job.coinbase2, + merkle_branches: job.merkle_branches, + }), + }); + + let (share_tx, _share_rx) = mpsc::channel(1); + let task = HashTask { + template: Arc::clone(&template), + en2_range: Some(Extranonce2Range::new(en2_size).expect("en2 range should construct")), + en2: Some(en2), + share_target: Difficulty::from(1000u64).to_target(), + ntime: template.time, + share_tx, + }; + + let merkle_root = template + .compute_merkle_root(&en2) + .expect("merkle root should compute"); + let microjob_versions = task_midstate_versions(&task); + let payload = task_to_bzm2_payload(&task, merkle_root, microjob_versions) + .expect("payload should derive"); + let engine_assignments = vec![ + EngineAssignment { + merkle_root, + extranonce2: task.en2, + midstates: payload.midstates, + }; + WORK_ENGINE_COUNT + ]; + let assigned = AssignedTask { + task, + merkle_root, + engine_assignments: Arc::from(engine_assignments.into_boxed_slice()), + microjob_versions, + sequence_id: 0, + timestamp_count: ENGINE_TIMESTAMP_COUNT, + leading_zeros: ENGINE_LEADING_ZEROS, + nonce_minus_value: BZM2_NONCE_MINUS, + }; + + // Reconstruct an on-wire READRESULT frame for the accepted share: + // status=0x8 (valid), engine_id=0x001, sequence=2 (micro-job 2), timecode=0x3a. + // READRESULT adjusted nonce is byte-swapped before Stratum submit. + let expected_nonce_submit = 0x1c1a_2bffu32; + let expected_nonce_adjusted = expected_nonce_submit.swap_bytes(); + let expected_ntime = 0x699a_9735u32; + let expected_version = 0x3fff_0000u32; + let ntime_delta = expected_ntime.wrapping_sub(assigned.task.ntime); + assert_eq!( + ntime_delta, 2, + "test fixture ntime delta must match capture" + ); + + let raw_nonce = expected_nonce_adjusted.wrapping_add(BZM2_NONCE_MINUS); + let raw_frame = [ + 0x0a, + protocol::Opcode::ReadResult as u8, + 0x80, + 0x01, + (raw_nonce & 0xff) as u8, + ((raw_nonce >> 8) & 0xff) as u8, + ((raw_nonce >> 16) & 0xff) as u8, + ((raw_nonce >> 24) & 0xff) as u8, + 0x02, + ENGINE_TIMESTAMP_COUNT.wrapping_sub(ntime_delta as u8), + ]; + + let mut codec = protocol::FrameCodec::default(); + let mut src = BytesMut::from(&raw_frame[..]); + let response = codec + .decode(&mut src) + .expect("decode should succeed") + .expect("frame should decode"); + + let protocol::Response::ReadResult { + engine_id, + status, + nonce: nonce_raw, + sequence, + timecode, + .. + } = response + else { + panic!("expected READRESULT response"); + }; + assert_eq!(engine_id, 0x001); + assert_eq!(status, 0x8); + + let sequence_id = sequence / (MIDSTATE_COUNT as u8); + let micro_job_id = sequence % (MIDSTATE_COUNT as u8); + assert_eq!(sequence_id, assigned.sequence_id); + + let share_version = assigned.microjob_versions[micro_job_id as usize]; + let ntime_offset = u32::from(assigned.timestamp_count.wrapping_sub(timecode)); + let share_ntime = assigned.task.ntime.wrapping_add(ntime_offset); + let nonce_adjusted = nonce_raw.wrapping_sub(assigned.nonce_minus_value); + let nonce_submit = nonce_adjusted.swap_bytes(); + + assert_eq!(share_version.to_consensus() as u32, expected_version); + assert_eq!(share_ntime, expected_ntime); + assert_eq!(nonce_adjusted, expected_nonce_adjusted); + assert_eq!(nonce_submit, expected_nonce_submit); + + let header = BlockHeader { + version: share_version, + prev_blockhash: assigned.task.template.prev_blockhash, + merkle_root: assigned.merkle_root, + time: share_ntime, + bits: assigned.task.template.bits, + nonce: nonce_submit, + }; + let hash = header.block_hash(); + let hash_bytes = hash_bytes_bzm2_order(&hash); + let tail16 = bzm2_tail16_bytes(&assigned, share_ntime, nonce_submit); + let bzm2_hash_bytes = bzm2_double_sha_from_midstate_and_tail( + &assigned.engine_assignments[0].midstates[micro_job_id as usize], + &tail16, + ); + let target_bytes = assigned.task.share_target.to_le_bytes(); + + assert_eq!(hash_bytes, bzm2_hash_bytes); + assert_eq!( + check_result(&hash_bytes, &target_bytes, assigned.leading_zeros), + Bzm2CheckResult::Correct + ); + assert!(assigned.task.share_target.is_met_by(hash)); } } From 1ad085e7b8df498619c55f888cdd54ba057dc331 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Fri, 6 Mar 2026 08:55:38 -0500 Subject: [PATCH 12/19] feat(board): initialize BIRDS data port over BZM2 --- mujina-miner/src/asic/bzm2/init.rs | 223 +++++++++++++++++ mujina-miner/src/asic/bzm2/mod.rs | 34 ++- mujina-miner/src/asic/bzm2/protocol.rs | 186 +++++++++++--- mujina-miner/src/asic/bzm2/smoke.rs | 147 ----------- mujina-miner/src/board/birds.rs | 330 ++++++++++++++++--------- 5 files changed, 626 insertions(+), 294 deletions(-) create mode 100644 mujina-miner/src/asic/bzm2/init.rs delete mode 100644 mujina-miner/src/asic/bzm2/smoke.rs diff --git a/mujina-miner/src/asic/bzm2/init.rs b/mujina-miner/src/asic/bzm2/init.rs new file mode 100644 index 0000000..7c43d8e --- /dev/null +++ b/mujina-miner/src/asic/bzm2/init.rs @@ -0,0 +1,223 @@ +//! BZM2 data-port initialization helpers. +//! +//! This module performs the board-time transport probe that happens before the +//! hashing thread takes ownership of the UART. Initialization here uses the +//! real protocol codec and returns a ready-to-use framed transport on success. + +use anyhow::{Context, Result, anyhow, bail}; +use futures::SinkExt; +use tokio::io::AsyncReadExt; +use tokio::time::{self, Duration}; +use tokio_stream::StreamExt; +use tokio_util::codec::{FramedRead, FramedWrite}; + +use super::{ + Bzm2Protocol, FrameCodec, HexBytes, ReadRegData, Response, + protocol::{DEFAULT_ASIC_ID, NOOP_STRING}, +}; +use crate::transport::serial::{SerialControl, SerialReader, SerialStream, SerialWriter}; + +/// Default BZM2 UART baud rate used by the BIRDS data port. +pub const DEFAULT_BZM2_DATA_BAUD: u32 = 5_000_000; + +/// Default timeout for each initialization request/response step. +pub const DEFAULT_IO_TIMEOUT: Duration = Duration::from_secs(2); + +/// Result of probing one ASIC during board initialization. +#[derive(Debug, Clone, Copy)] +pub struct ProbeResult { + /// Logical ASIC index that was probed. + pub logical_asic: u8, + /// Hardware UART ID observed on the response path. + pub asic_hw_id: u8, + /// Raw `ASIC_ID` register value returned by the chip. + pub asic_id: u32, +} + +/// Framed BZM2 data-port transport that has already passed initialization. +pub struct InitializedDataPort { + /// Probe metadata collected during initialization. + pub probe: ProbeResult, + /// Decoded response stream for subsequent hashing logic. + pub reader: FramedRead, + /// Encoded command sink for subsequent hashing logic. + pub writer: FramedWrite, + /// Control handle associated with the serial data port. + pub control: SerialControl, +} + +fn expect_noop_response(response: Response) -> Result { + match response { + Response::Noop { + asic_hw_id, + signature, + } if signature == *NOOP_STRING => Ok(asic_hw_id), + Response::Noop { signature, .. } => { + bail!("NOOP signature mismatch: got {:02x?}", signature) + } + other => bail!("expected NOOP response, got {:?}", other), + } +} + +fn expect_asic_id_response(expected_asic_hw_id: u8, response: Response) -> Result { + match response { + Response::ReadReg { + asic_hw_id, + data: ReadRegData::U32(asic_id), + } if asic_hw_id == expected_asic_hw_id => Ok(asic_id), + Response::ReadReg { asic_hw_id, data } => bail!( + "READREG(ASIC_ID) response mismatch: expected ASIC 0x{expected_asic_hw_id:02X}, got ASIC 0x{asic_hw_id:02X} with payload {:?}", + data + ), + other => bail!("expected READREG(ASIC_ID) response, got {:?}", other), + } +} + +async fn next_response( + reader: &mut FramedRead, + timeout: Duration, + context: &str, +) -> Result { + let response = time::timeout(timeout, reader.next()) + .await + .with_context(|| format!("timeout waiting for {context}"))? + .transpose() + .with_context(|| format!("read error while waiting for {context}"))? + .ok_or_else(|| anyhow!("BZM2 response stream closed while waiting for {context}"))?; + Ok(response) +} + +/// Open, probe, and return an initialized BZM2 data port using default +/// transport settings. +pub async fn initialize_data_port( + serial_port: &str, + logical_asic: u8, +) -> Result { + initialize_data_port_with_options( + serial_port, + logical_asic, + DEFAULT_BZM2_DATA_BAUD, + DEFAULT_IO_TIMEOUT, + ) + .await +} + +/// Open, probe, and return an initialized BZM2 data port using explicit +/// transport settings. +pub async fn initialize_data_port_with_options( + serial_port: &str, + logical_asic: u8, + baud: u32, + timeout: Duration, +) -> Result { + let protocol = Bzm2Protocol::new(); + let serial = SerialStream::new(serial_port, baud) + .with_context(|| format!("failed to open serial port {}", serial_port))?; + let (mut raw_reader, writer, control) = serial.split(); + + // Reset/power-up can leave transient bytes on the data UART. Drain any + // pending bytes before issuing the first command. + drain_input_noise(&mut raw_reader).await; + + let mut reader = FramedRead::new(raw_reader, FrameCodec::default()); + let mut writer = FramedWrite::new(writer, FrameCodec::default()); + + writer + .send(protocol.noop(DEFAULT_ASIC_ID)) + .await + .context("failed to send NOOP")?; + let noop_response = next_response(&mut reader, timeout, "NOOP response").await?; + let asic_hw_id = expect_noop_response(noop_response)?; + + writer + .send(protocol.read_asic_id(asic_hw_id)) + .await + .context("failed to send READREG(ASIC_ID)")?; + let asic_id_response = next_response(&mut reader, timeout, "READREG(ASIC_ID) response").await?; + let asic_id = expect_asic_id_response(asic_hw_id, asic_id_response)?; + + Ok(InitializedDataPort { + probe: ProbeResult { + logical_asic, + asic_hw_id, + asic_id, + }, + reader, + writer, + control, + }) +} + +async fn drain_input_noise(reader: &mut SerialReader) { + let mut scratch = [0u8; 256]; + loop { + match time::timeout(Duration::from_millis(20), reader.read(&mut scratch)).await { + Ok(Ok(0)) => break, + Ok(Ok(n)) => { + tracing::debug!( + bytes = n, + rx = %HexBytes(&scratch[..n]), + "BZM2 init drained residual input" + ); + continue; + } + Ok(Err(_)) => break, + Err(_elapsed) => break, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_expect_noop_response_accepts_expected_signature() { + let asic_hw_id = expect_noop_response(Response::Noop { + asic_hw_id: DEFAULT_ASIC_ID, + signature: *NOOP_STRING, + }) + .unwrap(); + assert_eq!(asic_hw_id, DEFAULT_ASIC_ID); + } + + #[test] + fn test_expect_noop_response_rejects_non_noop_response() { + let error = expect_noop_response(Response::ReadReg { + asic_hw_id: DEFAULT_ASIC_ID, + data: ReadRegData::U32(0x1234_5678), + }) + .expect_err("non-NOOP response must fail"); + assert!(error.to_string().contains("expected NOOP response")); + } + + #[test] + fn test_expect_asic_id_response_accepts_matching_u32_payload() { + let asic_id = expect_asic_id_response( + DEFAULT_ASIC_ID, + Response::ReadReg { + asic_hw_id: DEFAULT_ASIC_ID, + data: ReadRegData::U32(0x1234_5678), + }, + ) + .unwrap(); + assert_eq!(asic_id, 0x1234_5678); + } + + #[test] + fn test_expect_asic_id_response_rejects_mismatched_payload_type() { + let error = expect_asic_id_response( + DEFAULT_ASIC_ID, + Response::ReadReg { + asic_hw_id: DEFAULT_ASIC_ID, + data: ReadRegData::U8(0x12), + }, + ) + .expect_err("unexpected payload type must fail"); + assert!( + error + .to_string() + .contains("READREG(ASIC_ID) response mismatch") + ); + } +} diff --git a/mujina-miner/src/asic/bzm2/mod.rs b/mujina-miner/src/asic/bzm2/mod.rs index 6cf0b87..d2f9868 100644 --- a/mujina-miner/src/asic/bzm2/mod.rs +++ b/mujina-miner/src/asic/bzm2/mod.rs @@ -1,9 +1,37 @@ -//! BZM2 ASIC protocol support. +//! BZM2 ASIC family support. +//! +//! The BZM2 implementation is split into focused modules: +//! - [`protocol`] owns wire-format types and the Tokio codec. +//! - [`thread`] owns the `HashThread` actor and chip bring-up sequence. +//! - [`init`] owns board-time transport probing before the hash thread takes +//! over the UART. +//! - [`error`] contains protocol-specific validation errors. +//! +//! BIRDS boards use this module for both board-time initialization and +//! production hashing. Keeping the low-level helpers centralized avoids board +//! code having to duplicate protocol details. + +use std::fmt; + +/// Wrapper for formatting byte slices as space-separated uppercase hex. +pub(crate) struct HexBytes<'a>(pub(crate) &'a [u8]); + +impl fmt::Display for HexBytes<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (i, byte) in self.0.iter().enumerate() { + if i > 0 { + write!(f, " ")?; + } + write!(f, "{:02X}", byte)?; + } + Ok(()) + } +} pub mod error; +pub mod init; pub mod protocol; -pub mod smoke; pub mod thread; pub use error::ProtocolError; -pub use protocol::{Command, FrameCodec, Opcode, ReadRegData, Response}; +pub use protocol::{Bzm2Protocol, Command, FrameCodec, Opcode, ReadRegData, Response}; diff --git a/mujina-miner/src/asic/bzm2/protocol.rs b/mujina-miner/src/asic/bzm2/protocol.rs index 3479ba6..7df487c 100644 --- a/mujina-miner/src/asic/bzm2/protocol.rs +++ b/mujina-miner/src/asic/bzm2/protocol.rs @@ -1,9 +1,14 @@ //! BZM2 wire protocol and frame codec. //! -//! This module implements pass-1 support for bring-up: -//! - Command encoding for `NOOP`, `READREG`, `WRITEREG` +//! This module defines the wire-format types used by the BIRDS BZM2 transport: +//! - Command encoding for `NOOP`, `READREG`, `WRITEREG`, multicast writes, and +//! `WRITEJOB` //! - Response decoding for `NOOP`, `READREG`, `READRESULT`, and `DTS/VS` -//! - 9-bit TX framing via the BIRDS USB bridge format +//! - 9-bit transmit framing via the BIRDS USB bridge format +//! +//! The API surface is intentionally narrow: higher-level chip initialization and +//! mining logic live in [`super::thread`], while this module stays responsible +//! for translating typed commands and responses to bytes. use std::io; @@ -11,32 +16,38 @@ use bytes::{Buf, BufMut, Bytes, BytesMut}; use strum::FromRepr; use tokio_util::codec::{Decoder, Encoder}; -use super::error::ProtocolError; +use super::{HexBytes, error::ProtocolError}; use crate::transport::nine_bit::nine_bit_encode_frame; +/// ASCII marker returned by the ASIC in identification contexts. pub const ASIC_STRING: &[u8; 3] = b"BZ2"; +/// Signature returned by a successful `NOOP` response. pub const NOOP_STRING: &[u8; 3] = b"2ZB"; +/// Default hardware ASIC ID before per-chip addressing is assigned. pub const DEFAULT_ASIC_ID: u8 = 0xfa; +/// Distance between logical ASIC indices and hardware UART IDs. pub const ASIC_HW_ID_STRIDE: u8 = 10; +/// Total engine count exposed by one BZM2 ASIC package. pub const ENGINES_PER_ASIC: usize = 240; +/// Engine selector used for local-register accesses. pub const NOTCH_REG: u16 = 0x0fff; +/// Engine selector used for BIST register accesses. pub const BIST_REG: u16 = 0x0fc0; +/// Broadcast ASIC target used by commands that address every device. pub const BROADCAST_ASIC: u8 = 0xff; +/// Broadcast engine/group target used for wide writes. pub const BROADCAST_ENGINE: u16 = 0x00ff; +/// Terminator byte appended to transmit frames by the bridge format. pub const TERM_BYTE: u8 = 0xa5; +/// Target selector used by `READREG` requests. pub const TAR_BYTE: u8 = 0x08; +/// Register offset encoded in `WRITEJOB` headers. pub const WRITEJOB_OFFSET: u16 = 41; -fn format_hex(data: &[u8]) -> String { - data.iter() - .map(|byte| format!("{:02X}", byte)) - .collect::>() - .join(" ") -} - +/// Engine-local register offsets used by job dispatch and result handling. pub mod engine_reg { pub const STATUS: u16 = 0x00; pub const CONFIG: u16 = 0x01; @@ -58,6 +69,7 @@ pub mod engine_reg { pub const RESULT_POP: u16 = 0x77; } +/// Local control and sensor register offsets for one ASIC. pub mod local_reg { pub const RESULT_STS_CTL: u16 = 0x00; pub const ERROR_LOG0: u16 = 0x01; @@ -124,6 +136,7 @@ pub mod local_reg { pub const CKDLLR_1_1: u16 = 0x63; } +/// BIST register offsets used during engine self-test programming. pub mod bist_reg { pub const RESULT_FSM_CTL: u16 = 0x00; pub const ERROR_LOG0: u16 = 0x01; @@ -170,16 +183,25 @@ pub mod bist_reg { } } +/// Supported BZM2 opcodes. #[derive(Debug, Clone, Copy, PartialEq, Eq, FromRepr)] #[repr(u8)] pub enum Opcode { + /// Push one micro-job into an engine. WriteJob = 0x0, + /// Read a completed result from an engine. ReadResult = 0x1, + /// Write one or more bytes to a register region. WriteReg = 0x2, + /// Read one, two, or four bytes from a register region. ReadReg = 0x3, + /// Write one or more bytes to a register group or broadcast target. MulticastWrite = 0x4, + /// Read temperature/voltage sensor telemetry. DtsVs = 0x0d, + /// Loopback command used by bring-up and diagnostics. Loopback = 0x0e, + /// Lightweight connectivity check that returns `NOOP_STRING`. Noop = 0x0f, } @@ -192,13 +214,14 @@ pub fn logical_to_hw_asic_id(logical_asic: u8) -> u8 { /// Translate hardware ASIC ID from UART into logical ASIC index. pub fn hw_to_logical_asic_id(hw_asic_id: u8) -> Option { - if hw_asic_id < ASIC_HW_ID_STRIDE || hw_asic_id % ASIC_HW_ID_STRIDE != 0 { + if hw_asic_id < ASIC_HW_ID_STRIDE || !hw_asic_id.is_multiple_of(ASIC_HW_ID_STRIDE) { return None; } Some((hw_asic_id / ASIC_HW_ID_STRIDE) - 1) } +/// Commands that can be sent to one or more BZM2 ASICs. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Command { /// Push a job payload to one engine. @@ -241,6 +264,7 @@ pub enum Command { } impl Command { + /// Build a single `WRITEJOB` command containing one midstate variant. pub fn write_job_single_midstate( asic_hw_id: u8, engine: u16, @@ -321,6 +345,7 @@ impl Command { ]) } + /// Build a `READREG` command for a four-byte little-endian register. pub fn read_reg_u32(asic_hw_id: u8, engine: u16, offset: u16) -> Self { Self::ReadReg { asic_hw_id, @@ -330,6 +355,7 @@ impl Command { } } + /// Build a single-byte `WRITEREG` command. pub fn write_reg_u8(asic_hw_id: u8, engine: u16, offset: u16, value: u8) -> Self { Self::WriteReg { asic_hw_id, @@ -339,6 +365,7 @@ impl Command { } } + /// Build a four-byte little-endian `WRITEREG` command. pub fn write_reg_u32_le(asic_hw_id: u8, engine: u16, offset: u16, value: u32) -> Self { Self::WriteReg { asic_hw_id, @@ -348,6 +375,7 @@ impl Command { } } + /// Build a single-byte multicast register write. pub fn multicast_write_u8(asic_hw_id: u8, group: u16, offset: u16, value: u8) -> Self { Self::MulticastWrite { asic_hw_id, @@ -468,23 +496,25 @@ impl Command { } } +/// Typed payload returned by a `READREG` response. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ReadRegData { + /// One-byte register payload. U8(u8), + /// Two-byte little-endian register payload. U16(u16), + /// Four-byte little-endian register payload. U32(u32), } +/// Responses decoded from the BZM2 UART receive stream. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Response { - Noop { - asic_hw_id: u8, - signature: [u8; 3], - }, - ReadReg { - asic_hw_id: u8, - data: ReadRegData, - }, + /// Response to a `NOOP` request. + Noop { asic_hw_id: u8, signature: [u8; 3] }, + /// Response to a `READREG` request. + ReadReg { asic_hw_id: u8, data: ReadRegData }, + /// Completed result from one engine. ReadResult { asic_hw_id: u8, engine_id: u16, @@ -493,12 +523,11 @@ pub enum Response { sequence: u8, timecode: u8, }, - DtsVs { - asic_hw_id: u8, - data: DtsVsData, - }, + /// Temperature and voltage sensor telemetry. + DtsVs { asic_hw_id: u8, data: DtsVsData }, } +/// Decoded payload for the `DTS/VS` response family. #[derive(Debug, Clone, PartialEq, Eq)] pub enum DtsVsData { /// Generation-1 payload (`uart_dts_vs_msg`) represented as a big-endian `u32`. @@ -572,6 +601,24 @@ impl FrameCodec { } Self::is_plausible_asic_hw_id(buf[0]) && Self::response_opcode(buf[1]).is_some() } + + fn echoed_tx_prefix_len(buf: &[u8]) -> Option { + if buf.len() < 6 || buf[1] != 0x01 { + return None; + } + + for offset in (4..=buf.len().saturating_sub(2)).step_by(2) { + if buf[offset - 1] != 0x00 { + return None; + } + + if Self::is_plausible_response_header(&buf[offset..]) { + return Some(offset); + } + } + + None + } } impl Encoder for FrameCodec { @@ -581,8 +628,8 @@ impl Encoder for FrameCodec { let raw = item.encode_raw().map_err(Self::io_error)?; let encoded = nine_bit_encode_frame(&raw); tracing::debug!( - raw = %format_hex(&raw), - encoded = %format_hex(&encoded), + raw = %HexBytes(&raw), + encoded = %HexBytes(&encoded), "BZM2 tx frame" ); dst.extend_from_slice(&encoded); @@ -601,6 +648,11 @@ impl Decoder for FrameCodec { return Ok(None); } + if let Some(prefix_len) = Self::echoed_tx_prefix_len(src) { + src.advance(prefix_len); + continue; + } + let opcode = match Self::response_opcode(src[1]) { Some(op) => op, None => { @@ -629,7 +681,7 @@ impl Decoder for FrameCodec { if src.len() < 5 { return Ok(None); } - tracing::debug!(rx = %format_hex(&src[..5]), "BZM2 rx NOOP frame"); + tracing::debug!(rx = %HexBytes(&src[..5]), "BZM2 rx NOOP frame"); let asic_hw_id = src[0]; let signature = [src[2], src[3], src[4]]; @@ -656,7 +708,7 @@ impl Decoder for FrameCodec { return Ok(None); } tracing::debug!( - rx = %format_hex(&src[..frame_len]), + rx = %HexBytes(&src[..frame_len]), "BZM2 rx READREG frame" ); @@ -686,7 +738,7 @@ impl Decoder for FrameCodec { // BIRDS/bzm2 layout packs [status:4 | engine_id:12] in network byte order. let engine_id = engine_status & 0x0fff; let status = ((engine_status >> 12) & 0x000f) as u8; - tracing::trace!(rx = %format_hex(&src[..FRAME_LEN]), "BZM2 rx READRESULT frame"); + tracing::trace!(rx = %HexBytes(&src[..FRAME_LEN]), "BZM2 rx READRESULT frame"); let asic_hw_id = src[0]; let nonce = u32::from_le_bytes([src[4], src[5], src[6], src[7]]); @@ -781,6 +833,77 @@ fn build_full_header(asic_hw_id: u8, opcode: Opcode, engine: u16, offset: u16) - ((asic_hw_id as u32) << 24) | ((opcode as u32) << 20) | ((engine as u32) << 8) | (offset as u32) } +/// Stateless command factory for BZM2 protocol operations. +/// +/// Callers that want a protocol-centric entry point can build common commands +/// through this type instead of constructing enum variants directly. +pub struct Bzm2Protocol; + +impl Default for Bzm2Protocol { + fn default() -> Self { + Self::new() + } +} + +impl Bzm2Protocol { + /// Create a new protocol helper. + pub fn new() -> Self { + Self + } + + /// Create a `NOOP` command for one ASIC. + pub fn noop(&self, asic_hw_id: u8) -> Command { + Command::Noop { asic_hw_id } + } + + /// Create a `READREG` command. + pub fn read_register(&self, asic_hw_id: u8, engine: u16, offset: u16, count: u8) -> Command { + Command::ReadReg { + asic_hw_id, + engine, + offset, + count, + } + } + + /// Create a four-byte `READREG` command for the `ASIC_ID` local register. + pub fn read_asic_id(&self, asic_hw_id: u8) -> Command { + self.read_register(asic_hw_id, NOTCH_REG, local_reg::ASIC_ID, 4) + } + + /// Create a `WRITEREG` command. + pub fn write_register( + &self, + asic_hw_id: u8, + engine: u16, + offset: u16, + value: Bytes, + ) -> Command { + Command::WriteReg { + asic_hw_id, + engine, + offset, + value, + } + } + + /// Create a multicast write command. + pub fn multicast_write( + &self, + asic_hw_id: u8, + group: u16, + offset: u16, + value: Bytes, + ) -> Command { + Command::MulticastWrite { + asic_hw_id, + group, + offset, + value, + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -884,9 +1007,8 @@ mod tests { midstates[2][0] = 0x30; midstates[3][0] = 0x40; - let cmds = - Command::write_job(0x0a, 0x0123, midstates, 0x1122_3344, 0x5566_7788, 0xff, 3) - .expect("writejob should build"); + let cmds = Command::write_job(0x0a, 0x0123, midstates, 0x1122_3344, 0x5566_7788, 0xff, 3) + .expect("writejob should build"); let raw0 = cmds[0].clone().encode_raw().expect("encode should succeed"); let raw1 = cmds[1].clone().encode_raw().expect("encode should succeed"); diff --git a/mujina-miner/src/asic/bzm2/smoke.rs b/mujina-miner/src/asic/bzm2/smoke.rs deleted file mode 100644 index 2b8c041..0000000 --- a/mujina-miner/src/asic/bzm2/smoke.rs +++ /dev/null @@ -1,147 +0,0 @@ -//! BZM2 UART smoke test helpers. -//! -//! Used for early bring-up to verify basic command/response on the ASIC UART. - -use anyhow::{Context, Result, bail}; -use futures::SinkExt; -use tokio::io::AsyncReadExt; -use tokio::time::{self, Duration}; -use tokio_util::codec::FramedWrite; - -use super::{ - Command, FrameCodec, - protocol::{DEFAULT_ASIC_ID, NOOP_STRING, NOTCH_REG, local_reg}, -}; -use crate::transport::serial::SerialStream; - -/// Default BZM2 UART baud rate used by BIRDS data port. -pub const DEFAULT_BZM2_DATA_BAUD: u32 = 5_000_000; - -/// Default timeout for each request/response step. -pub const DEFAULT_IO_TIMEOUT: Duration = Duration::from_secs(2); - -fn format_hex(data: &[u8]) -> String { - data.iter() - .map(|byte| format!("{:02X}", byte)) - .collect::>() - .join(" ") -} - -/// Output from the smoke test. -#[derive(Debug, Clone, Copy)] -pub struct SmokeResult { - pub logical_asic: u8, - pub asic_hw_id: u8, - pub asic_id: u32, -} - -/// Run NOOP + READREG(ASIC_ID) smoke test on a BZM2 UART port. -pub async fn run_smoke(serial_port: &str, logical_asic: u8) -> Result { - run_smoke_with_options( - serial_port, - logical_asic, - DEFAULT_BZM2_DATA_BAUD, - DEFAULT_IO_TIMEOUT, - ) - .await -} - -/// Run smoke test with explicit baud and timeout. -pub async fn run_smoke_with_options( - serial_port: &str, - logical_asic: u8, - baud: u32, - timeout: Duration, -) -> Result { - // Initial bring-up uses BZM2 default ASIC ID (0xFA) before ID assignment. - let asic_hw_id = DEFAULT_ASIC_ID; - - let serial = SerialStream::new(serial_port, baud) - .with_context(|| format!("failed to open serial port {}", serial_port))?; - let (mut reader, writer, _control) = serial.split(); - let mut tx = FramedWrite::new( - writer, - FrameCodec::new(4).context("failed to construct BZM2 codec")?, - ); - - // Reset/power-up can leave transient bytes on the data UART. Drain any - // pending bytes before issuing the first command. - drain_input_noise(&mut reader).await; - - // Step 1: NOOP - tx.send(Command::Noop { asic_hw_id }) - .await - .context("failed to send NOOP")?; - - let mut noop_raw = [0u8; 5]; - time::timeout(timeout, reader.read_exact(&mut noop_raw)) - .await - .context("timeout waiting for NOOP response")? - .context("read error while waiting for NOOP response")?; - tracing::debug!( - asic_hw_id = format_args!("0x{:02X}", asic_hw_id), - rx = %format_hex(&noop_raw), - "BZM2 smoke NOOP rx" - ); - - let mut signature = [0u8; 3]; - signature.copy_from_slice(&noop_raw[2..5]); - if signature != *NOOP_STRING { - bail!( - "NOOP signature mismatch: got {:02x?} (raw={:02x?})", - signature, - noop_raw - ); - } - - // Step 2: READREG NOTCH_REG:LOCAL_REG_ASIC_ID - tx.send(Command::read_reg_u32( - asic_hw_id, - NOTCH_REG, - local_reg::ASIC_ID, - )) - .await - .context("failed to send READREG(ASIC_ID)")?; - - let mut readreg_raw = [0u8; 6]; - time::timeout(timeout, reader.read_exact(&mut readreg_raw)) - .await - .context("timeout waiting for READREG response")? - .context("read error while waiting for READREG response")?; - tracing::debug!( - asic_hw_id = format_args!("0x{:02X}", asic_hw_id), - rx = %format_hex(&readreg_raw), - "BZM2 smoke READREG rx" - ); - - let asic_id = u32::from_le_bytes( - readreg_raw[2..6] - .try_into() - .expect("slice is exactly 4 bytes"), - ); - - Ok(SmokeResult { - logical_asic, - asic_hw_id, - asic_id, - }) -} - -async fn drain_input_noise(reader: &mut crate::transport::serial::SerialReader) { - let mut scratch = [0u8; 256]; - loop { - match time::timeout(Duration::from_millis(20), reader.read(&mut scratch)).await { - Ok(Ok(0)) => break, - Ok(Ok(n)) => { - tracing::debug!( - bytes = n, - rx = %format_hex(&scratch[..n]), - "BZM2 smoke drained residual input" - ); - continue; - } - Ok(Err(_)) => break, - Err(_elapsed) => break, - } - } -} diff --git a/mujina-miner/src/board/birds.rs b/mujina-miner/src/board/birds.rs index 639ef4c..193d0b2 100644 --- a/mujina-miner/src/board/birds.rs +++ b/mujina-miner/src/board/birds.rs @@ -1,8 +1,15 @@ -//! BIRDS mining board support (stub). +//! BIRDS mining board support. //! //! The BIRDS board is a mining board with 4 BZM2 ASIC chips, communicating via //! USB using two serial ports: a control UART for GPIO/I2C and a data UART for //! ASIC communication with 8-bit to 9-bit serial translation. +//! +//! This module follows the same split of responsibilities as the BM13xx-backed +//! boards: +//! - the board owns USB discovery, power sequencing, and reset control +//! - the [`Bzm2Thread`] owns chip bring-up after the data path is handed off +//! - board-only protocol helpers stay local so they can be unit tested without +//! requiring attached hardware use async_trait::async_trait; use tokio::io::{AsyncReadExt, AsyncWriteExt}; @@ -16,14 +23,15 @@ use super::{ pattern::{BoardPattern, Match, StringMatch}, }; use crate::{ + api_client::types::BoardState, asic::{ - bzm2::{FrameCodec, smoke, thread::Bzm2Thread}, + bzm2::{FrameCodec, HexBytes, init, thread::Bzm2Thread}, hash_thread::{AsicEnable, BoardPeripherals, HashThread, ThreadRemovalSignal}, }, error::Error, transport::{ UsbDeviceInfo, - serial::{SerialControl, SerialReader, SerialStream, SerialWriter}, + serial::{SerialControl, SerialReader, SerialWriter}, }, }; @@ -49,16 +57,62 @@ const CTRL_ID_VR: u8 = 0xAA; /// Control protocol page for GPIO. const CTRL_PAGE_GPIO: u8 = 0x06; -fn format_hex(data: &[u8]) -> String { - data.iter() - .map(|byte| format!("{:02X}", byte)) - .collect::>() - .join(" ") +#[derive(Debug, Clone, PartialEq, Eq)] +struct BirdsPorts { + control_port: String, + data_port: String, +} + +impl BirdsPorts { + fn from_slice(serial_ports: &[String]) -> Result { + if serial_ports.len() != 2 { + return Err(BoardError::InitializationFailed(format!( + "BIRDS requires exactly 2 serial ports, found {}", + serial_ports.len() + ))); + } + + Ok(Self { + control_port: serial_ports[0].clone(), + data_port: serial_ports[1].clone(), + }) + } + + fn from_device_info(device_info: &UsbDeviceInfo) -> Result { + let serial_ports = device_info.serial_ports().map_err(|e| { + BoardError::InitializationFailed(format!("Failed to enumerate serial ports: {}", e)) + })?; + Self::from_slice(serial_ports) + } +} + +fn build_gpio_write_packet(dev_id: u8, pin: u8, value_high: bool) -> [u8; 7] { + [ + 0x07, + 0x00, + dev_id, + 0x00, + CTRL_PAGE_GPIO, + pin, + if value_high { 0x01 } else { 0x00 }, + ] +} + +fn validate_gpio_ack(dev_id: u8, pin: u8, ack: [u8; 4]) -> Result<(), BoardError> { + if ack[2] != dev_id { + return Err(BoardError::HardwareControl(format!( + "GPIO ack ID mismatch for pin {}: expected 0x{:02x}, got 0x{:02x}", + pin, dev_id, ack[2] + ))); + } + + Ok(()) } /// BIRDS mining board. pub struct BirdsBoard { device_info: UsbDeviceInfo, + state_tx: watch::Sender, control_port: Option, data_reader: Option>, data_writer: Option>, @@ -69,8 +123,18 @@ pub struct BirdsBoard { impl BirdsBoard { /// Create a new BIRDS board instance. pub fn new(device_info: UsbDeviceInfo) -> Result { + let serial = device_info.serial_number.clone(); + let initial_state = BoardState { + name: format!("birds-{}", serial.as_deref().unwrap_or("unknown")), + model: "BIRDS".into(), + serial, + ..Default::default() + }; + let (state_tx, _) = watch::channel(initial_state); + Ok(Self { device_info, + state_tx, control_port: None, data_reader: None, data_writer: None, @@ -81,23 +145,14 @@ impl BirdsBoard { /// Early bring-up init path. /// - /// Until full thread integration lands, we run a basic UART smoke test - /// (NOOP + READREG ASIC_ID) during board initialization. + /// During board initialization we verify that control sequencing works and + /// that at least one ASIC answers protocol-level initialization traffic + /// before exposing the data channel to a hashing thread. pub async fn initialize(&mut self) -> Result<(), BoardError> { - let (control_port, data_port) = { - let serial_ports = self.device_info.serial_ports().map_err(|e| { - BoardError::InitializationFailed(format!("Failed to enumerate serial ports: {}", e)) - })?; - - if serial_ports.len() != 2 { - return Err(BoardError::InitializationFailed(format!( - "BIRDS requires exactly 2 serial ports, found {}", - serial_ports.len() - ))); - } - - (serial_ports[0].clone(), serial_ports[1].clone()) - }; + let BirdsPorts { + control_port, + data_port, + } = BirdsPorts::from_device_info(&self.device_info)?; tracing::info!( serial = ?self.device_info.serial_number, @@ -106,7 +161,7 @@ impl BirdsBoard { data_baud = DATA_UART_BAUD, control_baud = CONTROL_UART_BAUD, asics = ASICS_PER_BOARD, - "Running BIRDS ASIC smoke test during initialization" + "Running BIRDS ASIC data-port initialization" ); // Match known-good bring-up sequence from birds_asyncio.py: @@ -118,37 +173,62 @@ impl BirdsBoard { self.bringup_power_and_reset(&control_port).await?; self.control_port = Some(control_port); - let result = smoke::run_smoke(&data_port, 0).await.map_err(|e| { - BoardError::InitializationFailed(format!("BIRDS ASIC smoke test failed: {:#}", e)) - })?; + let initialized_data_port = + init::initialize_data_port(&data_port, 0) + .await + .map_err(|e| { + BoardError::InitializationFailed(format!( + "BIRDS ASIC data-port initialization failed: {:#}", + e + )) + })?; + let result = initialized_data_port.probe; tracing::info!( logical_asic = result.logical_asic, asic_hw_id = result.asic_hw_id, asic_id = format_args!("0x{:08x}", result.asic_id), - "BIRDS ASIC smoke test succeeded" + "BIRDS ASIC data-port initialization succeeded" ); - let data_stream = SerialStream::new(&data_port, DATA_UART_BAUD).map_err(|e| { - BoardError::InitializationFailed(format!("Failed to open BIRDS data port: {}", e)) - })?; - let (data_reader, data_writer, data_control) = data_stream.split(); - self.data_reader = Some(FramedRead::new(data_reader, FrameCodec::default())); - self.data_writer = Some(FramedWrite::new(data_writer, FrameCodec::default())); - self.data_control = Some(data_control); + self.data_reader = Some(initialized_data_port.reader); + self.data_writer = Some(initialized_data_port.writer); + self.data_control = Some(initialized_data_port.control); Ok(()) } - async fn bringup_power_and_reset(&self, control_port: &str) -> Result<(), BoardError> { - let mut control_stream = tokio_serial::new(control_port, CONTROL_UART_BAUD) + fn open_control_stream(control_port: &str) -> Result { + tokio_serial::new(control_port, CONTROL_UART_BAUD) .open_native_async() .map_err(|e| { BoardError::InitializationFailed(format!( "Failed to open BIRDS control port {}: {}", control_port, e )) - })?; + }) + } + + async fn set_asic_reset(control_port: &str, value_high: bool) -> Result<(), BoardError> { + let mut control_stream = Self::open_control_stream(control_port)?; + Self::control_gpio_write( + &mut control_stream, + CTRL_ID_POWER_RESET, + GPIO_ASIC_RST, + value_high, + ) + .await + } + + fn thread_name_for_serial(serial_number: Option<&str>) -> String { + match serial_number { + Some(serial) => format!("BIRDS-{}", &serial[..8.min(serial.len())]), + None => "BIRDS".to_string(), + } + } + + async fn bringup_power_and_reset(&self, control_port: &str) -> Result<(), BoardError> { + let mut control_stream = Self::open_control_stream(control_port)?; Self::control_gpio_write(&mut control_stream, CTRL_ID_VR, GPIO_VR_EN, false).await?; sleep(Duration::from_millis(2000)).await; @@ -188,20 +268,12 @@ impl BirdsBoard { value_high: bool, ) -> Result<(), BoardError> { // Packet format: [len:u16_le][id][bus][page][cmd=pin][value]. - let packet: [u8; 7] = [ - 0x07, - 0x00, - dev_id, - 0x00, - CTRL_PAGE_GPIO, - pin, - if value_high { 0x01 } else { 0x00 }, - ]; + let packet = build_gpio_write_packet(dev_id, pin, value_high); tracing::debug!( dev_id = format_args!("0x{:02X}", dev_id), pin, value = if value_high { 1 } else { 0 }, - tx = %format_hex(&packet), + tx = %HexBytes(&packet), "BIRDS ctrl gpio tx" ); stream.write_all(&packet).await.map_err(|e| { @@ -222,17 +294,10 @@ impl BirdsBoard { tracing::debug!( dev_id = format_args!("0x{:02X}", dev_id), pin, - rx = %format_hex(&ack), + rx = %HexBytes(&ack), "BIRDS ctrl gpio rx" ); - if ack[2] != dev_id { - return Err(BoardError::HardwareControl(format!( - "GPIO ack ID mismatch for pin {}: expected 0x{:02x}, got 0x{:02x}", - pin, dev_id, ack[2] - ))); - } - - Ok(()) + validate_gpio_ack(dev_id, pin, ack) } async fn hold_in_reset(&self) -> Result<(), BoardError> { @@ -240,22 +305,7 @@ impl BirdsBoard { BoardError::InitializationFailed("BIRDS control port not initialized".into()) })?; - let mut control_stream = tokio_serial::new(control_port, CONTROL_UART_BAUD) - .open_native_async() - .map_err(|e| { - BoardError::InitializationFailed(format!( - "Failed to open BIRDS control port {}: {}", - control_port, e - )) - })?; - - Self::control_gpio_write( - &mut control_stream, - CTRL_ID_POWER_RESET, - GPIO_ASIC_RST, - false, - ) - .await + Self::set_asic_reset(control_port, false).await } } @@ -266,31 +316,15 @@ struct BirdsAsicEnable { #[async_trait] impl AsicEnable for BirdsAsicEnable { async fn enable(&mut self) -> anyhow::Result<()> { - let mut control_stream = tokio_serial::new(&self.control_port, CONTROL_UART_BAUD) - .open_native_async() - .map_err(|e| anyhow::anyhow!("failed to open control port: {}", e))?; - BirdsBoard::control_gpio_write( - &mut control_stream, - CTRL_ID_POWER_RESET, - GPIO_ASIC_RST, - true, - ) - .await - .map_err(|e| anyhow::anyhow!("failed to release BZM2 reset: {}", e)) + BirdsBoard::set_asic_reset(&self.control_port, true) + .await + .map_err(|e| anyhow::anyhow!("failed to release BZM2 reset: {}", e)) } async fn disable(&mut self) -> anyhow::Result<()> { - let mut control_stream = tokio_serial::new(&self.control_port, CONTROL_UART_BAUD) - .open_native_async() - .map_err(|e| anyhow::anyhow!("failed to open control port: {}", e))?; - BirdsBoard::control_gpio_write( - &mut control_stream, - CTRL_ID_POWER_RESET, - GPIO_ASIC_RST, - false, - ) - .await - .map_err(|e| anyhow::anyhow!("failed to assert BZM2 reset: {}", e)) + BirdsBoard::set_asic_reset(&self.control_port, false) + .await + .map_err(|e| anyhow::anyhow!("failed to assert BZM2 reset: {}", e)) } } @@ -305,10 +339,10 @@ impl Board for BirdsBoard { } async fn shutdown(&mut self) -> Result<(), BoardError> { - if let Some(ref tx) = self.thread_shutdown { - if let Err(e) = tx.send(ThreadRemovalSignal::Shutdown) { - tracing::warn!("Failed to send shutdown signal to BIRDS thread: {}", e); - } + if let Some(ref tx) = self.thread_shutdown + && let Err(e) = tx.send(ThreadRemovalSignal::Shutdown) + { + tracing::warn!("Failed to send shutdown signal to BIRDS thread: {}", e); } self.hold_in_reset().await?; @@ -344,10 +378,7 @@ impl Board for BirdsBoard { voltage_regulator: None, }; - let thread_name = match &self.device_info.serial_number { - Some(serial) => format!("BIRDS-{}", &serial[..8.min(serial.len())]), - None => "BIRDS".to_string(), - }; + let thread_name = Self::thread_name_for_serial(self.device_info.serial_number.as_deref()); let thread = Bzm2Thread::new( thread_name, @@ -362,7 +393,9 @@ impl Board for BirdsBoard { } // Factory function to create BIRDS board from USB device info -async fn create_from_usb(device: UsbDeviceInfo) -> crate::error::Result> { +async fn create_from_usb( + device: UsbDeviceInfo, +) -> crate::error::Result<(Box, super::BoardRegistration)> { let mut board = BirdsBoard::new(device) .map_err(|e| Error::Hardware(format!("Failed to create board: {}", e)))?; @@ -371,7 +404,10 @@ async fn create_from_usb(device: UsbDeviceInfo) -> crate::error::Result) -> UsbDeviceInfo { + UsbDeviceInfo::new_for_test( 0xc0de, 0xcafe, - Some("TEST001".to_string()), + serial.map(str::to_string), Some("BIRDS".to_string()), Some("Mining Board".to_string()), "/sys/devices/test".to_string(), - ); + ) + } - let board = BirdsBoard::new(device); + #[test] + fn test_board_creation() { + let board = BirdsBoard::new(test_device(Some("TEST001"))); assert!(board.is_ok()); let board = board.unwrap(); assert_eq!(board.board_info().model, "BIRDS"); } + + #[test] + fn test_birds_ports_requires_exactly_two_serial_ports() { + let ports = vec!["/dev/ttyACM0".to_string()]; + let error = BirdsPorts::from_slice(&ports).expect_err("one port should be rejected"); + assert_eq!( + error.to_string(), + "Board initialization failed: BIRDS requires exactly 2 serial ports, found 1" + ); + } + + #[test] + fn test_birds_ports_preserves_control_and_data_order() { + let ports = vec!["/dev/ttyACM0".to_string(), "/dev/ttyACM1".to_string()]; + let birds_ports = BirdsPorts::from_slice(&ports).unwrap(); + assert_eq!(birds_ports.control_port, "/dev/ttyACM0"); + assert_eq!(birds_ports.data_port, "/dev/ttyACM1"); + } + + #[test] + fn test_build_gpio_write_packet_layout() { + let packet = build_gpio_write_packet(CTRL_ID_POWER_RESET, GPIO_ASIC_RST, true); + assert_eq!( + packet, + [ + 0x07, + 0x00, + CTRL_ID_POWER_RESET, + 0x00, + CTRL_PAGE_GPIO, + GPIO_ASIC_RST, + 0x01 + ] + ); + } + + #[test] + fn test_validate_gpio_ack_accepts_matching_device_id() { + let ack = [0x04, 0x00, CTRL_ID_VR, 0x00]; + assert!(validate_gpio_ack(CTRL_ID_VR, GPIO_VR_EN, ack).is_ok()); + } + + #[test] + fn test_validate_gpio_ack_rejects_mismatched_device_id() { + let ack = [0x04, 0x00, CTRL_ID_POWER_RESET, 0x00]; + let error = + validate_gpio_ack(CTRL_ID_VR, GPIO_VR_EN, ack).expect_err("mismatched ack must fail"); + assert_eq!( + error.to_string(), + format!( + "Hardware control error: GPIO ack ID mismatch for pin {}: expected 0x{:02x}, got 0x{:02x}", + GPIO_VR_EN, CTRL_ID_VR, CTRL_ID_POWER_RESET + ) + ); + } + + #[test] + fn test_thread_name_uses_serial_prefix() { + assert_eq!( + BirdsBoard::thread_name_for_serial(Some("1234567890")), + "BIRDS-12345678" + ); + } + + #[test] + fn test_thread_name_falls_back_when_serial_is_missing() { + assert_eq!(BirdsBoard::thread_name_for_serial(None), "BIRDS"); + } } From f40971da73914c37d3596eae7e8a33e7d7ee8343 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Thu, 5 Mar 2026 22:57:56 -0500 Subject: [PATCH 13/19] refactor(bzm2): simplify hash thread internals Document the BZM2 thread and protocol error surface more clearly, centralize hex formatting usage, and clean up several thread internals that were carrying avoidable complexity. This keeps the actor-based hashing path easier to follow while preserving the existing behavior and test coverage. --- mujina-miner/src/asic/bzm2/error.rs | 10 ++ mujina-miner/src/asic/bzm2/thread.rs | 237 +++++++++++++-------------- 2 files changed, 128 insertions(+), 119 deletions(-) diff --git a/mujina-miner/src/asic/bzm2/error.rs b/mujina-miner/src/asic/bzm2/error.rs index 4ada16c..591c85e 100644 --- a/mujina-miner/src/asic/bzm2/error.rs +++ b/mujina-miner/src/asic/bzm2/error.rs @@ -2,29 +2,39 @@ use thiserror::Error; +/// Validation failures detected while encoding or decoding BZM2 frames. #[derive(Error, Debug)] pub enum ProtocolError { + /// A register write command was constructed without any payload bytes. #[error("register write payload cannot be empty")] EmptyWritePayload, + /// A register write payload exceeded the 8-bit on-wire length field. #[error("register write payload too large: {0} bytes")] WritePayloadTooLarge(usize), + /// READREG only supports 1-, 2-, or 4-byte responses. #[error("invalid read register byte count: {0} (expected 1, 2, or 4)")] InvalidReadRegCount(u8), + /// WRITEJOB only accepts `job_ctl` values that the hardware understands. #[error("invalid job control value: {0} (expected 1 or 3)")] InvalidJobControl(u8), + /// The codec was asked to decode a READREG response size it does not + /// implement. #[error("unsupported read register response size: {0} (expected 1 or 4)")] UnsupportedReadRegResponseSize(usize), + /// A frame exceeded what the bridge format can encode in one command. #[error("frame too large to encode: {0} bytes")] FrameTooLarge(usize), + /// A NOOP response did not return the expected `2ZB` signature bytes. #[error("invalid NOOP signature: {0:02x?}")] InvalidNoopSignature([u8; 3]), + /// The decoder saw a response opcode that is not currently supported. #[error("unsupported response opcode: 0x{0:02x}")] UnsupportedResponseOpcode(u8), } diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index e04a6e2..44cdd42 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -1,7 +1,14 @@ //! BZM2 HashThread implementation. //! -//! This module mirrors the BM13xx actor model and performs full BZM2 bring-up -//! before the first task is accepted. +//! This module uses an actor-style `HashThread` implementation and performs +//! full BZM2 bring-up before the first task is accepted. +//! +//! A `Bzm2Thread` represents the hashing worker for one BIRDS board data path. +//! It is responsible for: +//! - asserting and releasing ASIC reset through board-provided peripherals +//! - programming the chip register set needed for mining +//! - translating scheduler work into BZM2 micro-jobs +//! - validating returned results before forwarding shares upstream use std::{ collections::VecDeque, @@ -21,7 +28,7 @@ use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{self, Duration, Instant}; use tokio_stream::StreamExt; -use super::protocol; +use super::{HexBytes, protocol}; use crate::{ asic::hash_thread::{ BoardPeripherals, HashTask, HashThread, HashThreadCapabilities, HashThreadError, @@ -118,7 +125,11 @@ enum ThreadCommand { Shutdown, } -/// HashThread wrapper for a BZM2 board worker. +/// `HashThread` wrapper for a BZM2 board worker. +/// +/// This is a thin handle around a spawned actor task. The actor owns the +/// serial transport and emits [`HashThreadEvent`] updates as it initializes +/// the ASICs and processes work. pub struct Bzm2Thread { name: String, command_tx: mpsc::Sender, @@ -128,6 +139,11 @@ pub struct Bzm2Thread { } impl Bzm2Thread { + /// Create a new BZM2 hashing worker. + /// + /// The thread starts in an uninitialized state. Hardware bring-up happens + /// lazily when the first task is assigned so board discovery can complete + /// without immediately programming the ASICs. pub fn new( name: String, chip_responses: R, @@ -148,16 +164,16 @@ impl Bzm2Thread { let status_clone = Arc::clone(&status); tokio::spawn(async move { - bzm2_thread_actor( + bzm2_thread_actor(Bzm2ThreadActor { cmd_rx, evt_tx, removal_rx, - status_clone, + status: status_clone, chip_responses, chip_commands, peripherals, asic_count, - ) + }) .await; }); @@ -263,12 +279,7 @@ async fn drain_input(chip_responses: &mut R) where R: Stream> + Unpin, { - loop { - match time::timeout(Duration::from_millis(20), chip_responses.next()).await { - Ok(Some(_)) => continue, - _ => break, - } - } + while let Ok(Some(_)) = time::timeout(Duration::from_millis(20), chip_responses.next()).await {} } async fn wait_for_noop( @@ -657,6 +668,27 @@ enum Bzm2CheckResult { Error, } +struct SelectedReadResultCandidate { + assigned: AssignedTask, + share_version: BlockVersion, + selected_midstate: [u8; 32], + ntime_offset: u32, + share_ntime: u32, + nonce_adjusted: u32, + nonce_submit: u32, + header: BlockHeader, + tail16: [u8; 16], + hash_bytes: [u8; 32], + hash: bitcoin::BlockHash, + target_bytes: [u8; 32], + check_result: Bzm2CheckResult, + observed_leading_zeros: u16, + achieved_difficulty: Difficulty, + target_difficulty: Difficulty, + achieved_difficulty_f64: f64, + target_difficulty_f64: f64, +} + // Compute the four version-mask deltas used across the 4-midstate micro-jobs. fn midstate_version_mask_variants(version_mask: u32) -> [u32; MIDSTATE_COUNT] { if version_mask == 0 { @@ -665,19 +697,19 @@ fn midstate_version_mask_variants(version_mask: u32) -> [u32; MIDSTATE_COUNT] { let mut mask = version_mask; let mut cnt: u32 = 0; - while (mask % 16) == 0 { + while mask.is_multiple_of(16) { cnt = cnt.saturating_add(1); mask /= 16; } let mut tmp_mask = 0u32; - if (mask % 16) != 0 { + if !mask.is_multiple_of(16) { tmp_mask = mask % 16; - } else if (mask % 8) != 0 { + } else if !mask.is_multiple_of(8) { tmp_mask = mask % 8; - } else if (mask % 4) != 0 { + } else if !mask.is_multiple_of(4) { tmp_mask = mask % 4; - } else if (mask % 2) != 0 { + } else if !mask.is_multiple_of(2) { tmp_mask = mask % 2; } @@ -704,11 +736,7 @@ fn task_midstate_versions(task: &HashTask) -> [BlockVersion; MIDSTATE_COUNT] { variants.map(|variant| BlockVersion::from_consensus((base | variant) as i32)) } -fn check_result( - sha256_le: &[u8; 32], - target_le: &[u8; 32], - leading_zeros: u8, -) -> Bzm2CheckResult { +fn check_result(sha256_le: &[u8; 32], target_le: &[u8; 32], leading_zeros: u8) -> Bzm2CheckResult { let mut i: usize = 31; while i > 0 && sha256_le[i] == 0 { i -= 1; @@ -858,13 +886,6 @@ fn hash_bytes_bzm2_order(hash: &bitcoin::BlockHash) -> [u8; 32] { *hash.as_byte_array() } -fn format_hex(data: &[u8]) -> String { - data.iter() - .map(|byte| format!("{:02X}", byte)) - .collect::>() - .join(" ") -} - fn validation_probe_summary( assigned: &AssignedTask, version: BlockVersion, @@ -1259,8 +1280,8 @@ fn log_replay_check_for_task(config: &ReplayCheckConfig, assigned: &AssignedTask check_result = ?check_result, achieved_difficulty = %achieved_difficulty, target_difficulty = %target_difficulty, - hash_bzm2 = %format_hex(&hash_bzm2), - header = %format_hex(&header_bytes), + hash_bzm2 = %HexBytes(&hash_bzm2), + header = %HexBytes(&header_bytes), "BZM2 replay check" ); true @@ -1434,9 +1455,9 @@ fn log_bzm2_job_fingerprint( for (idx, version) in versions.iter().copied().enumerate() { let header = build_header_bytes(task, version, merkle_root)?; version_map.push(format!("mj{idx}={:#010x}", version.to_consensus() as u32)); - header_tails.push(format!("mj{idx}={}", format_hex(&header[64..80]))); - header_full.push(format!("mj{idx}={}", format_hex(&header))); - midstates_hex.push(format!("mj{idx}={}", format_hex(&payload.midstates[idx]))); + header_tails.push(format!("mj{idx}={}", HexBytes(&header[64..80]))); + header_full.push(format!("mj{idx}={}", HexBytes(&header))); + midstates_hex.push(format!("mj{idx}={}", HexBytes(&payload.midstates[idx]))); } debug!( @@ -1449,8 +1470,8 @@ fn log_bzm2_job_fingerprint( en2 = %en2_dbg, zeros_to_find, timestamp_count, - target_reg = %format_hex(&target_reg_bytes), - merkle_root = %format_hex(&merkle_root_bytes), + target_reg = %HexBytes(&target_reg_bytes), + merkle_root = %HexBytes(&merkle_root_bytes), payload_merkle_residue = format_args!("{:#010x}", payload.merkle_residue), payload_timestamp = format_args!("{:#010x}", payload.timestamp), versions = %version_map.join(" "), @@ -1901,10 +1922,7 @@ where Ok(()) } -async fn set_asic_nonce_range( - chip_commands: &mut W, - asic_id: u8, -) -> Result<(), HashThreadError> +async fn set_asic_nonce_range(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> where W: Sink + Unpin, W::Error: std::fmt::Debug, @@ -2153,20 +2171,34 @@ where Ok(asic_ids) } -async fn bzm2_thread_actor( - mut cmd_rx: mpsc::Receiver, +struct Bzm2ThreadActor { + cmd_rx: mpsc::Receiver, evt_tx: mpsc::Sender, - mut removal_rx: watch::Receiver, + removal_rx: watch::Receiver, status: Arc>, - mut chip_responses: R, - mut chip_commands: W, - mut peripherals: BoardPeripherals, + chip_responses: R, + chip_commands: W, + peripherals: BoardPeripherals, asic_count: u8, -) where +} + +async fn bzm2_thread_actor(actor: Bzm2ThreadActor) +where R: Stream> + Unpin, W: Sink + Unpin, W::Error: std::fmt::Debug, { + let Bzm2ThreadActor { + mut cmd_rx, + evt_tx, + mut removal_rx, + status, + mut chip_responses, + mut chip_commands, + mut peripherals, + asic_count, + } = actor; + if let Some(ref mut asic_enable) = peripherals.asic_enable && let Err(e) = asic_enable.disable().await { @@ -2284,14 +2316,10 @@ async fn bzm2_thread_actor( retain_assigned_task(&mut assigned_tasks, new_assigned_task); if let Some(cfg) = replay_check_config.as_ref() && let Some(assigned) = assigned_tasks.back() + && log_replay_check_for_task(cfg, assigned) { - if log_replay_check_for_task(cfg, assigned) { - replay_check_hits = replay_check_hits.saturating_add(1); - trace!( - replay_check_hits, - "BZM2 replay check matched on update_task" - ); - } + replay_check_hits = replay_check_hits.saturating_add(1); + trace!(replay_check_hits, "BZM2 replay check matched on update_task"); } debug!( @@ -2370,14 +2398,13 @@ async fn bzm2_thread_actor( retain_assigned_task(&mut assigned_tasks, new_assigned_task); if let Some(cfg) = replay_check_config.as_ref() && let Some(assigned) = assigned_tasks.back() + && log_replay_check_for_task(cfg, assigned) { - if log_replay_check_for_task(cfg, assigned) { - replay_check_hits = replay_check_hits.saturating_add(1); - trace!( - replay_check_hits, - "BZM2 replay check matched on replace_task" - ); - } + replay_check_hits = replay_check_hits.saturating_add(1); + trace!( + replay_check_hits, + "BZM2 replay check matched on replace_task" + ); } debug!( @@ -2567,26 +2594,7 @@ async fn bzm2_thread_actor( } let nonce_raw = nonce; - let mut selected_candidate: Option<( - AssignedTask, - BlockVersion, - [u8; 32], - u32, - u32, - u32, - u32, - BlockHeader, - [u8; 16], - [u8; 32], - bitcoin::BlockHash, - [u8; 32], - Bzm2CheckResult, - u16, - Difficulty, - Difficulty, - f64, - f64, - )> = None; + let mut selected_candidate: Option = None; let mut selected_rank = 0u8; for mut candidate in slot_candidates { @@ -2644,8 +2652,8 @@ async fn bzm2_thread_actor( if selected_candidate.is_none() || rank > selected_rank { selected_rank = rank; - selected_candidate = Some(( - candidate, + selected_candidate = Some(SelectedReadResultCandidate { + assigned: candidate, share_version, selected_midstate, ntime_offset, @@ -2663,14 +2671,14 @@ async fn bzm2_thread_actor( target_difficulty, achieved_difficulty_f64, target_difficulty_f64, - )); + }); if rank == 3 { break; } } } - let Some(( + let Some(SelectedReadResultCandidate { assigned, share_version, selected_midstate, @@ -2689,7 +2697,7 @@ async fn bzm2_thread_actor( target_difficulty, achieved_difficulty_f64, target_difficulty_f64, - )) = selected_candidate + }) = selected_candidate else { trace!( asic_hw_id, @@ -2727,13 +2735,16 @@ async fn bzm2_thread_actor( } sanity_candidates_total = sanity_candidates_total.saturating_add(1); - if sanity_best_difficulty.map_or(true, |best| achieved_difficulty > best) { + if sanity_best_difficulty + .is_none_or(|best| achieved_difficulty > best) + { sanity_best_difficulty = Some(achieved_difficulty); } if let Some(cfg) = focused_readresult_config.as_ref() { - let adjusted_match = cfg.adjusted_nonce.map_or(true, |n| n == nonce_adjusted); - let raw_match = cfg.raw_nonce.map_or(true, |n| n == nonce_raw); + let adjusted_match = + cfg.adjusted_nonce.is_none_or(|n| n == nonce_adjusted); + let raw_match = cfg.raw_nonce.is_none_or(|n| n == nonce_raw); if adjusted_match && raw_match { let header_bytes = consensus::serialize(&header); let merkle_root_bytes = consensus::serialize(&assigned.merkle_root); @@ -2762,15 +2773,15 @@ async fn bzm2_thread_actor( version = format_args!("{:#010x}", share_version.to_consensus() as u32), bits = format_args!("{:#010x}", assigned.task.template.bits.to_consensus()), extranonce2 = ?assigned.task.en2, - merkle_root = %format_hex(&merkle_root_bytes), - midstate = %format_hex(&selected_midstate), - derived_midstate = %format_hex(&derived_midstate), - header = %format_hex(&header_bytes), - tail16 = %format_hex(&tail16), - hash_bzm2_order = %format_hex(&hash_bytes), - hash_reversed = %format_hex(&hash_rev), + merkle_root = %HexBytes(&merkle_root_bytes), + midstate = %HexBytes(&selected_midstate), + derived_midstate = %HexBytes(&derived_midstate), + header = %HexBytes(&header_bytes), + tail16 = %HexBytes(&tail16), + hash_bzm2_order = %HexBytes(&hash_bytes), + hash_reversed = %HexBytes(&hash_rev), hash_msb_bzm2 = format_args!("{:#04x}", hash_bytes[31]), - target = %format_hex(&target_bytes), + target = %HexBytes(&target_bytes), check_result = ?check_result, observed_leading_zeros_bits = observed_leading_zeros, achieved_difficulty = %achieved_difficulty, @@ -2975,7 +2986,7 @@ async fn bzm2_thread_actor( selected_ntime = format_args!("{:#x}", share_ntime), selected_version = format_args!("{:#x}", share_version.to_consensus() as u32), bits = format_args!("{:#x}", assigned.task.template.bits.to_consensus()), - header = %format_hex(&header_bytes), + header = %HexBytes(&header_bytes), focused = %focused, probes = %probes.join(" | "), "BZM2 READRESULT sanity diagnostic" @@ -3001,7 +3012,7 @@ async fn bzm2_thread_actor( ); } - if sanity_candidates_total % 500 == 0 { + if sanity_candidates_total.is_multiple_of(500) { debug!( total_candidates = sanity_candidates_total, candidates_meeting_task_target = sanity_candidates_meet_task, @@ -3053,9 +3064,9 @@ mod tests { use super::{ AssignedTask, BZM2_NONCE_MINUS, Bzm2CheckResult, ENGINE_LEADING_ZEROS, ENGINE_TIMESTAMP_COUNT, EngineAssignment, MIDSTATE_COUNT, WORK_ENGINE_COUNT, - bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, midstate_version_mask_variants, - check_result, hash_bytes_bzm2_order, protocol, resolve_readresult_fields, - task_to_bzm2_payload, task_midstate_versions, + bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, check_result, + hash_bytes_bzm2_order, midstate_version_mask_variants, protocol, resolve_readresult_fields, + task_midstate_versions, task_to_bzm2_payload, }; #[test] @@ -3107,10 +3118,7 @@ mod tests { let mut hash = [0u8; 32]; let target = [0xffu8; 32]; hash[31] = 0x80; - assert_eq!( - check_result(&hash, &target, 32), - Bzm2CheckResult::Error - ); + assert_eq!(check_result(&hash, &target, 32), Bzm2CheckResult::Error); } #[test] @@ -3118,10 +3126,7 @@ mod tests { let mut hash = [0u8; 32]; let target = [0xffu8; 32]; hash[27] = 0x3f; - assert_eq!( - check_result(&hash, &target, 34), - Bzm2CheckResult::Correct - ); + assert_eq!(check_result(&hash, &target, 34), Bzm2CheckResult::Correct); } #[test] @@ -3129,10 +3134,7 @@ mod tests { let mut hash = [0u8; 32]; let target = [0xffu8; 32]; hash[27] = 0x40; - assert_eq!( - check_result(&hash, &target, 34), - Bzm2CheckResult::Error - ); + assert_eq!(check_result(&hash, &target, 34), Bzm2CheckResult::Error); } #[test] @@ -3142,10 +3144,7 @@ mod tests { hash[1] = 0x10; target[1] = 0x20; - assert_eq!( - check_result(&hash, &target, 32), - Bzm2CheckResult::Correct - ); + assert_eq!(check_result(&hash, &target, 32), Bzm2CheckResult::Correct); hash[1] = 0x30; target[1] = 0x20; From 4ffece7ed4998e475183347226b0e70cf469a553 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Thu, 5 Mar 2026 23:28:46 -0500 Subject: [PATCH 14/19] refactor(bzm2): remove verbose hash thread diagnostics --- mujina-miner/src/asic/bzm2/thread.rs | 1012 +------------------------- 1 file changed, 35 insertions(+), 977 deletions(-) diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index 44cdd42..f413e48 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -12,7 +12,7 @@ use std::{ collections::VecDeque, - env, io, + io, sync::{Arc, RwLock}, }; @@ -28,7 +28,7 @@ use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{self, Duration, Instant}; use tokio_stream::StreamExt; -use super::{HexBytes, protocol}; +use super::protocol; use crate::{ asic::hash_thread::{ BoardPeripherals, HashTask, HashThread, HashThreadCapabilities, HashThreadError, @@ -91,8 +91,6 @@ const READRESULT_SEQUENCE_SPACE: usize = 64; // sequence byte carries 4 micro-jo const READRESULT_SLOT_HISTORY: usize = 16; const READRESULT_ASSIGNMENT_HISTORY_LIMIT: usize = READRESULT_SEQUENCE_SPACE * READRESULT_SLOT_HISTORY; -const SANITY_DIAGNOSTIC_LIMIT: u64 = 24; -const SEQUENCE_LOOKUP_DIAGNOSTIC_LIMIT: u64 = 24; const ZERO_LZ_DIAGNOSTIC_LIMIT: u64 = 24; const SHA256_IV: [u32; 8] = [ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, @@ -640,27 +638,6 @@ struct AssignedTask { nonce_minus_value: u32, } -#[derive(Clone, Debug)] -struct ReplayCheckConfig { - job_id: Option, - en2_value: u64, - en2_size: u8, - ntime: u32, - nonce: u32, - version_bits: u32, -} - -#[derive(Clone, Debug)] -struct FocusedReadResultConfig { - adjusted_nonce: Option, - raw_nonce: Option, - break_on_match: bool, -} - -fn format_replay_en2_hex(value: u64, size: u8) -> String { - format!("{:0width$x}", value, width = size as usize * 2) -} - #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum Bzm2CheckResult { Correct, @@ -671,22 +648,14 @@ enum Bzm2CheckResult { struct SelectedReadResultCandidate { assigned: AssignedTask, share_version: BlockVersion, - selected_midstate: [u8; 32], ntime_offset: u32, share_ntime: u32, nonce_adjusted: u32, nonce_submit: u32, - header: BlockHeader, - tail16: [u8; 16], hash_bytes: [u8; 32], hash: bitcoin::BlockHash, - target_bytes: [u8; 32], check_result: Bzm2CheckResult, observed_leading_zeros: u16, - achieved_difficulty: Difficulty, - target_difficulty: Difficulty, - achieved_difficulty_f64: f64, - target_difficulty_f64: f64, } // Compute the four version-mask deltas used across the 4-midstate micro-jobs. @@ -886,407 +855,6 @@ fn hash_bytes_bzm2_order(hash: &bitcoin::BlockHash) -> [u8; 32] { *hash.as_byte_array() } -fn validation_probe_summary( - assigned: &AssignedTask, - version: BlockVersion, - ntime: u32, - nonce: u32, -) -> String { - let header = BlockHeader { - version, - prev_blockhash: assigned.task.template.prev_blockhash, - merkle_root: assigned.merkle_root, - time: ntime, - bits: assigned.task.template.bits, - nonce, - }; - let header_bytes = consensus::serialize(&header); - let header_prefix: [u8; 64] = header_bytes[..64] - .try_into() - .expect("header prefix length is fixed"); - let midstate = compute_midstate_le(&header_prefix); - let tail16 = bzm2_tail16_bytes(assigned, ntime, nonce); - let hash_bytes = bzm2_double_sha_from_midstate_and_tail(&midstate, &tail16); - let target_bytes = assigned.task.share_target.to_le_bytes(); - let check = check_result(&hash_bytes, &target_bytes, assigned.leading_zeros); - let lz_bits = leading_zero_bits(&hash_bytes); - format!( - "v={:#010x},t={:#010x},n={:#010x},chk={:?},lz={},msb={:#04x}", - version.to_consensus() as u32, - ntime, - nonce, - check, - lz_bits, - hash_bytes[31] - ) -} - -fn evaluate_check_with_hash_orders( - assigned: &AssignedTask, - version: BlockVersion, - ntime: u32, - nonce_submit: u32, -) -> (Bzm2CheckResult, u8, Bzm2CheckResult, u8) { - let evaluate = |candidate_nonce: u32| { - let header = BlockHeader { - version, - prev_blockhash: assigned.task.template.prev_blockhash, - merkle_root: assigned.merkle_root, - time: ntime, - bits: assigned.task.template.bits, - nonce: candidate_nonce, - }; - let header_bytes = consensus::serialize(&header); - let header_prefix: [u8; 64] = header_bytes[..64] - .try_into() - .expect("header prefix length is fixed"); - let midstate = compute_midstate_le(&header_prefix); - let tail16 = bzm2_tail16_bytes(assigned, ntime, candidate_nonce); - let hash_bytes = bzm2_double_sha_from_midstate_and_tail(&midstate, &tail16); - let target_bytes = assigned.task.share_target.to_le_bytes(); - let check = check_result(&hash_bytes, &target_bytes, assigned.leading_zeros); - (check, hash_bytes[31]) - }; - - // Keep the legacy "le/be" labels in focused diagnostics, but compare - // submit-order nonce vs swapped-order nonce to surface byte-order mistakes. - let (submit_check, submit_msb) = evaluate(nonce_submit); - let (swapped_check, swapped_msb) = evaluate(nonce_submit.swap_bytes()); - (submit_check, submit_msb, swapped_check, swapped_msb) -} - -fn focused_validation_entry( - label: &str, - assigned: &AssignedTask, - sequence: u8, - timecode: u8, - nonce: u32, -) -> String { - let sequence_id = sequence / (MIDSTATE_COUNT as u8); - let micro_job_id = (sequence % (MIDSTATE_COUNT as u8)) as usize; - let version = assigned.microjob_versions[micro_job_id]; - let ntime_rev = assigned - .task - .ntime - .wrapping_add(u32::from(assigned.timestamp_count.wrapping_sub(timecode))); - let ntime_plus = assigned.task.ntime.wrapping_add(u32::from(timecode)); - let (rev_le, rev_le_msb, rev_be, rev_be_msb) = - evaluate_check_with_hash_orders(assigned, version, ntime_rev, nonce); - let (plus_le, plus_le_msb, plus_be, plus_be_msb) = - evaluate_check_with_hash_orders(assigned, version, ntime_plus, nonce); - - format!( - "{label}(seq={:#04x}/sid={}/mj={},time={:#04x},n={:#010x},rev(le={:?}/{:#04x},be={:?}/{:#04x}),plus(le={:?}/{:#04x},be={:?}/{:#04x}))", - sequence, - sequence_id, - micro_job_id, - timecode, - nonce, - rev_le, - rev_le_msb, - rev_be, - rev_be_msb, - plus_le, - plus_le_msb, - plus_be, - plus_be_msb - ) -} - -fn focused_readresult_diagnostic( - assigned: &AssignedTask, - sequence_raw: u8, - timecode_raw: u8, - nonce_raw: u32, -) -> String { - let sequence_masked = sequence_raw & 0x7f; - let timecode_masked = timecode_raw & 0x7f; - let nonce_adjusted = nonce_raw.wrapping_sub(assigned.nonce_minus_value); - let entries = [ - focused_validation_entry( - "raw_adj", - assigned, - sequence_raw, - timecode_raw, - nonce_adjusted, - ), - focused_validation_entry("raw_raw", assigned, sequence_raw, timecode_raw, nonce_raw), - focused_validation_entry( - "m7_adj", - assigned, - sequence_masked, - timecode_masked, - nonce_adjusted, - ), - focused_validation_entry( - "m7_raw", - assigned, - sequence_masked, - timecode_masked, - nonce_raw, - ), - ]; - entries.join(" | ") -} - -fn parse_hex_u32(input: &str) -> Option { - let trimmed = input - .trim() - .trim_start_matches("0x") - .trim_start_matches("0X"); - u32::from_str_radix(trimmed, 16).ok() -} - -fn parse_u32_env(input: &str) -> Option { - let trimmed = input.trim(); - if trimmed.is_empty() { - return None; - } - if trimmed.starts_with("0x") || trimmed.starts_with("0X") { - return parse_hex_u32(trimmed); - } - trimmed - .parse::() - .ok() - .or_else(|| parse_hex_u32(trimmed)) -} - -fn parse_bool_env_flag(name: &str) -> bool { - let Ok(raw) = env::var(name) else { - return false; - }; - let v = raw.trim(); - v == "1" - || v.eq_ignore_ascii_case("true") - || v.eq_ignore_ascii_case("yes") - || v.eq_ignore_ascii_case("on") -} - -fn parse_focused_readresult_config_from_env() -> Option { - let adjusted_nonce = match env::var("MUJINA_BZM2_TRACE_NONCE") { - Ok(v) => { - let Some(parsed) = parse_u32_env(&v) else { - warn!(value = %v, "Invalid MUJINA_BZM2_TRACE_NONCE (expected hex or decimal u32)"); - return None; - }; - Some(parsed) - } - Err(_) => None, - }; - let raw_nonce = match env::var("MUJINA_BZM2_TRACE_RAW_NONCE") { - Ok(v) => { - let Some(parsed) = parse_u32_env(&v) else { - warn!(value = %v, "Invalid MUJINA_BZM2_TRACE_RAW_NONCE (expected hex or decimal u32)"); - return None; - }; - Some(parsed) - } - Err(_) => None, - }; - if adjusted_nonce.is_none() && raw_nonce.is_none() { - return None; - } - let break_on_match = parse_bool_env_flag("MUJINA_BZM2_TRACE_BREAK_ON_NONCE"); - Some(FocusedReadResultConfig { - adjusted_nonce, - raw_nonce, - break_on_match, - }) -} - -fn parse_replay_check_config_from_env() -> Option { - let en2_hex = match env::var("MUJINA_BZM2_REPLAY_EN2") { - Ok(v) => v, - Err(_) => return None, - }; - let ntime_s = match env::var("MUJINA_BZM2_REPLAY_NTIME") { - Ok(v) => v, - Err(_) => { - warn!("MUJINA_BZM2_REPLAY_EN2 is set but MUJINA_BZM2_REPLAY_NTIME is missing"); - return None; - } - }; - let nonce_s = match env::var("MUJINA_BZM2_REPLAY_NONCE") { - Ok(v) => v, - Err(_) => { - warn!("MUJINA_BZM2_REPLAY_EN2 is set but MUJINA_BZM2_REPLAY_NONCE is missing"); - return None; - } - }; - let version_bits_s = match env::var("MUJINA_BZM2_REPLAY_VERSION_BITS") { - Ok(v) => v, - Err(_) => { - warn!("MUJINA_BZM2_REPLAY_EN2 is set but MUJINA_BZM2_REPLAY_VERSION_BITS is missing"); - return None; - } - }; - - let en2_trim = en2_hex - .trim() - .trim_start_matches("0x") - .trim_start_matches("0X"); - if en2_trim.is_empty() || (en2_trim.len() % 2) != 0 || en2_trim.len() > 16 { - warn!( - en2 = %en2_hex, - "Invalid MUJINA_BZM2_REPLAY_EN2 (must be 1-8 bytes of hex)" - ); - return None; - } - - let en2_size = (en2_trim.len() / 2) as u8; - let mut en2_bytes = [0u8; 8]; - for (idx, pair) in en2_trim.as_bytes().chunks_exact(2).enumerate() { - let Ok(byte_str) = std::str::from_utf8(pair) else { - warn!(en2 = %en2_hex, "Invalid UTF-8 in MUJINA_BZM2_REPLAY_EN2"); - return None; - }; - let Ok(byte) = u8::from_str_radix(byte_str, 16) else { - warn!(en2 = %en2_hex, "Invalid MUJINA_BZM2_REPLAY_EN2 hex"); - return None; - }; - en2_bytes[idx] = byte; - } - // Stratum submit extranonce2 is sent as raw bytes hex. Extranonce2 stores value as little-endian. - let en2_value = u64::from_le_bytes(en2_bytes); - - let Some(ntime) = parse_hex_u32(&ntime_s) else { - warn!(ntime = %ntime_s, "Invalid MUJINA_BZM2_REPLAY_NTIME hex"); - return None; - }; - let Some(nonce) = parse_hex_u32(&nonce_s) else { - warn!(nonce = %nonce_s, "Invalid MUJINA_BZM2_REPLAY_NONCE hex"); - return None; - }; - let Some(version_bits) = parse_hex_u32(&version_bits_s) else { - warn!( - version_bits = %version_bits_s, - "Invalid MUJINA_BZM2_REPLAY_VERSION_BITS hex" - ); - return None; - }; - let job_id = env::var("MUJINA_BZM2_REPLAY_JOB_ID") - .ok() - .filter(|s| !s.trim().is_empty()); - - Some(ReplayCheckConfig { - job_id, - en2_value, - en2_size, - ntime, - nonce, - version_bits, - }) -} - -fn log_replay_check_for_task(config: &ReplayCheckConfig, assigned: &AssignedTask) -> bool { - if let Some(job_id) = &config.job_id - && assigned.task.template.id.as_str() != job_id - { - debug!( - configured_job_id = %job_id, - assigned_job_id = %assigned.task.template.id, - "BZM2 replay check skipped (job_id mismatch)" - ); - return false; - } - - let Ok(config_en2) = Extranonce2::new(config.en2_value, config.en2_size) else { - debug!( - job_id = %assigned.task.template.id, - configured_en2 = %format_replay_en2_hex(config.en2_value, config.en2_size), - "BZM2 replay check skipped (configured extranonce2 invalid for configured size)" - ); - return false; - }; - - let matched_engine = assigned - .engine_assignments - .iter() - .position(|engine| engine.extranonce2 == Some(config_en2)); - let (task_en2, replay_merkle_root) = if let Some(logical_engine_id) = matched_engine { - ( - config_en2, - assigned.engine_assignments[logical_engine_id].merkle_root, - ) - } else { - let Some(task_en2) = assigned.task.en2 else { - debug!( - job_id = %assigned.task.template.id, - configured_en2 = %format_replay_en2_hex(config.en2_value, config.en2_size), - "BZM2 replay check skipped (assigned task has no extranonce2)" - ); - return false; - }; - if task_en2 != config_en2 { - debug!( - job_id = %assigned.task.template.id, - configured_en2 = %format_replay_en2_hex(config.en2_value, config.en2_size), - assigned_en2 = %task_en2, - "BZM2 replay check skipped (extranonce2 mismatch)" - ); - return false; - } - (task_en2, assigned.merkle_root) - }; - - let base_version = assigned.task.template.version.base().to_consensus() as u32; - let replay_version_u32 = base_version | config.version_bits; - let replay_version = BlockVersion::from_consensus(replay_version_u32 as i32); - let matched_microjob = assigned - .microjob_versions - .iter() - .position(|v| v.to_consensus() as u32 == replay_version_u32); - - let header = BlockHeader { - version: replay_version, - prev_blockhash: assigned.task.template.prev_blockhash, - merkle_root: replay_merkle_root, - time: config.ntime, - bits: assigned.task.template.bits, - nonce: config.nonce, - }; - let header_bytes = consensus::serialize(&header); - let replay_midstate = matched_microjob - .and_then(|idx| { - matched_engine.map(|logical_engine_id| { - assigned.engine_assignments[logical_engine_id].midstates[idx] - }) - }) - .unwrap_or_else(|| { - let header_prefix: [u8; 64] = header_bytes[..64] - .try_into() - .expect("header prefix length is fixed"); - compute_midstate_le(&header_prefix) - }); - let replay_tail16 = bzm2_tail16_bytes(assigned, config.ntime, config.nonce); - let hash_bzm2 = bzm2_double_sha_from_midstate_and_tail(&replay_midstate, &replay_tail16); - let hash = bitcoin::BlockHash::from_byte_array(hash_bzm2); - let target_bytes = assigned.task.share_target.to_le_bytes(); - let check_result = check_result(&hash_bzm2, &target_bytes, assigned.leading_zeros); - let achieved_difficulty = Difficulty::from_hash(&hash); - let target_difficulty = Difficulty::from_target(assigned.task.share_target); - - debug!( - job_id = %assigned.task.template.id, - assigned_sequence_id = assigned.sequence_id, - assigned_en2 = %task_en2, - replay_en2 = format_args!("{:0width$x}", config.en2_value, width = config.en2_size as usize * 2), - replay_ntime = format_args!("{:#010x}", config.ntime), - replay_nonce = format_args!("{:#010x}", config.nonce), - replay_version_bits = format_args!("{:#010x}", config.version_bits), - replay_version = format_args!("{:#010x}", replay_version_u32), - matched_logical_engine = ?matched_engine, - matched_microjob = ?matched_microjob, - check_result = ?check_result, - achieved_difficulty = %achieved_difficulty, - target_difficulty = %target_difficulty, - hash_bzm2 = %HexBytes(&hash_bzm2), - header = %HexBytes(&header_bytes), - "BZM2 replay check" - ); - true -} - fn compute_task_merkle_root(task: &HashTask) -> Result { let template = task.template.as_ref(); match &template.merkle_root { @@ -1429,61 +997,6 @@ fn task_to_bzm2_payload( }) } -fn log_bzm2_job_fingerprint( - task: &HashTask, - merkle_root: TxMerkleNode, - versions: [BlockVersion; MIDSTATE_COUNT], - payload: &TaskJobPayload, - sequence_id: u8, - zeros_to_find: u8, - timestamp_count: u8, -) -> Result<(), HashThreadError> { - let target_swapped = task.template.bits.to_consensus().swap_bytes(); - let target_reg_bytes = target_swapped.to_le_bytes(); - let merkle_root_bytes = consensus::serialize(&merkle_root); - let en2_dbg = task - .en2 - .as_ref() - .map(|v| format!("{v:?}")) - .unwrap_or_else(|| "None".to_owned()); - - let mut version_map = Vec::with_capacity(MIDSTATE_COUNT); - let mut header_tails = Vec::with_capacity(MIDSTATE_COUNT); - let mut header_full = Vec::with_capacity(MIDSTATE_COUNT); - let mut midstates_hex = Vec::with_capacity(MIDSTATE_COUNT); - - for (idx, version) in versions.iter().copied().enumerate() { - let header = build_header_bytes(task, version, merkle_root)?; - version_map.push(format!("mj{idx}={:#010x}", version.to_consensus() as u32)); - header_tails.push(format!("mj{idx}={}", HexBytes(&header[64..80]))); - header_full.push(format!("mj{idx}={}", HexBytes(&header))); - midstates_hex.push(format!("mj{idx}={}", HexBytes(&payload.midstates[idx]))); - } - - debug!( - job_id = %task.template.id, - sequence_id, - ntime = format_args!("{:#x}", task.ntime), - template_time = format_args!("{:#x}", task.template.time), - bits = format_args!("{:#x}", task.template.bits.to_consensus()), - share_target = %task.share_target, - en2 = %en2_dbg, - zeros_to_find, - timestamp_count, - target_reg = %HexBytes(&target_reg_bytes), - merkle_root = %HexBytes(&merkle_root_bytes), - payload_merkle_residue = format_args!("{:#010x}", payload.merkle_residue), - payload_timestamp = format_args!("{:#010x}", payload.timestamp), - versions = %version_map.join(" "), - header_tail = %header_tails.join(" | "), - midstates = %midstates_hex.join(" | "), - headers = %header_full.join(" | "), - "BZM2 job fingerprint" - ); - - Ok(()) -} - async fn send_task_to_all_engines( chip_commands: &mut W, task: &HashTask, @@ -1501,7 +1014,6 @@ where let target = task.template.bits.to_consensus().swap_bytes(); let timestamp_reg_value = ((AUTO_CLOCK_UNGATE & 0x1) << 7) | (timestamp_count & 0x7f); let mut engine_assignments = Vec::with_capacity(WORK_ENGINE_COUNT); - let mut fingerprint_logged = false; for row in 0..ENGINE_ROWS { for col in 0..ENGINE_COLS { @@ -1525,32 +1037,6 @@ where "failed to derive per-engine payload for logical engine {logical_engine_id} (row {row} col {col}): {e}" )) })?; - if !fingerprint_logged { - fingerprint_logged = true; - if let Err(e) = log_bzm2_job_fingerprint( - &engine_task, - merkle_root, - versions, - &payload, - sequence_id, - zeros_to_find, - timestamp_count, - ) { - warn!(error = %e, "Failed to emit BZM2 job fingerprint"); - } - } - debug!( - logical_engine_id, - engine_hw_id = format_args!("{:#05x}", engine), - row, - column = col, - sequence_id, - extranonce2 = ?engine_task.en2, - data0 = format_args!("{:#010x}", payload.merkle_residue), - data1 = format_args!("{:#010x}", payload.timestamp), - data2 = format_args!("{:#010x}", target), - "BZM2 dispatch map" - ); write_reg_u8( chip_commands, @@ -2210,33 +1696,7 @@ where let mut assigned_tasks: VecDeque = VecDeque::with_capacity(READRESULT_ASSIGNMENT_HISTORY_LIMIT); let mut next_sequence_id: u8 = 0; - let mut sanity_candidates_total: u64 = 0; - let mut sanity_candidates_meet_task: u64 = 0; - let mut sanity_best_difficulty: Option = None; - let mut sanity_diagnostic_samples: u64 = 0; - let mut sequence_lookup_diagnostic_samples: u64 = 0; let mut zero_lz_diagnostic_samples: u64 = 0; - let replay_check_config = parse_replay_check_config_from_env(); - let focused_readresult_config = parse_focused_readresult_config_from_env(); - if let Some(cfg) = replay_check_config.as_ref() { - info!( - replay_job_id = ?cfg.job_id, - replay_en2 = %format_replay_en2_hex(cfg.en2_value, cfg.en2_size), - replay_ntime = format_args!("{:#010x}", cfg.ntime), - replay_nonce = format_args!("{:#010x}", cfg.nonce), - replay_version_bits = format_args!("{:#010x}", cfg.version_bits), - "BZM2 replay check configured" - ); - } - if let Some(cfg) = focused_readresult_config.as_ref() { - info!( - trace_nonce = ?cfg.adjusted_nonce.map(|n| format!("{:#010x}", n)), - trace_raw_nonce = ?cfg.raw_nonce.map(|n| format!("{:#010x}", n)), - break_on_match = cfg.break_on_match, - "BZM2 focused READRESULT tracing configured" - ); - } - let mut replay_check_hits: u64 = 0; let mut status_ticker = time::interval(Duration::from_secs(5)); status_ticker.set_missed_tick_behavior(time::MissedTickBehavior::Skip); @@ -2256,6 +1716,16 @@ where Some(cmd) = cmd_rx.recv() => { match cmd { ThreadCommand::UpdateTask { new_task, response_tx } => { + if let Some(ref old) = current_task { + debug!( + old_job = %old.template.id, + new_job = %new_task.template.id, + "Updating work" + ); + } else { + debug!(new_job = %new_task.template.id, "Updating work from idle"); + } + if !chip_initialized { match initialize_chip(&mut chip_responses, &mut chip_commands, &mut peripherals, asic_count).await { Ok(ids) => { @@ -2314,19 +1784,11 @@ where nonce_minus_value: BZM2_NONCE_MINUS, }; retain_assigned_task(&mut assigned_tasks, new_assigned_task); - if let Some(cfg) = replay_check_config.as_ref() - && let Some(assigned) = assigned_tasks.back() - && log_replay_check_for_task(cfg, assigned) - { - replay_check_hits = replay_check_hits.saturating_add(1); - trace!(replay_check_hits, "BZM2 replay check matched on update_task"); - } debug!( job_id = %new_task.template.id, - sequence_id = next_sequence_id, write_sequence_id, - "Sent BZM2 WRITEJOB payloads for update_task" + "Sent BZM2 work to chip" ); next_sequence_id = next_sequence_id.wrapping_add(1); @@ -2338,6 +1800,16 @@ where let _ = response_tx.send(Ok(old_task)); } ThreadCommand::ReplaceTask { new_task, response_tx } => { + if let Some(ref old) = current_task { + debug!( + old_job = %old.template.id, + new_job = %new_task.template.id, + "Replacing work" + ); + } else { + debug!(new_job = %new_task.template.id, "Replacing work from idle"); + } + if !chip_initialized { match initialize_chip(&mut chip_responses, &mut chip_commands, &mut peripherals, asic_count).await { Ok(ids) => { @@ -2396,22 +1868,11 @@ where nonce_minus_value: BZM2_NONCE_MINUS, }; retain_assigned_task(&mut assigned_tasks, new_assigned_task); - if let Some(cfg) = replay_check_config.as_ref() - && let Some(assigned) = assigned_tasks.back() - && log_replay_check_for_task(cfg, assigned) - { - replay_check_hits = replay_check_hits.saturating_add(1); - trace!( - replay_check_hits, - "BZM2 replay check matched on replace_task" - ); - } debug!( job_id = %new_task.template.id, - sequence_id = next_sequence_id, write_sequence_id, - "Sent BZM2 WRITEJOB payloads for replace_task" + "Sent BZM2 work to chip (old work invalidated)" ); next_sequence_id = next_sequence_id.wrapping_add(1); @@ -2423,6 +1884,8 @@ where let _ = response_tx.send(Ok(old_task)); } ThreadCommand::GoIdle { response_tx } => { + debug!("Going idle"); + let old_task = current_task.take(); assigned_tasks.clear(); { @@ -2432,6 +1895,7 @@ where let _ = response_tx.send(Ok(old_task)); } ThreadCommand::Shutdown => { + info!("Shutdown command received"); break; } } @@ -2439,12 +1903,8 @@ where Some(result) = chip_responses.next() => { match result { - Ok(protocol::Response::Noop { asic_hw_id, signature }) => { - trace!(asic_hw_id, signature = ?signature, "BZM2 NOOP response"); - } - Ok(protocol::Response::ReadReg { asic_hw_id, data }) => { - trace!(asic_hw_id, data = ?data, "BZM2 READREG response"); - } + Ok(protocol::Response::Noop { .. }) => {} + Ok(protocol::Response::ReadReg { .. }) => {} Ok(protocol::Response::DtsVs { asic_hw_id, data }) => { // Temporarily suppress noisy DTS/VS logging while debugging share flow. let _ = (asic_hw_id, data); @@ -2459,59 +1919,21 @@ where }) => { // status bit3 indicates a valid nonce candidate. if (result_status & 0x8) == 0 { - trace!( - asic_hw_id, - engine_id, - result_status, - nonce, - sequence, - timecode, - "Ignoring BZM2 READRESULT without valid-nonce flag" - ); continue; } let row = engine_id & 0x3f; let column = engine_id >> 6; if row >= ENGINE_ROWS || column >= ENGINE_COLS { - trace!( - asic_hw_id, - engine_id, - row, - column, - sequence, - "Ignoring BZM2 READRESULT with unmapped engine coordinates" - ); continue; } if is_invalid_engine(row, column) { - trace!( - asic_hw_id, - engine_id, - row, - column, - sequence, - "Ignoring BZM2 READRESULT from invalid engine coordinate" - ); continue; } let Some(logical_engine_id) = logical_engine_index(row, column) else { - trace!( - asic_hw_id, - engine_id, - row, - column, - sequence, - "Ignoring BZM2 READRESULT with unmapped logical engine index" - ); continue; }; - let sequence_id_raw = sequence / (MIDSTATE_COUNT as u8); - let sequence_masked = sequence & 0x7f; - let sequence_id_masked = sequence_masked / (MIDSTATE_COUNT as u8); - let micro_job_id_masked = sequence_masked % (MIDSTATE_COUNT as u8); - let timecode_masked = timecode & 0x7f; let Some(resolved_fields) = resolve_readresult_fields(sequence, timecode, |slot| { assigned_tasks.iter().rev().any(|task| { @@ -2519,59 +1941,10 @@ where }) }) else { - if sequence_lookup_diagnostic_samples < SEQUENCE_LOOKUP_DIAGNOSTIC_LIMIT { - sequence_lookup_diagnostic_samples = - sequence_lookup_diagnostic_samples.saturating_add(1); - let masked_match = assigned_tasks - .iter() - .rev() - .find(|task| { - readresult_sequence_slot(task.sequence_id) - == sequence_id_masked - }) - .map(|task| task.sequence_id); - let recent_slots: Vec = assigned_tasks - .iter() - .rev() - .take(6) - .map(|task| readresult_sequence_slot(task.sequence_id)) - .collect(); - let recent_sequence_ids: Vec = assigned_tasks - .iter() - .rev() - .take(6) - .map(|task| task.sequence_id) - .collect(); - debug!( - asic_hw_id, - engine_id, - sequence_raw = format_args!("{:#04x}", sequence), - sequence_id_raw, - sequence_masked = format_args!("{:#04x}", sequence_masked), - sequence_id_masked, - micro_job_id_masked, - timecode_raw = format_args!("{:#04x}", timecode), - timecode_masked = format_args!("{:#04x}", timecode_masked), - masked_lookup_hit = masked_match.is_some(), - masked_lookup_sequence_id = ?masked_match, - recent_slots = ?recent_slots, - recent_sequence_ids = ?recent_sequence_ids, - "BZM2 READRESULT lookup diagnostic" - ); - } - trace!( - asic_hw_id, - engine_id, - sequence_id_raw, - sequence, - timecode, - "Ignoring BZM2 READRESULT with no assigned task" - ); continue; }; let sequence_id = resolved_fields.sequence_id; let micro_job_id = resolved_fields.micro_job_id; - let sequence_effective = resolved_fields.sequence; let timecode_effective = resolved_fields.timecode; let sequence_slot = readresult_sequence_slot(sequence_id); let slot_candidates: Vec = assigned_tasks @@ -2582,14 +1955,6 @@ where .collect(); let slot_candidate_count = slot_candidates.len(); if slot_candidate_count == 0 { - trace!( - asic_hw_id, - engine_id, - sequence_id, - sequence_raw = sequence, - sequence_effective, - "Ignoring BZM2 READRESULT with no assigned task after field resolution" - ); continue; } @@ -2618,15 +1983,6 @@ where let nonce_adjusted = nonce_raw.wrapping_sub(candidate.nonce_minus_value); let nonce_submit = nonce_adjusted.swap_bytes(); - // Build a canonical header for logging/replay diagnostics. - let header = BlockHeader { - version: share_version, - prev_blockhash: candidate.task.template.prev_blockhash, - merkle_root: candidate.merkle_root, - time: share_ntime, - bits: candidate.task.template.bits, - nonce: nonce_submit, - }; let tail16 = bzm2_tail16_bytes(&candidate, share_ntime, nonce_submit); let hash_bytes = bzm2_double_sha_from_midstate_and_tail(&selected_midstate, &tail16); @@ -2637,13 +1993,7 @@ where &target_bytes, candidate.leading_zeros, ); - let observed_leading_zeros = - leading_zero_bits(&hash_bytes); - let achieved_difficulty = Difficulty::from_hash(&hash); - let target_difficulty = - Difficulty::from_target(candidate.task.share_target); - let achieved_difficulty_f64 = achieved_difficulty.as_f64(); - let target_difficulty_f64 = target_difficulty.as_f64(); + let observed_leading_zeros = leading_zero_bits(&hash_bytes); let rank = match check_result { Bzm2CheckResult::Correct => 3, Bzm2CheckResult::NotMeetTarget => 2, @@ -2655,22 +2005,14 @@ where selected_candidate = Some(SelectedReadResultCandidate { assigned: candidate, share_version, - selected_midstate, ntime_offset, share_ntime, nonce_adjusted, nonce_submit, - header, - tail16, hash_bytes, hash, - target_bytes, check_result, observed_leading_zeros, - achieved_difficulty, - target_difficulty, - achieved_difficulty_f64, - target_difficulty_f64, }); if rank == 3 { break; @@ -2681,124 +2023,19 @@ where let Some(SelectedReadResultCandidate { assigned, share_version, - selected_midstate, ntime_offset, share_ntime, nonce_adjusted, nonce_submit, - header, - tail16, hash_bytes, hash, - target_bytes, check_result, observed_leading_zeros, - achieved_difficulty, - target_difficulty, - achieved_difficulty_f64, - target_difficulty_f64, }) = selected_candidate else { - trace!( - asic_hw_id, - engine_id, - logical_engine_id, - sequence_id, - slot_candidate_count, - "Ignoring BZM2 READRESULT without a usable retained assignment" - ); continue; }; - if slot_candidate_count > 1 { - trace!( - asic_hw_id, - engine_id, - logical_engine_id, - sequence_id, - matched_sequence_id = assigned.sequence_id, - slot_candidate_count, - "BZM2 READRESULT evaluated retained slot history" - ); - } - - if resolved_fields.used_masked_fields { - trace!( - asic_hw_id, - engine_id, - sequence_raw = format_args!("{:#04x}", sequence), - sequence_effective = format_args!("{:#04x}", sequence_effective), - timecode_raw = format_args!("{:#04x}", timecode), - timecode_effective = format_args!("{:#04x}", timecode_effective), - "BZM2 READRESULT using masked sequence/timecode fields" - ); - } - - sanity_candidates_total = sanity_candidates_total.saturating_add(1); - if sanity_best_difficulty - .is_none_or(|best| achieved_difficulty > best) - { - sanity_best_difficulty = Some(achieved_difficulty); - } - - if let Some(cfg) = focused_readresult_config.as_ref() { - let adjusted_match = - cfg.adjusted_nonce.is_none_or(|n| n == nonce_adjusted); - let raw_match = cfg.raw_nonce.is_none_or(|n| n == nonce_raw); - if adjusted_match && raw_match { - let header_bytes = consensus::serialize(&header); - let merkle_root_bytes = consensus::serialize(&assigned.merkle_root); - let header_prefix: [u8; 64] = header_bytes[..64] - .try_into() - .expect("header prefix length is fixed"); - let derived_midstate = compute_midstate_le(&header_prefix); - let mut hash_rev = hash_bytes; - hash_rev.reverse(); - debug!( - asic_hw_id, - engine_hw_id = engine_id, - logical_engine_id, - sequence_raw = format_args!("{:#04x}", sequence), - sequence_effective = format_args!("{:#04x}", sequence_effective), - sequence_id, - micro_job_id, - timecode_raw = format_args!("{:#04x}", timecode), - timecode_effective = format_args!("{:#04x}", timecode_effective), - nonce_raw = format_args!("{:#010x}", nonce_raw), - nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), - nonce_submit = format_args!("{:#010x}", nonce_submit), - nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), - ntime_offset, - ntime = format_args!("{:#010x}", share_ntime), - version = format_args!("{:#010x}", share_version.to_consensus() as u32), - bits = format_args!("{:#010x}", assigned.task.template.bits.to_consensus()), - extranonce2 = ?assigned.task.en2, - merkle_root = %HexBytes(&merkle_root_bytes), - midstate = %HexBytes(&selected_midstate), - derived_midstate = %HexBytes(&derived_midstate), - header = %HexBytes(&header_bytes), - tail16 = %HexBytes(&tail16), - hash_bzm2_order = %HexBytes(&hash_bytes), - hash_reversed = %HexBytes(&hash_rev), - hash_msb_bzm2 = format_args!("{:#04x}", hash_bytes[31]), - target = %HexBytes(&target_bytes), - check_result = ?check_result, - observed_leading_zeros_bits = observed_leading_zeros, - achieved_difficulty = %achieved_difficulty, - achieved_difficulty_f64 = format_args!("{:.3e}", achieved_difficulty_f64), - target_difficulty = %target_difficulty, - target_difficulty_f64 = format_args!("{:.3e}", target_difficulty_f64), - "BZM2 focused READRESULT trace" - ); - if cfg.break_on_match { - panic!( - "BZM2 focused READRESULT breakpoint hit: engine_hw_id={:#x} logical_engine_id={} sequence={:#x} timecode={:#x} raw_nonce={:#010x} adjusted_nonce={:#010x}", - engine_id, logical_engine_id, sequence, timecode, nonce_raw, nonce_adjusted - ); - } - } - } - if check_result == Bzm2CheckResult::Error && observed_leading_zeros == 0 && zero_lz_diagnostic_samples < ZERO_LZ_DIAGNOSTIC_LIMIT @@ -2809,12 +2046,9 @@ where asic_hw_id, engine_hw_id = engine_id, logical_engine_id, - sequence_raw = format_args!("{:#04x}", sequence), - sequence_effective = format_args!("{:#04x}", sequence_effective), sequence_id, matched_sequence_id = assigned.sequence_id, micro_job_id, - timecode_raw = format_args!("{:#04x}", timecode), timecode_effective = format_args!("{:#04x}", timecode_effective), slot_candidate_count, nonce_raw = format_args!("{:#010x}", nonce_raw), @@ -2832,8 +2066,6 @@ where } if check_result == Bzm2CheckResult::Correct { - sanity_candidates_meet_task = - sanity_candidates_meet_task.saturating_add(1); let share = Share { nonce: nonce_submit, hash, @@ -2843,188 +2075,12 @@ where expected_work: assigned.task.share_target.to_work(), }; - if assigned.task.share_tx.send(share).await.is_ok() { + if assigned.task.share_tx.send(share).await.is_err() { + debug!("Share channel closed (task replaced)"); + } else { let mut s = status.write().expect("status lock poisoned"); s.chip_shares_found = s.chip_shares_found.saturating_add(1); } - - trace!( - asic_hw_id, - engine_hw_id = engine_id, - logical_engine_id, - sequence_id, - micro_job_id, - nonce = format_args!("{:#010x}", nonce_submit), - nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), - sequence, - timecode, - ntime_offset, - expected_sequence_id = assigned.sequence_id, - nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), - observed_leading_zeros_bits = observed_leading_zeros, - achieved_difficulty = %achieved_difficulty, - achieved_difficulty_f64 = format_args!("{:.3e}", achieved_difficulty_f64), - target_difficulty = %target_difficulty, - target_difficulty_f64 = format_args!("{:.3e}", target_difficulty_f64), - "BZM2 candidate met task share target" - ); - } else if check_result == Bzm2CheckResult::NotMeetTarget { - trace!( - asic_hw_id, - engine_hw_id = engine_id, - logical_engine_id, - sequence_id, - micro_job_id, - nonce = format_args!("{:#010x}", nonce_submit), - nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), - sequence, - timecode, - ntime_offset, - expected_sequence_id = assigned.sequence_id, - nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), - observed_leading_zeros_bits = observed_leading_zeros, - achieved_difficulty = %achieved_difficulty, - achieved_difficulty_f64 = format_args!("{:.3e}", achieved_difficulty_f64), - target_difficulty = %target_difficulty, - target_difficulty_f64 = format_args!("{:.3e}", target_difficulty_f64), - "BZM2 nonce filtered by share target" - ); - } else { - if sanity_diagnostic_samples < SANITY_DIAGNOSTIC_LIMIT { - sanity_diagnostic_samples = sanity_diagnostic_samples.saturating_add(1); - - let header_bytes = consensus::serialize(&header); - let base_ntime = assigned.task.ntime; - let mut probes = Vec::new(); - let focused = focused_readresult_diagnostic( - &assigned, - sequence, - timecode, - nonce_raw, - ); - - probes.push(format!( - "current({})", - validation_probe_summary( - &assigned, - share_version, - share_ntime, - nonce_submit, - ) - )); - probes.push(format!( - "raw_nonce({})", - validation_probe_summary(&assigned, share_version, share_ntime, nonce_raw) - )); - for gap in [0x14u32, 0x28, 0x4c, 0x98] { - probes.push(format!( - "gap_{gap:#x}({})", - validation_probe_summary( - &assigned, - share_version, - share_ntime, - nonce_raw.wrapping_sub(gap).swap_bytes(), - ) - )); - } - probes.push(format!( - "time_base({})", - validation_probe_summary( - &assigned, - share_version, - base_ntime, - nonce_submit, - ) - )); - probes.push(format!( - "time_plus_tc({})", - validation_probe_summary( - &assigned, - share_version, - base_ntime.wrapping_add(u32::from(timecode)), - nonce_submit, - ) - )); - probes.push(format!( - "time_minus_tc({})", - validation_probe_summary( - &assigned, - share_version, - base_ntime.wrapping_sub(u32::from(timecode)), - nonce_submit, - ) - )); - for (alt_idx, alt_version) in - assigned.microjob_versions.iter().copied().enumerate() - { - probes.push(format!( - "ver_mj{alt_idx}({})", - validation_probe_summary( - &assigned, - alt_version, - share_ntime, - nonce_submit, - ) - )); - } - - debug!( - asic_hw_id, - engine_id, - sequence_id, - micro_job_id, - sequence, - timecode, - result_status, - nonce_raw, - nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), - nonce_submit = format_args!("{:#010x}", nonce_submit), - assigned_sequence_id = assigned.sequence_id, - assigned_timestamp_count = assigned.timestamp_count, - assigned_nonce_minus = format_args!("{:#x}", assigned.nonce_minus_value), - base_ntime = format_args!("{:#x}", base_ntime), - selected_ntime = format_args!("{:#x}", share_ntime), - selected_version = format_args!("{:#x}", share_version.to_consensus() as u32), - bits = format_args!("{:#x}", assigned.task.template.bits.to_consensus()), - header = %HexBytes(&header_bytes), - focused = %focused, - probes = %probes.join(" | "), - "BZM2 READRESULT sanity diagnostic" - ); - } - - trace!( - asic_hw_id, - engine_hw_id = engine_id, - logical_engine_id, - sequence_id, - micro_job_id, - nonce = format_args!("{:#010x}", nonce_submit), - nonce_adjusted = format_args!("{:#010x}", nonce_adjusted), - sequence, - timecode, - ntime_offset, - expected_sequence_id = assigned.sequence_id, - nonce_minus_value = format_args!("{:#x}", assigned.nonce_minus_value), - observed_leading_zeros_bits = observed_leading_zeros, - hash_msb = format_args!("{:#04x}", hash_bytes[31]), - "BZM2 nonce rejected by leading-zeros sanity check" - ); - } - - if sanity_candidates_total.is_multiple_of(500) { - debug!( - total_candidates = sanity_candidates_total, - candidates_meeting_task_target = sanity_candidates_meet_task, - best_achieved_difficulty = %sanity_best_difficulty - .expect("sanity_best_difficulty is set when total_candidates > 0"), - best_achieved_difficulty_f64 = format_args!("{:.3e}", sanity_best_difficulty - .expect("sanity_best_difficulty is set when total_candidates > 0") - .as_f64()), - current_target_difficulty = %target_difficulty, - current_target_difficulty_f64 = format_args!("{:.3e}", target_difficulty_f64), - "BZM2 candidate sanity summary" - ); } } Err(e) => { @@ -3039,6 +2095,8 @@ where } } } + + debug!("BZM2 thread actor exiting"); } #[cfg(test)] From 84cb51f9665a252eaef89ff5d991e2d8ff3d8850 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Thu, 5 Mar 2026 23:36:22 -0500 Subject: [PATCH 15/19] refactor(bzm2): extract hash thread hashing helpers --- mujina-miner/src/asic/bzm2/thread.rs | 408 +----------------- mujina-miner/src/asic/bzm2/thread/hashing.rs | 413 +++++++++++++++++++ 2 files changed, 425 insertions(+), 396 deletions(-) create mode 100644 mujina-miner/src/asic/bzm2/thread/hashing.rs diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index f413e48..c82c4ec 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -17,12 +17,7 @@ use std::{ }; use async_trait::async_trait; -use bitcoin::{ - TxMerkleNode, - block::{Header as BlockHeader, Version as BlockVersion}, - consensus, - hashes::Hash as _, -}; +use bitcoin::{TxMerkleNode, block::Version as BlockVersion, hashes::Hash as _}; use futures::{SinkExt, sink::Sink, stream::Stream}; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{self, Duration, Instant}; @@ -38,6 +33,14 @@ use crate::{ tracing::prelude::*, types::{Difficulty, HashRate}, }; +#[cfg(test)] +use hashing::hash_bytes_bzm2_order; +use hashing::{ + Bzm2CheckResult, build_header_bytes, bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, + check_result, compute_midstate_le, leading_zero_bits, task_midstate_versions, +}; + +mod hashing; const ENGINE_ROWS: u16 = 20; const ENGINE_COLS: u16 = 12; @@ -92,19 +95,6 @@ const READRESULT_SLOT_HISTORY: usize = 16; const READRESULT_ASSIGNMENT_HISTORY_LIMIT: usize = READRESULT_SEQUENCE_SPACE * READRESULT_SLOT_HISTORY; const ZERO_LZ_DIAGNOSTIC_LIMIT: u64 = 24; -const SHA256_IV: [u32; 8] = [ - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, -]; -const SHA256_K: [u32; 64] = [ - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, -]; #[derive(Debug)] enum ThreadCommand { @@ -638,13 +628,6 @@ struct AssignedTask { nonce_minus_value: u32, } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum Bzm2CheckResult { - Correct, - NotMeetTarget, - Error, -} - struct SelectedReadResultCandidate { assigned: AssignedTask, share_version: BlockVersion, @@ -658,203 +641,6 @@ struct SelectedReadResultCandidate { observed_leading_zeros: u16, } -// Compute the four version-mask deltas used across the 4-midstate micro-jobs. -fn midstate_version_mask_variants(version_mask: u32) -> [u32; MIDSTATE_COUNT] { - if version_mask == 0 { - return [0, 0, 0, 0]; - } - - let mut mask = version_mask; - let mut cnt: u32 = 0; - while mask.is_multiple_of(16) { - cnt = cnt.saturating_add(1); - mask /= 16; - } - - let mut tmp_mask = 0u32; - if !mask.is_multiple_of(16) { - tmp_mask = mask % 16; - } else if !mask.is_multiple_of(8) { - tmp_mask = mask % 8; - } else if !mask.is_multiple_of(4) { - tmp_mask = mask % 4; - } else if !mask.is_multiple_of(2) { - tmp_mask = mask % 2; - } - - for _ in 0..cnt { - tmp_mask = tmp_mask.saturating_mul(16); - } - - [ - 0, - tmp_mask, - version_mask.saturating_sub(tmp_mask), - version_mask, - ] -} - -// Derive per-midstate block versions from the template base version and gp_bits mask. -fn task_midstate_versions(task: &HashTask) -> [BlockVersion; MIDSTATE_COUNT] { - let template = task.template.as_ref(); - let base = template.version.base().to_consensus() as u32; - let gp_mask = u16::from_be_bytes(*template.version.gp_bits_mask().as_bytes()) as u32; - let version_mask = gp_mask << 13; - let variants = midstate_version_mask_variants(version_mask); - - variants.map(|variant| BlockVersion::from_consensus((base | variant) as i32)) -} - -fn check_result(sha256_le: &[u8; 32], target_le: &[u8; 32], leading_zeros: u8) -> Bzm2CheckResult { - let mut i: usize = 31; - while i > 0 && sha256_le[i] == 0 { - i -= 1; - } - - let threshold = 31i32 - i32::from(leading_zeros / 8); - if (i as i32) > threshold { - return Bzm2CheckResult::Error; - } - if (i as i32) == threshold { - let mut bit_count = leading_zeros % 8; - let mut bit_index = 7u8; - while bit_count > 0 { - if (sha256_le[i] & (1u8 << bit_index)) != 0 { - return Bzm2CheckResult::Error; - } - bit_count -= 1; - bit_index = bit_index.saturating_sub(1); - } - } - - for k in (1..=31).rev() { - if sha256_le[k] < target_le[k] { - return Bzm2CheckResult::Correct; - } - if sha256_le[k] > target_le[k] { - return Bzm2CheckResult::NotMeetTarget; - } - } - - Bzm2CheckResult::Correct -} - -fn leading_zero_bits(sha256_le: &[u8; 32]) -> u16 { - let mut bits = 0u16; - for byte in sha256_le.iter().rev() { - if *byte == 0 { - bits = bits.saturating_add(8); - continue; - } - bits = bits.saturating_add(byte.leading_zeros() as u16); - return bits; - } - bits -} - -fn sha256_compress_state(initial_state: [u32; 8], block: &[u8; 64]) -> [u32; 8] { - let mut w = [0u32; 64]; - for (i, chunk) in block.chunks_exact(4).enumerate() { - w[i] = u32::from_be_bytes(chunk.try_into().expect("chunk size is 4")); - } - for i in 16..64 { - let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); - let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); - w[i] = w[i - 16] - .wrapping_add(s0) - .wrapping_add(w[i - 7]) - .wrapping_add(s1); - } - - let mut a = initial_state[0]; - let mut b = initial_state[1]; - let mut c = initial_state[2]; - let mut d = initial_state[3]; - let mut e = initial_state[4]; - let mut f = initial_state[5]; - let mut g = initial_state[6]; - let mut h = initial_state[7]; - - for i in 0..64 { - let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); - let ch = (e & f) ^ ((!e) & g); - let t1 = h - .wrapping_add(s1) - .wrapping_add(ch) - .wrapping_add(SHA256_K[i]) - .wrapping_add(w[i]); - let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); - let maj = (a & b) ^ (a & c) ^ (b & c); - let t2 = s0.wrapping_add(maj); - - h = g; - g = f; - f = e; - e = d.wrapping_add(t1); - d = c; - c = b; - b = a; - a = t1.wrapping_add(t2); - } - - [ - initial_state[0].wrapping_add(a), - initial_state[1].wrapping_add(b), - initial_state[2].wrapping_add(c), - initial_state[3].wrapping_add(d), - initial_state[4].wrapping_add(e), - initial_state[5].wrapping_add(f), - initial_state[6].wrapping_add(g), - initial_state[7].wrapping_add(h), - ] -} - -fn sha256_state_to_be_bytes(state: [u32; 8]) -> [u8; 32] { - let mut out = [0u8; 32]; - for (i, word) in state.iter().copied().enumerate() { - out[i * 4..i * 4 + 4].copy_from_slice(&word.to_be_bytes()); - } - out -} - -fn bzm2_double_sha_from_midstate_and_tail(midstate_le: &[u8; 32], tail16: &[u8; 16]) -> [u8; 32] { - // 1) resume SHA256 from midstate with 16-byte tail - // 2) SHA256 the resulting 32-byte digest again. - let mut resumed_state = [0u32; 8]; - for (i, chunk) in midstate_le.chunks_exact(4).enumerate() { - resumed_state[i] = u32::from_le_bytes(chunk.try_into().expect("chunk size is 4")); - } - - let mut first_block = [0u8; 64]; - first_block[..16].copy_from_slice(tail16); - first_block[16] = 0x80; - first_block[56..64].copy_from_slice(&(80u64 * 8).to_be_bytes()); - let first_state = sha256_compress_state(resumed_state, &first_block); - let first_digest = sha256_state_to_be_bytes(first_state); - - let mut second_block = [0u8; 64]; - second_block[..32].copy_from_slice(&first_digest); - second_block[32] = 0x80; - second_block[56..64].copy_from_slice(&(32u64 * 8).to_be_bytes()); - let second_state = sha256_compress_state(SHA256_IV, &second_block); - sha256_state_to_be_bytes(second_state) -} - -fn bzm2_tail16_bytes(assigned: &AssignedTask, ntime: u32, nonce_submit: u32) -> [u8; 16] { - let merkle_root_bytes = consensus::serialize(&assigned.merkle_root); - let mut tail16 = [0u8; 16]; - tail16[0..4].copy_from_slice(&merkle_root_bytes[28..32]); - tail16[4..8].copy_from_slice(&ntime.to_le_bytes()); - tail16[8..12].copy_from_slice(&assigned.task.template.bits.to_consensus().to_le_bytes()); - tail16[12..16].copy_from_slice(&nonce_submit.to_le_bytes()); - tail16 -} - -#[cfg(test)] -fn hash_bytes_bzm2_order(hash: &bitcoin::BlockHash) -> [u8; 32] { - *hash.as_byte_array() -} - fn compute_task_merkle_root(task: &HashTask) -> Result { let template = task.template.as_ref(); match &template.merkle_root { @@ -872,93 +658,6 @@ fn compute_task_merkle_root(task: &HashTask) -> Result Result<[u8; 80], HashThreadError> { - let template = task.template.as_ref(); - let header = BlockHeader { - version, - prev_blockhash: template.prev_blockhash, - merkle_root, - time: task.ntime, - bits: template.bits, - nonce: 0, - }; - - let bytes = consensus::serialize(&header); - let len = bytes.len(); - bytes.try_into().map_err(|_| { - HashThreadError::WorkAssignmentFailed(format!("unexpected serialized header size: {}", len)) - }) -} - -fn compute_midstate_le(header_prefix_64: &[u8; 64]) -> [u8; 32] { - // Midstate derivation: SHA256-compress the first 64-byte header block and - // send the raw SHA256 state words in little-endian byte order (OpenSSL ctx.h on x86). - let mut w = [0u32; 64]; - for (i, chunk) in header_prefix_64.chunks_exact(4).enumerate() { - w[i] = u32::from_be_bytes(chunk.try_into().expect("chunk size is 4")); - } - for i in 16..64 { - let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); - let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); - w[i] = w[i - 16] - .wrapping_add(s0) - .wrapping_add(w[i - 7]) - .wrapping_add(s1); - } - - let mut a = SHA256_IV[0]; - let mut b = SHA256_IV[1]; - let mut c = SHA256_IV[2]; - let mut d = SHA256_IV[3]; - let mut e = SHA256_IV[4]; - let mut f = SHA256_IV[5]; - let mut g = SHA256_IV[6]; - let mut h = SHA256_IV[7]; - - for i in 0..64 { - let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); - let ch = (e & f) ^ ((!e) & g); - let t1 = h - .wrapping_add(s1) - .wrapping_add(ch) - .wrapping_add(SHA256_K[i]) - .wrapping_add(w[i]); - let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); - let maj = (a & b) ^ (a & c) ^ (b & c); - let t2 = s0.wrapping_add(maj); - - h = g; - g = f; - f = e; - e = d.wrapping_add(t1); - d = c; - c = b; - b = a; - a = t1.wrapping_add(t2); - } - - let state = [ - SHA256_IV[0].wrapping_add(a), - SHA256_IV[1].wrapping_add(b), - SHA256_IV[2].wrapping_add(c), - SHA256_IV[3].wrapping_add(d), - SHA256_IV[4].wrapping_add(e), - SHA256_IV[5].wrapping_add(f), - SHA256_IV[6].wrapping_add(g), - SHA256_IV[7].wrapping_add(h), - ]; - - let mut out = [0u8; 32]; - for (i, word) in state.iter().copied().enumerate() { - out[i * 4..i * 4 + 4].copy_from_slice(&word.to_le_bytes()); - } - out -} - fn task_to_bzm2_payload( task: &HashTask, merkle_root: TxMerkleNode, @@ -2103,7 +1802,7 @@ where mod tests { use std::sync::Arc; - use bitcoin::{block::Header as BlockHeader, hashes::Hash as _}; + use bitcoin::block::Header as BlockHeader; use bytes::BytesMut; use serde_json::json; use tokio::sync::mpsc; @@ -2123,23 +1822,10 @@ mod tests { AssignedTask, BZM2_NONCE_MINUS, Bzm2CheckResult, ENGINE_LEADING_ZEROS, ENGINE_TIMESTAMP_COUNT, EngineAssignment, MIDSTATE_COUNT, WORK_ENGINE_COUNT, bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, check_result, - hash_bytes_bzm2_order, midstate_version_mask_variants, protocol, resolve_readresult_fields, - task_midstate_versions, task_to_bzm2_payload, + hash_bytes_bzm2_order, protocol, resolve_readresult_fields, task_midstate_versions, + task_to_bzm2_payload, }; - #[test] - fn test_midstate_version_mask_variants_for_full_mask() { - assert_eq!( - midstate_version_mask_variants(0x1fff_e000), - [0x0000_0000, 0x0000_e000, 0x1fff_0000, 0x1fff_e000] - ); - } - - #[test] - fn test_midstate_version_mask_variants_for_zero_mask() { - assert_eq!(midstate_version_mask_variants(0), [0, 0, 0, 0]); - } - #[test] fn test_resolve_readresult_fields_prefers_raw_when_slot_exists() { let active_slots = [32u8, 0u8]; @@ -2171,76 +1857,6 @@ mod tests { assert!(fields.is_none()); } - #[test] - fn test_check_result_leading_zeros_error() { - let mut hash = [0u8; 32]; - let target = [0xffu8; 32]; - hash[31] = 0x80; - assert_eq!(check_result(&hash, &target, 32), Bzm2CheckResult::Error); - } - - #[test] - fn test_check_result_accepts_required_leading_zeros() { - let mut hash = [0u8; 32]; - let target = [0xffu8; 32]; - hash[27] = 0x3f; - assert_eq!(check_result(&hash, &target, 34), Bzm2CheckResult::Correct); - } - - #[test] - fn test_check_result_rejects_missing_partial_zero_bits() { - let mut hash = [0u8; 32]; - let target = [0xffu8; 32]; - hash[27] = 0x40; - assert_eq!(check_result(&hash, &target, 34), Bzm2CheckResult::Error); - } - - #[test] - fn test_check_result_target_compare() { - let mut hash = [0u8; 32]; - let mut target = [0u8; 32]; - - hash[1] = 0x10; - target[1] = 0x20; - assert_eq!(check_result(&hash, &target, 32), Bzm2CheckResult::Correct); - - hash[1] = 0x30; - target[1] = 0x20; - assert_eq!( - check_result(&hash, &target, 32), - Bzm2CheckResult::NotMeetTarget - ); - } - - #[test] - fn test_hash_bytes_bzm2_order_keeps_digest_order() { - let src = core::array::from_fn(|i| i as u8); - let hash = bitcoin::BlockHash::from_byte_array(src); - assert_eq!(hash_bytes_bzm2_order(&hash), src); - } - - #[test] - fn test_bzm2_double_sha_matches_known_trace_sample() { - // Captured from birds-bzm2 valid-share-hash-input logging. - let midstate = - hex::decode("07348faef527b8ec3733171cb0781bc545efb4220d71e0a5b54af23de2106bfd") - .expect("midstate hex should parse"); - let tail16 = - hex::decode("ef70e3ac38979a6903f301176467a52b").expect("tail16 hex should parse"); - let expected_double_sha = - hex::decode("25ef6a2327c5304bd263126a6a38ad16c3b27cd8b647085624a7130000000000") - .expect("double sha hex should parse"); - let midstate: [u8; 32] = midstate.try_into().expect("midstate must be 32 bytes"); - let tail16: [u8; 16] = tail16.try_into().expect("tail16 must be 16 bytes"); - let expected_double_sha: [u8; 32] = expected_double_sha - .try_into() - .expect("double sha must be 32 bytes"); - assert_eq!( - bzm2_double_sha_from_midstate_and_tail(&midstate, &tail16), - expected_double_sha - ); - } - #[test] fn test_readresult_hash_check_with_known_good_bzm2_share() { // Job + accepted share captured from known working messages diff --git a/mujina-miner/src/asic/bzm2/thread/hashing.rs b/mujina-miner/src/asic/bzm2/thread/hashing.rs new file mode 100644 index 0000000..dcbe240 --- /dev/null +++ b/mujina-miner/src/asic/bzm2/thread/hashing.rs @@ -0,0 +1,413 @@ +#[cfg(test)] +use bitcoin::hashes::Hash as _; +use bitcoin::{ + TxMerkleNode, + block::{Header as BlockHeader, Version as BlockVersion}, + consensus, +}; + +use crate::asic::hash_thread::{HashTask, HashThreadError}; + +use super::{AssignedTask, MIDSTATE_COUNT}; + +const SHA256_IV: [u32; 8] = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; +const SHA256_K: [u32; 64] = [ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +]; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(super) enum Bzm2CheckResult { + Correct, + NotMeetTarget, + Error, +} + +pub(super) fn midstate_version_mask_variants(version_mask: u32) -> [u32; MIDSTATE_COUNT] { + if version_mask == 0 { + return [0, 0, 0, 0]; + } + + let mut mask = version_mask; + let mut cnt: u32 = 0; + while mask.is_multiple_of(16) { + cnt = cnt.saturating_add(1); + mask /= 16; + } + + let mut tmp_mask = 0u32; + if !mask.is_multiple_of(16) { + tmp_mask = mask % 16; + } else if !mask.is_multiple_of(8) { + tmp_mask = mask % 8; + } else if !mask.is_multiple_of(4) { + tmp_mask = mask % 4; + } else if !mask.is_multiple_of(2) { + tmp_mask = mask % 2; + } + + for _ in 0..cnt { + tmp_mask = tmp_mask.saturating_mul(16); + } + + [ + 0, + tmp_mask, + version_mask.saturating_sub(tmp_mask), + version_mask, + ] +} + +pub(super) fn task_midstate_versions(task: &HashTask) -> [BlockVersion; MIDSTATE_COUNT] { + let template = task.template.as_ref(); + let base = template.version.base().to_consensus() as u32; + let gp_mask = u16::from_be_bytes(*template.version.gp_bits_mask().as_bytes()) as u32; + let version_mask = gp_mask << 13; + let variants = midstate_version_mask_variants(version_mask); + + variants.map(|variant| BlockVersion::from_consensus((base | variant) as i32)) +} + +pub(super) fn check_result( + sha256_le: &[u8; 32], + target_le: &[u8; 32], + leading_zeros: u8, +) -> Bzm2CheckResult { + let mut i: usize = 31; + while i > 0 && sha256_le[i] == 0 { + i -= 1; + } + + let threshold = 31i32 - i32::from(leading_zeros / 8); + if (i as i32) > threshold { + return Bzm2CheckResult::Error; + } + if (i as i32) == threshold { + let mut bit_count = leading_zeros % 8; + let mut bit_index = 7u8; + while bit_count > 0 { + if (sha256_le[i] & (1u8 << bit_index)) != 0 { + return Bzm2CheckResult::Error; + } + bit_count -= 1; + bit_index = bit_index.saturating_sub(1); + } + } + + for k in (1..=31).rev() { + if sha256_le[k] < target_le[k] { + return Bzm2CheckResult::Correct; + } + if sha256_le[k] > target_le[k] { + return Bzm2CheckResult::NotMeetTarget; + } + } + + Bzm2CheckResult::Correct +} + +pub(super) fn leading_zero_bits(sha256_le: &[u8; 32]) -> u16 { + let mut bits = 0u16; + for byte in sha256_le.iter().rev() { + if *byte == 0 { + bits = bits.saturating_add(8); + continue; + } + bits = bits.saturating_add(byte.leading_zeros() as u16); + return bits; + } + bits +} + +fn sha256_compress_state(initial_state: [u32; 8], block: &[u8; 64]) -> [u32; 8] { + let mut w = [0u32; 64]; + for (i, chunk) in block.chunks_exact(4).enumerate() { + w[i] = u32::from_be_bytes(chunk.try_into().expect("chunk size is 4")); + } + for i in 16..64 { + let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); + let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); + w[i] = w[i - 16] + .wrapping_add(s0) + .wrapping_add(w[i - 7]) + .wrapping_add(s1); + } + + let mut a = initial_state[0]; + let mut b = initial_state[1]; + let mut c = initial_state[2]; + let mut d = initial_state[3]; + let mut e = initial_state[4]; + let mut f = initial_state[5]; + let mut g = initial_state[6]; + let mut h = initial_state[7]; + + for i in 0..64 { + let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); + let ch = (e & f) ^ ((!e) & g); + let t1 = h + .wrapping_add(s1) + .wrapping_add(ch) + .wrapping_add(SHA256_K[i]) + .wrapping_add(w[i]); + let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); + let maj = (a & b) ^ (a & c) ^ (b & c); + let t2 = s0.wrapping_add(maj); + + h = g; + g = f; + f = e; + e = d.wrapping_add(t1); + d = c; + c = b; + b = a; + a = t1.wrapping_add(t2); + } + + [ + initial_state[0].wrapping_add(a), + initial_state[1].wrapping_add(b), + initial_state[2].wrapping_add(c), + initial_state[3].wrapping_add(d), + initial_state[4].wrapping_add(e), + initial_state[5].wrapping_add(f), + initial_state[6].wrapping_add(g), + initial_state[7].wrapping_add(h), + ] +} + +fn sha256_state_to_be_bytes(state: [u32; 8]) -> [u8; 32] { + let mut out = [0u8; 32]; + for (i, word) in state.iter().copied().enumerate() { + out[i * 4..i * 4 + 4].copy_from_slice(&word.to_be_bytes()); + } + out +} + +pub(super) fn bzm2_double_sha_from_midstate_and_tail( + midstate_le: &[u8; 32], + tail16: &[u8; 16], +) -> [u8; 32] { + let mut resumed_state = [0u32; 8]; + for (i, chunk) in midstate_le.chunks_exact(4).enumerate() { + resumed_state[i] = u32::from_le_bytes(chunk.try_into().expect("chunk size is 4")); + } + + let mut first_block = [0u8; 64]; + first_block[..16].copy_from_slice(tail16); + first_block[16] = 0x80; + first_block[56..64].copy_from_slice(&(80u64 * 8).to_be_bytes()); + let first_state = sha256_compress_state(resumed_state, &first_block); + let first_digest = sha256_state_to_be_bytes(first_state); + + let mut second_block = [0u8; 64]; + second_block[..32].copy_from_slice(&first_digest); + second_block[32] = 0x80; + second_block[56..64].copy_from_slice(&(32u64 * 8).to_be_bytes()); + let second_state = sha256_compress_state(SHA256_IV, &second_block); + sha256_state_to_be_bytes(second_state) +} + +pub(super) fn bzm2_tail16_bytes( + assigned: &AssignedTask, + ntime: u32, + nonce_submit: u32, +) -> [u8; 16] { + let merkle_root_bytes = consensus::serialize(&assigned.merkle_root); + let mut tail16 = [0u8; 16]; + tail16[0..4].copy_from_slice(&merkle_root_bytes[28..32]); + tail16[4..8].copy_from_slice(&ntime.to_le_bytes()); + tail16[8..12].copy_from_slice(&assigned.task.template.bits.to_consensus().to_le_bytes()); + tail16[12..16].copy_from_slice(&nonce_submit.to_le_bytes()); + tail16 +} + +pub(super) fn build_header_bytes( + task: &HashTask, + version: BlockVersion, + merkle_root: TxMerkleNode, +) -> Result<[u8; 80], HashThreadError> { + let template = task.template.as_ref(); + let header = BlockHeader { + version, + prev_blockhash: template.prev_blockhash, + merkle_root, + time: task.ntime, + bits: template.bits, + nonce: 0, + }; + + let bytes = consensus::serialize(&header); + let len = bytes.len(); + bytes.try_into().map_err(|_| { + HashThreadError::WorkAssignmentFailed(format!("unexpected serialized header size: {len}")) + }) +} + +pub(super) fn compute_midstate_le(header_prefix_64: &[u8; 64]) -> [u8; 32] { + let mut w = [0u32; 64]; + for (i, chunk) in header_prefix_64.chunks_exact(4).enumerate() { + w[i] = u32::from_be_bytes(chunk.try_into().expect("chunk size is 4")); + } + for i in 16..64 { + let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); + let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); + w[i] = w[i - 16] + .wrapping_add(s0) + .wrapping_add(w[i - 7]) + .wrapping_add(s1); + } + + let mut a = SHA256_IV[0]; + let mut b = SHA256_IV[1]; + let mut c = SHA256_IV[2]; + let mut d = SHA256_IV[3]; + let mut e = SHA256_IV[4]; + let mut f = SHA256_IV[5]; + let mut g = SHA256_IV[6]; + let mut h = SHA256_IV[7]; + + for i in 0..64 { + let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); + let ch = (e & f) ^ ((!e) & g); + let t1 = h + .wrapping_add(s1) + .wrapping_add(ch) + .wrapping_add(SHA256_K[i]) + .wrapping_add(w[i]); + let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); + let maj = (a & b) ^ (a & c) ^ (b & c); + let t2 = s0.wrapping_add(maj); + + h = g; + g = f; + f = e; + e = d.wrapping_add(t1); + d = c; + c = b; + b = a; + a = t1.wrapping_add(t2); + } + + let state = [ + SHA256_IV[0].wrapping_add(a), + SHA256_IV[1].wrapping_add(b), + SHA256_IV[2].wrapping_add(c), + SHA256_IV[3].wrapping_add(d), + SHA256_IV[4].wrapping_add(e), + SHA256_IV[5].wrapping_add(f), + SHA256_IV[6].wrapping_add(g), + SHA256_IV[7].wrapping_add(h), + ]; + + let mut out = [0u8; 32]; + for (i, word) in state.iter().copied().enumerate() { + out[i * 4..i * 4 + 4].copy_from_slice(&word.to_le_bytes()); + } + out +} + +#[cfg(test)] +pub(super) fn hash_bytes_bzm2_order(hash: &bitcoin::BlockHash) -> [u8; 32] { + *hash.as_byte_array() +} + +#[cfg(test)] +mod tests { + use bitcoin::hashes::Hash as _; + + use super::{ + Bzm2CheckResult, bzm2_double_sha_from_midstate_and_tail, check_result, + hash_bytes_bzm2_order, midstate_version_mask_variants, + }; + + #[test] + fn test_midstate_version_mask_variants_for_full_mask() { + assert_eq!( + midstate_version_mask_variants(0x1fff_e000), + [0x0000_0000, 0x0000_e000, 0x1fff_0000, 0x1fff_e000] + ); + } + + #[test] + fn test_midstate_version_mask_variants_for_zero_mask() { + assert_eq!(midstate_version_mask_variants(0), [0, 0, 0, 0]); + } + + #[test] + fn test_check_result_leading_zeros_error() { + let mut hash = [0u8; 32]; + let target = [0xffu8; 32]; + hash[31] = 0x80; + assert_eq!(check_result(&hash, &target, 32), Bzm2CheckResult::Error); + } + + #[test] + fn test_check_result_accepts_required_leading_zeros() { + let mut hash = [0u8; 32]; + let target = [0xffu8; 32]; + hash[27] = 0x3f; + assert_eq!(check_result(&hash, &target, 34), Bzm2CheckResult::Correct); + } + + #[test] + fn test_check_result_rejects_missing_partial_zero_bits() { + let mut hash = [0u8; 32]; + let target = [0xffu8; 32]; + hash[27] = 0x40; + assert_eq!(check_result(&hash, &target, 34), Bzm2CheckResult::Error); + } + + #[test] + fn test_check_result_target_compare() { + let mut hash = [0u8; 32]; + let mut target = [0u8; 32]; + + hash[1] = 0x10; + target[1] = 0x20; + assert_eq!(check_result(&hash, &target, 32), Bzm2CheckResult::Correct); + + hash[1] = 0x30; + target[1] = 0x20; + assert_eq!( + check_result(&hash, &target, 32), + Bzm2CheckResult::NotMeetTarget + ); + } + + #[test] + fn test_hash_bytes_bzm2_order_keeps_digest_order() { + let src = core::array::from_fn(|i| i as u8); + let hash = bitcoin::BlockHash::from_byte_array(src); + assert_eq!(hash_bytes_bzm2_order(&hash), src); + } + + #[test] + fn test_bzm2_double_sha_matches_known_trace_sample() { + let midstate = + hex::decode("07348faef527b8ec3733171cb0781bc545efb4220d71e0a5b54af23de2106bfd") + .expect("midstate hex should parse"); + let tail16 = + hex::decode("ef70e3ac38979a6903f301176467a52b").expect("tail16 hex should parse"); + let expected_double_sha = + hex::decode("25ef6a2327c5304bd263126a6a38ad16c3b27cd8b647085624a7130000000000") + .expect("double sha hex should parse"); + let midstate: [u8; 32] = midstate.try_into().expect("midstate must be 32 bytes"); + let tail16: [u8; 16] = tail16.try_into().expect("tail16 must be 16 bytes"); + let expected_double_sha: [u8; 32] = expected_double_sha + .try_into() + .expect("double sha must be 32 bytes"); + assert_eq!( + bzm2_double_sha_from_midstate_and_tail(&midstate, &tail16), + expected_double_sha + ); + } +} From 0004077c0ac0dcdd7ce4cc24433054074adbe951 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Thu, 5 Mar 2026 23:40:47 -0500 Subject: [PATCH 16/19] refactor(bzm2): extract hash thread work helpers --- mujina-miner/src/asic/bzm2/thread.rs | 280 +-------------------- mujina-miner/src/asic/bzm2/thread/work.rs | 290 ++++++++++++++++++++++ 2 files changed, 300 insertions(+), 270 deletions(-) create mode 100644 mujina-miner/src/asic/bzm2/thread/work.rs diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index c82c4ec..58b1d87 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -17,7 +17,7 @@ use std::{ }; use async_trait::async_trait; -use bitcoin::{TxMerkleNode, block::Version as BlockVersion, hashes::Hash as _}; +use bitcoin::{block::Version as BlockVersion, hashes::Hash as _}; use futures::{SinkExt, sink::Sink, stream::Stream}; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{self, Duration, Instant}; @@ -29,18 +29,23 @@ use crate::{ BoardPeripherals, HashTask, HashThread, HashThreadCapabilities, HashThreadError, HashThreadEvent, HashThreadStatus, Share, ThreadRemovalSignal, }, - job_source::{Extranonce2, MerkleRootKind}, tracing::prelude::*, types::{Difficulty, HashRate}, }; #[cfg(test)] use hashing::hash_bytes_bzm2_order; use hashing::{ - Bzm2CheckResult, build_header_bytes, bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, - check_result, compute_midstate_le, leading_zero_bits, task_midstate_versions, + Bzm2CheckResult, bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, check_result, + leading_zero_bits, task_midstate_versions, }; +use work::{ + AssignedTask, engine_id, is_invalid_engine, logical_engine_index, send_task_to_all_engines, +}; +#[cfg(test)] +use work::{EngineAssignment, task_to_bzm2_payload}; mod hashing; +mod work; const ENGINE_ROWS: u16 = 20; const ENGINE_COLS: u16 = 12; @@ -448,79 +453,6 @@ fn calc_pll_dividers(freq_mhz: f32, post1_divider: u8) -> (u32, u32) { (post_div, fb_div) } -fn engine_id(row: u16, col: u16) -> u16 { - ((col & 0x3f) << 6) | (row & 0x3f) -} - -fn is_invalid_engine(row: u16, col: u16) -> bool { - (row == INVALID_ENGINE_0_ROW && col == INVALID_ENGINE_0_COL) - || (row == INVALID_ENGINE_1_ROW && col == INVALID_ENGINE_1_COL) - || (row == INVALID_ENGINE_2_ROW && col == INVALID_ENGINE_2_COL) - || (row == INVALID_ENGINE_3_ROW && col == INVALID_ENGINE_3_COL) -} - -fn logical_engine_index(row: u16, col: u16) -> Option { - if row >= ENGINE_ROWS || col >= ENGINE_COLS || is_invalid_engine(row, col) { - return None; - } - - let mut logical = 0usize; - for r in 0..ENGINE_ROWS { - for c in 0..ENGINE_COLS { - if is_invalid_engine(r, c) { - continue; - } - if r == row && c == col { - return Some(logical); - } - logical = logical.saturating_add(1); - } - } - - None -} - -fn engine_extranonce2_for_logical_engine( - task: &HashTask, - logical_engine: usize, -) -> Option { - let base = task.en2?; - let offset = (logical_engine as u64).saturating_add(ENGINE_EN2_OFFSET_START); - - if let Some(range) = task.en2_range.as_ref() - && range.size == base.size() - { - let value = if range.min == 0 && range.max == u64::MAX { - base.value().wrapping_add(offset) - } else { - let span = range.max.saturating_sub(range.min).saturating_add(1); - let base_value = if base.value() < range.min || base.value() > range.max { - range.min - } else { - base.value() - }; - let rel = base_value.saturating_sub(range.min); - range - .min - .saturating_add((rel.saturating_add(offset % span)) % span) - }; - return Extranonce2::new(value, base.size()).ok(); - } - - let width_bits = u32::from(base.size()).saturating_mul(8); - let max = if width_bits >= 64 { - u64::MAX - } else { - (1u64 << width_bits) - 1 - }; - let value = if max == u64::MAX { - base.value().wrapping_add(offset) - } else { - base.value().wrapping_add(offset) & max - }; - Extranonce2::new(value, base.size()).ok() -} - fn readresult_sequence_slot(sequence_id: u8) -> u8 { sequence_id & 0x3f } @@ -603,31 +535,6 @@ fn resolve_readresult_fields( None } -struct TaskJobPayload { - midstates: [[u8; 32]; MIDSTATE_COUNT], - merkle_residue: u32, - timestamp: u32, -} - -#[derive(Clone)] -struct EngineAssignment { - merkle_root: TxMerkleNode, - extranonce2: Option, - midstates: [[u8; 32]; MIDSTATE_COUNT], -} - -#[derive(Clone)] -struct AssignedTask { - task: HashTask, - merkle_root: TxMerkleNode, - engine_assignments: Arc<[EngineAssignment]>, - microjob_versions: [BlockVersion; MIDSTATE_COUNT], - sequence_id: u8, - timestamp_count: u8, - leading_zeros: u8, - nonce_minus_value: u32, -} - struct SelectedReadResultCandidate { assigned: AssignedTask, share_version: BlockVersion, @@ -641,173 +548,6 @@ struct SelectedReadResultCandidate { observed_leading_zeros: u16, } -fn compute_task_merkle_root(task: &HashTask) -> Result { - let template = task.template.as_ref(); - match &template.merkle_root { - MerkleRootKind::Computed(_) => { - let en2 = task.en2.as_ref().ok_or_else(|| { - HashThreadError::WorkAssignmentFailed( - "EN2 is required for computed merkle roots".into(), - ) - })?; - template.compute_merkle_root(en2).map_err(|e| { - HashThreadError::WorkAssignmentFailed(format!("failed to compute merkle root: {e}")) - }) - } - MerkleRootKind::Fixed(merkle_root) => Ok(*merkle_root), - } -} - -fn task_to_bzm2_payload( - task: &HashTask, - merkle_root: TxMerkleNode, - versions: [BlockVersion; MIDSTATE_COUNT], -) -> Result { - let mut midstates = [[0u8; 32]; MIDSTATE_COUNT]; - let mut merkle_residue = 0u32; - let mut timestamp = 0u32; - - for (idx, midstate) in midstates.iter_mut().enumerate() { - let header = build_header_bytes(task, versions[idx], merkle_root)?; - let header_prefix: [u8; 64] = header[..64] - .try_into() - .expect("header prefix length is fixed"); - - *midstate = compute_midstate_le(&header_prefix); - - if idx == 0 { - merkle_residue = u32::from_be_bytes( - header[64..68] - .try_into() - .expect("slice length is exactly 4 bytes"), - ); - timestamp = u32::from_be_bytes( - header[68..72] - .try_into() - .expect("slice length is exactly 4 bytes"), - ); - } - } - - Ok(TaskJobPayload { - midstates, - merkle_residue, - timestamp, - }) -} - -async fn send_task_to_all_engines( - chip_commands: &mut W, - task: &HashTask, - versions: [BlockVersion; MIDSTATE_COUNT], - sequence_id: u8, - zeros_to_find: u8, - timestamp_count: u8, -) -> Result, HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - // `data[2]` comes from big-endian nbits bytes copied into - // a little-endian u32, so the numeric value is byte-swapped consensus nbits. - let target = task.template.bits.to_consensus().swap_bytes(); - let timestamp_reg_value = ((AUTO_CLOCK_UNGATE & 0x1) << 7) | (timestamp_count & 0x7f); - let mut engine_assignments = Vec::with_capacity(WORK_ENGINE_COUNT); - - for row in 0..ENGINE_ROWS { - for col in 0..ENGINE_COLS { - if is_invalid_engine(row, col) { - continue; - } - - let Some(logical_engine_id) = logical_engine_index(row, col) else { - continue; - }; - let engine = engine_id(row, col); - let mut engine_task = task.clone(); - engine_task.en2 = engine_extranonce2_for_logical_engine(task, logical_engine_id); - let merkle_root = compute_task_merkle_root(&engine_task).map_err(|e| { - HashThreadError::WorkAssignmentFailed(format!( - "failed to derive per-engine merkle root for logical engine {logical_engine_id} (row {row} col {col}): {e}" - )) - })?; - let payload = task_to_bzm2_payload(&engine_task, merkle_root, versions).map_err(|e| { - HashThreadError::WorkAssignmentFailed(format!( - "failed to derive per-engine payload for logical engine {logical_engine_id} (row {row} col {col}): {e}" - )) - })?; - - write_reg_u8( - chip_commands, - protocol::BROADCAST_ASIC, - engine, - protocol::engine_reg::ZEROS_TO_FIND, - zeros_to_find, - "task assign: ZEROS_TO_FIND", - ) - .await?; - - write_reg_u8( - chip_commands, - protocol::BROADCAST_ASIC, - engine, - protocol::engine_reg::TIMESTAMP_COUNT, - timestamp_reg_value, - "task assign: TIMESTAMP_COUNT", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - engine, - protocol::engine_reg::TARGET, - target, - "task assign: TARGET", - ) - .await?; - - let commands = protocol::Command::write_job( - protocol::BROADCAST_ASIC, - engine, - payload.midstates, - payload.merkle_residue, - payload.timestamp, - sequence_id, - WRITEJOB_CTL_REPLACE, - ) - .map_err(|e| { - HashThreadError::WorkAssignmentFailed(format!( - "failed to build WRITEJOB payload for engine 0x{engine:03x}: {e}" - )) - })?; - - for command in commands { - chip_commands.send(command).await.map_err(|e| { - HashThreadError::WorkAssignmentFailed(format!( - "failed to send WRITEJOB to engine 0x{engine:03x}: {e:?}" - )) - })?; - } - engine_assignments.push(EngineAssignment { - merkle_root, - extranonce2: engine_task.en2, - midstates: payload.midstates, - }); - } - } - - if engine_assignments.len() != WORK_ENGINE_COUNT { - return Err(HashThreadError::WorkAssignmentFailed(format!( - "unexpected BZM2 engine assignment count: got {}, expected {}", - engine_assignments.len(), - WORK_ENGINE_COUNT - ))); - } - - Ok(engine_assignments) -} - async fn configure_sensors( chip_responses: &mut R, chip_commands: &mut W, @@ -1937,7 +1677,7 @@ mod tests { EngineAssignment { merkle_root, extranonce2: task.en2, - midstates: payload.midstates, + midstates: payload, }; WORK_ENGINE_COUNT ]; diff --git a/mujina-miner/src/asic/bzm2/thread/work.rs b/mujina-miner/src/asic/bzm2/thread/work.rs new file mode 100644 index 0000000..2a0ac1c --- /dev/null +++ b/mujina-miner/src/asic/bzm2/thread/work.rs @@ -0,0 +1,290 @@ +use std::sync::Arc; + +use bitcoin::{TxMerkleNode, block::Version as BlockVersion}; +use futures::{SinkExt, sink::Sink}; + +use crate::{ + asic::hash_thread::{HashTask, HashThreadError}, + job_source::{Extranonce2, MerkleRootKind}, +}; + +use super::{ + AUTO_CLOCK_UNGATE, ENGINE_COLS, ENGINE_EN2_OFFSET_START, ENGINE_ROWS, INVALID_ENGINE_0_COL, + INVALID_ENGINE_0_ROW, INVALID_ENGINE_1_COL, INVALID_ENGINE_1_ROW, INVALID_ENGINE_2_COL, + INVALID_ENGINE_2_ROW, INVALID_ENGINE_3_COL, INVALID_ENGINE_3_ROW, MIDSTATE_COUNT, + WORK_ENGINE_COUNT, WRITEJOB_CTL_REPLACE, hashing::build_header_bytes, + hashing::compute_midstate_le, protocol, write_reg_u8, write_reg_u32, +}; + +struct TaskJobPayload { + midstates: [[u8; 32]; MIDSTATE_COUNT], + merkle_residue: u32, + timestamp: u32, +} + +#[derive(Clone)] +pub(super) struct EngineAssignment { + pub(super) merkle_root: TxMerkleNode, + pub(super) extranonce2: Option, + pub(super) midstates: [[u8; 32]; MIDSTATE_COUNT], +} + +#[derive(Clone)] +pub(super) struct AssignedTask { + pub(super) task: HashTask, + pub(super) merkle_root: TxMerkleNode, + pub(super) engine_assignments: Arc<[EngineAssignment]>, + pub(super) microjob_versions: [BlockVersion; MIDSTATE_COUNT], + pub(super) sequence_id: u8, + pub(super) timestamp_count: u8, + pub(super) leading_zeros: u8, + pub(super) nonce_minus_value: u32, +} + +pub(super) fn engine_id(row: u16, col: u16) -> u16 { + ((col & 0x3f) << 6) | (row & 0x3f) +} + +pub(super) fn is_invalid_engine(row: u16, col: u16) -> bool { + (row == INVALID_ENGINE_0_ROW && col == INVALID_ENGINE_0_COL) + || (row == INVALID_ENGINE_1_ROW && col == INVALID_ENGINE_1_COL) + || (row == INVALID_ENGINE_2_ROW && col == INVALID_ENGINE_2_COL) + || (row == INVALID_ENGINE_3_ROW && col == INVALID_ENGINE_3_COL) +} + +pub(super) fn logical_engine_index(row: u16, col: u16) -> Option { + if row >= ENGINE_ROWS || col >= ENGINE_COLS || is_invalid_engine(row, col) { + return None; + } + + let mut logical = 0usize; + for r in 0..ENGINE_ROWS { + for c in 0..ENGINE_COLS { + if is_invalid_engine(r, c) { + continue; + } + if r == row && c == col { + return Some(logical); + } + logical = logical.saturating_add(1); + } + } + + None +} + +fn engine_extranonce2_for_logical_engine( + task: &HashTask, + logical_engine: usize, +) -> Option { + let base = task.en2?; + let offset = (logical_engine as u64).saturating_add(ENGINE_EN2_OFFSET_START); + + if let Some(range) = task.en2_range.as_ref() + && range.size == base.size() + { + let value = if range.min == 0 && range.max == u64::MAX { + base.value().wrapping_add(offset) + } else { + let span = range.max.saturating_sub(range.min).saturating_add(1); + let base_value = if base.value() < range.min || base.value() > range.max { + range.min + } else { + base.value() + }; + let rel = base_value.saturating_sub(range.min); + range + .min + .saturating_add((rel.saturating_add(offset % span)) % span) + }; + return Extranonce2::new(value, base.size()).ok(); + } + + let width_bits = u32::from(base.size()).saturating_mul(8); + let max = if width_bits >= 64 { + u64::MAX + } else { + (1u64 << width_bits) - 1 + }; + let value = if max == u64::MAX { + base.value().wrapping_add(offset) + } else { + base.value().wrapping_add(offset) & max + }; + Extranonce2::new(value, base.size()).ok() +} + +fn compute_task_merkle_root(task: &HashTask) -> Result { + let template = task.template.as_ref(); + match &template.merkle_root { + MerkleRootKind::Computed(_) => { + let en2 = task.en2.as_ref().ok_or_else(|| { + HashThreadError::WorkAssignmentFailed( + "EN2 is required for computed merkle roots".into(), + ) + })?; + template.compute_merkle_root(en2).map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!("failed to compute merkle root: {e}")) + }) + } + MerkleRootKind::Fixed(merkle_root) => Ok(*merkle_root), + } +} + +#[cfg(test)] +pub(super) fn task_to_bzm2_payload( + task: &HashTask, + merkle_root: TxMerkleNode, + versions: [BlockVersion; MIDSTATE_COUNT], +) -> Result<[[u8; 32]; MIDSTATE_COUNT], HashThreadError> { + Ok(build_task_job_payload(task, merkle_root, versions)?.midstates) +} + +fn build_task_job_payload( + task: &HashTask, + merkle_root: TxMerkleNode, + versions: [BlockVersion; MIDSTATE_COUNT], +) -> Result { + let mut midstates = [[0u8; 32]; MIDSTATE_COUNT]; + let mut merkle_residue = 0u32; + let mut timestamp = 0u32; + + for (idx, midstate) in midstates.iter_mut().enumerate() { + let header = build_header_bytes(task, versions[idx], merkle_root)?; + let header_prefix: [u8; 64] = header[..64] + .try_into() + .expect("header prefix length is fixed"); + + *midstate = compute_midstate_le(&header_prefix); + + if idx == 0 { + merkle_residue = u32::from_be_bytes( + header[64..68] + .try_into() + .expect("slice length is exactly 4 bytes"), + ); + timestamp = u32::from_be_bytes( + header[68..72] + .try_into() + .expect("slice length is exactly 4 bytes"), + ); + } + } + + Ok(TaskJobPayload { + midstates, + merkle_residue, + timestamp, + }) +} + +pub(super) async fn send_task_to_all_engines( + chip_commands: &mut W, + task: &HashTask, + versions: [BlockVersion; MIDSTATE_COUNT], + sequence_id: u8, + zeros_to_find: u8, + timestamp_count: u8, +) -> Result, HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + let target = task.template.bits.to_consensus().swap_bytes(); + let timestamp_reg_value = ((AUTO_CLOCK_UNGATE & 0x1) << 7) | (timestamp_count & 0x7f); + let mut engine_assignments = Vec::with_capacity(WORK_ENGINE_COUNT); + + for row in 0..ENGINE_ROWS { + for col in 0..ENGINE_COLS { + if is_invalid_engine(row, col) { + continue; + } + + let Some(logical_engine_id) = logical_engine_index(row, col) else { + continue; + }; + let engine = engine_id(row, col); + let mut engine_task = task.clone(); + engine_task.en2 = engine_extranonce2_for_logical_engine(task, logical_engine_id); + let merkle_root = compute_task_merkle_root(&engine_task).map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!( + "failed to derive per-engine merkle root for logical engine {logical_engine_id} (row {row} col {col}): {e}" + )) + })?; + let payload = + build_task_job_payload(&engine_task, merkle_root, versions).map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!( + "failed to derive per-engine payload for logical engine {logical_engine_id} (row {row} col {col}): {e}" + )) + })?; + + write_reg_u8( + chip_commands, + protocol::BROADCAST_ASIC, + engine, + protocol::engine_reg::ZEROS_TO_FIND, + zeros_to_find, + "task assign: ZEROS_TO_FIND", + ) + .await?; + + write_reg_u8( + chip_commands, + protocol::BROADCAST_ASIC, + engine, + protocol::engine_reg::TIMESTAMP_COUNT, + timestamp_reg_value, + "task assign: TIMESTAMP_COUNT", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + engine, + protocol::engine_reg::TARGET, + target, + "task assign: TARGET", + ) + .await?; + + let commands = protocol::Command::write_job( + protocol::BROADCAST_ASIC, + engine, + payload.midstates, + payload.merkle_residue, + payload.timestamp, + sequence_id, + WRITEJOB_CTL_REPLACE, + ) + .map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!( + "failed to build WRITEJOB payload for engine 0x{engine:03x}: {e}" + )) + })?; + + for command in commands { + chip_commands.send(command).await.map_err(|e| { + HashThreadError::WorkAssignmentFailed(format!( + "failed to send WRITEJOB to engine 0x{engine:03x}: {e:?}" + )) + })?; + } + engine_assignments.push(EngineAssignment { + merkle_root, + extranonce2: engine_task.en2, + midstates: payload.midstates, + }); + } + } + + if engine_assignments.len() != WORK_ENGINE_COUNT { + return Err(HashThreadError::WorkAssignmentFailed(format!( + "unexpected BZM2 engine assignment count: got {}, expected {}", + engine_assignments.len(), + WORK_ENGINE_COUNT + ))); + } + + Ok(engine_assignments) +} From edf90c914299fcad0e750e2c761d65bd7d3bc425 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Thu, 5 Mar 2026 23:45:11 -0500 Subject: [PATCH 17/19] refactor(bzm2): extract hash thread assignment tracker --- mujina-miner/src/asic/bzm2/thread.rs | 248 ++---------------- mujina-miner/src/asic/bzm2/thread/tracker.rs | 256 +++++++++++++++++++ 2 files changed, 275 insertions(+), 229 deletions(-) create mode 100644 mujina-miner/src/asic/bzm2/thread/tracker.rs diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index 58b1d87..91e42d8 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -11,13 +11,11 @@ //! - validating returned results before forwarding shares upstream use std::{ - collections::VecDeque, io, sync::{Arc, RwLock}, }; use async_trait::async_trait; -use bitcoin::{block::Version as BlockVersion, hashes::Hash as _}; use futures::{SinkExt, sink::Sink, stream::Stream}; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{self, Duration, Instant}; @@ -32,12 +30,12 @@ use crate::{ tracing::prelude::*, types::{Difficulty, HashRate}, }; +use hashing::{Bzm2CheckResult, task_midstate_versions}; #[cfg(test)] -use hashing::hash_bytes_bzm2_order; use hashing::{ - Bzm2CheckResult, bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, check_result, - leading_zero_bits, task_midstate_versions, + bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, check_result, hash_bytes_bzm2_order, }; +use tracker::{AssignmentTracker, SelectedReadResultCandidate}; use work::{ AssignedTask, engine_id, is_invalid_engine, logical_engine_index, send_task_to_all_engines, }; @@ -45,6 +43,7 @@ use work::{ use work::{EngineAssignment, task_to_bzm2_payload}; mod hashing; +mod tracker; mod work; const ENGINE_ROWS: u16 = 20; @@ -453,101 +452,6 @@ fn calc_pll_dividers(freq_mhz: f32, post1_divider: u8) -> (u32, u32) { (post_div, fb_div) } -fn readresult_sequence_slot(sequence_id: u8) -> u8 { - sequence_id & 0x3f -} - -fn writejob_effective_sequence_id(sequence_id: u8) -> u8 { - // Keep the thread's assignment tracking in the same sequence domain as - // Command::write_job (seq_start = (sequence_id % 2) * 4). - sequence_id % 2 -} - -fn retain_assigned_task(assigned_tasks: &mut VecDeque, new_task: AssignedTask) { - let slot = readresult_sequence_slot(new_task.sequence_id); - assigned_tasks.push_back(new_task); - - // Keep a small per-slot history so delayed READRESULT frames can still be - // validated against recent predecessors in the same visible sequence slot. - let mut slot_count = assigned_tasks - .iter() - .filter(|task| readresult_sequence_slot(task.sequence_id) == slot) - .count(); - while slot_count > READRESULT_SLOT_HISTORY { - if let Some(index) = assigned_tasks - .iter() - .position(|task| readresult_sequence_slot(task.sequence_id) == slot) - { - let _ = assigned_tasks.remove(index); - slot_count = slot_count.saturating_sub(1); - } else { - break; - } - } - - while assigned_tasks.len() > READRESULT_ASSIGNMENT_HISTORY_LIMIT { - let _ = assigned_tasks.pop_front(); - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -struct ReadResultFields { - sequence: u8, - timecode: u8, - sequence_id: u8, - micro_job_id: u8, - used_masked_fields: bool, -} - -fn resolve_readresult_fields( - sequence_raw: u8, - timecode_raw: u8, - has_sequence_slot: impl Fn(u8) -> bool, -) -> Option { - let sequence_id_raw = sequence_raw / (MIDSTATE_COUNT as u8); - let sequence_slot_raw = readresult_sequence_slot(sequence_id_raw); - if has_sequence_slot(sequence_slot_raw) { - return Some(ReadResultFields { - sequence: sequence_raw, - timecode: timecode_raw, - sequence_id: sequence_id_raw, - micro_job_id: sequence_raw % (MIDSTATE_COUNT as u8), - used_masked_fields: false, - }); - } - - let sequence_masked = sequence_raw & 0x7f; - let timecode_masked = timecode_raw & 0x7f; - let sequence_id_masked = sequence_masked / (MIDSTATE_COUNT as u8); - let sequence_slot_masked = readresult_sequence_slot(sequence_id_masked); - if (sequence_masked != sequence_raw || timecode_masked != timecode_raw) - && has_sequence_slot(sequence_slot_masked) - { - return Some(ReadResultFields { - sequence: sequence_masked, - timecode: timecode_masked, - sequence_id: sequence_id_masked, - micro_job_id: sequence_masked % (MIDSTATE_COUNT as u8), - used_masked_fields: true, - }); - } - - None -} - -struct SelectedReadResultCandidate { - assigned: AssignedTask, - share_version: BlockVersion, - ntime_offset: u32, - share_ntime: u32, - nonce_adjusted: u32, - nonce_submit: u32, - hash_bytes: [u8; 32], - hash: bitcoin::BlockHash, - check_result: Bzm2CheckResult, - observed_leading_zeros: u16, -} - async fn configure_sensors( chip_responses: &mut R, chip_commands: &mut W, @@ -1132,9 +1036,7 @@ where let mut chip_initialized = false; let mut current_task: Option = None; - let mut assigned_tasks: VecDeque = - VecDeque::with_capacity(READRESULT_ASSIGNMENT_HISTORY_LIMIT); - let mut next_sequence_id: u8 = 0; + let mut assignment_tracker = AssignmentTracker::new(); let mut zero_lz_diagnostic_samples: u64 = 0; let mut status_ticker = time::interval(Duration::from_secs(5)); status_ticker.set_missed_tick_behavior(time::MissedTickBehavior::Skip); @@ -1183,7 +1085,7 @@ where } let microjob_versions = task_midstate_versions(&new_task); - let write_sequence_id = writejob_effective_sequence_id(next_sequence_id); + let write_sequence_id = assignment_tracker.current_write_sequence_id(); let engine_assignments = match send_task_to_all_engines( &mut chip_commands, @@ -1222,14 +1124,14 @@ where leading_zeros: ENGINE_LEADING_ZEROS, nonce_minus_value: BZM2_NONCE_MINUS, }; - retain_assigned_task(&mut assigned_tasks, new_assigned_task); + assignment_tracker.retain(new_assigned_task); debug!( job_id = %new_task.template.id, write_sequence_id, "Sent BZM2 work to chip" ); - next_sequence_id = next_sequence_id.wrapping_add(1); + assignment_tracker.advance_sequence(); let old_task = current_task.replace(new_task); { @@ -1267,7 +1169,7 @@ where } let microjob_versions = task_midstate_versions(&new_task); - let write_sequence_id = writejob_effective_sequence_id(next_sequence_id); + let write_sequence_id = assignment_tracker.current_write_sequence_id(); let engine_assignments = match send_task_to_all_engines( &mut chip_commands, @@ -1306,14 +1208,14 @@ where leading_zeros: ENGINE_LEADING_ZEROS, nonce_minus_value: BZM2_NONCE_MINUS, }; - retain_assigned_task(&mut assigned_tasks, new_assigned_task); + assignment_tracker.retain(new_assigned_task); debug!( job_id = %new_task.template.id, write_sequence_id, "Sent BZM2 work to chip (old work invalidated)" ); - next_sequence_id = next_sequence_id.wrapping_add(1); + assignment_tracker.advance_sequence(); let old_task = current_task.replace(new_task); { @@ -1326,7 +1228,7 @@ where debug!("Going idle"); let old_task = current_task.take(); - assigned_tasks.clear(); + assignment_tracker.clear(); { let mut s = status.write().expect("status lock poisoned"); s.is_active = false; @@ -1373,94 +1275,13 @@ where continue; }; - let Some(resolved_fields) = - resolve_readresult_fields(sequence, timecode, |slot| { - assigned_tasks.iter().rev().any(|task| { - readresult_sequence_slot(task.sequence_id) == slot - }) - }) - else { - continue; - }; - let sequence_id = resolved_fields.sequence_id; - let micro_job_id = resolved_fields.micro_job_id; - let timecode_effective = resolved_fields.timecode; - let sequence_slot = readresult_sequence_slot(sequence_id); - let slot_candidates: Vec = assigned_tasks - .iter() - .rev() - .filter(|task| readresult_sequence_slot(task.sequence_id) == sequence_slot) - .cloned() - .collect(); - let slot_candidate_count = slot_candidates.len(); - if slot_candidate_count == 0 { - continue; - } - let nonce_raw = nonce; - let mut selected_candidate: Option = None; - let mut selected_rank = 0u8; - - for mut candidate in slot_candidates { - let Some(engine_assignment) = - candidate.engine_assignments.get(logical_engine_id).cloned() - else { - continue; - }; - candidate.merkle_root = engine_assignment.merkle_root; - candidate.task.en2 = engine_assignment.extranonce2; - let share_version = candidate.microjob_versions[micro_job_id as usize]; - let selected_midstate = engine_assignment.midstates[micro_job_id as usize]; - // Result time is reverse-counted and must be - // converted into a forward ntime offset. - let ntime_offset = - u32::from(candidate.timestamp_count.wrapping_sub(timecode_effective)); - let share_ntime = candidate.task.ntime.wrapping_add(ntime_offset); - // READRESULT mapping: - // READRESULT nonce is first adjusted by nonce_minus, then byte-swapped - // for reconstructed header hashing and Stratum submit nonce field. - let nonce_adjusted = nonce_raw.wrapping_sub(candidate.nonce_minus_value); - let nonce_submit = nonce_adjusted.swap_bytes(); - - let tail16 = bzm2_tail16_bytes(&candidate, share_ntime, nonce_submit); - let hash_bytes = - bzm2_double_sha_from_midstate_and_tail(&selected_midstate, &tail16); - let hash = bitcoin::BlockHash::from_byte_array(hash_bytes); - let target_bytes = candidate.task.share_target.to_le_bytes(); - let check_result = check_result( - &hash_bytes, - &target_bytes, - candidate.leading_zeros, - ); - let observed_leading_zeros = leading_zero_bits(&hash_bytes); - let rank = match check_result { - Bzm2CheckResult::Correct => 3, - Bzm2CheckResult::NotMeetTarget => 2, - Bzm2CheckResult::Error => 1, - }; - - if selected_candidate.is_none() || rank > selected_rank { - selected_rank = rank; - selected_candidate = Some(SelectedReadResultCandidate { - assigned: candidate, - share_version, - ntime_offset, - share_ntime, - nonce_adjusted, - nonce_submit, - hash_bytes, - hash, - check_result, - observed_leading_zeros, - }); - if rank == 3 { - break; - } - } - } - let Some(SelectedReadResultCandidate { assigned, + sequence_id, + micro_job_id, + timecode_effective, + slot_candidate_count, share_version, ntime_offset, share_ntime, @@ -1470,7 +1291,8 @@ where hash, check_result, observed_leading_zeros, - }) = selected_candidate + }) = assignment_tracker + .resolve_candidate(logical_engine_id, sequence, timecode, nonce_raw) else { continue; }; @@ -1562,41 +1384,9 @@ mod tests { AssignedTask, BZM2_NONCE_MINUS, Bzm2CheckResult, ENGINE_LEADING_ZEROS, ENGINE_TIMESTAMP_COUNT, EngineAssignment, MIDSTATE_COUNT, WORK_ENGINE_COUNT, bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, check_result, - hash_bytes_bzm2_order, protocol, resolve_readresult_fields, task_midstate_versions, - task_to_bzm2_payload, + hash_bytes_bzm2_order, protocol, task_midstate_versions, task_to_bzm2_payload, }; - #[test] - fn test_resolve_readresult_fields_prefers_raw_when_slot_exists() { - let active_slots = [32u8, 0u8]; - let fields = resolve_readresult_fields(0x80, 0xbc, |slot| active_slots.contains(&slot)) - .expect("raw slot should resolve"); - assert_eq!(fields.sequence, 0x80); - assert_eq!(fields.timecode, 0xbc); - assert_eq!(fields.sequence_id, 32); - assert_eq!(fields.micro_job_id, 0); - assert!(!fields.used_masked_fields); - } - - #[test] - fn test_resolve_readresult_fields_uses_masked_fallback() { - let active_slots = [0u8]; - let fields = resolve_readresult_fields(0x82, 0xbc, |slot| active_slots.contains(&slot)) - .expect("masked slot should resolve"); - assert_eq!(fields.sequence, 0x02); - assert_eq!(fields.timecode, 0x3c); - assert_eq!(fields.sequence_id, 0); - assert_eq!(fields.micro_job_id, 2); - assert!(fields.used_masked_fields); - } - - #[test] - fn test_resolve_readresult_fields_none_when_no_slot_matches() { - let active_slots = [0u8]; - let fields = resolve_readresult_fields(0xfd, 0x7f, |slot| active_slots.contains(&slot)); - assert!(fields.is_none()); - } - #[test] fn test_readresult_hash_check_with_known_good_bzm2_share() { // Job + accepted share captured from known working messages diff --git a/mujina-miner/src/asic/bzm2/thread/tracker.rs b/mujina-miner/src/asic/bzm2/thread/tracker.rs new file mode 100644 index 0000000..32cd094 --- /dev/null +++ b/mujina-miner/src/asic/bzm2/thread/tracker.rs @@ -0,0 +1,256 @@ +use std::collections::VecDeque; + +use bitcoin::block::Version as BlockVersion; +use bitcoin::hashes::Hash as _; + +use super::{ + Bzm2CheckResult, MIDSTATE_COUNT, READRESULT_ASSIGNMENT_HISTORY_LIMIT, READRESULT_SLOT_HISTORY, + hashing::bzm2_double_sha_from_midstate_and_tail, hashing::bzm2_tail16_bytes, + hashing::check_result, hashing::leading_zero_bits, work::AssignedTask, +}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct ReadResultFields { + sequence: u8, + timecode: u8, + sequence_id: u8, + micro_job_id: u8, + used_masked_fields: bool, +} + +pub(super) struct SelectedReadResultCandidate { + pub(super) assigned: AssignedTask, + pub(super) sequence_id: u8, + pub(super) micro_job_id: u8, + pub(super) timecode_effective: u8, + pub(super) slot_candidate_count: usize, + pub(super) share_version: BlockVersion, + pub(super) ntime_offset: u32, + pub(super) share_ntime: u32, + pub(super) nonce_adjusted: u32, + pub(super) nonce_submit: u32, + pub(super) hash_bytes: [u8; 32], + pub(super) hash: bitcoin::BlockHash, + pub(super) check_result: Bzm2CheckResult, + pub(super) observed_leading_zeros: u16, +} + +pub(super) struct AssignmentTracker { + assignments: VecDeque, + next_sequence_id: u8, +} + +impl AssignmentTracker { + pub(super) fn new() -> Self { + Self { + assignments: VecDeque::with_capacity(READRESULT_ASSIGNMENT_HISTORY_LIMIT), + next_sequence_id: 0, + } + } + + pub(super) fn current_write_sequence_id(&self) -> u8 { + writejob_effective_sequence_id(self.next_sequence_id) + } + + pub(super) fn retain(&mut self, new_task: AssignedTask) { + let slot = readresult_sequence_slot(new_task.sequence_id); + self.assignments.push_back(new_task); + + let mut slot_count = self + .assignments + .iter() + .filter(|task| readresult_sequence_slot(task.sequence_id) == slot) + .count(); + while slot_count > READRESULT_SLOT_HISTORY { + if let Some(index) = self + .assignments + .iter() + .position(|task| readresult_sequence_slot(task.sequence_id) == slot) + { + let _ = self.assignments.remove(index); + slot_count = slot_count.saturating_sub(1); + } else { + break; + } + } + + while self.assignments.len() > READRESULT_ASSIGNMENT_HISTORY_LIMIT { + let _ = self.assignments.pop_front(); + } + } + + pub(super) fn advance_sequence(&mut self) { + self.next_sequence_id = self.next_sequence_id.wrapping_add(1); + } + + pub(super) fn clear(&mut self) { + self.assignments.clear(); + } + + pub(super) fn resolve_candidate( + &self, + logical_engine_id: usize, + sequence: u8, + timecode: u8, + nonce_raw: u32, + ) -> Option { + let resolved_fields = resolve_readresult_fields(sequence, timecode, |slot| { + self.assignments + .iter() + .rev() + .any(|task| readresult_sequence_slot(task.sequence_id) == slot) + })?; + let sequence_id = resolved_fields.sequence_id; + let micro_job_id = resolved_fields.micro_job_id; + let timecode_effective = resolved_fields.timecode; + let sequence_slot = readresult_sequence_slot(sequence_id); + let slot_candidates: Vec = self + .assignments + .iter() + .rev() + .filter(|task| readresult_sequence_slot(task.sequence_id) == sequence_slot) + .cloned() + .collect(); + let slot_candidate_count = slot_candidates.len(); + if slot_candidate_count == 0 { + return None; + } + + let mut selected_candidate: Option = None; + let mut selected_rank = 0u8; + + for mut candidate in slot_candidates { + let Some(engine_assignment) = + candidate.engine_assignments.get(logical_engine_id).cloned() + else { + continue; + }; + candidate.merkle_root = engine_assignment.merkle_root; + candidate.task.en2 = engine_assignment.extranonce2; + let share_version = candidate.microjob_versions[micro_job_id as usize]; + let selected_midstate = engine_assignment.midstates[micro_job_id as usize]; + let ntime_offset = + u32::from(candidate.timestamp_count.wrapping_sub(timecode_effective)); + let share_ntime = candidate.task.ntime.wrapping_add(ntime_offset); + let nonce_adjusted = nonce_raw.wrapping_sub(candidate.nonce_minus_value); + let nonce_submit = nonce_adjusted.swap_bytes(); + + let tail16 = bzm2_tail16_bytes(&candidate, share_ntime, nonce_submit); + let hash_bytes = bzm2_double_sha_from_midstate_and_tail(&selected_midstate, &tail16); + let hash = bitcoin::BlockHash::from_byte_array(hash_bytes); + let target_bytes = candidate.task.share_target.to_le_bytes(); + let check_result = check_result(&hash_bytes, &target_bytes, candidate.leading_zeros); + let observed_leading_zeros = leading_zero_bits(&hash_bytes); + let rank = match check_result { + Bzm2CheckResult::Correct => 3, + Bzm2CheckResult::NotMeetTarget => 2, + Bzm2CheckResult::Error => 1, + }; + + if selected_candidate.is_none() || rank > selected_rank { + selected_rank = rank; + selected_candidate = Some(SelectedReadResultCandidate { + assigned: candidate, + sequence_id, + micro_job_id, + timecode_effective, + slot_candidate_count, + share_version, + ntime_offset, + share_ntime, + nonce_adjusted, + nonce_submit, + hash_bytes, + hash, + check_result, + observed_leading_zeros, + }); + if rank == 3 { + break; + } + } + } + + selected_candidate + } +} + +fn readresult_sequence_slot(sequence_id: u8) -> u8 { + sequence_id & 0x3f +} + +fn writejob_effective_sequence_id(sequence_id: u8) -> u8 { + sequence_id % 2 +} + +fn resolve_readresult_fields( + sequence_raw: u8, + timecode_raw: u8, + has_sequence_slot: impl Fn(u8) -> bool, +) -> Option { + let sequence_id_raw = sequence_raw / (MIDSTATE_COUNT as u8); + let sequence_slot_raw = readresult_sequence_slot(sequence_id_raw); + if has_sequence_slot(sequence_slot_raw) { + return Some(ReadResultFields { + sequence: sequence_raw, + timecode: timecode_raw, + sequence_id: sequence_id_raw, + micro_job_id: sequence_raw % (MIDSTATE_COUNT as u8), + used_masked_fields: false, + }); + } + + let sequence_masked = sequence_raw & 0x7f; + let timecode_masked = timecode_raw & 0x7f; + let sequence_id_masked = sequence_masked / (MIDSTATE_COUNT as u8); + let sequence_slot_masked = readresult_sequence_slot(sequence_id_masked); + if (sequence_masked != sequence_raw || timecode_masked != timecode_raw) + && has_sequence_slot(sequence_slot_masked) + { + return Some(ReadResultFields { + sequence: sequence_masked, + timecode: timecode_masked, + sequence_id: sequence_id_masked, + micro_job_id: sequence_masked % (MIDSTATE_COUNT as u8), + used_masked_fields: true, + }); + } + + None +} + +#[cfg(test)] +mod tests { + use super::resolve_readresult_fields; + + #[test] + fn test_resolve_readresult_fields_prefers_raw_when_slot_exists() { + let active_slots = [32u8, 0u8]; + let fields = resolve_readresult_fields(0x80, 0xbc, |slot| active_slots.contains(&slot)) + .expect("raw slot should resolve"); + assert_eq!(fields.sequence, 0x80); + assert_eq!(fields.timecode, 0xbc); + assert_eq!(fields.sequence_id, 32); + assert_eq!(fields.micro_job_id, 0); + assert!(!fields.used_masked_fields); + } + + #[test] + fn test_resolve_readresult_fields_uses_masked_fallback() { + let active_slots = [0u8]; + let fields = resolve_readresult_fields(0x82, 0xbc, |slot| active_slots.contains(&slot)) + .expect("masked slot should resolve"); + assert_eq!(fields.sequence, 0x02); + assert_eq!(fields.timecode, 0x3c); + assert_eq!(fields.sequence_id, 0); + assert_eq!(fields.micro_job_id, 2); + assert!(fields.used_masked_fields); + } + + #[test] + fn test_resolve_readresult_fields_none_when_no_slot_matches() { + let active_slots = [0u8]; + let fields = resolve_readresult_fields(0xfd, 0x7f, |slot| active_slots.contains(&slot)); + assert!(fields.is_none()); + } +} From 41803eb28807529c4649a452887516f93ed1dc04 Mon Sep 17 00:00:00 2001 From: johnny9 Date: Thu, 5 Mar 2026 23:52:59 -0500 Subject: [PATCH 18/19] refactor(bzm2): extract hash thread bring-up flow --- mujina-miner/src/asic/bzm2/thread.rs | 1033 +++--------------- mujina-miner/src/asic/bzm2/thread/bringup.rs | 767 +++++++++++++ 2 files changed, 909 insertions(+), 891 deletions(-) create mode 100644 mujina-miner/src/asic/bzm2/thread/bringup.rs diff --git a/mujina-miner/src/asic/bzm2/thread.rs b/mujina-miner/src/asic/bzm2/thread.rs index 91e42d8..223ecd0 100644 --- a/mujina-miner/src/asic/bzm2/thread.rs +++ b/mujina-miner/src/asic/bzm2/thread.rs @@ -16,9 +16,9 @@ use std::{ }; use async_trait::async_trait; -use futures::{SinkExt, sink::Sink, stream::Stream}; +use futures::{sink::Sink, stream::Stream}; use tokio::sync::{mpsc, oneshot, watch}; -use tokio::time::{self, Duration, Instant}; +use tokio::time::{self, Duration}; use tokio_stream::StreamExt; use super::protocol; @@ -30,18 +30,18 @@ use crate::{ tracing::prelude::*, types::{Difficulty, HashRate}, }; +use bringup::{initialize_chip, write_reg_u8, write_reg_u32}; use hashing::{Bzm2CheckResult, task_midstate_versions}; #[cfg(test)] use hashing::{ bzm2_double_sha_from_midstate_and_tail, bzm2_tail16_bytes, check_result, hash_bytes_bzm2_order, }; use tracker::{AssignmentTracker, SelectedReadResultCandidate}; -use work::{ - AssignedTask, engine_id, is_invalid_engine, logical_engine_index, send_task_to_all_engines, -}; +use work::{AssignedTask, is_invalid_engine, logical_engine_index, send_task_to_all_engines}; #[cfg(test)] use work::{EngineAssignment, task_to_bzm2_payload}; +mod bringup; mod hashing; mod tracker; mod work; @@ -248,767 +248,154 @@ impl HashThread for Bzm2Thread { } } -fn init_failed(msg: impl Into) -> HashThreadError { - HashThreadError::InitializationFailed(msg.into()) -} - -async fn send_command( - chip_commands: &mut W, - command: protocol::Command, - context: &str, -) -> Result<(), HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - chip_commands - .send(command) - .await - .map_err(|e| init_failed(format!("{context}: {e:?}"))) -} - -async fn drain_input(chip_responses: &mut R) -where - R: Stream> + Unpin, -{ - while let Ok(Some(_)) = time::timeout(Duration::from_millis(20), chip_responses.next()).await {} -} - -async fn wait_for_noop( - chip_responses: &mut R, - expected_asic_id: u8, - timeout: Duration, -) -> Result<(), HashThreadError> -where - R: Stream> + Unpin, -{ - let deadline = Instant::now() + timeout; - loop { - let remaining = deadline.saturating_duration_since(Instant::now()); - if remaining.is_zero() { - return Err(init_failed(format!( - "timeout waiting for NOOP response from ASIC 0x{expected_asic_id:02x}" - ))); - } - - match time::timeout(remaining, chip_responses.next()).await { - Ok(Some(Ok(protocol::Response::Noop { asic_hw_id, .. }))) - if asic_hw_id == expected_asic_id => - { - return Ok(()); - } - Ok(Some(Ok(_))) => continue, - Ok(Some(Err(e))) => { - return Err(init_failed(format!("failed while waiting for NOOP: {e}"))); - } - Ok(None) => { - return Err(init_failed("response stream closed while waiting for NOOP")); - } - Err(_) => { - return Err(init_failed(format!( - "timeout waiting for NOOP response from ASIC 0x{expected_asic_id:02x}" - ))); - } - } - } -} - -async fn read_reg_u32( - chip_responses: &mut R, - chip_commands: &mut W, - asic_id: u8, - engine: u16, - offset: u16, - timeout: Duration, - context: &str, -) -> Result -where - R: Stream> + Unpin, - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - send_command( - chip_commands, - protocol::Command::read_reg_u32(asic_id, engine, offset), - context, - ) - .await?; - - let deadline = Instant::now() + timeout; - loop { - let remaining = deadline.saturating_duration_since(Instant::now()); - if remaining.is_zero() { - return Err(init_failed(format!( - "{context}: timeout waiting for READREG response" - ))); - } - - match time::timeout(remaining, chip_responses.next()).await { - Ok(Some(Ok(protocol::Response::ReadReg { asic_hw_id, data }))) - if asic_hw_id == asic_id => - { - return match data { - protocol::ReadRegData::U32(value) => Ok(value), - protocol::ReadRegData::U16(value) => Ok(value as u32), - protocol::ReadRegData::U8(value) => Ok(value as u32), - }; - } - Ok(Some(Ok(_))) => continue, - Ok(Some(Err(e))) => { - return Err(init_failed(format!("{context}: stream read error: {e}"))); - } - Ok(None) => { - return Err(init_failed(format!("{context}: response stream closed"))); - } - Err(_) => { - return Err(init_failed(format!( - "{context}: timeout waiting for response" - ))); - } - } - } -} - -async fn write_reg_u32( - chip_commands: &mut W, - asic_id: u8, - engine: u16, - offset: u16, - value: u32, - context: &str, -) -> Result<(), HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - send_command( - chip_commands, - protocol::Command::write_reg_u32_le(asic_id, engine, offset, value), - context, - ) - .await -} - -async fn write_reg_u8( - chip_commands: &mut W, - asic_id: u8, - engine: u16, - offset: u16, - value: u8, - context: &str, -) -> Result<(), HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - send_command( - chip_commands, - protocol::Command::write_reg_u8(asic_id, engine, offset, value), - context, - ) - .await -} - -async fn group_write_u8( - chip_commands: &mut W, - asic_id: u8, - group: u16, - offset: u16, - value: u8, - context: &str, -) -> Result<(), HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - send_command( - chip_commands, - protocol::Command::multicast_write_u8(asic_id, group, offset, value), - context, - ) - .await -} - -fn thermal_c_to_tune_code(thermal_c: f32) -> u32 { - let tune_code = (2048.0 / 4096.0) + (4096.0 * (thermal_c + 293.8) / 631.8); - tune_code.max(0.0) as u32 -} - -fn voltage_mv_to_tune_code(voltage_mv: f32) -> u32 { - let tune_code = (16384.0 / 6.0) * (2.5 * voltage_mv / 706.7 + 3.0 / 16384.0 + 1.0); - tune_code.max(0.0) as u32 -} - -fn calc_pll_dividers(freq_mhz: f32, post1_divider: u8) -> (u32, u32) { - let fb = - REF_DIVIDER as f32 * (post1_divider as f32 + 1.0) * (POST2_DIVIDER as f32 + 1.0) * freq_mhz - / REF_CLK_MHZ; - let mut fb_div = fb as u32; - if fb - fb_div as f32 > 0.5 { - fb_div += 1; - } - - let post_div = (1 << 12) | (POST2_DIVIDER << 9) | ((post1_divider as u32) << 6) | REF_DIVIDER; - (post_div, fb_div) +struct Bzm2ThreadActor { + cmd_rx: mpsc::Receiver, + evt_tx: mpsc::Sender, + removal_rx: watch::Receiver, + status: Arc>, + chip_responses: R, + chip_commands: W, + peripherals: BoardPeripherals, + asic_count: u8, } -async fn configure_sensors( - chip_responses: &mut R, - chip_commands: &mut W, - read_asic_id: u8, -) -> Result<(), HashThreadError> -where - R: Stream> + Unpin, - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - let thermal_trip_code = thermal_c_to_tune_code(THERMAL_TRIP_C); - let voltage_trip_code = voltage_mv_to_tune_code(VOLTAGE_TRIP_MV); - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::UART_TX, - 0xF, - "enable sensors: UART_TX", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::SLOW_CLK_DIV, - 2, - "enable sensors: SLOW_CLK_DIV", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::SENSOR_CLK_DIV, - (8 << 5) | 8, - "enable sensors: SENSOR_CLK_DIV", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::DTS_SRST_PD, - 1 << 8, - "enable sensors: DTS_SRST_PD", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::SENS_TDM_GAP_CNT, - SENSOR_REPORT_INTERVAL, - "enable sensors: SENS_TDM_GAP_CNT", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::DTS_CFG, - 0, - "enable sensors: DTS_CFG", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::SENSOR_THRS_CNT, - (10 << 16) | 10, - "enable sensors: SENSOR_THRS_CNT", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::TEMPSENSOR_TUNE_CODE, - 0x8001 | (thermal_trip_code << 1), - "enable sensors: TEMPSENSOR_TUNE_CODE", - ) - .await?; - - let bandgap = read_reg_u32( - chip_responses, - chip_commands, - read_asic_id, - protocol::NOTCH_REG, - protocol::local_reg::BANDGAP, - INIT_READREG_TIMEOUT, - "enable sensors: read BANDGAP", - ) - .await?; - let bandgap_updated = (bandgap & !0xF) | 0x3; - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::BANDGAP, - bandgap_updated, - "enable sensors: write BANDGAP", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::VSENSOR_SRST_PD, - 1 << 8, - "enable sensors: VSENSOR_SRST_PD", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::VSENSOR_CFG, - (8 << 28) | (1 << 24), - "enable sensors: VSENSOR_CFG", - ) - .await?; - - let vs_enable = (voltage_trip_code << 16) | (voltage_trip_code << 1) | 1; - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::VOLTAGE_SENSOR_ENABLE, - vs_enable, - "enable sensors: VOLTAGE_SENSOR_ENABLE", - ) - .await?; - - Ok(()) +#[derive(Clone, Copy)] +enum TaskAssignmentMode { + Update, + Replace, } -async fn set_frequency( - chip_responses: &mut R, - chip_commands: &mut W, - read_asic_id: u8, -) -> Result<(), HashThreadError> -where - R: Stream> + Unpin, - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - let (post_div, fb_div) = calc_pll_dividers(TARGET_FREQ_MHZ, POST1_DIVIDER); - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::PLL_FBDIV, - fb_div, - "set frequency: PLL_FBDIV", - ) - .await?; - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::PLL_POSTDIV, - post_div, - "set frequency: PLL_POSTDIV", - ) - .await?; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::PLL1_FBDIV, - fb_div, - "set frequency: PLL1_FBDIV", - ) - .await?; - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::PLL1_POSTDIV, - post_div, - "set frequency: PLL1_POSTDIV", - ) - .await?; - - time::sleep(Duration::from_millis(1)).await; - - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::PLL_ENABLE, - 1, - "set frequency: PLL_ENABLE", - ) - .await?; - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::PLL1_ENABLE, - 1, - "set frequency: PLL1_ENABLE", - ) - .await?; - - let deadline = Instant::now() + PLL_LOCK_TIMEOUT; - for pll_enable_offset in [ - protocol::local_reg::PLL_ENABLE, - protocol::local_reg::PLL1_ENABLE, - ] { - loop { - let lock = read_reg_u32( - chip_responses, - chip_commands, - read_asic_id, - protocol::NOTCH_REG, - pll_enable_offset, - INIT_READREG_TIMEOUT, - "set frequency: wait PLL lock", - ) - .await?; - if (lock & PLL_LOCK_MASK) != 0 { - break; - } - - if Instant::now() >= deadline { - return Err(init_failed(format!( - "set frequency: PLL at offset 0x{pll_enable_offset:02x} failed to lock" - ))); - } - - time::sleep(PLL_POLL_DELAY).await; +impl TaskAssignmentMode { + fn transition_message(self, had_old_task: bool) -> &'static str { + match (self, had_old_task) { + (Self::Update, true) => "Updating work", + (Self::Update, false) => "Updating work from idle", + (Self::Replace, true) => "Replacing work", + (Self::Replace, false) => "Replacing work from idle", } } - Ok(()) -} - -async fn soft_reset(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - write_reg_u32( - chip_commands, - asic_id, - protocol::NOTCH_REG, - protocol::local_reg::ENG_SOFT_RESET, - 0, - "soft reset assert", - ) - .await?; - time::sleep(SOFT_RESET_DELAY).await; - write_reg_u32( - chip_commands, - asic_id, - protocol::NOTCH_REG, - protocol::local_reg::ENG_SOFT_RESET, - 1, - "soft reset release", - ) - .await?; - time::sleep(SOFT_RESET_DELAY).await; - Ok(()) -} - -async fn set_all_clock_gates(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - for group_id in 0..ENGINE_ROWS { - group_write_u8( - chip_commands, - asic_id, - group_id, - protocol::engine_reg::CONFIG, - ENGINE_CONFIG_ENHANCED_MODE_BIT, - "set all clock gates", - ) - .await?; - } - Ok(()) -} - -async fn set_asic_nonce_range(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - let start_nonce = BZM2_START_NONCE; - let end_nonce = BZM2_END_NONCE; - - for col in 0..ENGINE_COLS { - for row in 0..ENGINE_ROWS { - if is_invalid_engine(row, col) { - continue; - } - let engine = engine_id(row, col); - write_reg_u32( - chip_commands, - asic_id, - engine, - protocol::engine_reg::START_NONCE, - start_nonce, - "set nonce range: START_NONCE", - ) - .await?; - write_reg_u32( - chip_commands, - asic_id, - engine, - protocol::engine_reg::END_NONCE, - end_nonce, - "set nonce range: END_NONCE", - ) - .await?; + fn send_failure_context(self) -> &'static str { + match self { + Self::Update => "update_task", + Self::Replace => "replace_task", } } - Ok(()) -} - -async fn start_warm_up_jobs(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> -where - W: Sink + Unpin, - W::Error: std::fmt::Debug, -{ - for col in 0..ENGINE_COLS { - for row in 0..ENGINE_ROWS { - if is_invalid_engine(row, col) { - continue; - } - let engine = engine_id(row, col); - - write_reg_u8( - chip_commands, - asic_id, - engine, - protocol::engine_reg::TIMESTAMP_COUNT, - 0xff, - "warm-up: TIMESTAMP_COUNT", - ) - .await?; - - for seq in [0xfc, 0xfd, 0xfe, 0xff] { - write_reg_u8( - chip_commands, - asic_id, - engine, - protocol::engine_reg::SEQUENCE_ID, - seq, - "warm-up: SEQUENCE_ID", - ) - .await?; - } - - write_reg_u8( - chip_commands, - asic_id, - engine, - protocol::engine_reg::JOB_CONTROL, - 1, - "warm-up: JOB_CONTROL", - ) - .await?; + fn sent_message(self) -> &'static str { + match self { + Self::Update => "Sent BZM2 work to chip", + Self::Replace => "Sent BZM2 work to chip (old work invalidated)", } } - Ok(()) } -async fn initialize_chip( +async fn assign_task( chip_responses: &mut R, chip_commands: &mut W, peripherals: &mut BoardPeripherals, asic_count: u8, -) -> Result, HashThreadError> + chip_initialized: &mut bool, + current_task: &mut Option, + assignment_tracker: &mut AssignmentTracker, + status: &Arc>, + new_task: HashTask, + mode: TaskAssignmentMode, +) -> Result, HashThreadError> where R: Stream> + Unpin, W: Sink + Unpin, W::Error: std::fmt::Debug, { - if asic_count == 0 { - return Err(init_failed("asic_count must be > 0")); - } - - if let Some(ref mut asic_enable) = peripherals.asic_enable { - asic_enable - .enable() - .await - .map_err(|e| init_failed(format!("failed to release reset for BZM2 bring-up: {e}")))?; + if let Some(old) = current_task.as_ref() { + debug!( + old_job = %old.template.id, + new_job = %new_task.template.id, + "{}", + mode.transition_message(true) + ); + } else { + debug!( + new_job = %new_task.template.id, + "{}", + mode.transition_message(false) + ); } - time::sleep(Duration::from_millis(200)).await; - - drain_input(chip_responses).await; - - send_command( - chip_commands, - protocol::Command::Noop { - asic_hw_id: protocol::DEFAULT_ASIC_ID, - }, - "default ping", - ) - .await?; - wait_for_noop(chip_responses, protocol::DEFAULT_ASIC_ID, INIT_NOOP_TIMEOUT).await?; - debug!("BZM2 default ASIC ID ping succeeded"); - - let mut asic_ids = Vec::with_capacity(asic_count as usize); - for index in 0..asic_count { - let asic_id = protocol::logical_to_hw_asic_id(index); - if protocol::hw_to_logical_asic_id(asic_id) != Some(index) { - return Err(init_failed(format!( - "invalid ASIC ID mapping for logical index {} -> 0x{:02x}", - index, asic_id - ))); - } - - write_reg_u32( - chip_commands, - protocol::DEFAULT_ASIC_ID, - protocol::NOTCH_REG, - protocol::local_reg::ASIC_ID, - asic_id as u32, - "program chain IDs", - ) - .await?; - time::sleep(Duration::from_millis(50)).await; - - let readback = read_reg_u32( - chip_responses, - chip_commands, - asic_id, - protocol::NOTCH_REG, - protocol::local_reg::ASIC_ID, - INIT_READREG_TIMEOUT, - "verify programmed ASIC ID", - ) - .await?; - if (readback & 0xff) as u8 != asic_id { - return Err(init_failed(format!( - "ASIC ID verify mismatch for 0x{asic_id:02x}: read 0x{readback:08x}" - ))); + if !*chip_initialized { + match initialize_chip(chip_responses, chip_commands, peripherals, asic_count).await { + Ok(ids) => { + *chip_initialized = true; + info!(asic_ids = ?ids, "BZM2 initialization completed"); + } + Err(e) => { + error!(error = %e, "BZM2 chip initialization failed"); + return Err(e); + } } - - asic_ids.push(asic_id); - } - debug!(asic_ids = ?asic_ids, "BZM2 chain IDs programmed"); - - drain_input(chip_responses).await; - for &asic_id in &asic_ids { - send_command( - chip_commands, - protocol::Command::Noop { - asic_hw_id: asic_id, - }, - "per-ASIC ping", - ) - .await?; - wait_for_noop(chip_responses, asic_id, INIT_NOOP_TIMEOUT).await?; } - debug!("BZM2 per-ASIC ping succeeded"); - - let first_asic = *asic_ids - .first() - .ok_or_else(|| init_failed("no ASIC IDs programmed"))?; - debug!("Configuring BZM2 sensors"); - configure_sensors(chip_responses, chip_commands, first_asic).await?; - debug!("Configuring BZM2 PLL"); - set_frequency(chip_responses, chip_commands, first_asic).await?; + let microjob_versions = task_midstate_versions(&new_task); + let write_sequence_id = assignment_tracker.current_write_sequence_id(); - write_reg_u8( + let engine_assignments = send_task_to_all_engines( chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::CKDCCR_5_0, - 0x00, - "disable DLL0", + &new_task, + microjob_versions, + write_sequence_id, + ENGINE_ZEROS_TO_FIND, + ENGINE_TIMESTAMP_COUNT, ) - .await?; - write_reg_u8( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::CKDCCR_5_1, - 0x00, - "disable DLL1", - ) - .await?; - - let uart_tdm_control = (0x7f << 9) | (100 << 1) | 1; - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::UART_TDM_CTL, - uart_tdm_control, - "enable UART TDM mode", - ) - .await?; + .await + .map_err(|e| { + error!( + error = %e, + command = mode.send_failure_context(), + "Failed to send BZM2 work" + ); + e + })?; + + let Some(default_assignment) = engine_assignments.first().cloned() else { + let e = HashThreadError::WorkAssignmentFailed(format!( + "no engine assignments produced for {}", + mode.send_failure_context() + )); + error!( + error = %e, + command = mode.send_failure_context(), + "Failed to send BZM2 work" + ); + return Err(e); + }; - write_reg_u32( - chip_commands, - first_asic, - protocol::NOTCH_REG, - protocol::local_reg::IO_PEPS_DS, - DRIVE_STRENGTH_STRONG, - "set drive strength", - ) - .await?; - - for &asic_id in &asic_ids { - debug!(asic_id, "BZM2 soft reset + clock gate + warm-up start"); - soft_reset(chip_commands, asic_id).await?; - set_all_clock_gates(chip_commands, asic_id).await?; - set_asic_nonce_range(chip_commands, asic_id).await?; - start_warm_up_jobs(chip_commands, asic_id).await?; - debug!(asic_id, "BZM2 warm-up complete"); - } + let new_assigned_task = AssignedTask { + task: new_task.clone(), + merkle_root: default_assignment.merkle_root, + engine_assignments: Arc::from(engine_assignments.into_boxed_slice()), + microjob_versions, + sequence_id: write_sequence_id, + timestamp_count: ENGINE_TIMESTAMP_COUNT, + leading_zeros: ENGINE_LEADING_ZEROS, + nonce_minus_value: BZM2_NONCE_MINUS, + }; + assignment_tracker.retain(new_assigned_task); - write_reg_u32( - chip_commands, - protocol::BROADCAST_ASIC, - protocol::NOTCH_REG, - protocol::local_reg::RESULT_STS_CTL, - 0x10, - "enable TDM results", - ) - .await?; + debug!( + job_id = %new_task.template.id, + write_sequence_id, + "{}", + mode.sent_message() + ); + assignment_tracker.advance_sequence(); - Ok(asic_ids) -} + let old_task = current_task.replace(new_task); + { + let mut s = status.write().expect("status lock poisoned"); + s.is_active = true; + } -struct Bzm2ThreadActor { - cmd_rx: mpsc::Receiver, - evt_tx: mpsc::Sender, - removal_rx: watch::Receiver, - status: Arc>, - chip_responses: R, - chip_commands: W, - peripherals: BoardPeripherals, - asic_count: u8, + Ok(old_task) } async fn bzm2_thread_actor(actor: Bzm2ThreadActor) @@ -1057,172 +444,36 @@ where Some(cmd) = cmd_rx.recv() => { match cmd { ThreadCommand::UpdateTask { new_task, response_tx } => { - if let Some(ref old) = current_task { - debug!( - old_job = %old.template.id, - new_job = %new_task.template.id, - "Updating work" - ); - } else { - debug!(new_job = %new_task.template.id, "Updating work from idle"); - } - - if !chip_initialized { - match initialize_chip(&mut chip_responses, &mut chip_commands, &mut peripherals, asic_count).await { - Ok(ids) => { - chip_initialized = true; - info!( - asic_ids = ?ids, - "BZM2 initialization completed" - ); - } - Err(e) => { - error!(error = %e, "BZM2 chip initialization failed"); - let _ = response_tx.send(Err(e)); - continue; - } - } - } - - let microjob_versions = task_midstate_versions(&new_task); - let write_sequence_id = assignment_tracker.current_write_sequence_id(); - - let engine_assignments = match send_task_to_all_engines( + let result = assign_task( + &mut chip_responses, &mut chip_commands, - &new_task, - microjob_versions, - write_sequence_id, - ENGINE_ZEROS_TO_FIND, - ENGINE_TIMESTAMP_COUNT, + &mut peripherals, + asic_count, + &mut chip_initialized, + &mut current_task, + &mut assignment_tracker, + &status, + new_task, + TaskAssignmentMode::Update, ) - .await - { - Ok(assignments) => assignments, - Err(e) => { - error!(error = %e, "Failed to send BZM2 work during update_task"); - let _ = response_tx.send(Err(e)); - continue; - } - }; - let Some(default_assignment) = engine_assignments.first().cloned() else { - let e = HashThreadError::WorkAssignmentFailed( - "no engine assignments produced for update_task".into(), - ); - error!(error = %e, "Failed to send BZM2 work during update_task"); - let _ = response_tx.send(Err(e)); - continue; - }; - - // `job_ctl=3` behavior: old jobs are canceled on every assign. - let new_assigned_task = AssignedTask { - task: new_task.clone(), - merkle_root: default_assignment.merkle_root, - engine_assignments: Arc::from(engine_assignments.into_boxed_slice()), - microjob_versions, - sequence_id: write_sequence_id, - timestamp_count: ENGINE_TIMESTAMP_COUNT, - leading_zeros: ENGINE_LEADING_ZEROS, - nonce_minus_value: BZM2_NONCE_MINUS, - }; - assignment_tracker.retain(new_assigned_task); - - debug!( - job_id = %new_task.template.id, - write_sequence_id, - "Sent BZM2 work to chip" - ); - assignment_tracker.advance_sequence(); - - let old_task = current_task.replace(new_task); - { - let mut s = status.write().expect("status lock poisoned"); - s.is_active = true; - } - let _ = response_tx.send(Ok(old_task)); + .await; + let _ = response_tx.send(result); } ThreadCommand::ReplaceTask { new_task, response_tx } => { - if let Some(ref old) = current_task { - debug!( - old_job = %old.template.id, - new_job = %new_task.template.id, - "Replacing work" - ); - } else { - debug!(new_job = %new_task.template.id, "Replacing work from idle"); - } - - if !chip_initialized { - match initialize_chip(&mut chip_responses, &mut chip_commands, &mut peripherals, asic_count).await { - Ok(ids) => { - chip_initialized = true; - info!( - asic_ids = ?ids, - "BZM2 initialization completed" - ); - } - Err(e) => { - error!(error = %e, "BZM2 chip initialization failed"); - let _ = response_tx.send(Err(e)); - continue; - } - } - } - - let microjob_versions = task_midstate_versions(&new_task); - let write_sequence_id = assignment_tracker.current_write_sequence_id(); - - let engine_assignments = match send_task_to_all_engines( + let result = assign_task( + &mut chip_responses, &mut chip_commands, - &new_task, - microjob_versions, - write_sequence_id, - ENGINE_ZEROS_TO_FIND, - ENGINE_TIMESTAMP_COUNT, + &mut peripherals, + asic_count, + &mut chip_initialized, + &mut current_task, + &mut assignment_tracker, + &status, + new_task, + TaskAssignmentMode::Replace, ) - .await - { - Ok(assignments) => assignments, - Err(e) => { - error!(error = %e, "Failed to send BZM2 work during replace_task"); - let _ = response_tx.send(Err(e)); - continue; - } - }; - let Some(default_assignment) = engine_assignments.first().cloned() else { - let e = HashThreadError::WorkAssignmentFailed( - "no engine assignments produced for replace_task".into(), - ); - error!(error = %e, "Failed to send BZM2 work during replace_task"); - let _ = response_tx.send(Err(e)); - continue; - }; - - // `job_ctl=3` behavior: old jobs are canceled on every assign. - let new_assigned_task = AssignedTask { - task: new_task.clone(), - merkle_root: default_assignment.merkle_root, - engine_assignments: Arc::from(engine_assignments.into_boxed_slice()), - microjob_versions, - sequence_id: write_sequence_id, - timestamp_count: ENGINE_TIMESTAMP_COUNT, - leading_zeros: ENGINE_LEADING_ZEROS, - nonce_minus_value: BZM2_NONCE_MINUS, - }; - assignment_tracker.retain(new_assigned_task); - - debug!( - job_id = %new_task.template.id, - write_sequence_id, - "Sent BZM2 work to chip (old work invalidated)" - ); - assignment_tracker.advance_sequence(); - - let old_task = current_task.replace(new_task); - { - let mut s = status.write().expect("status lock poisoned"); - s.is_active = true; - } - let _ = response_tx.send(Ok(old_task)); + .await; + let _ = response_tx.send(result); } ThreadCommand::GoIdle { response_tx } => { debug!("Going idle"); diff --git a/mujina-miner/src/asic/bzm2/thread/bringup.rs b/mujina-miner/src/asic/bzm2/thread/bringup.rs new file mode 100644 index 0000000..7ab0f9a --- /dev/null +++ b/mujina-miner/src/asic/bzm2/thread/bringup.rs @@ -0,0 +1,767 @@ +use std::io; + +use futures::{SinkExt, sink::Sink, stream::Stream}; +use tokio::time::{self, Duration, Instant}; +use tokio_stream::StreamExt; + +use crate::{ + asic::hash_thread::{BoardPeripherals, HashThreadError}, + tracing::prelude::*, +}; + +use super::{ + BZM2_END_NONCE, BZM2_START_NONCE, DRIVE_STRENGTH_STRONG, ENGINE_COLS, + ENGINE_CONFIG_ENHANCED_MODE_BIT, ENGINE_ROWS, INIT_NOOP_TIMEOUT, INIT_READREG_TIMEOUT, + PLL_LOCK_MASK, PLL_LOCK_TIMEOUT, PLL_POLL_DELAY, POST1_DIVIDER, POST2_DIVIDER, REF_CLK_MHZ, + REF_DIVIDER, SENSOR_REPORT_INTERVAL, SOFT_RESET_DELAY, TARGET_FREQ_MHZ, THERMAL_TRIP_C, + VOLTAGE_TRIP_MV, protocol, work::engine_id, work::is_invalid_engine, +}; + +fn init_failed(msg: impl Into) -> HashThreadError { + HashThreadError::InitializationFailed(msg.into()) +} + +async fn send_command( + chip_commands: &mut W, + command: protocol::Command, + context: &str, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + chip_commands + .send(command) + .await + .map_err(|e| init_failed(format!("{context}: {e:?}"))) +} + +async fn drain_input(chip_responses: &mut R) +where + R: Stream> + Unpin, +{ + while let Ok(Some(_)) = time::timeout(Duration::from_millis(20), chip_responses.next()).await {} +} + +async fn wait_for_noop( + chip_responses: &mut R, + expected_asic_id: u8, + timeout: Duration, +) -> Result<(), HashThreadError> +where + R: Stream> + Unpin, +{ + let deadline = Instant::now() + timeout; + loop { + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + return Err(init_failed(format!( + "timeout waiting for NOOP response from ASIC 0x{expected_asic_id:02x}" + ))); + } + + match time::timeout(remaining, chip_responses.next()).await { + Ok(Some(Ok(protocol::Response::Noop { asic_hw_id, .. }))) + if asic_hw_id == expected_asic_id => + { + return Ok(()); + } + Ok(Some(Ok(_))) => continue, + Ok(Some(Err(e))) => { + return Err(init_failed(format!("failed while waiting for NOOP: {e}"))); + } + Ok(None) => { + return Err(init_failed("response stream closed while waiting for NOOP")); + } + Err(_) => { + return Err(init_failed(format!( + "timeout waiting for NOOP response from ASIC 0x{expected_asic_id:02x}" + ))); + } + } + } +} + +async fn read_reg_u32( + chip_responses: &mut R, + chip_commands: &mut W, + asic_id: u8, + engine: u16, + offset: u16, + timeout: Duration, + context: &str, +) -> Result +where + R: Stream> + Unpin, + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + send_command( + chip_commands, + protocol::Command::read_reg_u32(asic_id, engine, offset), + context, + ) + .await?; + + let deadline = Instant::now() + timeout; + loop { + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + return Err(init_failed(format!( + "{context}: timeout waiting for READREG response" + ))); + } + + match time::timeout(remaining, chip_responses.next()).await { + Ok(Some(Ok(protocol::Response::ReadReg { asic_hw_id, data }))) + if asic_hw_id == asic_id => + { + return match data { + protocol::ReadRegData::U32(value) => Ok(value), + protocol::ReadRegData::U16(value) => Ok(value as u32), + protocol::ReadRegData::U8(value) => Ok(value as u32), + }; + } + Ok(Some(Ok(_))) => continue, + Ok(Some(Err(e))) => { + return Err(init_failed(format!("{context}: stream read error: {e}"))); + } + Ok(None) => { + return Err(init_failed(format!("{context}: response stream closed"))); + } + Err(_) => { + return Err(init_failed(format!( + "{context}: timeout waiting for response" + ))); + } + } + } +} + +pub(super) async fn write_reg_u32( + chip_commands: &mut W, + asic_id: u8, + engine: u16, + offset: u16, + value: u32, + context: &str, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + send_command( + chip_commands, + protocol::Command::write_reg_u32_le(asic_id, engine, offset, value), + context, + ) + .await +} + +pub(super) async fn write_reg_u8( + chip_commands: &mut W, + asic_id: u8, + engine: u16, + offset: u16, + value: u8, + context: &str, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + send_command( + chip_commands, + protocol::Command::write_reg_u8(asic_id, engine, offset, value), + context, + ) + .await +} + +async fn group_write_u8( + chip_commands: &mut W, + asic_id: u8, + group: u16, + offset: u16, + value: u8, + context: &str, +) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + send_command( + chip_commands, + protocol::Command::multicast_write_u8(asic_id, group, offset, value), + context, + ) + .await +} + +fn thermal_c_to_tune_code(thermal_c: f32) -> u32 { + let tune_code = (2048.0 / 4096.0) + (4096.0 * (thermal_c + 293.8) / 631.8); + tune_code.max(0.0) as u32 +} + +fn voltage_mv_to_tune_code(voltage_mv: f32) -> u32 { + let tune_code = (16384.0 / 6.0) * (2.5 * voltage_mv / 706.7 + 3.0 / 16384.0 + 1.0); + tune_code.max(0.0) as u32 +} + +fn calc_pll_dividers(freq_mhz: f32, post1_divider: u8) -> (u32, u32) { + let fb = + REF_DIVIDER as f32 * (post1_divider as f32 + 1.0) * (POST2_DIVIDER as f32 + 1.0) * freq_mhz + / REF_CLK_MHZ; + let mut fb_div = fb as u32; + if fb - fb_div as f32 > 0.5 { + fb_div += 1; + } + + let post_div = (1 << 12) | (POST2_DIVIDER << 9) | ((post1_divider as u32) << 6) | REF_DIVIDER; + (post_div, fb_div) +} + +async fn configure_sensors( + chip_responses: &mut R, + chip_commands: &mut W, + read_asic_id: u8, +) -> Result<(), HashThreadError> +where + R: Stream> + Unpin, + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + let thermal_trip_code = thermal_c_to_tune_code(THERMAL_TRIP_C); + let voltage_trip_code = voltage_mv_to_tune_code(VOLTAGE_TRIP_MV); + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::UART_TX, + 0xF, + "enable sensors: UART_TX", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::SLOW_CLK_DIV, + 2, + "enable sensors: SLOW_CLK_DIV", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::SENSOR_CLK_DIV, + (8 << 5) | 8, + "enable sensors: SENSOR_CLK_DIV", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::DTS_SRST_PD, + 1 << 8, + "enable sensors: DTS_SRST_PD", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::SENS_TDM_GAP_CNT, + SENSOR_REPORT_INTERVAL, + "enable sensors: SENS_TDM_GAP_CNT", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::DTS_CFG, + 0, + "enable sensors: DTS_CFG", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::SENSOR_THRS_CNT, + (10 << 16) | 10, + "enable sensors: SENSOR_THRS_CNT", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::TEMPSENSOR_TUNE_CODE, + 0x8001 | (thermal_trip_code << 1), + "enable sensors: TEMPSENSOR_TUNE_CODE", + ) + .await?; + + let bandgap = read_reg_u32( + chip_responses, + chip_commands, + read_asic_id, + protocol::NOTCH_REG, + protocol::local_reg::BANDGAP, + INIT_READREG_TIMEOUT, + "enable sensors: read BANDGAP", + ) + .await?; + let bandgap_updated = (bandgap & !0xF) | 0x3; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::BANDGAP, + bandgap_updated, + "enable sensors: write BANDGAP", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::VSENSOR_SRST_PD, + 1 << 8, + "enable sensors: VSENSOR_SRST_PD", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::VSENSOR_CFG, + (8 << 28) | (1 << 24), + "enable sensors: VSENSOR_CFG", + ) + .await?; + + let vs_enable = (voltage_trip_code << 16) | (voltage_trip_code << 1) | 1; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::VOLTAGE_SENSOR_ENABLE, + vs_enable, + "enable sensors: VOLTAGE_SENSOR_ENABLE", + ) + .await?; + + Ok(()) +} + +async fn set_frequency( + chip_responses: &mut R, + chip_commands: &mut W, + read_asic_id: u8, +) -> Result<(), HashThreadError> +where + R: Stream> + Unpin, + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + let (post_div, fb_div) = calc_pll_dividers(TARGET_FREQ_MHZ, POST1_DIVIDER); + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL_FBDIV, + fb_div, + "set frequency: PLL_FBDIV", + ) + .await?; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL_POSTDIV, + post_div, + "set frequency: PLL_POSTDIV", + ) + .await?; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL1_FBDIV, + fb_div, + "set frequency: PLL1_FBDIV", + ) + .await?; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL1_POSTDIV, + post_div, + "set frequency: PLL1_POSTDIV", + ) + .await?; + + time::sleep(Duration::from_millis(1)).await; + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL_ENABLE, + 1, + "set frequency: PLL_ENABLE", + ) + .await?; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::PLL1_ENABLE, + 1, + "set frequency: PLL1_ENABLE", + ) + .await?; + + let deadline = Instant::now() + PLL_LOCK_TIMEOUT; + for pll_enable_offset in [ + protocol::local_reg::PLL_ENABLE, + protocol::local_reg::PLL1_ENABLE, + ] { + loop { + let lock = read_reg_u32( + chip_responses, + chip_commands, + read_asic_id, + protocol::NOTCH_REG, + pll_enable_offset, + INIT_READREG_TIMEOUT, + "set frequency: wait PLL lock", + ) + .await?; + if (lock & PLL_LOCK_MASK) != 0 { + break; + } + + if Instant::now() >= deadline { + return Err(init_failed(format!( + "set frequency: PLL at offset 0x{pll_enable_offset:02x} failed to lock" + ))); + } + + time::sleep(PLL_POLL_DELAY).await; + } + } + + Ok(()) +} + +async fn soft_reset(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + write_reg_u32( + chip_commands, + asic_id, + protocol::NOTCH_REG, + protocol::local_reg::ENG_SOFT_RESET, + 0, + "soft reset assert", + ) + .await?; + time::sleep(SOFT_RESET_DELAY).await; + write_reg_u32( + chip_commands, + asic_id, + protocol::NOTCH_REG, + protocol::local_reg::ENG_SOFT_RESET, + 1, + "soft reset release", + ) + .await?; + time::sleep(SOFT_RESET_DELAY).await; + Ok(()) +} + +async fn set_all_clock_gates(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + for group_id in 0..ENGINE_ROWS { + group_write_u8( + chip_commands, + asic_id, + group_id, + protocol::engine_reg::CONFIG, + ENGINE_CONFIG_ENHANCED_MODE_BIT, + "set all clock gates", + ) + .await?; + } + Ok(()) +} + +async fn set_asic_nonce_range(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + for col in 0..ENGINE_COLS { + for row in 0..ENGINE_ROWS { + if is_invalid_engine(row, col) { + continue; + } + let engine = engine_id(row, col); + write_reg_u32( + chip_commands, + asic_id, + engine, + protocol::engine_reg::START_NONCE, + BZM2_START_NONCE, + "set nonce range: START_NONCE", + ) + .await?; + write_reg_u32( + chip_commands, + asic_id, + engine, + protocol::engine_reg::END_NONCE, + BZM2_END_NONCE, + "set nonce range: END_NONCE", + ) + .await?; + } + } + + Ok(()) +} + +async fn start_warm_up_jobs(chip_commands: &mut W, asic_id: u8) -> Result<(), HashThreadError> +where + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + for col in 0..ENGINE_COLS { + for row in 0..ENGINE_ROWS { + if is_invalid_engine(row, col) { + continue; + } + let engine = engine_id(row, col); + + write_reg_u8( + chip_commands, + asic_id, + engine, + protocol::engine_reg::TIMESTAMP_COUNT, + 0xff, + "warm-up: TIMESTAMP_COUNT", + ) + .await?; + + for seq in [0xfc, 0xfd, 0xfe, 0xff] { + write_reg_u8( + chip_commands, + asic_id, + engine, + protocol::engine_reg::SEQUENCE_ID, + seq, + "warm-up: SEQUENCE_ID", + ) + .await?; + } + + write_reg_u8( + chip_commands, + asic_id, + engine, + protocol::engine_reg::JOB_CONTROL, + 1, + "warm-up: JOB_CONTROL", + ) + .await?; + } + } + Ok(()) +} + +pub(super) async fn initialize_chip( + chip_responses: &mut R, + chip_commands: &mut W, + peripherals: &mut BoardPeripherals, + asic_count: u8, +) -> Result, HashThreadError> +where + R: Stream> + Unpin, + W: Sink + Unpin, + W::Error: std::fmt::Debug, +{ + if asic_count == 0 { + return Err(init_failed("asic_count must be > 0")); + } + + if let Some(ref mut asic_enable) = peripherals.asic_enable { + asic_enable + .enable() + .await + .map_err(|e| init_failed(format!("failed to release reset for BZM2 bring-up: {e}")))?; + } + time::sleep(Duration::from_millis(200)).await; + + drain_input(chip_responses).await; + + send_command( + chip_commands, + protocol::Command::Noop { + asic_hw_id: protocol::DEFAULT_ASIC_ID, + }, + "default ping", + ) + .await?; + wait_for_noop(chip_responses, protocol::DEFAULT_ASIC_ID, INIT_NOOP_TIMEOUT).await?; + debug!("BZM2 default ASIC ID ping succeeded"); + + let mut asic_ids = Vec::with_capacity(asic_count as usize); + for index in 0..asic_count { + let asic_id = protocol::logical_to_hw_asic_id(index); + if protocol::hw_to_logical_asic_id(asic_id) != Some(index) { + return Err(init_failed(format!( + "invalid ASIC ID mapping for logical index {} -> 0x{:02x}", + index, asic_id + ))); + } + + write_reg_u32( + chip_commands, + protocol::DEFAULT_ASIC_ID, + protocol::NOTCH_REG, + protocol::local_reg::ASIC_ID, + asic_id as u32, + "program chain IDs", + ) + .await?; + time::sleep(Duration::from_millis(50)).await; + + let readback = read_reg_u32( + chip_responses, + chip_commands, + asic_id, + protocol::NOTCH_REG, + protocol::local_reg::ASIC_ID, + INIT_READREG_TIMEOUT, + "verify programmed ASIC ID", + ) + .await?; + + if (readback & 0xff) as u8 != asic_id { + return Err(init_failed(format!( + "ASIC ID verify mismatch for 0x{asic_id:02x}: read 0x{readback:08x}" + ))); + } + + asic_ids.push(asic_id); + } + debug!(asic_ids = ?asic_ids, "BZM2 chain IDs programmed"); + + drain_input(chip_responses).await; + for &asic_id in &asic_ids { + send_command( + chip_commands, + protocol::Command::Noop { + asic_hw_id: asic_id, + }, + "per-ASIC ping", + ) + .await?; + wait_for_noop(chip_responses, asic_id, INIT_NOOP_TIMEOUT).await?; + } + debug!("BZM2 per-ASIC ping succeeded"); + + let first_asic = *asic_ids + .first() + .ok_or_else(|| init_failed("no ASIC IDs programmed"))?; + + debug!("Configuring BZM2 sensors"); + configure_sensors(chip_responses, chip_commands, first_asic).await?; + debug!("Configuring BZM2 PLL"); + set_frequency(chip_responses, chip_commands, first_asic).await?; + + write_reg_u8( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::CKDCCR_5_0, + 0x00, + "disable DLL0", + ) + .await?; + write_reg_u8( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::CKDCCR_5_1, + 0x00, + "disable DLL1", + ) + .await?; + + let uart_tdm_control = (0x7f << 9) | (100 << 1) | 1; + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::UART_TDM_CTL, + uart_tdm_control, + "enable UART TDM mode", + ) + .await?; + + write_reg_u32( + chip_commands, + first_asic, + protocol::NOTCH_REG, + protocol::local_reg::IO_PEPS_DS, + DRIVE_STRENGTH_STRONG, + "set drive strength", + ) + .await?; + + for &asic_id in &asic_ids { + debug!(asic_id, "BZM2 soft reset + clock gate + warm-up start"); + soft_reset(chip_commands, asic_id).await?; + set_all_clock_gates(chip_commands, asic_id).await?; + set_asic_nonce_range(chip_commands, asic_id).await?; + start_warm_up_jobs(chip_commands, asic_id).await?; + debug!(asic_id, "BZM2 warm-up complete"); + } + + write_reg_u32( + chip_commands, + protocol::BROADCAST_ASIC, + protocol::NOTCH_REG, + protocol::local_reg::RESULT_STS_CTL, + 0x10, + "enable TDM results", + ) + .await?; + + Ok(asic_ids) +} From d3097dd8baee2c4c4da2c54d606fafc740a2914d Mon Sep 17 00:00:00 2001 From: johnny9 Date: Fri, 6 Mar 2026 09:16:15 -0500 Subject: [PATCH 19/19] refactor(bzm2): reuse bitcoin SHA-256 engine --- mujina-miner/src/asic/bzm2/thread/hashing.rs | 232 +++++++------------ 1 file changed, 78 insertions(+), 154 deletions(-) diff --git a/mujina-miner/src/asic/bzm2/thread/hashing.rs b/mujina-miner/src/asic/bzm2/thread/hashing.rs index dcbe240..aa47bd2 100644 --- a/mujina-miner/src/asic/bzm2/thread/hashing.rs +++ b/mujina-miner/src/asic/bzm2/thread/hashing.rs @@ -1,29 +1,14 @@ -#[cfg(test)] -use bitcoin::hashes::Hash as _; use bitcoin::{ TxMerkleNode, block::{Header as BlockHeader, Version as BlockVersion}, consensus, + hashes::{Hash as _, HashEngine as _, sha256}, }; use crate::asic::hash_thread::{HashTask, HashThreadError}; use super::{AssignedTask, MIDSTATE_COUNT}; -const SHA256_IV: [u32; 8] = [ - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, -]; -const SHA256_K: [u32; 64] = [ - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, -]; - #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(super) enum Bzm2CheckResult { Correct, @@ -127,93 +112,44 @@ pub(super) fn leading_zero_bits(sha256_le: &[u8; 32]) -> u16 { bits } -fn sha256_compress_state(initial_state: [u32; 8], block: &[u8; 64]) -> [u32; 8] { - let mut w = [0u32; 64]; - for (i, chunk) in block.chunks_exact(4).enumerate() { - w[i] = u32::from_be_bytes(chunk.try_into().expect("chunk size is 4")); - } - for i in 16..64 { - let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); - let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); - w[i] = w[i - 16] - .wrapping_add(s0) - .wrapping_add(w[i - 7]) - .wrapping_add(s1); - } - - let mut a = initial_state[0]; - let mut b = initial_state[1]; - let mut c = initial_state[2]; - let mut d = initial_state[3]; - let mut e = initial_state[4]; - let mut f = initial_state[5]; - let mut g = initial_state[6]; - let mut h = initial_state[7]; - - for i in 0..64 { - let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); - let ch = (e & f) ^ ((!e) & g); - let t1 = h - .wrapping_add(s1) - .wrapping_add(ch) - .wrapping_add(SHA256_K[i]) - .wrapping_add(w[i]); - let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); - let maj = (a & b) ^ (a & c) ^ (b & c); - let t2 = s0.wrapping_add(maj); - - h = g; - g = f; - f = e; - e = d.wrapping_add(t1); - d = c; - c = b; - b = a; - a = t1.wrapping_add(t2); +fn bzm2_midstate_to_sha256_midstate(midstate_le: &[u8; 32]) -> sha256::Midstate { + let mut midstate_be = [0u8; 32]; + for (be_chunk, le_chunk) in midstate_be + .chunks_exact_mut(4) + .zip(midstate_le.chunks_exact(4)) + { + let word = u32::from_le_bytes(le_chunk.try_into().expect("chunk size is 4")); + be_chunk.copy_from_slice(&word.to_be_bytes()); } - [ - initial_state[0].wrapping_add(a), - initial_state[1].wrapping_add(b), - initial_state[2].wrapping_add(c), - initial_state[3].wrapping_add(d), - initial_state[4].wrapping_add(e), - initial_state[5].wrapping_add(f), - initial_state[6].wrapping_add(g), - initial_state[7].wrapping_add(h), - ] + sha256::Midstate::from_byte_array(midstate_be) } -fn sha256_state_to_be_bytes(state: [u32; 8]) -> [u8; 32] { - let mut out = [0u8; 32]; - for (i, word) in state.iter().copied().enumerate() { - out[i * 4..i * 4 + 4].copy_from_slice(&word.to_be_bytes()); +fn sha256_midstate_to_bzm2_le(midstate: sha256::Midstate) -> [u8; 32] { + let midstate_be = midstate.to_byte_array(); + let mut midstate_le = [0u8; 32]; + for (le_chunk, be_chunk) in midstate_le + .chunks_exact_mut(4) + .zip(midstate_be.chunks_exact(4)) + { + let word = u32::from_be_bytes(be_chunk.try_into().expect("chunk size is 4")); + le_chunk.copy_from_slice(&word.to_le_bytes()); } - out + + midstate_le } pub(super) fn bzm2_double_sha_from_midstate_and_tail( midstate_le: &[u8; 32], tail16: &[u8; 16], ) -> [u8; 32] { - let mut resumed_state = [0u32; 8]; - for (i, chunk) in midstate_le.chunks_exact(4).enumerate() { - resumed_state[i] = u32::from_le_bytes(chunk.try_into().expect("chunk size is 4")); - } + let mut engine = + sha256::HashEngine::from_midstate(bzm2_midstate_to_sha256_midstate(midstate_le), 64); + engine.input(tail16); - let mut first_block = [0u8; 64]; - first_block[..16].copy_from_slice(tail16); - first_block[16] = 0x80; - first_block[56..64].copy_from_slice(&(80u64 * 8).to_be_bytes()); - let first_state = sha256_compress_state(resumed_state, &first_block); - let first_digest = sha256_state_to_be_bytes(first_state); - - let mut second_block = [0u8; 64]; - second_block[..32].copy_from_slice(&first_digest); - second_block[32] = 0x80; - second_block[56..64].copy_from_slice(&(32u64 * 8).to_be_bytes()); - let second_state = sha256_compress_state(SHA256_IV, &second_block); - sha256_state_to_be_bytes(second_state) + sha256::Hash::from_engine(engine) + .hash_again() + .to_byte_array() } pub(super) fn bzm2_tail16_bytes( @@ -253,66 +189,9 @@ pub(super) fn build_header_bytes( } pub(super) fn compute_midstate_le(header_prefix_64: &[u8; 64]) -> [u8; 32] { - let mut w = [0u32; 64]; - for (i, chunk) in header_prefix_64.chunks_exact(4).enumerate() { - w[i] = u32::from_be_bytes(chunk.try_into().expect("chunk size is 4")); - } - for i in 16..64 { - let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); - let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); - w[i] = w[i - 16] - .wrapping_add(s0) - .wrapping_add(w[i - 7]) - .wrapping_add(s1); - } - - let mut a = SHA256_IV[0]; - let mut b = SHA256_IV[1]; - let mut c = SHA256_IV[2]; - let mut d = SHA256_IV[3]; - let mut e = SHA256_IV[4]; - let mut f = SHA256_IV[5]; - let mut g = SHA256_IV[6]; - let mut h = SHA256_IV[7]; - - for i in 0..64 { - let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); - let ch = (e & f) ^ ((!e) & g); - let t1 = h - .wrapping_add(s1) - .wrapping_add(ch) - .wrapping_add(SHA256_K[i]) - .wrapping_add(w[i]); - let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); - let maj = (a & b) ^ (a & c) ^ (b & c); - let t2 = s0.wrapping_add(maj); - - h = g; - g = f; - f = e; - e = d.wrapping_add(t1); - d = c; - c = b; - b = a; - a = t1.wrapping_add(t2); - } - - let state = [ - SHA256_IV[0].wrapping_add(a), - SHA256_IV[1].wrapping_add(b), - SHA256_IV[2].wrapping_add(c), - SHA256_IV[3].wrapping_add(d), - SHA256_IV[4].wrapping_add(e), - SHA256_IV[5].wrapping_add(f), - SHA256_IV[6].wrapping_add(g), - SHA256_IV[7].wrapping_add(h), - ]; - - let mut out = [0u8; 32]; - for (i, word) in state.iter().copied().enumerate() { - out[i * 4..i * 4 + 4].copy_from_slice(&word.to_le_bytes()); - } - out + let mut engine = sha256::HashEngine::default(); + engine.input(header_prefix_64); + sha256_midstate_to_bzm2_le(engine.midstate()) } #[cfg(test)] @@ -322,11 +201,12 @@ pub(super) fn hash_bytes_bzm2_order(hash: &bitcoin::BlockHash) -> [u8; 32] { #[cfg(test)] mod tests { - use bitcoin::hashes::Hash as _; + use bitcoin::hashes::{Hash as _, HashEngine as _, sha256d}; use super::{ - Bzm2CheckResult, bzm2_double_sha_from_midstate_and_tail, check_result, - hash_bytes_bzm2_order, midstate_version_mask_variants, + Bzm2CheckResult, bzm2_double_sha_from_midstate_and_tail, + bzm2_midstate_to_sha256_midstate, check_result, compute_midstate_le, + hash_bytes_bzm2_order, midstate_version_mask_variants, sha256_midstate_to_bzm2_le, }; #[test] @@ -390,6 +270,50 @@ mod tests { assert_eq!(hash_bytes_bzm2_order(&hash), src); } + #[test] + fn test_midstate_conversion_round_trip_preserves_words() { + let sha256_midstate = bitcoin::hashes::sha256::Midstate::from_byte_array( + core::array::from_fn(|i| i as u8), + ); + let bzm2_midstate = sha256_midstate_to_bzm2_le(sha256_midstate); + + assert_eq!( + bzm2_midstate_to_sha256_midstate(&bzm2_midstate).to_byte_array(), + sha256_midstate.to_byte_array() + ); + } + + #[test] + fn test_compute_midstate_le_matches_bitcoin_sha256_engine() { + let header_prefix = core::array::from_fn(|i| i as u8); + let mut engine = bitcoin::hashes::sha256::HashEngine::default(); + engine.input(&header_prefix); + + assert_eq!( + compute_midstate_le(&header_prefix), + sha256_midstate_to_bzm2_le(engine.midstate()) + ); + } + + #[test] + fn test_bzm2_double_sha_matches_bitcoin_double_sha_for_full_header() { + let header_bytes: [u8; 80] = core::array::from_fn(|i| i as u8); + let header_prefix: [u8; 64] = header_bytes[..64] + .try_into() + .expect("header prefix must be 64 bytes"); + let header_tail: [u8; 16] = header_bytes[64..] + .try_into() + .expect("header tail must be 16 bytes"); + + assert_eq!( + bzm2_double_sha_from_midstate_and_tail( + &compute_midstate_le(&header_prefix), + &header_tail, + ), + sha256d::Hash::hash(&header_bytes).to_byte_array() + ); + } + #[test] fn test_bzm2_double_sha_matches_known_trace_sample() { let midstate =