|
1 | 1 | // src/collector/nvme.rs |
2 | 2 | //! NVMe SMART collection via linux_nvme_sys. |
3 | 3 |
|
4 | | -use nvme_cli_sys::{nvme_admin_cmd, nvme_admin_opcode::nvme_admin_get_log_page, nvme_smart_log}; |
| 4 | +use nvme_cli_sys::{ |
| 5 | + nvme_admin_cmd, nvme_admin_opcode::nvme_admin_get_log_page, |
| 6 | + nvme_admin_opcode::nvme_admin_identify, nvme_id_ctrl, nvme_smart_log,nvme_id_power_state |
| 7 | +}; |
5 | 8 | use serde::Serialize; |
6 | 9 | use std::fs::{self, OpenOptions}; |
7 | 10 | use std::io; |
8 | 11 | use std::mem::{size_of, zeroed}; |
9 | 12 | use std::os::unix::io::AsRawFd; |
10 | 13 |
|
| 14 | +#[derive(Debug, Serialize)] |
| 15 | +pub struct NvmesIdCtrl { |
| 16 | + /// NVMe device name (e.g., "nvme0") |
| 17 | + pub nvme_name: String, |
| 18 | + |
| 19 | + /// PCI Vendor ID of the controller. |
| 20 | + pub vid: u16, |
| 21 | + |
| 22 | + /// PCI Subsystem Vendor ID. |
| 23 | + pub ssvid: u16, |
| 24 | + |
| 25 | + /// Serial Number (ASCII, space padded). |
| 26 | + pub sn: String, |
| 27 | + |
| 28 | + /// Model Number (ASCII, space padded). |
| 29 | + pub mn: String, |
| 30 | + |
| 31 | + /// Firmware Revision (ASCII, space padded). |
| 32 | + pub fr: String, |
| 33 | + |
| 34 | + /// Recommended Arbitration Burst. |
| 35 | + /// Hint to host for arbitration burst size when using weighted round-robin arbitration. |
| 36 | + pub rab: u8, |
| 37 | + |
| 38 | + /// IEEE OUI Identifier (3 bytes) for the vendor (Organizationally Unique Identifier). |
| 39 | + pub ieee: [u8; 3], |
| 40 | + |
| 41 | + /// Controller Multi-Path I/O and Namespace Sharing Capabilities (bitfield). |
| 42 | + /// Indicates multipath / shared namespaces capabilities. |
| 43 | + pub cmic: u8, |
| 44 | + |
| 45 | + /// Maximum Data Transfer Size (MDTS). |
| 46 | + /// Expressed as a power-of-two multiple of the minimum memory page size (MPSMIN). |
| 47 | + /// Effective max transfer = (2^mdts) * (minimum page size). |
| 48 | + pub mdts: u8, |
| 49 | + |
| 50 | + /// Controller ID (CNTLID) assigned by the controller. |
| 51 | + pub cntlid: u16, |
| 52 | + |
| 53 | + /// Version (VER) of the NVMe specification the controller complies with. |
| 54 | + pub ver: u32, |
| 55 | + |
| 56 | + /// RTD3 Resume Latency (microseconds). |
| 57 | + pub rtd3r_us: u32, |
| 58 | + |
| 59 | + /// RTD3 Entry Latency (microseconds). |
| 60 | + pub rtd3e_us: u32, |
| 61 | + |
| 62 | + /// Optional Asynchronous Events Supported (bitfield). |
| 63 | + pub oaes: u32, |
| 64 | + |
| 65 | + /// Controller Attributes (bitfield). |
| 66 | + pub ctratt: u32, |
| 67 | + |
| 68 | + /// Read Recovery Levels Supported (bitfield / encoded). |
| 69 | + pub rrls: u16, |
| 70 | + |
| 71 | + /// Controller Type (encoded). |
| 72 | + pub cntrltype: u8, |
| 73 | + |
| 74 | + /// FRU GUID / Field Replaceable Unit GUID. |
| 75 | + pub fguid: [u8; 16], |
| 76 | + |
| 77 | + /// Command Retry Delay Time 1. |
| 78 | + pub crdt1: u16, |
| 79 | + |
| 80 | + /// Command Retry Delay Time 2. |
| 81 | + pub crdt2: u16, |
| 82 | + |
| 83 | + /// Command Retry Delay Time 3. |
| 84 | + pub crdt3: u16, |
| 85 | + |
| 86 | + /// NVM Subsystem Report (bitfield/encoded). |
| 87 | + pub nvmsr: u8, |
| 88 | + |
| 89 | + /// VPD Write Cycle Information (bitfield/encoded). |
| 90 | + pub vwci: u8, |
| 91 | + |
| 92 | + /// Management Endpoint Capabilities (bitfield/encoded). |
| 93 | + pub mec: u8, |
| 94 | + |
| 95 | + /// Optional Admin Command Support (bitfield). |
| 96 | + pub oacs: u16, |
| 97 | + |
| 98 | + /// Abort Command Limit. |
| 99 | + pub acl: u8, |
| 100 | + |
| 101 | + /// Asynchronous Event Request Limit. |
| 102 | + pub aerl: u8, |
| 103 | + |
| 104 | + /// Firmware Updates (bitfield). |
| 105 | + pub frmw: u8, |
| 106 | + |
| 107 | + /// Log Page Attributes (bitfield). |
| 108 | + pub lpa: u8, |
| 109 | + |
| 110 | + /// Error Log Page Entries (0-based). |
| 111 | + pub elpe: u8, |
| 112 | + |
| 113 | + /// Number of Power States Supported minus 1. |
| 114 | + pub npss: u8, |
| 115 | + |
| 116 | + /// Admin Vendor Specific Command Configuration. |
| 117 | + pub avscc: u8, |
| 118 | + |
| 119 | + /// Autonomous Power State Transition Attributes. |
| 120 | + pub apsta: u8, |
| 121 | + |
| 122 | + /// Warning Composite Temperature Threshold (Kelvin). |
| 123 | + pub wctemp_k: u16, |
| 124 | + |
| 125 | + /// Critical Composite Temperature Threshold (Kelvin). |
| 126 | + pub cctemp_k: u16, |
| 127 | + |
| 128 | + /// Maximum Time for Firmware Activation. |
| 129 | + pub mtfa: u16, |
| 130 | + |
| 131 | + /// Host Memory Buffer Preferred Size (bytes). |
| 132 | + pub hmpre: u32, |
| 133 | + |
| 134 | + /// Host Memory Buffer Minimum Size (bytes). |
| 135 | + pub hmmin: u32, |
| 136 | + |
| 137 | + /// Total NVM Capacity (bytes). |
| 138 | + pub tnvmcap_bytes: u128, |
| 139 | + |
| 140 | + /// Unallocated NVM Capacity (bytes). |
| 141 | + pub unvmcap_bytes: u128, |
| 142 | + |
| 143 | + /// Replay Protected Memory Block Support (bitfield). |
| 144 | + pub rpmbs: u32, |
| 145 | + |
| 146 | + /// Extended Device Self-test Time (minutes). |
| 147 | + pub edstt: u16, |
| 148 | + |
| 149 | + /// Device Self-test Options (bitfield). |
| 150 | + pub dsto: u8, |
| 151 | + |
| 152 | + /// Firmware Update Granularity. |
| 153 | + pub fwug: u8, |
| 154 | + |
| 155 | + /// Keep Alive Support. |
| 156 | + pub kas: u16, |
| 157 | + |
| 158 | + /// Host Controlled Thermal Management Attributes. |
| 159 | + pub hctma: u16, |
| 160 | + |
| 161 | + /// Minimum Thermal Management Temperature (Kelvin). |
| 162 | + pub mntmt_k: u16, |
| 163 | + |
| 164 | + /// Maximum Thermal Management Temperature (Kelvin). |
| 165 | + pub mxtmt_k: u16, |
| 166 | + |
| 167 | + /// Sanitize Capabilities (bitfield). |
| 168 | + pub sanicap: u32, |
| 169 | + |
| 170 | + /// Host Memory Buffer Minimum Descriptor Entry Size. |
| 171 | + pub hmminds: u32, |
| 172 | + |
| 173 | + /// Host Memory Buffer Maximum Descriptor Entries. |
| 174 | + pub hmmaxd: u16, |
| 175 | + |
| 176 | + /// NVM Set Identifier Maximum. |
| 177 | + pub nsetidmax: u16, |
| 178 | + |
| 179 | + /// Endurance Group Identifier Maximum. |
| 180 | + pub endgidmax: u16, |
| 181 | + |
| 182 | + /// ANA Transition Time. |
| 183 | + pub anatt: u8, |
| 184 | + |
| 185 | + /// ANA Capabilities. |
| 186 | + pub anacap: u8, |
| 187 | + |
| 188 | + /// ANA Group Identifier Maximum. |
| 189 | + pub anagrpmax: u32, |
| 190 | + |
| 191 | + /// Number of ANA Group Identifiers. |
| 192 | + pub nanagrpid: u32, |
| 193 | + |
| 194 | + /// Persistent Event Log Size (bytes). |
| 195 | + pub pels: u32, |
| 196 | + |
| 197 | + /// Domain Identifier. |
| 198 | + pub domainid: u16, |
| 199 | + |
| 200 | + /// Maximum Endurance Group Capacity (bytes). |
| 201 | + pub megcap_bytes: u128, |
| 202 | + |
| 203 | + /// Submission Queue Entry Size encoding. |
| 204 | + pub sqes: u8, |
| 205 | + |
| 206 | + /// Completion Queue Entry Size encoding. |
| 207 | + pub cqes: u8, |
| 208 | + |
| 209 | + /// Maximum Outstanding Commands. |
| 210 | + pub maxcmd: u16, |
| 211 | + |
| 212 | + /// Number of Namespaces. |
| 213 | + pub nn: u32, |
| 214 | + |
| 215 | + /// Optional NVM Command Support. |
| 216 | + pub oncs: u16, |
| 217 | + |
| 218 | + /// Fused Operation Support. |
| 219 | + pub fuses: u16, |
| 220 | + |
| 221 | + /// Format NVM Attributes. |
| 222 | + pub fna: u8, |
| 223 | + |
| 224 | + /// Volatile Write Cache. |
| 225 | + pub vwc: u8, |
| 226 | + |
| 227 | + /// Atomic Write Unit Normal (logical blocks). |
| 228 | + pub awun: u16, |
| 229 | + |
| 230 | + /// Atomic Write Unit Power Fail (logical blocks). |
| 231 | + pub awupf: u16, |
| 232 | + |
| 233 | + /// Vendor Specific Command Configuration. |
| 234 | + pub icsvscc: u8, |
| 235 | + |
| 236 | + /// Namespace Write Protection Capabilities. |
| 237 | + pub nwpc: u8, |
| 238 | + |
| 239 | + /// Atomic Compare & Write Unit (logical blocks). |
| 240 | + pub acwu: u16, |
| 241 | + |
| 242 | + /// Optional Copy Formats Supported. |
| 243 | + pub ocfs: u16, |
| 244 | + |
| 245 | + /// SGL Support. |
| 246 | + pub sgls: u32, |
| 247 | + |
| 248 | + /// Maximum Number of Allowed Namespaces. |
| 249 | + pub mnan: u32, |
| 250 | + |
| 251 | + /// Maximum Capacity of NVM Area. |
| 252 | + pub maxcna: u32, |
| 253 | + |
| 254 | + /// Subsystem NQN (ASCII). |
| 255 | + pub subnqn: String, |
| 256 | + |
| 257 | + /// I/O Command Capsule Supported Size. |
| 258 | + pub ioccsz: u32, |
| 259 | + |
| 260 | + /// I/O Response Capsule Supported Size. |
| 261 | + pub iorcsz: u32, |
| 262 | + |
| 263 | + /// In Capsule Data Offset. |
| 264 | + pub icdoff: u16, |
| 265 | + |
| 266 | + /// Fabric Controller Attributes. |
| 267 | + pub fcatt: u8, |
| 268 | + |
| 269 | + /// Management Service Data Block Descriptor. |
| 270 | + pub msdbd: u8, |
| 271 | + |
| 272 | + /// Optional Fabric Commands Support. |
| 273 | + pub ofcs: u16, |
| 274 | + |
| 275 | + /// Power State Descriptors. |
| 276 | + pub psd: [nvme_id_power_state; 32], |
| 277 | + |
| 278 | + /// Vendor Specific area (1024 bytes). |
| 279 | + pub vs: [u8; 1024], |
| 280 | +} |
| 281 | + |
| 282 | + |
| 283 | +/// Constructor for NvmesIdCtrl |
| 284 | +impl NvmesIdCtrl { |
| 285 | + pub fn new(nvme_name: String, raw: &nvme_id_ctrl) -> Self { |
| 286 | + Self {} |
| 287 | + } |
| 288 | +} |
| 289 | + |
| 290 | +/// Function to extract raw nvme_id_ctrl using the Identify admin command |
| 291 | +pub fn get_nvme_id_ctrl_raw(dev_path: &str) -> io::Result<nvme_id_ctrl> { |
| 292 | + let file = OpenOptions::new() |
| 293 | + .read(true) |
| 294 | + .write(true) // Here we need admin permission to send write commands |
| 295 | + .open(dev_path)?; // path would be something like /dev/nvme0 |
| 296 | + |
| 297 | + let fd = file.as_raw_fd(); |
| 298 | + |
| 299 | + // Identify Controller payload is 4096 bytes based on the C bindings in the nvme_cli_sys crate. |
| 300 | + // If nvme_id_ctrl from your crate is exactly 4096, great. |
| 301 | + // If it's smaller, you should use a [u8; 4096] buffer instead. |
| 302 | + let mut id: nvme_id_ctrl = unsafe { zeroed() }; |
| 303 | + |
| 304 | + let id_ptr = &mut id as *mut nvme_id_ctrl as u64; |
| 305 | + let id_len = size_of::<nvme_id_ctrl>() as u32; |
| 306 | + |
| 307 | + let cns: u8 = 0x01; // Identify Controller |
| 308 | + let cntlid: u16 = 0x0000; // usually 0 |
| 309 | + let cdw10: u32 = (cns as u32) | ((cntlid as u32) << 16); |
| 310 | + |
| 311 | + let mut cmd: nvme_admin_cmd = unsafe { zeroed() }; |
| 312 | + cmd.opcode = nvme_admin_identify as u8; // Identify (0x06) |
| 313 | + cmd.nsid = 0x0000_0000; |
| 314 | + cmd.addr = id_ptr; |
| 315 | + cmd.data_len = id_len; |
| 316 | + cmd.cdw10 = cdw10; |
| 317 | + cmd.cdw11 = 0; |
| 318 | + cmd.timeout_ms = 1000; |
| 319 | + |
| 320 | + let ret = unsafe { nvme_cli_sys::nvme_ioctl_admin_cmd(fd, &mut cmd) }; |
| 321 | + |
| 322 | + match ret { |
| 323 | + Ok(status) if status == 0 => Ok(id), |
| 324 | + Ok(status) => Err(io::Error::new( |
| 325 | + io::ErrorKind::Other, |
| 326 | + format!("NVMe admin command failed, status={:#x}", status), |
| 327 | + )), |
| 328 | + Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())), |
| 329 | + } |
| 330 | +} |
| 331 | + |
11 | 332 | #[derive(Debug, Serialize)] |
12 | 333 | pub struct NvmesSmartLog { |
13 | 334 | /// NVMe device name (e.g., "nvme0") |
@@ -174,7 +495,6 @@ pub struct NvmesSmartLog { |
174 | 495 | // Constructor for NvmesSmartLog |
175 | 496 | impl NvmesSmartLog { |
176 | 497 | pub fn new(nvme_name: String, raw: &nvme_smart_log) -> Self { |
177 | | - // TODO: Add validation for values from unsafe crate |
178 | 498 | Self { |
179 | 499 | nvme_name, |
180 | 500 | critical_warning: Some(raw.critical_warning as u64), |
@@ -228,7 +548,7 @@ pub fn list_nvme_controllers() -> Vec<String> { |
228 | 548 | names |
229 | 549 | } |
230 | 550 |
|
231 | | -/// Function to extract raw nvme_smart_log from a controller. |
| 551 | +/// Function to extract raw nvme_smart_log. |
232 | 552 | /// NOTE - This function is heavily annotated because I was struggling to understand how data is extracted. |
233 | 553 | pub fn get_nvme_smart_log_raw(dev_path: &str) -> io::Result<nvme_smart_log> { |
234 | 554 | let file = OpenOptions::new() |
|
0 commit comments