Skip to content

Commit 57ea700

Browse files
committed
what a depressing friday night
1 parent 4108d66 commit 57ea700

1 file changed

Lines changed: 323 additions & 3 deletions

File tree

src/collector/nvme.rs

Lines changed: 323 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,334 @@
11
// src/collector/nvme.rs
22
//! NVMe SMART collection via linux_nvme_sys.
33
4-
use nvme_cli_sys::{nvme_admin_cmd, nvme_admin_opcode::nvme_admin_get_log_page, nvme_smart_log};
4+
use nvme_cli_sys::{
5+
nvme_admin_cmd, nvme_admin_opcode::nvme_admin_get_log_page,
6+
nvme_admin_opcode::nvme_admin_identify, nvme_id_ctrl, nvme_smart_log,nvme_id_power_state
7+
};
58
use serde::Serialize;
69
use std::fs::{self, OpenOptions};
710
use std::io;
811
use std::mem::{size_of, zeroed};
912
use std::os::unix::io::AsRawFd;
1013

14+
#[derive(Debug, Serialize)]
15+
pub struct NvmesIdCtrl {
16+
/// NVMe device name (e.g., "nvme0")
17+
pub nvme_name: String,
18+
19+
/// PCI Vendor ID of the controller.
20+
pub vid: u16,
21+
22+
/// PCI Subsystem Vendor ID.
23+
pub ssvid: u16,
24+
25+
/// Serial Number (ASCII, space padded).
26+
pub sn: String,
27+
28+
/// Model Number (ASCII, space padded).
29+
pub mn: String,
30+
31+
/// Firmware Revision (ASCII, space padded).
32+
pub fr: String,
33+
34+
/// Recommended Arbitration Burst.
35+
/// Hint to host for arbitration burst size when using weighted round-robin arbitration.
36+
pub rab: u8,
37+
38+
/// IEEE OUI Identifier (3 bytes) for the vendor (Organizationally Unique Identifier).
39+
pub ieee: [u8; 3],
40+
41+
/// Controller Multi-Path I/O and Namespace Sharing Capabilities (bitfield).
42+
/// Indicates multipath / shared namespaces capabilities.
43+
pub cmic: u8,
44+
45+
/// Maximum Data Transfer Size (MDTS).
46+
/// Expressed as a power-of-two multiple of the minimum memory page size (MPSMIN).
47+
/// Effective max transfer = (2^mdts) * (minimum page size).
48+
pub mdts: u8,
49+
50+
/// Controller ID (CNTLID) assigned by the controller.
51+
pub cntlid: u16,
52+
53+
/// Version (VER) of the NVMe specification the controller complies with.
54+
pub ver: u32,
55+
56+
/// RTD3 Resume Latency (microseconds).
57+
pub rtd3r_us: u32,
58+
59+
/// RTD3 Entry Latency (microseconds).
60+
pub rtd3e_us: u32,
61+
62+
/// Optional Asynchronous Events Supported (bitfield).
63+
pub oaes: u32,
64+
65+
/// Controller Attributes (bitfield).
66+
pub ctratt: u32,
67+
68+
/// Read Recovery Levels Supported (bitfield / encoded).
69+
pub rrls: u16,
70+
71+
/// Controller Type (encoded).
72+
pub cntrltype: u8,
73+
74+
/// FRU GUID / Field Replaceable Unit GUID.
75+
pub fguid: [u8; 16],
76+
77+
/// Command Retry Delay Time 1.
78+
pub crdt1: u16,
79+
80+
/// Command Retry Delay Time 2.
81+
pub crdt2: u16,
82+
83+
/// Command Retry Delay Time 3.
84+
pub crdt3: u16,
85+
86+
/// NVM Subsystem Report (bitfield/encoded).
87+
pub nvmsr: u8,
88+
89+
/// VPD Write Cycle Information (bitfield/encoded).
90+
pub vwci: u8,
91+
92+
/// Management Endpoint Capabilities (bitfield/encoded).
93+
pub mec: u8,
94+
95+
/// Optional Admin Command Support (bitfield).
96+
pub oacs: u16,
97+
98+
/// Abort Command Limit.
99+
pub acl: u8,
100+
101+
/// Asynchronous Event Request Limit.
102+
pub aerl: u8,
103+
104+
/// Firmware Updates (bitfield).
105+
pub frmw: u8,
106+
107+
/// Log Page Attributes (bitfield).
108+
pub lpa: u8,
109+
110+
/// Error Log Page Entries (0-based).
111+
pub elpe: u8,
112+
113+
/// Number of Power States Supported minus 1.
114+
pub npss: u8,
115+
116+
/// Admin Vendor Specific Command Configuration.
117+
pub avscc: u8,
118+
119+
/// Autonomous Power State Transition Attributes.
120+
pub apsta: u8,
121+
122+
/// Warning Composite Temperature Threshold (Kelvin).
123+
pub wctemp_k: u16,
124+
125+
/// Critical Composite Temperature Threshold (Kelvin).
126+
pub cctemp_k: u16,
127+
128+
/// Maximum Time for Firmware Activation.
129+
pub mtfa: u16,
130+
131+
/// Host Memory Buffer Preferred Size (bytes).
132+
pub hmpre: u32,
133+
134+
/// Host Memory Buffer Minimum Size (bytes).
135+
pub hmmin: u32,
136+
137+
/// Total NVM Capacity (bytes).
138+
pub tnvmcap_bytes: u128,
139+
140+
/// Unallocated NVM Capacity (bytes).
141+
pub unvmcap_bytes: u128,
142+
143+
/// Replay Protected Memory Block Support (bitfield).
144+
pub rpmbs: u32,
145+
146+
/// Extended Device Self-test Time (minutes).
147+
pub edstt: u16,
148+
149+
/// Device Self-test Options (bitfield).
150+
pub dsto: u8,
151+
152+
/// Firmware Update Granularity.
153+
pub fwug: u8,
154+
155+
/// Keep Alive Support.
156+
pub kas: u16,
157+
158+
/// Host Controlled Thermal Management Attributes.
159+
pub hctma: u16,
160+
161+
/// Minimum Thermal Management Temperature (Kelvin).
162+
pub mntmt_k: u16,
163+
164+
/// Maximum Thermal Management Temperature (Kelvin).
165+
pub mxtmt_k: u16,
166+
167+
/// Sanitize Capabilities (bitfield).
168+
pub sanicap: u32,
169+
170+
/// Host Memory Buffer Minimum Descriptor Entry Size.
171+
pub hmminds: u32,
172+
173+
/// Host Memory Buffer Maximum Descriptor Entries.
174+
pub hmmaxd: u16,
175+
176+
/// NVM Set Identifier Maximum.
177+
pub nsetidmax: u16,
178+
179+
/// Endurance Group Identifier Maximum.
180+
pub endgidmax: u16,
181+
182+
/// ANA Transition Time.
183+
pub anatt: u8,
184+
185+
/// ANA Capabilities.
186+
pub anacap: u8,
187+
188+
/// ANA Group Identifier Maximum.
189+
pub anagrpmax: u32,
190+
191+
/// Number of ANA Group Identifiers.
192+
pub nanagrpid: u32,
193+
194+
/// Persistent Event Log Size (bytes).
195+
pub pels: u32,
196+
197+
/// Domain Identifier.
198+
pub domainid: u16,
199+
200+
/// Maximum Endurance Group Capacity (bytes).
201+
pub megcap_bytes: u128,
202+
203+
/// Submission Queue Entry Size encoding.
204+
pub sqes: u8,
205+
206+
/// Completion Queue Entry Size encoding.
207+
pub cqes: u8,
208+
209+
/// Maximum Outstanding Commands.
210+
pub maxcmd: u16,
211+
212+
/// Number of Namespaces.
213+
pub nn: u32,
214+
215+
/// Optional NVM Command Support.
216+
pub oncs: u16,
217+
218+
/// Fused Operation Support.
219+
pub fuses: u16,
220+
221+
/// Format NVM Attributes.
222+
pub fna: u8,
223+
224+
/// Volatile Write Cache.
225+
pub vwc: u8,
226+
227+
/// Atomic Write Unit Normal (logical blocks).
228+
pub awun: u16,
229+
230+
/// Atomic Write Unit Power Fail (logical blocks).
231+
pub awupf: u16,
232+
233+
/// Vendor Specific Command Configuration.
234+
pub icsvscc: u8,
235+
236+
/// Namespace Write Protection Capabilities.
237+
pub nwpc: u8,
238+
239+
/// Atomic Compare & Write Unit (logical blocks).
240+
pub acwu: u16,
241+
242+
/// Optional Copy Formats Supported.
243+
pub ocfs: u16,
244+
245+
/// SGL Support.
246+
pub sgls: u32,
247+
248+
/// Maximum Number of Allowed Namespaces.
249+
pub mnan: u32,
250+
251+
/// Maximum Capacity of NVM Area.
252+
pub maxcna: u32,
253+
254+
/// Subsystem NQN (ASCII).
255+
pub subnqn: String,
256+
257+
/// I/O Command Capsule Supported Size.
258+
pub ioccsz: u32,
259+
260+
/// I/O Response Capsule Supported Size.
261+
pub iorcsz: u32,
262+
263+
/// In Capsule Data Offset.
264+
pub icdoff: u16,
265+
266+
/// Fabric Controller Attributes.
267+
pub fcatt: u8,
268+
269+
/// Management Service Data Block Descriptor.
270+
pub msdbd: u8,
271+
272+
/// Optional Fabric Commands Support.
273+
pub ofcs: u16,
274+
275+
/// Power State Descriptors.
276+
pub psd: [nvme_id_power_state; 32],
277+
278+
/// Vendor Specific area (1024 bytes).
279+
pub vs: [u8; 1024],
280+
}
281+
282+
283+
/// Constructor for NvmesIdCtrl
284+
impl NvmesIdCtrl {
285+
pub fn new(nvme_name: String, raw: &nvme_id_ctrl) -> Self {
286+
Self {}
287+
}
288+
}
289+
290+
/// Function to extract raw nvme_id_ctrl using the Identify admin command
291+
pub fn get_nvme_id_ctrl_raw(dev_path: &str) -> io::Result<nvme_id_ctrl> {
292+
let file = OpenOptions::new()
293+
.read(true)
294+
.write(true) // Here we need admin permission to send write commands
295+
.open(dev_path)?; // path would be something like /dev/nvme0
296+
297+
let fd = file.as_raw_fd();
298+
299+
// Identify Controller payload is 4096 bytes based on the C bindings in the nvme_cli_sys crate.
300+
// If nvme_id_ctrl from your crate is exactly 4096, great.
301+
// If it's smaller, you should use a [u8; 4096] buffer instead.
302+
let mut id: nvme_id_ctrl = unsafe { zeroed() };
303+
304+
let id_ptr = &mut id as *mut nvme_id_ctrl as u64;
305+
let id_len = size_of::<nvme_id_ctrl>() as u32;
306+
307+
let cns: u8 = 0x01; // Identify Controller
308+
let cntlid: u16 = 0x0000; // usually 0
309+
let cdw10: u32 = (cns as u32) | ((cntlid as u32) << 16);
310+
311+
let mut cmd: nvme_admin_cmd = unsafe { zeroed() };
312+
cmd.opcode = nvme_admin_identify as u8; // Identify (0x06)
313+
cmd.nsid = 0x0000_0000;
314+
cmd.addr = id_ptr;
315+
cmd.data_len = id_len;
316+
cmd.cdw10 = cdw10;
317+
cmd.cdw11 = 0;
318+
cmd.timeout_ms = 1000;
319+
320+
let ret = unsafe { nvme_cli_sys::nvme_ioctl_admin_cmd(fd, &mut cmd) };
321+
322+
match ret {
323+
Ok(status) if status == 0 => Ok(id),
324+
Ok(status) => Err(io::Error::new(
325+
io::ErrorKind::Other,
326+
format!("NVMe admin command failed, status={:#x}", status),
327+
)),
328+
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())),
329+
}
330+
}
331+
11332
#[derive(Debug, Serialize)]
12333
pub struct NvmesSmartLog {
13334
/// NVMe device name (e.g., "nvme0")
@@ -174,7 +495,6 @@ pub struct NvmesSmartLog {
174495
// Constructor for NvmesSmartLog
175496
impl NvmesSmartLog {
176497
pub fn new(nvme_name: String, raw: &nvme_smart_log) -> Self {
177-
// TODO: Add validation for values from unsafe crate
178498
Self {
179499
nvme_name,
180500
critical_warning: Some(raw.critical_warning as u64),
@@ -228,7 +548,7 @@ pub fn list_nvme_controllers() -> Vec<String> {
228548
names
229549
}
230550

231-
/// Function to extract raw nvme_smart_log from a controller.
551+
/// Function to extract raw nvme_smart_log.
232552
/// NOTE - This function is heavily annotated because I was struggling to understand how data is extracted.
233553
pub fn get_nvme_smart_log_raw(dev_path: &str) -> io::Result<nvme_smart_log> {
234554
let file = OpenOptions::new()

0 commit comments

Comments
 (0)