Skip to content

Commit cfee1d7

Browse files
author
Lars T Hansen
committed
Fix #450 - compute numa nodes and distances properly
1 parent 58ad3fa commit cfee1d7

File tree

12 files changed

+125
-40
lines changed

12 files changed

+125
-40
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "sonar"
3-
version = "0.16.0"
3+
version = "0.16.1-devel"
44
edition = "2021"
55
repository = "https://github.com/NordicHPC/sonar"
66

doc/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ relied upon. Consumers should expect to see the "new" JSON format for new data,
1414
should ask for that format.
1515

1616

17+
## Changes in v0.16.1 (on `release_0_16`)
18+
19+
* Bug 450 - compute distances correctly to avoid erroring out when NUMA nodes are fewer than sockets;
20+
expose `numa_nodes` field on sysinfo for good measure.
21+
1722
## Changes in v0.16.0 (on `release_0_16`)
1823

1924
* Bug 335 / Bug 415 - **BREAKING CHANGE.** The `cpu_util` field was not emitted properly, it should

doc/NEW-FORMAT.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,11 @@ Operating system version (the `release` field of `struct utsname`)
314314

315315
Architecture name (the `machine` field of `struct utsname`)
316316

317+
#### **`numa_nodes`** uint64
318+
319+
Number of NUMA nodes (frequently smaller than the number of sockets), zero if we can't
320+
compute it
321+
317322
#### **`sockets`** NonzeroUint
318323

319324
Number of CPU sockets
@@ -337,20 +342,21 @@ Primary memory in kilobytes
337342
#### **`topo_svg`** string
338343

339344
Base64-encoded text output (SVG source text) of `lstopo` or similar, using the base64
340-
alphabet from RFC4648.
345+
alphabet from RFC4648
341346

342347
#### **`topo_text`** string
343348

344349
Base64-encoded text output (human-readable) of `hwloc-ls` or similar, using the base64
345-
alphabet from RFC4648.
350+
alphabet from RFC4648
346351

347352
#### **`cards`** []SysinfoGpuCard
348353

349354
Per-card information
350355

351356
#### **`distances`** [][]uint64
352357

353-
Square matrix of standard NUMA node-to-node distances (normalized to 10 for self distance)
358+
Square matrix of standard NUMA node-to-node distances (normalized to 10 for self distance),
359+
empty if we can't compute it
354360

355361
### Type: `SysinfoGpuCard`
356362

doc/types.spec.yaml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ SysinfoAttributes:
101101
type: 'NonemptyString'
102102
doc: >
103103
Architecture name (the `machine` field of `struct utsname`)
104+
numa_nodes:
105+
type: 'uint64'
106+
doc: >
107+
Number of NUMA nodes (frequently smaller than the number of sockets), zero if we can't compute it
104108
sockets:
105109
type: 'NonzeroUint'
106110
doc: >
@@ -124,19 +128,19 @@ SysinfoAttributes:
124128
topo_svg:
125129
type: 'string'
126130
doc: >
127-
Base64-encoded text output (SVG source text) of `lstopo` or similar, using the base64 alphabet from RFC4648.
131+
Base64-encoded text output (SVG source text) of `lstopo` or similar, using the base64 alphabet from RFC4648
128132
topo_text:
129133
type: 'string'
130134
doc: >
131-
Base64-encoded text output (human-readable) of `hwloc-ls` or similar, using the base64 alphabet from RFC4648.
135+
Base64-encoded text output (human-readable) of `hwloc-ls` or similar, using the base64 alphabet from RFC4648
132136
cards:
133137
type: '[]SysinfoGpuCard'
134138
doc: >
135139
Per-card information
136140
distances:
137141
type: '[][]uint64'
138142
doc: >
139-
Square matrix of standard NUMA node-to-node distances (normalized to 10 for self distance)
143+
Square matrix of standard NUMA node-to-node distances (normalized to 10 for self distance), empty if we can't compute it
140144
SysinfoGpuCard:
141145
fields:
142146
index:

src/json_tags.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ pub const SYSINFO_ATTRIBUTES_NODE: &str = "node"; // Hostname
3131
pub const SYSINFO_ATTRIBUTES_OS_NAME: &str = "os_name"; // NonemptyString
3232
pub const SYSINFO_ATTRIBUTES_OS_RELEASE: &str = "os_release"; // NonemptyString
3333
pub const SYSINFO_ATTRIBUTES_ARCHITECTURE: &str = "architecture"; // NonemptyString
34+
pub const SYSINFO_ATTRIBUTES_NUMA_NODES: &str = "numa_nodes"; // uint64
3435
pub const SYSINFO_ATTRIBUTES_SOCKETS: &str = "sockets"; // NonzeroUint
3536
pub const SYSINFO_ATTRIBUTES_CORES_PER_SOCKET: &str = "cores_per_socket"; // NonzeroUint
3637
pub const SYSINFO_ATTRIBUTES_THREADS_PER_CORE: &str = "threads_per_core"; // NonzeroUint

src/linux/mocksystem.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::gpu;
22
use crate::gpu::mockgpu;
33
use crate::jobsapi;
44
use crate::json_tags;
5-
use crate::linux::procfs;
5+
use crate::linux::{procfs, system};
66
use crate::systemapi;
77
use crate::time;
88

@@ -15,6 +15,7 @@ use std::path;
1515
#[derive(Default)]
1616
pub struct Builder {
1717
proc_files: Option<HashMap<String, String>>,
18+
node_files: Option<HashMap<String, String>>,
1819
pids: Option<Vec<(usize, u32)>>,
1920
threads: Option<HashMap<usize, Vec<(usize, u32)>>>,
2021
users: Option<HashMap<u32, String>>,
@@ -39,13 +40,22 @@ impl Builder {
3940
}
4041
}
4142

43+
// Files below /proc
4244
pub fn with_proc_files(self, files: HashMap<String, String>) -> Builder {
4345
Builder {
4446
proc_files: Some(files),
4547
..self
4648
}
4749
}
4850

51+
// Files below /sys/devices/system/node
52+
pub fn with_node_files(self, files: HashMap<String, String>) -> Builder {
53+
Builder {
54+
node_files: Some(files),
55+
..self
56+
}
57+
}
58+
4959
pub fn with_pids(self, pids: Vec<(usize, u32)>) -> Builder {
5060
Builder {
5161
pids: Some(pids),
@@ -188,6 +198,7 @@ impl Builder {
188198
threads,
189199
}
190200
},
201+
node_files: self.node_files.unwrap_or_default(),
191202
now: if let Some(x) = self.now {
192203
x
193204
} else {
@@ -216,6 +227,7 @@ pub struct MockSystem {
216227
os_release: String,
217228
architecture: String,
218229
fs: MockFS,
230+
node_files: HashMap<String, String>,
219231
gpus: mockgpu::MockGpuAPI,
220232
users: HashMap<u32, String>,
221233
pid: u32,
@@ -298,7 +310,7 @@ impl systemapi::SystemAPI for MockSystem {
298310
}
299311

300312
fn get_numa_distances(&self) -> Result<Vec<Vec<u32>>, String> {
301-
Ok(vec![vec![10u32]])
313+
system::get_numa_distances(self)
302314
}
303315

304316
fn compute_node_information(&self) -> Result<(u64, Vec<u64>), String> {
@@ -337,6 +349,13 @@ impl systemapi::SystemAPI for MockSystem {
337349
panic!("Not in use yet");
338350
}
339351

352+
fn read_node_file_to_string(&self, filename: &str) -> io::Result<String> {
353+
match self.node_files.get(filename) {
354+
Some(contents) => Ok(contents.clone()),
355+
None => Err(io::Error::from(io::ErrorKind::NotFound)),
356+
}
357+
}
358+
340359
fn run_sacct(
341360
&self,
342361
_job_states: &[&str],

src/linux/system.rs

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -207,28 +207,7 @@ impl systemapi::SystemAPI for System {
207207
}
208208

209209
fn get_numa_distances(&self) -> Result<Vec<Vec<u32>>, String> {
210-
let mut m = vec![];
211-
for n in 0..self.get_cpu_info()?.sockets {
212-
let filename = format!("/sys/devices/system/node/node{n}/distance");
213-
match fs::read_to_string(path::Path::new(&filename)) {
214-
Ok(s) => {
215-
let mut r = vec![];
216-
for v in s.trim().split(" ") {
217-
match v.parse::<u32>() {
218-
Ok(n) => r.push(n),
219-
Err(_) => {
220-
return Err(format!("Unable to parse {s}"));
221-
}
222-
}
223-
}
224-
m.push(r);
225-
}
226-
Err(_) => {
227-
return Err(format!("Unable to read {filename}"));
228-
}
229-
}
230-
}
231-
Ok(m)
210+
get_numa_distances(self)
232211
}
233212

234213
fn compute_node_information(&self) -> Result<(u64, Vec<u64>), String> {
@@ -267,6 +246,11 @@ impl systemapi::SystemAPI for System {
267246
fs::remove_file(p)
268247
}
269248

249+
fn read_node_file_to_string(&self, f: &str) -> io::Result<String> {
250+
let filename = format!("/sys/devices/system/node/{f}");
251+
fs::read_to_string(path::Path::new(&filename))
252+
}
253+
270254
fn run_sacct(
271255
&self,
272256
job_states: &[&str],
@@ -497,3 +481,44 @@ impl procfs::ProcfsAPI for RealProcFS {
497481
Ok(pids)
498482
}
499483
}
484+
485+
pub fn get_numa_distances(system: &dyn systemapi::SystemAPI) -> Result<Vec<Vec<u32>>, String> {
486+
let mut m = vec![];
487+
// The number of NUMA nodes is no greater than the number of sockets but it is frequently
488+
// smaller. The rule here is that we enumerate the files and we will expect a dense range
489+
// (even though numactl currently handles a sparse range), but if there's any kind of hiccup
490+
// we fall back to returning an empty matrix, it is only if we can't parse a file that we
491+
// could read that we return an error.
492+
for n in 0..system.get_cpu_info()?.sockets {
493+
match system.read_node_file_to_string(&format!("node{n}/distance")) {
494+
Ok(s) => {
495+
let mut r = vec![];
496+
for v in s.trim().split(" ") {
497+
match v.parse::<u32>() {
498+
Ok(n) => r.push(n),
499+
Err(_) => {
500+
return Err(format!("Unable to parse {s}"));
501+
}
502+
}
503+
}
504+
m.push(r);
505+
}
506+
Err(_) => {
507+
break
508+
}
509+
}
510+
}
511+
// Check that we have a square matrix.
512+
if m.len() > 0 {
513+
let n = m[0].len();
514+
if m.len() != n {
515+
return Ok(vec![]);
516+
}
517+
for i in 1..m.len() {
518+
if m[i].len() != n {
519+
return Ok(vec![]);
520+
}
521+
}
522+
}
523+
Ok(m)
524+
}

src/sysinfo.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ fn layout_sysinfo_newfmt(
122122
attrs.push_s(SYSINFO_ATTRIBUTES_NODE, node_info.node.clone());
123123
attrs.push_s(SYSINFO_ATTRIBUTES_OS_NAME, system.get_os_name());
124124
attrs.push_s(SYSINFO_ATTRIBUTES_OS_RELEASE, system.get_os_release());
125+
attrs.push_u(SYSINFO_ATTRIBUTES_NUMA_NODES, node_info.numa_nodes);
125126
attrs.push_u(SYSINFO_ATTRIBUTES_SOCKETS, node_info.sockets);
126127
attrs.push_u(
127128
SYSINFO_ATTRIBUTES_CORES_PER_SOCKET,
@@ -326,6 +327,7 @@ struct NodeInfo {
326327
node: String,
327328
description: String,
328329
logical_cores: u64,
330+
numa_nodes: u64,
329331
sockets: u64,
330332
cores_per_socket: u64,
331333
threads_per_core: u64,
@@ -409,6 +411,7 @@ fn compute_nodeinfo(system: &dyn systemapi::SystemAPI) -> Result<NodeInfo, Strin
409411
"{sockets}x{cores_per_socket}{ht} {model_name}, {mem_gb} GiB{gpu_desc}"
410412
),
411413
logical_cores: (sockets * cores_per_socket * threads_per_core) as u64,
414+
numa_nodes: distances.len() as u64,
412415
sockets: sockets as u64,
413416
cores_per_socket: cores_per_socket as u64,
414417
threads_per_core: threads_per_core as u64,

src/sysinfo_test.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,36 @@ use std::collections::HashMap;
1414
#[test]
1515
pub fn sysinfo_output_test() {
1616
// FIXME: Information leakage!!
17-
let mut files = HashMap::new();
18-
files.insert(
17+
let mut proc_files = HashMap::new();
18+
proc_files.insert(
1919
"cpuinfo".to_string(),
2020
std::include_str!("linux/testdata/cpuinfo-x86_64.txt").to_string(),
2121
);
22-
files.insert(
22+
proc_files.insert(
2323
"meminfo".to_string(),
2424
"MemTotal: 16093776 kB".to_string(),
2525
);
26+
// Really we want something here where we have multiple nodes, but we *also* want sockets>nodes
27+
// to test that bit. However I don't want to change the test data on release_0_16 too much so
28+
// for now let's be happy with sockets==nodes.
29+
let mut node_files = HashMap::new();
30+
node_files.insert(
31+
"node0/distance".to_string(),
32+
"10 21".to_string(),
33+
);
34+
node_files.insert(
35+
"node1/distance".to_string(),
36+
"21 10".to_string(),
37+
);
2638
let system = mocksystem::Builder::new()
2739
.with_version("0.13.100")
2840
.with_timestamp("2025-02-11T08:47+01:00")
2941
.with_hostname("yes.no")
3042
.with_cluster("kain.uio.no")
3143
.with_os("CP/M", "2.2")
3244
.with_architecture("Z80")
33-
.with_proc_files(files)
45+
.with_proc_files(proc_files)
46+
.with_node_files(node_files)
3447
.with_card(gpu::Card {
3548
bus_addr: "12:14:16".to_string(),
3649
device: gpu::Name {
@@ -97,13 +110,14 @@ pub fn sysinfo_output_test() {
97110
"node":"yes.no",
98111
"os_name":"CP/M",
99112
"os_release":"2.2",
113+
"numa_nodes":2,
100114
"sockets":2,
101115
"cores_per_socket":4,
102116
"threads_per_core":2,
103117
"cpu_model":"Intel(R) Xeon(R) CPU E5-2637 v4 @ 3.50GHz",
104118
"architecture":"Z80",
105119
"memory":16093776,
106-
"distances":[[10]],
120+
"distances":[[10,21],[21,10]],
107121
"cards":[
108122
{
109123
"index":0,

0 commit comments

Comments
 (0)