Skip to content

update hotfix branch #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Nov 28, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions examples/gpt-demo/01-GPT-Training.ipynb
Original file line number Diff line number Diff line change
@@ -184,14 +184,14 @@
"lastKernelId": null
},
"kernelspec": {
"display_name": "scorep-python3",
"language": "python3",
"name": "scorep-python3"
"display_name": "JUmPER",
"language": "python",
"name": "jumper"
},
"language_info": {
"file_extension": ".py",
"mimetype": "text/plain",
"name": "Any text"
"name": "python"
}
},
"nbformat": 4,
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta:__legacy__"

[project]
name='jumper-kernel'
version='1.0.0'
version='1.1.0'
authors=[
{name='Elias Werner',email='[email protected]'},
]
@@ -21,13 +21,14 @@ classifiers=[

dependencies = [
"ipykernel",
"ipywidgets",
"ipympl",
"jupyter-client",
"astunparse",
"dill",
"itables",
"matplotlib",
"pandas",
"ipywidgets",
"pynvml" # we need that only for Nvidia GPU systems
]

231 changes: 174 additions & 57 deletions src/jumper/kernel.py

Large diffs are not rendered by default.

23 changes: 15 additions & 8 deletions src/jumper/perfdatahandler.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@
import pickle
import codecs
import time
from datetime import datetime
import os
import subprocess
import sys
@@ -28,8 +27,9 @@ class PerformanceDataHandler:
def __init__(self):
self.code_history = []
self.performance_data_history = []
self.time_indices = []
self.nodelist = None
# for local it's none, otherwise points to slurm/ssh/... monitor
# for local, it's none, otherwise points to slurm/ssh/... monitor
self.monitor_module = None
# the object from the monitor module
self.monitor = None
@@ -78,8 +78,12 @@ def get_perfdata_history(self):
def get_code_history(self):
return self.code_history

def append_code(self, time, code):
self.code_history.append([time, code])
def get_time_indices(self):
return self.time_indices

def append_code(self, time_, code, time_indices=None):
self.code_history.append([time_, code])
self.time_indices.append(time_indices)

def get_perfdata_aggregated(self):
perfdata_aggregated = []
@@ -157,7 +161,12 @@ def get_perfdata_aggregated(self):
# add cell index and the number of measurements
# we will use that in the visualization to generate
# a color transition in the graphs and add the cell index
time_indices[node].append((idx, len(perfdata[node][2])))
if self.time_indices[idx]:
# for cells tracked in multi cell mode, we can use the sub
# indices created
time_indices[node].extend(self.time_indices[idx][node])
else:
time_indices[node].append((idx, len(perfdata[node][2])))

return perfdata_aggregated, time_indices

@@ -326,7 +335,7 @@ def start_perfmonitor(self, pid):

self.starttime = time.perf_counter()

def end_perfmonitor(self, code):
def end_perfmonitor(self):
duration = time.perf_counter() - self.starttime

if self.monitor_module:
@@ -352,6 +361,4 @@ def end_perfmonitor(self, code):
performance_data_nodes = self.parse_perfdata_from_stdout(
stdout_data_node
)
if performance_data_nodes:
self.append_code(datetime.now(), code)
return performance_data_nodes, duration
3 changes: 2 additions & 1 deletion src/jumper/userpersistence.py
Original file line number Diff line number Diff line change
@@ -101,6 +101,7 @@ def jupyter_dump(self):
Generate code for kernel ghost cell to dump notebook persistence for
subprocess.
"""

jupyter_dump_ = (
"import sys\n"
"import os\n"
@@ -111,7 +112,7 @@ def jupyter_dump(self):
f"'{self.paths['jupyter']['sys_path']}',{self.marshaller})\n"
f"dump_variables({str(self.jupyter_variables)},globals(),"
f"'{self.paths['jupyter']['var']}',"
f"{self.marshaller})"
f"{self.marshaller})\n"
)

return jupyter_dump_
27 changes: 15 additions & 12 deletions src/jumper/visualization.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None):
int(os.environ.get("JUMPER_REPORT_FREQUENCY", 2)),
)
]

if metric == perfmetrics["cpu_agg"]:
ax.plot(
x_scale, perfdata[0][0][-3], label="Mean", color=(0.20, 0.47, 1.00)
@@ -147,24 +148,26 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None):
ax.legend()
ax.grid(True)

# in multi node case, we have to iterate over the indices (time_indices)
# and not only 0 here
current_index = 0
target_index = -1
transition_offset = (x_scale[1] - x_scale[0]) / 2
start_offset = 0
# colorization of the plot in case of multiple cells
if time_indices:
# in multi node case, we have to iterate over the indices (
# time_indices) and not only 0 here
current_index = 0
target_index = 0
transition_offset = (x_scale[1] - x_scale[0]) / 2
start_offset = 0
last_idx = time_indices[0][-1][0]

for cell_idx, n_ms in time_indices[0]:

# for multi cell mode, we might have sub indices
for cell_idx, values in enumerate(time_indices[0]):
sub_idx, n_ms = values
target_index = target_index + n_ms
# don't use offset for last cell
if cell_idx == last_idx:
if sub_idx == last_idx:
transition_offset = 0
ax.axvspan(
x_scale[current_index] + start_offset,
x_scale[target_index] + transition_offset,
x_scale[min(target_index, len(x_scale)-1)] + transition_offset,
facecolor=color[cell_idx],
alpha=0.3,
)
@@ -174,7 +177,7 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None):
+ start_offset
+ (
(
x_scale[target_index]
x_scale[min(target_index, len(x_scale)-1)]
+ transition_offset
- x_scale[current_index]
+ start_offset
@@ -188,7 +191,7 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None):
ax.text(
text_x_pos,
text_y_pos,
"#" + str(cell_idx),
"#" + str(sub_idx),
style="italic",
bbox={"facecolor": "lightgrey", "alpha": 0.5, "pad": 2},
)
10 changes: 5 additions & 5 deletions tests/kernel/multicell.yaml
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
- - "Multicell mode enabled. The following cells will be marked for instrumented execution."
-
- "c = np.sum(c_mtx)"
- - "Cell marked for multicell mode. It will be executed at position 1"
- - "Cell marked for multicell mode. It will be executed at position 0"
-
- "%%abort_multicellmode"
- - "Multicell mode aborted."
@@ -15,23 +15,23 @@
with scorep.instrumenter.enable():
c = np.sum(c_mtx)
c_vec = np.arange(b, c)
- - "Cell marked for multicell mode. It will be executed at position 1"
- - "Cell marked for multicell mode. It will be executed at position 0"
-
- |-
print('c =', c)
print('Sum(c_vec) =', c_vec.sum())
- - "Cell marked for multicell mode. It will be executed at position 2"
- - "Cell marked for multicell mode. It will be executed at position 1"
-
- "%%finalize_multicellmode"
- - "\0"
- "Executing cell 1\n"
- "Executing cell 0\n"
- "with scorep.instrumenter.enable():\n"
- " c = np.sum(c_mtx)\n"
- "c_vec = np.arange(b, c)\n"
- "----------------------------------\n"
- "\n"
- "\n"
- "Executing cell 2\n"
- "Executing cell 1\n"
- "print('c =', c)\n"
- "print('Sum(c_vec) =', c_vec.sum())\n"
- "----------------------------------\n"
2 changes: 2 additions & 0 deletions tests/test_kernel.py
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ def check_stream_output(self, code, expected_output, stream="stdout"):
# some messages can be of type 'execute_result'
# type instead of stdout
# self.assertEqual(msg["content"]["name"], stream)

if msg["header"]["msg_type"] == "stream":
self.assertEqual(msg["content"]["name"], stream)
self.assertEqual(msg["content"]["text"], expected_msg)
@@ -42,6 +43,7 @@ def check_stream_output(self, code, expected_output, stream="stdout"):
msg["content"]["data"]["text/plain"], expected_msg
)


def check_from_file(self, filename):

with open(filename, "r") as file: