Skip to content

Commit 8ce46e9

Browse files
author
amesar
committed
Issue #62: added 'script' attribute to 'system' stanza in JSON export file format
1 parent 126c03b commit 8ce46e9

File tree

9 files changed

+32
-33
lines changed

9 files changed

+32
-33
lines changed

mlflow_export_import/bulk/export_all.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ def export_all(output_dir, notebook_formats=None, use_threads=False):
3232
use_threads=use_threads)
3333
duration = round(time.time() - start_time, 1)
3434

35-
content = {
35+
mlflow_attr = {
3636
"summary": {
3737
"stages": ALL_STAGES,
3838
"notebook_formats": notebook_formats,
3939
"duration": duration
4040
}
4141
}
42-
io_utils.write_export_file(output_dir, "all_manifest.json", content)
42+
io_utils.write_export_file(output_dir, "all_manifest.json", __file__, mlflow_attr)
4343
print(f"Duraton for entire tracking server export: {duration} seconds")
4444

4545

mlflow_export_import/bulk/export_experiments.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,15 @@ def export_experiments(client, experiments, output_dir, notebook_formats=None, u
8585
total_runs = ok_runs + failed_runs
8686
duration = round(time.time() - start_time, 1)
8787

88-
custom_info = {
88+
info_attr = {
8989
"duration": duration,
9090
"experiments": len(experiments),
9191
"total_runs": total_runs,
9292
"ok_runs": ok_runs,
9393
"failed_runs": failed_runs
9494
}
95-
content = { "experiments": export_results }
96-
io_utils.write_export_file(output_dir, "experiments.json", content, custom_info)
95+
mlflow_attr = { "experiments": export_results }
96+
io_utils.write_export_file(output_dir, "experiments.json", __file__, mlflow_attr, info_attr)
9797

9898
print(f"{len(experiments)} experiments exported")
9999
print(f"{ok_runs}/{total_runs} runs succesfully exported")

mlflow_export_import/bulk/export_models.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def _export_models(client, model_names, output_dir, notebook_formats, stages, ex
4141
else: failed_models.append(result[1])
4242
duration = round(time.time()-start_time, 1)
4343

44-
custom_info = {
44+
info_attr = {
4545
"stages": stages,
4646
"notebook_formats": notebook_formats,
4747
"num_total_models": len(model_names),
@@ -50,10 +50,10 @@ def _export_models(client, model_names, output_dir, notebook_formats, stages, ex
5050
"duration": duration,
5151
"failed_models": failed_models
5252
}
53-
content = {
53+
mlflow_attr = {
5454
"models": ok_models,
5555
}
56-
io_utils.write_export_file(output_dir, "models.json", content, custom_info)
56+
io_utils.write_export_file(output_dir, "models.json", __file__, mlflow_attr, info_attr)
5757

5858
print(f"{len(model_names)} models exported")
5959
print(f"Duration for registered models export: {duration} seconds")
@@ -69,11 +69,11 @@ def export_models(client, model_names, output_dir, notebook_formats=None, stages
6969
_export_models(client, model_names, os.path.join(output_dir,"models"), notebook_formats, stages, export_run=False, use_threads=use_threads)
7070
duration = round(time.time()-start_time, 1)
7171

72-
custom_info = {
72+
info_attr = {
7373
"stages": stages,
7474
"notebook_formats": notebook_formats
7575
}
76-
io_utils.write_export_file(output_dir, "models.json", {}, custom_info)
76+
io_utils.write_export_file(output_dir, "models.json", __file__, {}, info_attr)
7777

7878
print(f"Duration for total registered models and versions' runs export: {duration} seconds")
7979

mlflow_export_import/common/io_utils.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66
from mlflow_export_import.common.source_tags import ExportFields
77

88

9-
def _mk_export_info():
9+
def _mk_system_attr(script):
1010
"""
1111
Create common standard JSON stanza containing internal export information.
1212
"""
1313
import mlflow
1414
import platform
1515
return {
1616
ExportFields.SYSTEM: {
17+
"script": os.path.basename(script),
1718
"export_time": ts_now_seconds,
1819
"_export_time": ts_now_fmt_utc,
1920
"mlflow_version": mlflow.__version__,
@@ -27,16 +28,16 @@ def _mk_export_info():
2728
}
2829

2930

30-
def write_export_file(dir, file, content, custom_info=None):
31+
def write_export_file(dir, file, script, mlflow_attr, info_attr=None):
3132
"""
3233
Write standard formatted JSON file.
3334
"""
3435
path = os.path.join(dir, file)
35-
custom_info = { ExportFields.INFO: custom_info} if custom_info else {}
36-
content = { ExportFields.MLFLOW: content}
37-
content = { **_mk_export_info(), **custom_info, **content }
36+
info_attr = { ExportFields.INFO: info_attr} if info_attr else {}
37+
mlflow_attr = { ExportFields.MLFLOW: mlflow_attr}
38+
mlflow_attr = { **_mk_system_attr(script), **info_attr, **mlflow_attr }
3839
os.makedirs(dir, exist_ok=True)
39-
write_file(path, content)
40+
write_file(path, mlflow_attr)
4041

4142

4243
def write_file(path, content):
@@ -64,7 +65,7 @@ def read_file(path):
6465
return json.loads(f.read())
6566

6667

67-
def get_custom(export_dct):
68+
def get_info(export_dct):
6869
return export_dct[ExportFields.INFO]
6970

7071

mlflow_export_import/common/source_tags.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ class ExportFields:
88

99
class ExportTags:
1010
""" Tags source export tags. """
11-
#PREFIX_ROOT = "mlflow_export_import"
1211
PREFIX_ROOT = "mlflow_exim"
1312
PREFIX_RUN_INFO = f"{PREFIX_ROOT}.run_info"
1413
PREFIX_MLFLOW = f"{PREFIX_ROOT}.mlflow"

mlflow_export_import/experiment/export_experiment.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def export_experiment(self, exp_id_or_name, output_dir, run_ids=None):
4444
for j,run in enumerate(SearchRunsIterator(self.mlflow_client, exp.experiment_id)):
4545
self._export_run(j, run, output_dir, ok_run_ids, failed_run_ids)
4646

47-
custom_info = {
47+
info_attr = {
4848
"num_total_runs": (j+1),
4949
"num_ok_runs": len(ok_run_ids),
5050
"ok_runs": ok_run_ids,
@@ -53,9 +53,9 @@ def export_experiment(self, exp_id_or_name, output_dir, run_ids=None):
5353
}
5454
exp_dct = utils.strip_underscores(exp)
5555
exp_dct["tags"] = dict(sorted(exp_dct["tags"].items()))
56-
content = { "experiment": exp_dct }
5756

58-
io_utils.write_export_file(output_dir, "experiment.json", content, custom_info)
57+
mlflow_attr = { "experiment": exp_dct }
58+
io_utils.write_export_file(output_dir, "experiment.json", __file__, mlflow_attr, info_attr)
5959

6060
msg = f"for experiment '{exp.name}' (ID: {exp.experiment_id})"
6161
if len(failed_run_ids) == 0:

mlflow_export_import/experiment/import_experiment.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from mlflow_export_import.common import mlflow_utils
1212
from mlflow_export_import.common.http_client import DatabricksHttpClient
1313
from mlflow_export_import.run.import_run import RunImporter
14-
from mlflow_export_import.common.source_tags import ExportFields, ExportTags
14+
from mlflow_export_import.common.source_tags import ExportTags
1515

1616

1717
def _peek_at_experiments(exp_dir):
@@ -48,7 +48,7 @@ def import_experiment(self, exp_name, input_dir, dst_notebook_dir=None):
4848

4949
path = io_utils.mk_manifest_json_path(input_dir, "experiment.json")
5050
exp_dct = io_utils.read_file(path)
51-
custom_info = io_utils.get_custom(exp_dct)
51+
info = io_utils.get_info(exp_dct)
5252
exp_dct = io_utils.get_mlflow(exp_dct)
5353

5454
tags = exp_dct["experiment"]["tags"]
@@ -58,8 +58,8 @@ def import_experiment(self, exp_name, input_dir, dst_notebook_dir=None):
5858

5959
mlflow_utils.set_experiment(self.mlflow_client, self.dbx_client, exp_name, tags)
6060

61-
run_ids = custom_info["ok_runs"]
62-
failed_run_ids = custom_info["failed_runs"]
61+
run_ids = info["ok_runs"]
62+
failed_run_ids = info["failed_runs"]
6363

6464
print(f"Importing {len(run_ids)} runs into experiment '{exp_name}' from {input_dir}")
6565
run_ids_map = {}

mlflow_export_import/model/export_model.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,19 @@ class ModelExporter():
1717

1818
def __init__(self, mlflow_client, notebook_formats=None, stages=None, versions=None, export_run=True):
1919
"""
20-
:param mlflow_client: MLflow client or if None create default client.
20+
:param mlflow_client: MlflowClient
2121
:param notebook_formats: List of notebook formats to export. Values are SOURCE, HTML, JUPYTER or DBC.
2222
:param stages: Stages to export. Default is all stages. Values are Production, Staging, Archived and None.
2323
:param export_run: Export the run that generated a registered model's version.
2424
"""
2525
self.mlflow_client = mlflow_client
2626
self.http_client = MlflowHttpClient()
2727
self.run_exporter = RunExporter(self.mlflow_client, notebook_formats=notebook_formats)
28-
self.stages = self._normalize_stages(stages)
2928
self.export_run = export_run
29+
self.stages = self._normalize_stages(stages)
3030
self.versions = versions if versions else []
3131
if len(self.stages) > 0 and len(self.versions) > 0:
3232
raise MlflowExportImportException(f"Both stages {self.stages} and versions {self.versions} cannot be set")
33-
self.export_run = export_run
3433

3534

3635
def export_model(self, model_name, output_dir):
@@ -62,7 +61,7 @@ def _export_model(self, model_name, output_dir):
6261
opath = os.path.join(output_dir,run_id)
6362
opath = opath.replace("dbfs:", "/dbfs")
6463
dct = { "version": vr.version, "stage": vr.current_stage, "run_id": run_id, "description": vr.description, "tags": vr.tags }
65-
print(f"Exporting version: {dct}")
64+
print(f"Exporting verions {vr.version} to '{opath}'")
6665
manifest.append(dct)
6766
try:
6867
if self.export_run:
@@ -85,13 +84,13 @@ def _export_model(self, model_name, output_dir):
8584
model = self.http_client.get(f"registered-models/get", {"name": model_name})
8685
model["registered_model"]["latest_versions"] = output_versions
8786

88-
custom_info = {
87+
info_attr = {
8988
"num_target_stages": len(self.stages),
9089
"num_target_versions": len(self.versions),
9190
"num_src_versions": len(versions),
9291
"num_dst_versions": len(output_versions)
9392
}
94-
io_utils.write_export_file(output_dir, "model.json", model, custom_info)
93+
io_utils.write_export_file(output_dir, "model.json", __file__, model, info_attr)
9594

9695
print(f"Exported {exported_versions}/{len(output_versions)} versions for model '{model_name}'")
9796
return manifest

mlflow_export_import/run/export_run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,13 @@ def export_run(self, run_id, output_dir):
5353
tags = run.data.tags
5454
tags = dict(sorted(tags.items()))
5555

56-
content = {
56+
mlflow_attr = {
5757
"info": utils.strip_underscores(run.info),
5858
"params": run.data.params,
5959
"metrics": self._get_metrics_with_steps(run),
6060
"tags": tags
6161
}
62-
io_utils.write_export_file(output_dir, "run.json", content)
62+
io_utils.write_export_file(output_dir, "run.json", __file__, mlflow_attr)
6363
fs = _filesystem.get_filesystem(".")
6464

6565
# copy artifacts

0 commit comments

Comments
 (0)