Skip to content

Commit 52a92e5

Browse files
blaiszikclaude
andauthored
Patch/restore mdf client (#471)
* Restore missing mdf_client.py from design-renaissance branch This file was part of PR #469 but was not included in the merge, causing ModuleNotFoundError when importing foundry. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fix DOI search to return correct dataset The forge DOI search can return multiple results where only one actually has the matching DOI. Previously, get_metadata_by_doi() blindly returned the first result, which often didn't have the requested DOI. Now it iterates through results to find the one with the exact DOI match, fixing test_dataframe_search_by_doi and test_dataframe_download_by_doi tests. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Move torch/tensorflow to optional extras to fix CI disk space The combined size of torch, tensorflow, and NVIDIA CUDA dependencies exceeded GitHub Actions runner disk space (~4GB+). These ML frameworks are now available as optional extras via pip install .[torch] or pip install .[tensorflow]. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fix flake8 linting errors - Remove unused imports (sys, rprint, Optional, pandas, numpy) - Fix unused exception variable - Remove f-string without placeholders - Split long line in MCP server description - Add noqa comment for intentional re-export Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Replace mdf_forge with internal MDFClient in tests Update test imports to use foundry.mdf_client.MDFClient instead of mdf_forge.Forge, which is no longer a required dependency. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 6e5c387 commit 52a92e5

7 files changed

Lines changed: 25 additions & 31 deletions

File tree

foundry/__main__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,13 @@
1010
"""
1111

1212
import json
13-
import sys
1413
from typing import Optional
1514

1615
import typer
1716
from rich.console import Console
1817
from rich.table import Table
1918
from rich.panel import Panel
2019
from rich.progress import Progress, SpinnerColumn, TextColumn
21-
from rich import print as rprint
2220

2321
app = typer.Typer(
2422
name="foundry",
@@ -189,7 +187,7 @@ def schema(
189187
f = get_foundry()
190188
try:
191189
dataset = f.get_dataset(doi)
192-
except Exception as e:
190+
except Exception:
193191
console.print(f"[red]Error: Could not find dataset '{doi}'[/red]")
194192
raise typer.Exit(1)
195193

@@ -340,7 +338,7 @@ def push_to_hf(
340338
console.print(f"[dim]{e}[/dim]")
341339
raise typer.Exit(1)
342340

343-
progress.update(task, description=f"Exporting to HuggingFace Hub...")
341+
progress.update(task, description="Exporting to HuggingFace Hub...")
344342
try:
345343
url = push_to_hub(dataset, repo, token=token, private=private)
346344
except Exception as e:

foundry/integrations/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"""
1313

1414
try:
15-
from .huggingface import push_to_hub
15+
from .huggingface import push_to_hub # noqa: F401
1616
__all__ = ["push_to_hub"]
1717
except ImportError:
1818
# huggingface extras not installed

foundry/integrations/huggingface.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,6 @@ def push_to_hub(
112112

113113
def _combine_inputs_targets(inputs: Dict, targets: Dict) -> Dict[str, Any]:
114114
"""Combine input and target dictionaries into a single flat dict."""
115-
import pandas as pd
116-
import numpy as np
117-
118115
combined = {}
119116

120117
for key, value in inputs.items():
@@ -131,7 +128,6 @@ def _combine_inputs_targets(inputs: Dict, targets: Dict) -> Dict[str, Any]:
131128
def _flatten_data(data: Any) -> Dict[str, Any]:
132129
"""Flatten nested data structure to a dict suitable for HF Dataset."""
133130
import pandas as pd
134-
import numpy as np
135131

136132
if isinstance(data, pd.DataFrame):
137133
return {col: data[col].tolist() for col in data.columns}

foundry/mcp/server.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
import json
1414
import logging
15-
from typing import Any, Dict, List, Optional
15+
from typing import Any, Dict, List
1616

1717
logger = logging.getLogger(__name__)
1818

@@ -178,7 +178,8 @@ def _serialize_data(data: Any) -> Any:
178178
},
179179
{
180180
"name": "get_dataset_schema",
181-
"description": "Get the schema of a dataset - what fields it contains, their descriptions, and units. Use this to understand the data structure before loading.",
181+
"description": "Get the schema of a dataset - what fields it contains, their descriptions, "
182+
"and units. Use this to understand the data structure before loading.",
182183
"inputSchema": {
183184
"type": "object",
184185
"properties": {

requirements.txt

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
1+
# Core runtime dependencies (see setup.py)
2+
mdf_toolbox>=0.7.1
13
globus-sdk>=3,<4
2-
dlhub_sdk>=2.1.0
3-
requests>=2.18.4
4-
tqdm>=4.19.4
5-
six>=1.11.0
6-
h5py>=2.10.0
7-
numpy>=1.15.4
8-
pandas>=0.23.4
9-
scikit-learn>=1.0
10-
pydantic>=2.7.2
11-
mdf_forge>=0.8.0
124
mdf-connect-client>=0.5.0
5+
requests>=2.31.0
6+
tqdm>=4.66.0
7+
numpy>=2.0.0
8+
pandas>=2.2.2
9+
h5py>=3.11.0
10+
pydantic>=2.7.2
1311
json2table>=1.1.5
14-
torch>=1.8.0
15-
tensorflow>=2
16-
tqdm>=4.64
17-
openpyxl>=3.1.0
12+
typer>=0.12.0
13+
rich>=13.7.0
14+
15+
# Optional extras are available via pip install .[name]
16+
# torch, tensorflow, excel, huggingface, examples, dev

tests/test_foundry.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
import builtins
1616

1717
import mdf_toolbox
18-
from mdf_forge import Forge
1918
from foundry import foundry
19+
from foundry.mdf_client import MDFClient
2020
from foundry.foundry_dataset import FoundryDataset
2121
from foundry.auth import PubAuths
2222
from foundry.https_upload import upload_to_endpoint
@@ -93,17 +93,17 @@ def _delete_test_data(dataset):
9393

9494
def test_foundry_init():
9595
f = foundry.Foundry(authorizers=auths)
96-
assert isinstance(f.forge_client, Forge)
96+
assert isinstance(f.forge_client, MDFClient)
9797
assert isinstance(f.connect_client, MDFConnectClient)
9898

9999
if not is_gha:
100100

101101
f2 = foundry.Foundry(download=False, authorizers=auths, no_browser=False, no_local_server=True)
102-
assert isinstance(f2.forge_client, Forge)
102+
assert isinstance(f2.forge_client, MDFClient)
103103
assert isinstance(f2.connect_client, MDFConnectClient)
104104

105105
f3 = foundry.Foundry(download=False, authorizers=auths, no_browser=True, no_local_server=False)
106-
assert isinstance(f3.forge_client, Forge)
106+
assert isinstance(f3.forge_client, MDFClient)
107107
assert isinstance(f3.connect_client, MDFConnectClient)
108108

109109

tests/test_foundry_components.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
import os
44

55
from mdf_connect_client import MDFConnectClient
6-
from mdf_forge import Forge
76
import mdf_toolbox
87
import pandas as pd
98

109
from foundry import foundry
10+
from foundry.mdf_client import MDFClient
1111

1212
is_gha = os.getenv("GITHUB_ACTIONS")
1313
client_id = os.getenv("CLIENT_ID")
@@ -87,7 +87,7 @@ def test_foundry_init(auths, elwood_data):
8787
test_dataset_name, test_doi, expected_title = elwood_data
8888

8989
f = foundry.Foundry(authorizers=auths)
90-
assert isinstance(f.forge_client, Forge)
90+
assert isinstance(f.forge_client, MDFClient)
9191
assert isinstance(f.connect_client, MDFConnectClient)
9292

9393
def test_search(auths, elwood_data):

0 commit comments

Comments
 (0)