Skip to content

Commit c11fee7

Browse files
committed
Fix minio/parquet url to reflect the latest changes
1 parent edacd12 commit c11fee7

File tree

5 files changed

+8
-18
lines changed

5 files changed

+8
-18
lines changed

src/core/formatting.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def _format_parquet_url(dataset: Row) -> str | None:
2727

2828
minio_base_url = load_configuration()["minio_base_url"]
2929
prefix = dataset.did // 10_000
30-
return f"{minio_base_url}/{prefix:04d}/{dataset.did:04d}/dataset_{dataset.did}.pq"
30+
return f"{minio_base_url}/datasets/{prefix:04d}/{dataset.did:04d}/dataset_{dataset.did}.pq"
3131

3232

3333
def _format_dataset_url(dataset: Row) -> str:

src/routers/openml/datasets.py

-1
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,6 @@ def get_dataset(
443443
row_id_attribute=row_id_attribute,
444444
url=dataset_url,
445445
parquet_url=parquet_url,
446-
minio_url=parquet_url,
447446
file_id=dataset.file_id,
448447
format=dataset.format.lower(),
449448
paper_url=dataset.paper_url or None,

src/schemas/datasets/openml.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,12 @@ class DatasetMetadata(BaseModel):
124124
"description": "URL of the parquet dataset data file.",
125125
},
126126
)
127-
minio_url: HttpUrl | None = Field(
128-
json_schema_extra={
129-
"example": "http://openml1.win.tue.nl/dataset2/dataset_2.pq",
130-
"description": "Deprecated, I think.",
131-
},
132-
)
127+
# minio_url: HttpUrl | None = Field(
128+
# json_schema_extra={
129+
# "example": "http://openml1.win.tue.nl/dataset2/dataset_2.pq",
130+
# "description": "Deprecated, I think.",
131+
# },
132+
# )
133133
file_id: int = Field(json_schema_extra={"example": 1})
134134
format_: DatasetFileFormat = Field(
135135
json_schema_extra={"example": DatasetFileFormat.ARFF},

tests/routers/openml/datasets_test.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,12 @@ def test_get_dataset(py_api: TestClient) -> None:
4242
"upload_date": "2014-04-06T23:19:24",
4343
"licence": "Public",
4444
"url": "https://test.openml.org/data/v1/download/1/anneal.arff",
45-
"parquet_url": "https://openml1.win.tue.nl/0000/0001/dataset_1.pq",
45+
"parquet_url": "https://openml1.win.tue.nl/datasets/0000/0001/dataset_1.pq",
4646
"file_id": 1,
4747
"default_target_attribute": ["class"],
4848
"version_label": "1",
4949
"tag": ["study_14"],
5050
"visibility": "public",
51-
"minio_url": "https://openml1.win.tue.nl/0000/0001/dataset_1.pq",
5251
"status": "in_preparation",
5352
"processing_date": "2024-01-04T10:13:59",
5453
"md5_checksum": "4eaed8b6ec9d8211024b6c089b064761",

tests/routers/openml/migration/datasets_migration_test.py

-8
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,6 @@ def test_dataset_response_is_identical(
5858
new_body = new.json()
5959
if processing_data := new_body.get("processing_date"):
6060
new_body["processing_date"] = str(processing_data).replace("T", " ")
61-
if parquet_url := new_body.get("parquet_url"):
62-
bucket, prefix, did, filename = parquet_url.rsplit("/", 3)
63-
new_body["parquet_url"] = f"{bucket}/dataset{did.lstrip('0')}/{filename}"
64-
new_body["parquet_url"] = new_body["parquet_url"].replace("https", "http")
65-
if minio_url := new_body.get("minio_url"):
66-
bucket, prefix, did, filename = minio_url.rsplit("/", 3)
67-
new_body["minio_url"] = f"{bucket}/dataset{did.lstrip('0')}/{filename}"
68-
new_body["minio_url"] = new_body["minio_url"].replace("https", "http")
6961

7062
manual = []
7163
# ref test.openml.org/d/33 (contributor) and d/34 (creator)

0 commit comments

Comments
 (0)