Skip to content

Commit

Permalink
[CADL-41] Removing typing due to incompatibility with the CI
Browse files Browse the repository at this point in the history
  • Loading branch information
angelo-romano committed Jan 28, 2025
1 parent 7ad8ab7 commit 80ed2a3
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 93 deletions.
Empty file added integration/__init__.py
Empty file.
86 changes: 47 additions & 39 deletions py311/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
import codecs
import csv
import os
import sys
import tempfile
import uuid
from datetime import datetime
from typing import Any, Dict, Optional, Union

import numpy
import pyspssio
import pytest
from numpy.testing import assert_equal as numpy_assert_equal
from pycrunch.shoji import Entity, as_entity, wait_progress

Expand All @@ -17,6 +17,9 @@
from scrunch.datasets import Project
from scrunch.streaming_dataset import StreamingDataset

IS_PYTHON_2 = bool(sys.version_info.major == 2)
if not IS_PYTHON_2:
import pyspssio
PROJECT_ID = os.environ.get("SCRUNCH_PROJECT_ID")
PROJECT_311_ID = os.environ.get("SCRUNCH_PROJECT_311_ID")
TEST_DATASET_ID = os.environ.get("SCRUNCH_TEST_DATASET_ID")
Expand Down Expand Up @@ -60,9 +63,7 @@ class ST:
source_mimetypes["txt"] = ["text/csv"] # Backward compatibility


def ensure_binary(
s: Union[str, bytes], encoding: str = "utf-8", errors: str = "strict"
) -> bytes:
def ensure_binary(s, encoding="utf-8", errors="strict"):
"""Coerce **s** to bytes.
- `str` -> encoded to `bytes`
- `bytes` -> `bytes`
Expand All @@ -75,7 +76,7 @@ def ensure_binary(
return s
if isinstance(s, str):
return s.encode(encoding, errors)
raise TypeError(f"not expecting type '{type(s)}'")
raise TypeError("not expecting type '%s'" % type(s))


BOUNDARY = "________ThIs_Is_tHe_bouNdaRY_$"
Expand All @@ -90,21 +91,23 @@ def encode_multipart_formdata(files):
for key, filename, value in files:
lines.append("--" + BOUNDARY)
if filename is None:
lines.append(f'Content-Disposition: form-data; name="{key}"\r\n\r\n{value}')
lines.append(
'Content-Disposition: form-data; name="%s"\r\n\r\n%s' % (key, value)
)
continue
lines.append(
f'Content-Disposition: form-data; name="{key}"; filename="{filename}"'
'Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)
)
ct = source_mimetypes.get(
filename.rsplit(".", 1)[-1], ["application/octet-stream"]
)[0]
lines.append(f"Content-Type: {ct}")
lines.append("Content-Type: %s" % ct)
lines.append("")
lines.append(value)
lines.append("--" + BOUNDARY + "--")
lines.append("")
body = "\r\n".join(lines)
content_type = f"multipart/form-data; charset=UTF-8; boundary={BOUNDARY}"
content_type = "multipart/form-data; charset=UTF-8; boundary=%s" % BOUNDARY
return content_type, body


Expand Down Expand Up @@ -155,11 +158,11 @@ def _encode_file_as_multipart(self, field_name, filename, content_type, contents
)
body += "--%s--\r\n" % BOUNDARY

content_type = f"multipart/form-data; charset=UTF-8; boundary={BOUNDARY}"
content_type = "multipart/form-data; charset=UTF-8; boundary=%s" % BOUNDARY

return content_type, body

def _parse_on_311(self, on_311: Union[None, bool]) -> bool:
def _parse_on_311(self, on_311):
"""
Based on the value of the parameters, returns True or False, based on whether we are
meant to run this on a Python 3.11 factory or not.
Expand All @@ -174,10 +177,10 @@ def _parse_on_311(self, on_311: Union[None, bool]) -> bool:

def _import_dataset(
self,
metadata: Dict[str, Any],
input_file: str,
on_311: Optional[bool] = None,
format_: str = "csv",
metadata,
input_file,
on_311=None,
format_="csv",
):
"""
:param metadata: The metadata fields associated to the dataset we are creating.
Expand All @@ -194,12 +197,14 @@ def _import_dataset(
"Weighed imported test dataset" if self.weight else "Imported test dataset"
)
ds_data = {k: v for k, v in metadata.items()}
ds_data["name"] = (
f"{name} {uuid.uuid4().hex[:16]} [{datetime.now().isoformat()}]"
ds_data["name"] = "%s %s [%s]" % (
name,
uuid.uuid4().hex[:16],
datetime.now().isoformat(),
)
project_id = PROJECT_311_ID if on_311 else PROJECT_ID
if project_id:
ds_data["project"] = f"/projects/{project_id}/"
ds_data["project"] = "/projects/%s" % project_id
# server/tests/controllers/test_sources.py
# streaming dataset
# steps
Expand Down Expand Up @@ -240,7 +245,7 @@ def _import_dataset(
wait_progress(resp, self.site.session)
return ds.refresh()

def _export_dataset(self, ds, format_: str = "csv") -> Dict[str, Any]:
def _export_dataset(self, ds, format_="csv"):
"""
Runs a dataset export.
Expand All @@ -251,7 +256,7 @@ def _export_dataset(self, ds, format_: str = "csv") -> Dict[str, Any]:
ds.export(output.name, format=format_)
return self._parse_dataset_export(output, format_)

def _run_script(self, ds, payload: dict):
def _run_script(self, ds, payload):
"""
Runs an automation script against a dataset.
"""
Expand All @@ -260,7 +265,7 @@ def _run_script(self, ds, payload: dict):
wait_progress(resp, self.site.session)
return ds.refresh()

def _parse_dataset_export(self, output: str, format_: str = "csv"):
def _parse_dataset_export(self, output, format_="csv"):
"""
Given an output file, parses it and returns the values for it.
"""
Expand Down Expand Up @@ -289,23 +294,21 @@ def tearDown(self):

super().tearDown()

def _project(self, id: str) -> Project:
def _project(self, id):
"""
Returns the scrunch project instance for a specific project ID.
"""
project = Project(
Entity(
self.site.session,
**{
"self": f"{self.site.self}projects/{id}/",
"element": "shoji:entity",
"body": {"name": "Target project"},
},
self="%sprojects/%s" % (self.site.self, id),
element="shoji:entity",
body={"name": "Target project"},
)
)
return project

def _log(self, msg: str):
def _log(self, msg):
print(msg)

def _change_dataset_version(self, ds):
Expand Down Expand Up @@ -335,20 +338,22 @@ def _create_view(self, ds, on_311=None, **values):
ds_data["view_of"] = ds.self
if not name:
name = "Weighed test view dataset" if self.weight else "Test view dataset"
ds_data["name"] = (
f"{name} {uuid.uuid4().hex[:16]} [{datetime.now().isoformat()}]"
ds_data["name"] = "%s %s [%s]" % (
name,
uuid.uuid4().hex[:16],
datetime.now().isoformat(),
)
project_id = PROJECT_311_ID if on_311 else PROJECT_ID
if project_id:
ds_data["project"] = f"/projects/{project_id}/"
ds_data["project"] = "/projects/%s" % project_id
view = self.site.datasets.create(as_entity(ds_data)).refresh()
self._created_datasets[ds.self][1][view.self] = view
if self.weight:
view.settings.patch(
{"weight": view.variables.by("alias")[self.weight].entity.self}
)
streaming_view = StreamingDataset(view)
self._log(f"[{streaming_view.id}] {name} [project={project_id}]")
self._log("[%s] %s [project=%s]" % (streaming_view.id, name, project_id))
return streaming_view, view

def _create_dataset(self, on_311=None, pk=None, **values):
Expand All @@ -361,12 +366,14 @@ def _create_dataset(self, on_311=None, pk=None, **values):
ds_data = {k: v for k, v in values.items()}
if not name:
name = "Weighed test dataset" if self.weight else "Test dataset"
ds_data["name"] = (
f"{name} {uuid.uuid4().hex[:16]} [{datetime.now().isoformat()}]"
ds_data["name"] = "%s %s [%s]" % (
name,
uuid.uuid4().hex[:16],
datetime.now().isoformat(),
)
project_id = PROJECT_311_ID if on_311 else PROJECT_ID
if project_id:
ds_data["project"] = f"/projects/{project_id}/"
ds_data["project"] = "/projects/%s" % project_id
ds = self.site.datasets.create(as_entity(ds_data)).refresh()
if pk:
ds.variables.create(
Expand Down Expand Up @@ -528,10 +535,10 @@ def _create_dataset(self, on_311=None, pk=None, **values):
{"weight": ds.variables.by("alias")[self.weight].entity.self}
)
streaming_ds = StreamingDataset(ds)
self._log(f"[{streaming_ds.id}] {name} [project={project_id}]")
self._log("[%s] %s [project=%s]" % (streaming_ds.id, name, project_id))
return streaming_ds, ds

def _get_var_values(self, var) -> Dict[str, Any]:
def _get_var_values(self, var):
"""
Given a variable, runs a /dataset/DID/variable/VID/values/ call to get the data values
associated to it and parses them to return them.
Expand All @@ -542,7 +549,7 @@ def __new__(cls, *args, **kwargs):
for fn_name in cls.TEST_FUNCTIONS:
if hasattr(cls, fn_name):
continue
orig_fn = getattr(cls, f"_{fn_name}", None)
orig_fn = getattr(cls, "_" + fn_name, None)
if not orig_fn:
continue
setattr(cls, fn_name, orig_fn)
Expand Down Expand Up @@ -1117,6 +1124,7 @@ def _test_export_dataset_as_csv(self):
}
self._test_export_dataset("csv", EXPECTED)

@pytest.mark.skipif(IS_PYTHON_2, reason="Requires Python 3")
def _test_export_dataset_as_spss(self):
nan = float("nan")
EXPECTED = {
Expand Down
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[pytest]
norecursedirs=dist build .tox .eggs examples
addopts=--doctest-modules -p no:sugar
addopts=--doctest-modules -p no:sugar --ignore py311
doctest_optionflags=ALLOW_UNICODE ELLIPSIS
Loading

0 comments on commit 80ed2a3

Please sign in to comment.