Skip to content

Commit dabf10a

Browse files
authored
Add tests for the multi-table synthesizer code (#69)
Adding multi-table synthesizer tests Better way of checking if a test is being run in github actions by checking an environment variable that only exists there A few fixes to the current synthesizer code Fixing the test data to include a missing table relation that is important for the synthesizer, and re-generating and uploading the assertion data from github because it affects the results of existing tests
1 parent d9bc173 commit dabf10a

File tree

13 files changed

+194
-85
lines changed

13 files changed

+194
-85
lines changed

.github/workflows/integration_tests.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ jobs:
5858
- name: Install the project
5959
run: uv sync --all-extras --dev
6060

61+
- name: Is running on CI environment (GitHub Actions)?
62+
run: |
63+
python -c "import os; print('Result: ', os.getenv('GITHUB_ACTIONS', 'Not set'))"
64+
6165
- name: Install dependencies and check code
6266
run: |
63-
uv run pytest -m "integration_test"
67+
uv run pytest -m "integration_test" --log-cli-level=WARNING

.github/workflows/unit_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,4 @@ jobs:
6060

6161
- name: Install dependencies and check code
6262
run: |
63-
uv run pytest -m "not integration_test"
63+
uv run pytest -m "not integration_test" --log-cli-level=WARNING

src/midst_toolkit/models/clavaddpm/synthesizer.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,10 @@ def clava_synthesizing( # noqa: PLR0915, PLR0912
786786
"df": child_final_df,
787787
"keys": child_primary_keys_arr.flatten().tolist(),
788788
}
789-
with open(os.path.join(save_dir, "before_matching/synthetic_tables.pkl"), "wb") as file:
789+
790+
before_matching_dir = save_dir / "before_matching"
791+
before_matching_dir.mkdir(parents=True, exist_ok=True)
792+
with open(before_matching_dir / "synthetic_tables.pkl", "wb") as file:
790793
pickle.dump(synthetic_tables, file)
791794

792795
synthesizing_end_time = time.time()
@@ -800,12 +803,8 @@ def clava_synthesizing( # noqa: PLR0915, PLR0912
800803

801804
cleaned_tables: dict[str, pd.DataFrame] = {}
802805
for table_key, table_val in final_tables.items():
803-
if "account_id" in tables[table_key]["original_cols"]:
804-
cols = tables[table_key]["original_cols"]
805-
cols.remove("account_id")
806-
else:
807-
cols = tables[table_key]["original_cols"]
808-
cleaned_tables[table_key] = pd.DataFrame(table_val[cols])
806+
column_names = [column_name for column_name in tables[table_key]["original_cols"] if "_id" not in column_name]
807+
cleaned_tables[table_key] = pd.DataFrame(table_val[column_names])
809808

810809
for cleaned_key, cleaned_val in cleaned_tables.items():
811810
table_dir = os.path.join(
Binary file not shown.
Binary file not shown.
Binary file not shown.

tests/integration/assets/multi_table/assertion_data/syntetic_data.json

Lines changed: 0 additions & 61 deletions
This file was deleted.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"X_gen": [
3+
[
4+
-89.64320001648818,
5+
2.329031172032132,
6+
-122.97271923749832,
7+
552.3706152861826,
8+
353.47951217405426,
9+
-63.164915493559306,
10+
-42.27259013378604,
11+
244.21392290993887
12+
],
13+
[-0.4694302555020733,
14+
15.336690906361277,
15+
-48.59970780139716,
16+
-358.65097509895173,
17+
411.39200743280094,
18+
415.9651477725036,
19+
-12.980662539762594,
20+
-370.11192775534397
21+
],
22+
[5.009930498133295,
23+
-220.79264470424582,
24+
-4.129379545636459,
25+
-188.011555249935,
26+
218.10979023918082,
27+
221.16927688555808,
28+
49.89701474616661,
29+
-194.37953943919408
30+
],
31+
[-1.3109146973467711,
32+
-73.2679936874503,
33+
-9.218660554989645,
34+
-389.99286808084486,
35+
-490.3925197112697,
36+
-423.00630661809424,
37+
432.9884292987812,
38+
-397.6777786014056
39+
],
40+
[8.342572127948289,
41+
9.36842404400312,
42+
-72.28739585181947,
43+
-489.4411862829012,
44+
563.4325829362252,
45+
568.3398615720979,
46+
-16.894123940346486,
47+
-504.9528775096839
48+
]
49+
],
50+
"y_gen": [1, 0, 1, 0, 0]
51+
}

tests/integration/assets/multi_table/dataset_meta.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"relation_order": [
3+
[null, "account"],
34
["account", "trans"]
45
],
56
"tables": {

0 commit comments

Comments
 (0)