Skip to content

Commit

Permalink
Generate special svg names. (#308)
Browse files Browse the repository at this point in the history
  • Loading branch information
keyurva authored May 22, 2024
1 parent 14c3dd8 commit 0daedbf
Show file tree
Hide file tree
Showing 9 changed files with 243 additions and 194 deletions.
1 change: 1 addition & 0 deletions simple/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ idna==3.4
importlib-metadata==6.8.0
numpy==1.25.2
pandas==2.1.0
parameterized==0.9.0
platformdirs==3.10.0
PyMySQL==1.1.0
python-dateutil==2.8.2
Expand Down
30 changes: 26 additions & 4 deletions simple/stats/stat_var_hierarchy_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,27 @@ def gen_svg_name(self):
svg_name = f"{svg_name} With {pvs_str}"
return svg_name

def gen_specialized_name(self, parent_pvs: Self) -> str:
parent_parts = parent_pvs._get_pv_parts()
child_parts = self._get_pv_parts()
parts = [part for part in child_parts if part not in parent_parts]
return ", ".join(map(lambda part: _capitalize_and_split(part), parts))

# Creates and returns a new SVPropVals object with the same fields as this object
# except for PVs which are set to the specified list.
def with_pvs(self, pvs: list[PropVal]) -> Self:
return replace(self, pvs=pvs)

# Returns an ordered set of PVs as a dict (since sets don't maintain order).
def _get_pv_parts(self) -> dict[str, bool]:
parts: dict[str, bool] = {}
for pv in self.pvs:
if pv.prop:
parts[pv.prop] = True
if pv.val:
parts[pv.val] = True
return parts


class SVG:

Expand All @@ -136,7 +152,7 @@ def __init__(self, svg_id: str, svg_name: str) -> None:
# Maintaining order maintains results consistency and helps with tests.
self.sv_ids: dict[str, bool] = {}
self.parent_svg_ids: dict[str, bool] = {}
self.child_svg_ids: dict[str, bool] = {}
self.child_svg_id_2_specialized_name: dict[str, str] = {}
self.measured_properties: dict[str, bool] = {}

self.parent_svgs_processed: bool = False
Expand Down Expand Up @@ -171,14 +187,19 @@ def triples(self) -> list[Triple]:

return triples

def gen_specialized_name(self, parent_svg: Self) -> str:
if self.sample_sv and parent_svg.sample_sv:
return self.sample_sv.gen_specialized_name(parent_svg.sample_sv)
return ""

# For testing.
def json(self) -> dict:
return {
"svg_id": self.svg_id,
"svg_name": self.svg_name,
"sv_ids": list(self.sv_ids.keys()),
"parent_svg_ids": list(self.parent_svg_ids.keys()),
"child_svg_ids": list(self.child_svg_ids.keys()),
"child_svg_specialized_names": self.child_svg_id_2_specialized_name,
"mprops": sorted(list(self.measured_properties.keys()))
}

Expand Down Expand Up @@ -210,7 +231,7 @@ def _attach_verticals(poptype2svg: dict[str, SVG],
for vertical in vertical_spec.verticals:
vertical_svg = _get_or_create_vertical_svg(vertical, vertical_svgs)
vertical_svgs[vertical_svg.svg_id] = vertical_svg
vertical_svg.child_svg_ids[pop_type_svg.svg_id] = True
vertical_svg.child_svg_id_2_specialized_name[pop_type_svg.svg_id] = ""
pop_type_svg.parent_svg_ids[vertical_svg.svg_id] = True
for mprop in vertical_spec.measured_properties:
vertical_svg.measured_properties[mprop] = True
Expand Down Expand Up @@ -298,7 +319,8 @@ def _create_parent_svg(parent_sv: SVPropVals, svg: SVG, svgs: dict[str, SVG],

# Add parent child relationships.
svg.parent_svg_ids[parent_svg.svg_id] = True
parent_svg.child_svg_ids[svg.svg_id] = True
parent_svg.child_svg_id_2_specialized_name[
svg.svg_id] = svg.gen_specialized_name(parent_svg)

# Add child mprops to all parents recursively.
_add_measured_properties_to_parent_svgs(svg.measured_properties,
Expand Down
26 changes: 26 additions & 0 deletions simple/tests/stats/stat_var_hierarchy_generator_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from kg_util import mcf_parser
import pandas as pd
from parameterized import parameterized
from stats.data import Triple
from stats.stat_var_hierarchy_generator import *
from stats.stat_var_hierarchy_generator import _extract_svs
Expand Down Expand Up @@ -179,3 +180,28 @@ def test_extract_svs(self):
svs = _extract_svs(input_triples)

self.assertListEqual(svs, expected_svs)

@parameterized.expand([
(SVPropVals(sv_id="",
population_type="",
pvs=[PropVal("gender", ""),
PropVal("race", "Asian")],
measured_property=""),
SVPropVals(sv_id="",
population_type="",
pvs=[PropVal("gender", "Female"),
PropVal("race", "Asian")],
measured_property=""), "Female"),
(SVPropVals(sv_id="",
population_type="",
pvs=[PropVal("gender", "Female")],
measured_property=""),
SVPropVals(sv_id="",
population_type="",
pvs=[PropVal("gender", "Female"),
PropVal("race", "")],
measured_property=""), "Race")
])
def test_gen_specialized_name(self, parent: SVPropVals, child: SVPropVals,
expected: str):
self.assertEqual(child.gen_specialized_name(parent), expected)
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
"parent_svg_ids": [
"c/g/Root"
],
"child_svg_ids": [
"c/g/Person_Race",
"c/g/Person_Gender"
],
"child_svg_specialized_names": {
"c/g/Person_Race": "Race",
"c/g/Person_Gender": "Gender"
},
"mprops": []
},
{
Expand All @@ -19,9 +19,9 @@
"parent_svg_ids": [
"c/g/Person"
],
"child_svg_ids": [
"c/g/Person_Gender-Female"
],
"child_svg_specialized_names": {
"c/g/Person_Gender-Female": "Female"
},
"mprops": []
},
{
Expand All @@ -31,9 +31,9 @@
"parent_svg_ids": [
"c/g/Person_Gender"
],
"child_svg_ids": [
"c/g/Person_Gender-Female_Race"
],
"child_svg_specialized_names": {
"c/g/Person_Gender-Female_Race": "Race"
},
"mprops": []
},
{
Expand All @@ -43,9 +43,9 @@
"parent_svg_ids": [
"c/g/Person_Gender-Female"
],
"child_svg_ids": [
"c/g/Person_Gender-Female_Race-Asian"
],
"child_svg_specialized_names": {
"c/g/Person_Gender-Female_Race-Asian": "Asian"
},
"mprops": []
},
{
Expand All @@ -58,7 +58,7 @@
"c/g/Person_Gender_Race-Asian",
"c/g/Person_Gender-Female_Race"
],
"child_svg_ids": [],
"child_svg_specialized_names": {},
"mprops": []
},
{
Expand All @@ -68,9 +68,9 @@
"parent_svg_ids": [
"c/g/Person_Race-Asian"
],
"child_svg_ids": [
"c/g/Person_Gender-Female_Race-Asian"
],
"child_svg_specialized_names": {
"c/g/Person_Gender-Female_Race-Asian": "Female"
},
"mprops": []
},
{
Expand All @@ -80,9 +80,9 @@
"parent_svg_ids": [
"c/g/Person"
],
"child_svg_ids": [
"c/g/Person_Race-Asian"
],
"child_svg_specialized_names": {
"c/g/Person_Race-Asian": "Asian"
},
"mprops": []
},
{
Expand All @@ -92,9 +92,9 @@
"parent_svg_ids": [
"c/g/Person_Race"
],
"child_svg_ids": [
"c/g/Person_Gender_Race-Asian"
],
"child_svg_specialized_names": {
"c/g/Person_Gender_Race-Asian": "Gender"
},
"mprops": []
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
"parent_svg_ids": [
"c/g/Root"
],
"child_svg_ids": [
"c/g/Person_Race",
"c/g/Person_PovertyStatus"
],
"child_svg_specialized_names": {
"c/g/Person_Race": "Race",
"c/g/Person_PovertyStatus": "Poverty Status"
},
"mprops": [
"count",
"unknownMProp"
Expand All @@ -25,10 +25,10 @@
"parent_svg_ids": [
"c/g/Person"
],
"child_svg_ids": [
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months",
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months"
],
"child_svg_specialized_names": {
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months": "Below Poverty Level In The Past12 Months",
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months": "Above Poverty Level In The Past12 Months"
},
"mprops": [
"count",
"unknownMProp"
Expand All @@ -41,9 +41,9 @@
"parent_svg_ids": [
"c/g/Person_PovertyStatus"
],
"child_svg_ids": [
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months_Race"
],
"child_svg_specialized_names": {
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months_Race": "Race"
},
"mprops": [
"count"
]
Expand All @@ -55,9 +55,9 @@
"parent_svg_ids": [
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months"
],
"child_svg_ids": [
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months_Race-WhiteAlone"
],
"child_svg_specialized_names": {
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months_Race-WhiteAlone": "White Alone"
},
"mprops": [
"count"
]
Expand All @@ -72,7 +72,7 @@
"c/g/Person_PovertyStatus_Race-WhiteAlone",
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months_Race"
],
"child_svg_ids": [],
"child_svg_specialized_names": {},
"mprops": [
"count"
]
Expand All @@ -84,9 +84,9 @@
"parent_svg_ids": [
"c/g/Person_PovertyStatus"
],
"child_svg_ids": [
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months_Race"
],
"child_svg_specialized_names": {
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months_Race": "Race"
},
"mprops": [
"count",
"unknownMProp"
Expand All @@ -99,9 +99,9 @@
"parent_svg_ids": [
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months"
],
"child_svg_ids": [
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months_Race-WhiteAlone"
],
"child_svg_specialized_names": {
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months_Race-WhiteAlone": "White Alone"
},
"mprops": [
"count",
"unknownMProp"
Expand All @@ -118,7 +118,7 @@
"c/g/Person_PovertyStatus_Race-WhiteAlone",
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months_Race"
],
"child_svg_ids": [],
"child_svg_specialized_names": {},
"mprops": [
"count",
"unknownMProp"
Expand All @@ -131,10 +131,10 @@
"parent_svg_ids": [
"c/g/Person_Race-WhiteAlone"
],
"child_svg_ids": [
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months_Race-WhiteAlone",
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months_Race-WhiteAlone"
],
"child_svg_specialized_names": {
"c/g/Person_PovertyStatus-BelowPovertyLevelInThePast12Months_Race-WhiteAlone": "Below Poverty Level In The Past12 Months",
"c/g/Person_PovertyStatus-AbovePovertyLevelInThePast12Months_Race-WhiteAlone": "Above Poverty Level In The Past12 Months"
},
"mprops": [
"count",
"unknownMProp"
Expand All @@ -147,9 +147,9 @@
"parent_svg_ids": [
"c/g/Person"
],
"child_svg_ids": [
"c/g/Person_Race-WhiteAlone"
],
"child_svg_specialized_names": {
"c/g/Person_Race-WhiteAlone": "White Alone"
},
"mprops": [
"count",
"unknownMProp"
Expand All @@ -164,9 +164,9 @@
"parent_svg_ids": [
"c/g/Person_Race"
],
"child_svg_ids": [
"c/g/Person_PovertyStatus_Race-WhiteAlone"
],
"child_svg_specialized_names": {
"c/g/Person_PovertyStatus_Race-WhiteAlone": "Poverty Status"
},
"mprops": [
"count",
"unknownMProp"
Expand Down
Loading

0 comments on commit 0daedbf

Please sign in to comment.