Skip to content

Commit

Permalink
Add count anonymity method
Browse files Browse the repository at this point in the history
  • Loading branch information
glassonion1 committed Oct 26, 2021
1 parent 958f1f2 commit cc30b8e
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 4 deletions.
40 changes: 38 additions & 2 deletions anonypy/anonypy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@ def anonymize_l_diversity(self, k, l):
def anonymize_t_closeness(self, k, p):
return self.__anonymize(k, p=p)

def __count_anonymity(self, k, l=0, p=0.0):
partitions = self.modrian.partition(k, l, p)
return count_anonymity(
self.modrian.df,
partitions,
self.modrian.feature_columns,
self.modrian.sensitive_column,
)

def count_k_anonymity(self, k):
return self.__count_anonymity(k)

def count_l_diversity(self, k, l):
return self.__count_anonymity(k, l=l)

def count_t_closeness(self, k, p):
return self.__count_anonymity(k, p=p)


def agg_categorical_column(series):
# this is workaround for dtype bug of series
Expand Down Expand Up @@ -50,8 +68,6 @@ def anonymize(df, partitions, feature_columns, sensitive_column, max_partitions=
aggregations[column] = agg_numerical_column
rows = []
for i, partition in enumerate(partitions):
if i % 100 == 1:
print("Finished {} partitions...".format(i))
if max_partitions is not None and i > max_partitions:
break
grouped_columns = df.loc[partition].agg(aggregations, squeeze=False)
Expand All @@ -70,3 +86,23 @@ def anonymize(df, partitions, feature_columns, sensitive_column, max_partitions=
)
rows.append(values.copy())
return rows


def count_anonymity(
df, partitions, feature_columns, sensitive_column, max_partitions=None
):
aggregations = {}
for column in feature_columns:
if df[column].dtype.name == "category":
aggregations[column] = agg_categorical_column
else:
aggregations[column] = agg_numerical_column
aggregations[sensitive_column] = "count"
rows = []
for i, partition in enumerate(partitions):
if max_partitions is not None and i > max_partitions:
break
grouped_columns = df.loc[partition].agg(aggregations, squeeze=False)
values = grouped_columns.iloc[0].to_dict()
rows.append(values.copy())
return rows
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@

setup(
name="anonypy",
version="0.1.1",
version="0.1.4",
packages=find_packages(),
author="glassonion1",
author_email="[email protected]",
url="https://github.com/glassonion1/anonypy",
description="Anonymization library for python",
long_description=long_description,
long_description_content_type="text/markdown",
keywords="k-anonymity l-diversity t-closeness",
keywords="k-anonymity l-diversity t-closeness mondrian",
classifiers=[
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
Expand Down
17 changes: 17 additions & 0 deletions tests/preserver_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,23 @@ def test_k_anonymity():
print(dfn)


def test_count_k_anonymity():
df = pd.DataFrame(data=data, columns=columns)
print(df)

for name in categorical:
df[name] = df[name].astype("category")

feature_columns = ["col1", "col2", "col3"]
sensitive_column = "col4"

p = anonypy.Preserver(df, feature_columns, sensitive_column)
rows = p.count_k_anonymity(k=2)

dfn = pd.DataFrame(rows)
print(dfn)


def test_l_diversity():
df = pd.DataFrame(data=data, columns=columns)

Expand Down

0 comments on commit cc30b8e

Please sign in to comment.