diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e4a84d..79ad397 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -64,3 +64,5 @@ repos: - id: docformatter additional_dependencies: [tomli] args: [--in-place, --config, ./pyproject.toml] + +exclude: ^(tests/test_data/ | notebooks/) diff --git a/news/amd_pdd.rst b/news/amd_pdd.rst new file mode 100644 index 0000000..ebf566a --- /dev/null +++ b/news/amd_pdd.rst @@ -0,0 +1,24 @@ +**Added:** + +* Function to compare structures with amd in src/diffpy/metrics/amd.py +* Function to compare structures with pdd in src/diffpy/metrics/pdd.py + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/src/diffpy/similarity/metrics/__init__.py b/src/diffpy/similarity/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/diffpy/similarity/metrics/amd.py b/src/diffpy/similarity/metrics/amd.py new file mode 100644 index 0000000..aa836a2 --- /dev/null +++ b/src/diffpy/similarity/metrics/amd.py @@ -0,0 +1,34 @@ +import amd + + +def amd_compare(cif1, cif2, k=100): + """Compare two CIF files or two lists of CIF files using the AMD metric. + Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files + in cif_list. + + Parameters + ---------- + cif1 : str + Path to the first CIF file (or list of CIF files) + cif2 : str + Path to the second CIF file (or list of CIF files) + k : int + Number of nearest neighbors to consider. + Default is 100. + + Returns + ------- + dm : float or pandas.DataFrame + For single files cif1 and cif2, return the AMD distance value (float). + If either cif1 or cif2 is a list, return a distance matrix + of shape (len(cif1), len(cif2)). + Each element represents the AMD distance between two structures. + """ + dm = amd.compare(cif1, cif2, by="AMD", k=k) + if isinstance(cif1, list) or isinstance(cif2, list): + # if at least one input is a list, return the distance matrix + return dm + else: + return float( + dm.iloc[0, 0] + ) # return the single distance value as a float diff --git a/src/diffpy/similarity/metrics/pdd.py b/src/diffpy/similarity/metrics/pdd.py new file mode 100644 index 0000000..a59b84e --- /dev/null +++ b/src/diffpy/similarity/metrics/pdd.py @@ -0,0 +1,32 @@ +import amd + + +def pdd_compare(cif1, cif2, k=100): + """Compare two CIF files or two lists of CIF files using the PDD metric. + Use cif1 = cif2 = cif_list = [cif_0, cif_1, ...] to compare all CIF files + in cif_list. + + Parameters + ---------- + cif1 : str + Path to the first CIF file (or list of CIF files) + cif2 : str + Path to the second CIF file (or list of CIF files) + k : int + Number of nearest neighbors to consider. + Default is 100. + + Returns + ------- + dm : numpy.ndarray + Distance matrix of shape (len(cif1), len(cif2)). + Each element represents the PDD distance between two structures. + """ + dm = amd.compare(cif1, cif2, by="PDD", k=k) + if isinstance(cif1, list) or isinstance(cif2, list): + # if at least one input is a list, return the distance matrix + return dm + else: + return float( + dm.iloc[0, 0] + ) # return the single distance value as a float diff --git a/tests/test_amd.py b/tests/test_amd.py new file mode 100644 index 0000000..09fb478 --- /dev/null +++ b/tests/test_amd.py @@ -0,0 +1,35 @@ +from pathlib import Path + +import pandas as pd +import pytest + +from diffpy.similarity.metrics.amd import amd_compare + +# get cif files for tests +curr_path = Path().absolute() +cif1 = curr_path / "tests" / "test_data" / "mp-390.cif" +cif2 = curr_path / "tests" / "test_data" / "mp-458.cif" +dm_amd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_amd.pkl") + +# set up input and output for tests +# for each case: (input1, input2, expected_output) +amd_test_datasets = [ + # test identical cif files + (cif1, cif1, 0.0), + # test different cif files + (cif1, cif2, 0.5725326132426836), + # test generate a distance matrix from two lists + ([cif1, cif2], [cif1, cif2], dm_amd), +] + + +@pytest.mark.parametrize("input1, input2, expected_output", amd_test_datasets) +def test_amd_compare(input1, input2, expected_output): + """Test the amd_compare function.""" + result = amd_compare(input1, input2, k=100) + if isinstance(input1, list) or isinstance(input2, list): + # check if two dataframes are equal + assert result.equals(expected_output) + else: + # check if two floats are equal + assert result == expected_output diff --git a/tests/test_data/dm_amd.pkl b/tests/test_data/dm_amd.pkl new file mode 100644 index 0000000..a0e8a29 Binary files /dev/null and b/tests/test_data/dm_amd.pkl differ diff --git a/tests/test_data/dm_pdd.pkl b/tests/test_data/dm_pdd.pkl new file mode 100644 index 0000000..e7c5ae3 Binary files /dev/null and b/tests/test_data/dm_pdd.pkl differ diff --git a/tests/test_data/mp-390.cif b/tests/test_data/mp-390.cif new file mode 100644 index 0000000..974aa00 --- /dev/null +++ b/tests/test_data/mp-390.cif @@ -0,0 +1,32 @@ +# generated using pymatgen +data_TiO2 +_symmetry_space_group_name_H-M 'P 1' +_cell_length_a 3.78253951 +_cell_length_b 3.78253984 +_cell_length_c 5.50145140 +_cell_angle_alpha 110.10714408 +_cell_angle_beta 110.10713258 +_cell_angle_gamma 89.99997885 +_symmetry_Int_Tables_number 1 +_chemical_formula_structural TiO2 +_chemical_formula_sum 'Ti2 O4' +_cell_volume 68.78397225 +_cell_formula_units_Z 2 +loop_ + _symmetry_equiv_pos_site_id + _symmetry_equiv_pos_as_xyz + 1 'x, y, z' +loop_ + _atom_site_type_symbol + _atom_site_label + _atom_site_symmetry_multiplicity + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + Ti Ti0 1 0.87500000 0.62500000 0.25000000 1 + Ti Ti1 1 0.12500000 0.37500000 0.75000000 1 + O O2 1 0.33215263 0.58215263 0.16430425 1 + O O3 1 0.08215263 0.83215263 0.66430425 1 + O O4 1 0.91784737 0.16784737 0.33569575 1 + O O5 1 0.66784737 0.41784737 0.83569575 1 diff --git a/tests/test_data/mp-458.cif b/tests/test_data/mp-458.cif new file mode 100644 index 0000000..6b6730b --- /dev/null +++ b/tests/test_data/mp-458.cif @@ -0,0 +1,36 @@ +# generated using pymatgen +data_Ti2O3 +_symmetry_space_group_name_H-M 'P 1' +_cell_length_a 5.45736044 +_cell_length_b 5.45736193 +_cell_length_c 5.45736145 +_cell_angle_alpha 55.79413805 +_cell_angle_beta 55.79412997 +_cell_angle_gamma 55.79413435 +_symmetry_Int_Tables_number 1 +_chemical_formula_structural Ti2O3 +_chemical_formula_sum 'Ti4 O6' +_cell_volume 103.72111553 +_cell_formula_units_Z 2 +loop_ + _symmetry_equiv_pos_site_id + _symmetry_equiv_pos_as_xyz + 1 'x, y, z' +loop_ + _atom_site_type_symbol + _atom_site_label + _atom_site_symmetry_multiplicity + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + Ti Ti0 1 0.34453747 0.34453747 0.34453747 1 + Ti Ti1 1 0.15546253 0.15546253 0.15546253 1 + Ti Ti2 1 0.65546253 0.65546253 0.65546253 1 + Ti Ti3 1 0.84453747 0.84453747 0.84453747 1 + O O4 1 0.56439446 0.93560554 0.25000000 1 + O O5 1 0.25000000 0.56439446 0.93560554 1 + O O6 1 0.93560554 0.25000000 0.56439446 1 + O O7 1 0.43560554 0.06439446 0.75000000 1 + O O8 1 0.75000000 0.43560554 0.06439446 1 + O O9 1 0.06439446 0.75000000 0.43560554 1 diff --git a/tests/test_pdd.py b/tests/test_pdd.py new file mode 100644 index 0000000..770ada5 --- /dev/null +++ b/tests/test_pdd.py @@ -0,0 +1,35 @@ +from pathlib import Path + +import pandas as pd +import pytest + +from diffpy.similarity.metrics.pdd import pdd_compare + +# get cif files for tests +curr_path = Path().absolute() +cif1 = curr_path / "tests" / "test_data" / "mp-390.cif" +cif2 = curr_path / "tests" / "test_data" / "mp-458.cif" +dm_pdd = pd.read_pickle(curr_path / "tests" / "test_data" / "dm_pdd.pkl") + +# set up input and output for tests +# for each case: (input1, input2, expected_output) +pdd_test_datasets = [ + # test identical cif files + (cif1, cif1, 0.0), + # test different cif files + (cif1, cif2, 0.6675364987310654), + # test generate a distance matrix from two lists + ([cif1, cif2], [cif1, cif2], dm_pdd), +] + + +@pytest.mark.parametrize("input1, input2, expected_output", pdd_test_datasets) +def test_pdd_compare(input1, input2, expected_output): + """Test the pdd_compare function.""" + result = pdd_compare(input1, input2, k=100) + if isinstance(input1, list) or isinstance(input2, list): + # check if two dataframes are equal + assert result.equals(expected_output) + else: + # check if two floats are equal + assert result == expected_output