From b11066a8e3b27eae81f7b99f5c6976cd12332e05 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 1 Dec 2022 17:30:16 -0800 Subject: [PATCH 01/14] Require unifrac-binaries>=1.2, since we are about to change default precision --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 878019dd..818ff6b9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -59,7 +59,7 @@ jobs: else conda install --yes -c conda-forge -c bioconda clangxx_osx-64 fi - conda install --yes -c conda-forge -c bioconda unifrac-binaries + conda install --yes -c conda-forge -c bioconda "unifrac-binaries>=1.2" # TEMP HACK: Use older version of scipy to work around scikit-bio problem conda install --yes -c conda-forge -c bioconda cython "scipy<1.9" "hdf5<1.12.1" biom-format numpy "h5py<3.0.0 | >3.3.0" "scikit-bio>=0.5.7" nose echo "$(uname -s)" From dba72622217b2aeb7874f460ab6a83c5922c251b Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 1 Dec 2022 17:31:13 -0800 Subject: [PATCH 02/14] Change default precision to fp32 and add explicit fp64 functions --- unifrac/__init__.py | 18 +- unifrac/_api.pyx | 12 +- unifrac/_methods.py | 727 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 718 insertions(+), 39 deletions(-) diff --git a/unifrac/__init__.py b/unifrac/__init__.py index 411b2df1..e0d1929f 100644 --- a/unifrac/__init__.py +++ b/unifrac/__init__.py @@ -12,6 +12,10 @@ weighted_normalized, weighted_unnormalized, generalized, + unweighted_fp64, + weighted_normalized_fp64, + weighted_unnormalized_fp64, + generalized_fp64, unweighted_fp32, weighted_normalized_fp32, weighted_unnormalized_fp32, @@ -20,6 +24,10 @@ weighted_normalized_to_file, weighted_unnormalized_to_file, generalized_to_file, + unweighted_fp64_to_file, + weighted_normalized_fp64_to_file, + weighted_unnormalized_fp64_to_file, + generalized_fp64_to_file, unweighted_fp32_to_file, weighted_normalized_fp32_to_file, weighted_unnormalized_fp32_to_file, @@ -32,12 +40,18 @@ __version__ = pkg_resources.get_distribution('unifrac').version __all__ = ['unweighted', 'weighted_normalized', 'weighted_unnormalized', - 'generalized', 'unweighted_fp32', 'weighted_normalized_fp32', + 'generalized', 'unweighted_fp64', 'weighted_normalized_fp64', + 'weighted_unnormalized_fp64', 'generalized_fp64', + 'unweighted_fp32', 'weighted_normalized_fp32', 'weighted_unnormalized_fp32', 'generalized_fp32', 'meta', 'unweighted_to_file', 'weighted_normalized_to_file', 'weighted_unnormalized_to_file', - 'generalized_to_file', 'unweighted_fp32_to_file', + 'generalized_to_file', 'unweighted_fp64_to_file', + 'weighted_normalized_fp64_to_file', + 'weighted_unnormalized_fp64_to_file', + 'generalized_fp64_to_file', + 'unweighted_fp32_to_file', 'weighted_normalized_fp32_to_file', 'weighted_unnormalized_fp32_to_file', 'generalized_fp32_to_file', diff --git a/unifrac/_api.pyx b/unifrac/_api.pyx index ca10f6e6..d14dd06a 100644 --- a/unifrac/_api.pyx +++ b/unifrac/_api.pyx @@ -41,6 +41,8 @@ def ssu_inmem(object table, object tree, unifrac_method : str The requested UniFrac method, one of {unweighted, weighted_normalized, weighted_unnormalized, generalized, + unweighted_fp64, weighted_normalized_fp64, + weighted_unnormalized_fp64, generalized_fp64, unweighted_fp32, weighted_normalized_fp32, weighted_unnormalized_fp32, generalized_fp32} variance_adjust : bool @@ -83,7 +85,7 @@ def ssu_inmem(object table, object tree, met_py_bytes = unifrac_method.encode() met_c_string = met_py_bytes - if '_fp32' in unifrac_method: + if '_fp64' not in unifrac_method: numpy_arr_fp32 = _ssu_inmem_fp32(inmem_biom, inmem_tree, met_c_string, variance_adjust, alpha, bypass_tips, n_substeps) @@ -196,6 +198,8 @@ def ssu_fast(str biom_filename, str tree_filename, object ids, unifrac_method : str The requested UniFrac method, one of {unweighted, weighted_normalized, weighted_unnormalized, generalized, + unweighted_fp64, weighted_normalized_fp64, + weighted_unnormalized_fp64, generalized_fp64, unweighted_fp32, weighted_normalized_fp32, weighted_unnormalized_fp32, generalized_fp32} variance_adjust : bool @@ -241,7 +245,7 @@ def ssu_fast(str biom_filename, str tree_filename, object ids, tree_c_string = tree_py_bytes met_c_string = met_py_bytes - if '_fp32' in unifrac_method: + if '_fp64' not in unifrac_method: numpy_arr_fp32 = _ssu_fast_fp32(biom_c_string, tree_c_string, ids.__len__(), met_c_string, variance_adjust, alpha, bypass_tips, @@ -365,6 +369,8 @@ def ssu(str biom_filename, str tree_filename, unifrac_method : str The requested UniFrac method, one of {unweighted, weighted_normalized, weighted_unnormalized, generalized, + unweighted_fp64, weighted_normalized_fp64, + weighted_unnormalized_fp64, generalized_fp64, unweighted_fp32, weighted_normalized_fp32, weighted_unnormalized_fp32, generalized_fp32} variance_adjust : bool @@ -529,6 +535,8 @@ def ssu_to_file(str biom_filename, str tree_filename, str out_filename, unifrac_method : str The requested UniFrac method, one of {unweighted, weighted_normalized, weighted_unnormalized, generalized, + unweighted_fp64, weighted_normalized_fp64, + weighted_unnormalized_fp64, generalized_fp64, unweighted_fp32, weighted_normalized_fp32, weighted_unnormalized_fp32, generalized_fp32} variance_adjust : bool diff --git a/unifrac/_methods.py b/unifrac/_methods.py index 1ac10af3..4fa8307f 100644 --- a/unifrac/_methods.py +++ b/unifrac/_methods.py @@ -158,6 +158,73 @@ def unweighted(table: Union[str, Table], bypass_tips, n_substeps) +def unweighted_fp64(table: Union[str, Table], + phylogeny: Union[str, TreeNode, BP], + threads: int = 1, + variance_adjusted: bool = False, + bypass_tips: bool = False, + n_substeps: int = 1) -> skbio.DistanceMatrix: + """Compute Unweighted UniFrac using fp64 math + + Parameters + ---------- + table : str + A filepath to a BIOM-Format 2.1 file. + phylogeny : str + A filepath to a Newick formatted tree. + threads : int, optional + Deprecated, no-op. + variance_adjusted : bool, optional + Adjust for varianace or not. Default is False. + bypass_tips : bool, optional + Bypass the tips of the tree in the computation. This reduces compute + by about 50%, but is an approximation. + n_substeps : int, optional + Internally split the problem in substeps for reduced memory footprint. + + Returns + ------- + skbio.DistanceMatrix + The resulting distance matrix. + + Raises + ------ + IOError + If the tree file is not found + If the table is not found + ValueError + If the table does not appear to be BIOM-Format v2.1. + If the phylogeny does not appear to be in Newick format. + + Environment variables + --------------------- + OMP_NUM_THREADS + Number of CPU cores to use. If not defined, use all detected cores. + UNIFRAC_USE_GPU + Enable or disable GPU offload. If not defined, autodetect. + ACC_DEVICE_NUM + The GPU to use. If not defined, the first GPU will be used. + + Notes + ----- + Unweighted UniFrac was originally described in [1]_. Variance Adjusted + UniFrac was originally described in [2]_, and while its application to + Unweighted UniFrac was not described, factoring in the variance adjustment + is still feasible and so it is exposed. + + References + ---------- + .. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for + comparing microbial communities. Appl. Environ. Microbiol. 71, 8228-8235 + (2005). + .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a + powerful beta diversity measure for comparing communities based on + phylogeny. BMC Bioinformatics 12:118 (2011). + """ + return _call_ssu(table, phylogeny, 'unweighted_fp64', variance_adjusted, + 1.0, bypass_tips, n_substeps) + + def unweighted_fp32(table: Union[str, Table], phylogeny: Union[str, TreeNode, BP], threads: int = 1, @@ -291,6 +358,73 @@ def weighted_normalized(table: Union[str, Table], variance_adjusted, 1.0, bypass_tips, n_substeps) +def weighted_normalized_fp64(table: Union[str, Table], + phylogeny: Union[str, TreeNode, BP], + threads: int = 1, + variance_adjusted: bool = False, + bypass_tips: bool = False, + n_substeps: int = 1 + ) -> skbio.DistanceMatrix: + """Compute weighted normalized UniFrac using fp64 math + + Parameters + ---------- + table : str + A filepath to a BIOM-Format 2.1 file. + phylogeny : str + A filepath to a Newick formatted tree. + threads : int, optional + Deprecated, no-op. + variance_adjusted : bool, optional + Adjust for varianace or not. Default is False. + bypass_tips : bool, optional + Bypass the tips of the tree in the computation. This reduces compute + by about 50%, but is an approximation. + n_substeps : int, optional + Internally split the problem in substeps for reduced memory footprint. + + Returns + ------- + skbio.DistanceMatrix + The resulting distance matrix. + + Raises + ------ + IOError + If the tree file is not found + If the table is not found + ValueError + If the table does not appear to be BIOM-Format v2.1. + If the phylogeny does not appear to be in Newick format. + + Environment variables + --------------------- + OMP_NUM_THREADS + Number of CPU cores to use. If not defined, use all detected cores. + UNIFRAC_USE_GPU + Enable or disable GPU offload. If not defined, autodetect. + ACC_DEVICE_NUM + The GPU to use. If not defined, the first GPU will be used. + + Notes + ----- + Weighted UniFrac was originally described in [1]_. Variance Adjusted + Weighted UniFrac was originally described in [2]_. + + References + ---------- + .. [1] Lozupone, C. A., Hamady, M., Kelley, S. T. & Knight, R. Quantitative + and qualitative beta diversity measures lead to different insights into + factors that structure microbial communities. Appl. Environ. Microbiol. + 73, 1576-1585 (2007). + .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a + powerful beta diversity measure for comparing communities based on + phylogeny. BMC Bioinformatics 12:118 (2011). + """ + return _call_ssu(str(table), str(phylogeny), 'weighted_normalized_fp64', + variance_adjusted, 1.0, bypass_tips, n_substeps) + + def weighted_normalized_fp32(table: Union[str, Table], phylogeny: Union[str, TreeNode, BP], threads: int = 1, @@ -425,6 +559,74 @@ def weighted_unnormalized(table: Union[str, Table], variance_adjusted, 1.0, bypass_tips, n_substeps) +def weighted_unnormalized_fp64(table: Union[str, Table], + phylogeny: Union[str, TreeNode, BP], + threads: int = 1, + variance_adjusted: bool = False, + bypass_tips: bool = False, + n_substeps: int = 1 + ) -> skbio.DistanceMatrix: + # noqa + """Compute weighted unnormalized UniFrac using fp64 math + + Parameters + ---------- + table : str + A filepath to a BIOM-Format 2.1 file. + phylogeny : str + A filepath to a Newick formatted tree. + threads : int, optional + TDeprecated, no-op.. + variance_adjusted : bool, optional + Adjust for varianace or not. Default is False. + bypass_tips : bool, optional + Bypass the tips of the tree in the computation. This reduces compute + by about 50%, but is an approximation. + n_substeps : int, optional + Internally split the problem in substeps for reduced memory footprint. + + Returns + ------- + skbio.DistanceMatrix + The resulting distance matrix. + + Raises + ------ + IOError + If the tree file is not found + If the table is not found + ValueError + If the table does not appear to be BIOM-Format v2.1. + If the phylogeny does not appear to be in Newick format. + + Environment variables + --------------------- + OMP_NUM_THREADS + Number of CPU cores to use. If not defined, use all detected cores. + UNIFRAC_USE_GPU + Enable or disable GPU offload. If not defined, autodetect. + ACC_DEVICE_NUM + The GPU to use. If not defined, the first GPU will be used. + + Notes + ----- + Weighted UniFrac was originally described in [1]_. Variance Adjusted + Weighted UniFrac was originally described in [2]_. + + References + ---------- + .. [1] Lozupone, C. A., Hamady, M., Kelley, S. T. & Knight, R. Quantitative + and qualitative beta diversity measures lead to different insights into + factors that structure microbial communities. Appl. Environ. Microbiol. + 73, 1576-1585 (2007). + .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a + powerful beta diversity measure for comparing communities based on + phylogeny. BMC Bioinformatics 12:118 (2011). + """ + return _call_ssu(str(table), str(phylogeny), 'weighted_unnormalized_fp64', + variance_adjusted, 1.0, bypass_tips, n_substeps) + + def weighted_unnormalized_fp32(table: Union[str, Table], phylogeny: Union[str, TreeNode, BP], threads: int = 1, @@ -579,6 +781,93 @@ def generalized(table: Union[str, Table], variance_adjusted, alpha, bypass_tips, n_substeps) +def generalized_fp64(table: Union[str, Table], + phylogeny: Union[str, TreeNode, BP], + threads: int = 1, + alpha: float = 1.0, + variance_adjusted: bool = False, + bypass_tips: bool = False, + n_substeps: int = 1) -> skbio.DistanceMatrix: + """Compute Generalized UniFrac using fp64 math + + Parameters + ---------- + table : str + A filepath to a BIOM-Format 2.1 file. + phylogeny : str + A filepath to a Newick formatted tree. + threads : int, optional + Deprecated, no-op. + alpha : float, optional + The level of contribution of high abundance branches. Higher alpha + increases the contribution of from high abundance branches while lower + alpha reduces the contribution. Alpha was originally defined over the + range [0, 1]. Default is 1.0. + variance_adjusted : bool, optional + Adjust for varianace or not. Default is False. + bypass_tips : bool, optional + Bypass the tips of the tree in the computation. This reduces compute + by about 50%, but is an approximation. + n_substeps : int, optional + Internally split the problem in substeps for reduced memory footprint. + + Returns + ------- + skbio.DistanceMatrix + The resulting distance matrix. + + Raises + ------ + IOError + If the tree file is not found + If the table is not found + ValueError + If the table does not appear to be BIOM-Format v2.1. + If the phylogeny does not appear to be in Newick format. + + Environment variables + --------------------- + OMP_NUM_THREADS + Number of CPU cores to use. If not defined, use all detected cores. + UNIFRAC_USE_GPU + Enable or disable GPU offload. If not defined, autodetect. + ACC_DEVICE_NUM + The GPU to use. If not defined, the first GPU will be used. + + Notes + ----- + Generalized UniFrac was originally described in [1]_. Variance Adjusted + UniFrac was originally described in [2]_, but was not described in as + applied to Generalized UniFrac. It is feasible to do, so it is exposed + here. + + An alpha of 1.0 is Weighted normalized UniFrac. An alpha of 0.0 is + approximately Unweighted UniFrac, and is if the proportions are + dichotomized. + + References + ---------- + .. [1] Chen, J., Bittinger, K., Charlson, E. S., Hoffmann C., Lewis, J., + Wu, G. D., Collman R. G., Bushman, F. D. & Hongzhe L. Associating + microbiome composition with environmental covariates using generalized + UniFrac distances. Bioinformatics 28(16), 2106–2113 (2012). + .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a + powerful beta diversity measure for comparing communities based on + phylogeny. BMC Bioinformatics 12:118 (2011). + """ + if alpha == 1.0: + warn("alpha of 1.0 is weighted-normalized UniFrac. " + "Weighted-normalized is being used instead as it is more " + "optimized.", + Warning) + return weighted_normalized_fp64(table, phylogeny, threads, + variance_adjusted, bypass_tips, + n_substeps) + else: + return _call_ssu(str(table), str(phylogeny), 'generalized_fp64', + variance_adjusted, alpha, bypass_tips, n_substeps) + + def generalized_fp32(table: Union[str, Table], phylogeny: Union[str, TreeNode, BP], threads: int = 1, @@ -670,6 +959,10 @@ def generalized_fp32(table: Union[str, Table], 'weighted_normalized': weighted_normalized, 'weighted_unnormalized': weighted_unnormalized, 'generalized': generalized, + 'unweighted_fp64': unweighted_fp64, + 'weighted_normalized_fp64': weighted_normalized_fp64, + 'weighted_unnormalized_fp64': weighted_unnormalized_fp64, + 'generalized_fp64': generalized_fp64, 'unweighted_fp32': unweighted_fp32, 'weighted_normalized_fp32': weighted_normalized_fp32, 'weighted_unnormalized_fp32': weighted_unnormalized_fp32, @@ -702,9 +995,12 @@ def meta(tables: tuple, phylogenies: tuple, weights: tuple = None, 'skipping_missing_values'. The default is 'skipping_missing_values'. method : str The UniFrac method to use. The available choices are: - 'unweighted', 'unweighted_fp32', 'weighted_unnormalized', - 'weighted_unnormalized_fp32', 'weighted_normalized', - 'weighted_normalized_fp32', 'generalized' and 'generalized_fp32'. + 'unweighted', 'unweighted_fp64', 'unweighted_fp32', + 'weighted_unnormalized', 'weighted_unnormalized_fp64', + 'weighted_unnormalized_fp32', + 'weighted_normalized', 'weighted_normalized_fp64', + 'weighted_normalized_fp32', + 'generalized', 'generalized_fp64' and 'generalized_fp32'. threads : int, optional TDeprecated, no-op. bypass_tips : bool, optional @@ -902,22 +1198,192 @@ def unweighted_to_file(table: str, phylogeny. BMC Bioinformatics 12:118 (2011). """ return _call_ssu_to_file(table, phylogeny, out_filename, - 'unweighted', + 'unweighted', + variance_adjusted, 1.0, bypass_tips, n_substeps, + format, pcoa_dims, buf_dirname) + + +def unweighted_fp64_to_file(table: str, + phylogeny: str, + out_filename: str, + pcoa_dims: int = 10, + threads: int = 1, + variance_adjusted: bool = False, + bypass_tips: bool = False, + format: str = "hdf5", + buf_dirname: str = "", + n_substeps: int = 1) -> str: + """Compute Unweighted UniFrac using fp64 math and write to file + + Parameters + ---------- + table : str + A filepath to a BIOM-Format 2.1 file. + phylogeny : str + A filepath to a Newick formatted tree. + out_filename : str + A filepath to the output file. + pcoa_dims : int, optional + Number of dimensions to use for PCoA compute. + if set to 0, no PCoA is computed. + Defaults of 10. + threads : int, optional + Deprecated, no-op. + variance_adjusted : bool, optional + Adjust for varianace or not. Default is False. + bypass_tips : bool, optional + Bypass the tips of the tree in the computation. This reduces compute + by about 50%, but is an approximation. + format : str, optional + Output format to use. Defaults to "hdf5". + buf_dirname : str, optional + If set, the directory where the disk buffer is hosted, + can be used to reduce the amount of memory needed. + n_substeps : int, optional + Internally split the problem in substeps for reduced memory footprint. + + Returns + ------- + str + A filepath to the output file. + + Raises + ------ + IOError + If the tree file is not found + If the table is not found + If the output file cannot be created + ValueError + If the table does not appear to be BIOM-Format v2.1. + If the phylogeny does not appear to be in Newick format. + + Environment variables + --------------------- + OMP_NUM_THREADS + Number of CPU cores to use. If not defined, use all detected cores. + UNIFRAC_USE_GPU + Enable or disable GPU offload. If not defined, autodetect. + ACC_DEVICE_NUM + The GPU to use. If not defined, the first GPU will be used. + + Notes + ----- + Unweighted UniFrac was originally described in [1]_. Variance Adjusted + UniFrac was originally described in [2]_, and while its application to + Unweighted UniFrac was not described, factoring in the variance adjustment + is still feasible and so it is exposed. + + References + ---------- + .. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for + comparing microbial communities. Appl. Environ. Microbiol. 71, 8228-8235 + (2005). + .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a + powerful beta diversity measure for comparing communities based on + phylogeny. BMC Bioinformatics 12:118 (2011). + """ + return _call_ssu_to_file(table, phylogeny, out_filename, + 'unweighted_fp64', + variance_adjusted, 1.0, bypass_tips, n_substeps, + format, pcoa_dims, buf_dirname) + + +def unweighted_fp32_to_file(table: str, + phylogeny: str, + out_filename: str, + pcoa_dims: int = 10, + threads: int = 1, + variance_adjusted: bool = False, + bypass_tips: bool = False, + format: str = "hdf5", + buf_dirname: str = "", + n_substeps: int = 1) -> str: + """Compute Unweighted UniFrac using fp32 math and write to file + + Parameters + ---------- + table : str + A filepath to a BIOM-Format 2.1 file. + phylogeny : str + A filepath to a Newick formatted tree. + out_filename : str + A filepath to the output file. + pcoa_dims : int, optional + Number of dimensions to use for PCoA compute. + if set to 0, no PCoA is computed. + Defaults of 10. + threads : int, optional + Deprecated, no-op. + variance_adjusted : bool, optional + Adjust for varianace or not. Default is False. + bypass_tips : bool, optional + Bypass the tips of the tree in the computation. This reduces compute + by about 50%, but is an approximation. + format : str, optional + Output format to use. Defaults to "hdf5". + buf_dirname : str, optional + If set, the directory where the disk buffer is hosted, + can be used to reduce the amount of memory needed. + n_substeps : int, optional + Internally split the problem in substeps for reduced memory footprint. + + Returns + ------- + str + A filepath to the output file. + + Raises + ------ + IOError + If the tree file is not found + If the table is not found + If the output file cannot be created + ValueError + If the table does not appear to be BIOM-Format v2.1. + If the phylogeny does not appear to be in Newick format. + + Environment variables + --------------------- + OMP_NUM_THREADS + Number of CPU cores to use. If not defined, use all detected cores. + UNIFRAC_USE_GPU + Enable or disable GPU offload. If not defined, autodetect. + ACC_DEVICE_NUM + The GPU to use. If not defined, the first GPU will be used. + + Notes + ----- + Unweighted UniFrac was originally described in [1]_. Variance Adjusted + UniFrac was originally described in [2]_, and while its application to + Unweighted UniFrac was not described, factoring in the variance adjustment + is still feasible and so it is exposed. + + References + ---------- + .. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for + comparing microbial communities. Appl. Environ. Microbiol. 71, 8228-8235 + (2005). + .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a + powerful beta diversity measure for comparing communities based on + phylogeny. BMC Bioinformatics 12:118 (2011). + """ + return _call_ssu_to_file(table, phylogeny, out_filename, + 'unweighted_fp32', variance_adjusted, 1.0, bypass_tips, n_substeps, format, pcoa_dims, buf_dirname) -def unweighted_fp32_to_file(table: str, - phylogeny: str, - out_filename: str, - pcoa_dims: int = 10, - threads: int = 1, - variance_adjusted: bool = False, - bypass_tips: bool = False, - format: str = "hdf5", - buf_dirname: str = "", - n_substeps: int = 1) -> str: - """Compute Unweighted UniFrac using fp32 math and write to file +def weighted_normalized_to_file(table: str, + phylogeny: str, + out_filename: str, + pcoa_dims: int = 10, + threads: int = 1, + variance_adjusted: bool = False, + bypass_tips: bool = False, + format: str = "hdf5", + buf_dirname: str = "", + n_substeps: int = 1) -> str: + """Compute weighted normalized UniFrac and write to file Parameters ---------- @@ -972,37 +1438,36 @@ def unweighted_fp32_to_file(table: str, Notes ----- - Unweighted UniFrac was originally described in [1]_. Variance Adjusted - UniFrac was originally described in [2]_, and while its application to - Unweighted UniFrac was not described, factoring in the variance adjustment - is still feasible and so it is exposed. + Weighted UniFrac was originally described in [1]_. Variance Adjusted + Weighted UniFrac was originally described in [2]_. References ---------- - .. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for - comparing microbial communities. Appl. Environ. Microbiol. 71, 8228-8235 - (2005). + .. [1] Lozupone, C. A., Hamady, M., Kelley, S. T. & Knight, R. Quantitative + and qualitative beta diversity measures lead to different insights into + factors that structure microbial communities. Appl. Environ. Microbiol. + 73, 1576-1585 (2007). .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a powerful beta diversity measure for comparing communities based on phylogeny. BMC Bioinformatics 12:118 (2011). """ return _call_ssu_to_file(table, phylogeny, out_filename, - 'unweighted_fp32', + 'weighted_normalized', variance_adjusted, 1.0, bypass_tips, n_substeps, format, pcoa_dims, buf_dirname) -def weighted_normalized_to_file(table: str, - phylogeny: str, - out_filename: str, - pcoa_dims: int = 10, - threads: int = 1, - variance_adjusted: bool = False, - bypass_tips: bool = False, - format: str = "hdf5", - buf_dirname: str = "", - n_substeps: int = 1) -> str: - """Compute weighted normalized UniFrac and write to file +def weighted_normalized_fp64_to_file(table: str, + phylogeny: str, + out_filename: str, + pcoa_dims: int = 10, + threads: int = 1, + variance_adjusted: bool = False, + bypass_tips: bool = False, + format: str = "hdf5", + buf_dirname: str = "", + n_substeps: int = 1) -> str: + """Compute weighted normalized UniFrac using fp64 math and write to file Parameters ---------- @@ -1071,7 +1536,7 @@ def weighted_normalized_to_file(table: str, phylogeny. BMC Bioinformatics 12:118 (2011). """ return _call_ssu_to_file(table, phylogeny, out_filename, - 'weighted_normalized', + 'weighted_normalized_fp64', variance_adjusted, 1.0, bypass_tips, n_substeps, format, pcoa_dims, buf_dirname) @@ -1244,6 +1709,90 @@ def weighted_unnormalized_to_file(table: str, format, pcoa_dims, buf_dirname) +def weighted_unnormalized_fp64_to_file(table: str, + phylogeny: str, + out_filename: str, + pcoa_dims: int = 10, + threads: int = 1, + variance_adjusted: bool = False, + bypass_tips: bool = False, + format: str = "hdf5", + buf_dirname: str = "", + n_substeps: int = 1) -> str: + """Compute weighted unnormalized UniFrac using fp64 math and write to file + + Parameters + ---------- + table : str + A filepath to a BIOM-Format 2.1 file. + phylogeny : str + A filepath to a Newick formatted tree. + out_filename : str + A filepath to the output file. + pcoa_dims : int, optional + Number of dimensions to use for PCoA compute. + if set to 0, no PCoA is computed. + Defaults of 10. + threads : int, optional + TDeprecated, no-op.. + variance_adjusted : bool, optional + Adjust for varianace or not. Default is False. + bypass_tips : bool, optional + Bypass the tips of the tree in the computation. This reduces compute + by about 50%, but is an approximation. + format : str, optional + Output format to use. Defaults to "hdf5". + buf_dirname : str, optional + If set, the directory where the disk buffer is hosted, + can be used to reduce the amount of memory needed. + n_substeps : int, optional + Internally split the problem in substeps for reduced memory footprint. + + Returns + ------- + str + A filepath to the output file. + + Raises + ------ + IOError + If the tree file is not found + If the table is not found + If the output file cannot be created + ValueError + If the table does not appear to be BIOM-Format v2.1. + If the phylogeny does not appear to be in Newick format. + + Environment variables + --------------------- + OMP_NUM_THREADS + Number of CPU cores to use. If not defined, use all detected cores. + UNIFRAC_USE_GPU + Enable or disable GPU offload. If not defined, autodetect. + ACC_DEVICE_NUM + The GPU to use. If not defined, the first GPU will be used. + + Notes + ----- + Weighted UniFrac was originally described in [1]_. Variance Adjusted + Weighted UniFrac was originally described in [2]_. + + References + ---------- + .. [1] Lozupone, C. A., Hamady, M., Kelley, S. T. & Knight, R. Quantitative + and qualitative beta diversity measures lead to different insights into + factors that structure microbial communities. Appl. Environ. Microbiol. + 73, 1576-1585 (2007). + .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a + powerful beta diversity measure for comparing communities based on + phylogeny. BMC Bioinformatics 12:118 (2011). + """ + return _call_ssu_to_file(table, phylogeny, out_filename, + 'weighted_unnormalized_fp64', + variance_adjusted, 1.0, bypass_tips, n_substeps, + format, pcoa_dims, buf_dirname) + + def weighted_unnormalized_fp32_to_file(table: str, phylogeny: str, out_filename: str, @@ -1436,6 +1985,114 @@ def generalized_to_file(table: str, format, pcoa_dims, buf_dirname) +def generalized_fp64_to_file(table: str, + phylogeny: str, + out_filename: str, + pcoa_dims: int = 10, + threads: int = 1, + alpha: float = 1.0, + variance_adjusted: bool = False, + bypass_tips: bool = False, + format: str = "hdf5", + buf_dirname: str = "", + n_substeps: int = 1) -> str: + """Compute Generalized UniFrac using fp64 math and write to file + + Parameters + ---------- + table : str + A filepath to a BIOM-Format 2.1 file. + phylogeny : str + A filepath to a Newick formatted tree. + out_filename : str + A filepath to the output file. + pcoa_dims : int, optional + Number of dimensions to use for PCoA compute. + if set to 0, no PCoA is computed. + Defaults of 10. + threads : int, optional + TDeprecated, no-op. + alpha : float, optional + The level of contribution of high abundance branches. Higher alpha + increases the contribution of from high abundance branches while lower + alpha reduces the contribution. Alpha was originally defined over the + range [0, 1]. Default is 1.0. + variance_adjusted : bool, optional + Adjust for varianace or not. Default is False. + bypass_tips : bool, optional + Bypass the tips of the tree in the computation. This reduces compute + by about 50%, but is an approximation. + format : str, optional + Output format to use. Defaults to "hdf5". + buf_dirname : str, optional + If set, the directory where the disk buffer is hosted, + can be used to reduce the amount of memory needed. + n_substeps : int, optional + Internally split the problem in substeps for reduced memory footprint. + + Returns + ------- + str + A filepath to the output file. + + Raises + ------ + IOError + If the tree file is not found + If the table is not found + If the output file cannot be created + ValueError + If the table does not appear to be BIOM-Format v2.1. + If the phylogeny does not appear to be in Newick format. + + Environment variables + --------------------- + OMP_NUM_THREADS + Number of CPU cores to use. If not defined, use all detected cores. + UNIFRAC_USE_GPU + Enable or disable GPU offload. If not defined, autodetect. + ACC_DEVICE_NUM + The GPU to use. If not defined, the first GPU will be used. + + Notes + ----- + Generalized UniFrac was originally described in [1]_. Variance Adjusted + UniFrac was originally described in [2]_, but was not described in as + applied to Generalized UniFrac. It is feasible to do, so it is exposed + here. + + An alpha of 1.0 is Weighted normalized UniFrac. An alpha of 0.0 is + approximately Unweighted UniFrac, and is if the proportions are + dichotomized. + + References + ---------- + .. [1] Chen, J., Bittinger, K., Charlson, E. S., Hoffmann C., Lewis, J., + Wu, G. D., Collman R. G., Bushman, F. D. & Hongzhe L. Associating + microbiome composition with environmental covariates using generalized + UniFrac distances. Bioinformatics 28(16), 2106–2113 (2012). + .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a + powerful beta diversity measure for comparing communities based on + phylogeny. BMC Bioinformatics 12:118 (2011). + """ + if alpha == 1.0: + warn("alpha of 1.0 is weighted-normalized UniFrac. " + "Weighted-normalized is being used instead as it is more " + "optimized.", + Warning) + return _call_ssu_to_file(table, phylogeny, out_filename, + 'weighted_normalized_fp64', + variance_adjusted, alpha, + bypass_tips, n_substeps, + format, pcoa_dims, buf_dirname) + else: + return _call_ssu_to_file(table, phylogeny, out_filename, + 'generalized_fp64', + variance_adjusted, alpha, + bypass_tips, n_substeps, + format, pcoa_dims, buf_dirname) + + def generalized_fp32_to_file(table: str, phylogeny: str, out_filename: str, From aac017822998b4356838d2ddc8f3c11545f8ac99 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 12:29:18 -0800 Subject: [PATCH 03/14] Update README --- README.md | 104 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 7d86b261..a03ddb4c 100644 --- a/README.md +++ b/README.md @@ -135,22 +135,22 @@ To use Stacked Faith through QIIME2, given similar artifacts, you can use: The library can be accessed directly from within Python. If operating in this mode, the API methods are expecting a filepath to a BIOM-Format V2.1.0 table, and a filepath to a Newick formatted phylogeny. $ python - Python 3.7.8 | packaged by conda-forge | (default, Nov 27 2020, 19:24:58) - [GCC 9.3.0] on linux + Python 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:23:14) [GCC 10.4.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import unifrac >>> dir(unifrac) - ['__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', - '__package__', '__path__', '__spec__', '__version__', '_api', '_meta', '_methods', - 'faith_pd', - 'generalized', 'generalized_fp32', 'generalized_fp32_to_file', 'generalized_to_file', - 'h5pcoa', 'h5unifrac', 'meta', 'pkg_resources', 'ssu', 'ssu_to_file', - 'unweighted', 'unweighted_fp32', 'unweighted_fp32_to_file', 'unweighted_to_file', - 'weighted_normalized', 'weighted_normalized_fp32', 'weighted_normalized_fp32_to_file', 'weighted_normalized_to_file', - 'weighted_unnormalized', 'weighted_unnormalized_fp32', 'weighted_unnormalized_fp32_to_file', 'weighted_unnormalized_to_file'] - >>> print(unifrac.unweighted_fp32.__doc__) - Compute Unweighted UniFrac using fp32 math - + ['__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', + '__path__', '__spec__', '__version__', '_api', '_meta', '_methods', 'faith_pd', + 'generalized', 'generalized_fp32', 'generalized_fp32_to_file', 'generalized_fp64', 'generalized_fp64_to_file', 'generalized_to_file', + 'h5pcoa', 'h5unifrac', 'meta', 'pkg_resources', 'ssu', 'ssu_fast', 'ssu_inmem', 'ssu_to_file', + 'unweighted', 'unweighted_fp32', 'unweighted_fp32_to_file', 'unweighted_fp64', 'unweighted_fp64_to_file', 'unweighted_to_file', + 'weighted_normalized', 'weighted_normalized_fp32', 'weighted_normalized_fp32_to_file', + 'weighted_normalized_fp64', 'weighted_normalized_fp64_to_file', 'weighted_normalized_to_file', + 'weighted_unnormalized', 'weighted_unnormalized_fp32', 'weighted_unnormalized_fp32_to_file', + 'weighted_unnormalized_fp64', 'weighted_unnormalized_fp64_to_file', 'weighted_unnormalized_to_file'] + >>> print(unifrac.unweighted.__doc__) + Compute Unweighted UniFrac + Parameters ---------- table : str @@ -166,12 +166,12 @@ The library can be accessed directly from within Python. If operating in this mo by about 50%, but is an approximation. n_substeps : int, optional Internally split the problem in substeps for reduced memory footprint. - + Returns ------- skbio.DistanceMatrix The resulting distance matrix. - + Raises ------ IOError @@ -180,7 +180,7 @@ The library can be accessed directly from within Python. If operating in this mo ValueError If the table does not appear to be BIOM-Format v2.1. If the phylogeny does not appear to be in Newick format. - + Environment variables --------------------- OMP_NUM_THREADS @@ -189,14 +189,14 @@ The library can be accessed directly from within Python. If operating in this mo Enable or disable GPU offload. If not defined, autodetect. ACC_DEVICE_NUM The GPU to use. If not defined, the first GPU will be used. - + Notes ----- Unweighted UniFrac was originally described in [1]_. Variance Adjusted UniFrac was originally described in [2]_, and while its application to Unweighted UniFrac was not described, factoring in the variance adjustment is still feasible and so it is exposed. - + References ---------- .. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for @@ -205,10 +205,10 @@ The library can be accessed directly from within Python. If operating in this mo .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a powerful beta diversity measure for comparing communities based on phylogeny. BMC Bioinformatics 12:118 (2011). - - >>> print(unifrac.unweighted_fp32_to_file.__doc__) - Compute Unweighted UniFrac using fp32 math and write to file - + + >>> print(unifrac.unweighted_to_file.__doc__) + Compute Unweighted UniFrac and write to file + Parameters ---------- table : str @@ -235,12 +235,12 @@ The library can be accessed directly from within Python. If operating in this mo can be used to reduce the amount of memory needed. n_substeps : int, optional Internally split the problem in substeps for reduced memory footprint. - + Returns ------- str A filepath to the output file. - + Raises ------ IOError @@ -250,7 +250,7 @@ The library can be accessed directly from within Python. If operating in this mo ValueError If the table does not appear to be BIOM-Format v2.1. If the phylogeny does not appear to be in Newick format. - + Environment variables --------------------- OMP_NUM_THREADS @@ -259,14 +259,14 @@ The library can be accessed directly from within Python. If operating in this mo Enable or disable GPU offload. If not defined, autodetect. ACC_DEVICE_NUM The GPU to use. If not defined, the first GPU will be used. - + Notes ----- Unweighted UniFrac was originally described in [1]_. Variance Adjusted UniFrac was originally described in [2]_, and while its application to Unweighted UniFrac was not described, factoring in the variance adjustment is still feasible and so it is exposed. - + References ---------- .. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for @@ -275,27 +275,27 @@ The library can be accessed directly from within Python. If operating in this mo .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a powerful beta diversity measure for comparing communities based on phylogeny. BMC Bioinformatics 12:118 (2011). - + >>> print(unifrac.h5unifrac.__doc__) Read UniFrac from a hdf5 file - + Parameters ---------- h5file : str A filepath to a hdf5 file. - + Returns ------- skbio.DistanceMatrix The distance matrix. - + Raises ------ OSError If the hdf5 file is not found KeyError If the hdf5 does not have the necessary fields - + References ---------- .. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for @@ -304,7 +304,7 @@ The library can be accessed directly from within Python. If operating in this mo .. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a powerful beta diversity measure for comparing communities based on phylogeny. BMC Bioinformatics 12:118 (2011). - + >>> print(unifrac.faith_pd.__doc__) Execute a call to the Stacked Faith API in the UniFrac package @@ -402,14 +402,30 @@ The methods can also be used directly through the command line after install: ## Minor test dataset -A small test `.biom` and `.tre` can be found in `sucpp/`. An example with expected output is below, and should execute in 10s of milliseconds: - - $ ssu -i sucpp/test.biom -t sucpp/test.tre -m unweighted -o test.out - $ cat test.out - Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 - Sample1 0 0.2 0.5714285714285714 0.6 0.5 0.2 - Sample2 0.2 0 0.4285714285714285 0.6666666666666666 0.6 0.3333333333333333 - Sample3 0.5714285714285714 0.4285714285714285 0 0.7142857142857143 0.8571428571428571 0.4285714285714285 - Sample4 0.6 0.6666666666666666 0.7142857142857143 0 0.3333333333333333 0.4 - Sample5 0.5 0.6 0.8571428571428571 0.3333333333333333 0 0.6 - Sample6 0.2 0.3333333333333333 0.4285714285714285 0.4 0.6 0 +A small test `.biom` and `.tre` can be found in `unifrac/tests/data/`. An example with expected output is below, and should execute in 10s of milliseconds: + + $ python + Python 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:23:14) [GCC 10.4.0] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> import unifrac + >>> d=unifrac.unweighted('unifrac/tests/data/crawford.biom','unifrac/tests/data/crawford.tre') + >>> d.data + array([[0. , 0.71836066, 0.7131736 , 0.6974604 , 0.6258721 , + 0.7282667 , 0.72065896, 0.7264058 , 0.7360605 ], + [0.71836066, 0. , 0.7030297 , 0.734073 , 0.6548042 , + 0.71547383, 0.7839781 , 0.723184 , 0.7613893 ], + [0.7131736 , 0.7030297 , 0. , 0.6104128 , 0.623313 , + 0.71848303, 0.7041634 , 0.75258476, 0.7924903 ], + [0.6974604 , 0.734073 , 0.6104128 , 0. , 0.6439278 , + 0.7005273 , 0.6983272 , 0.77818936, 0.72959894], + [0.6258721 , 0.6548042 , 0.623313 , 0.6439278 , 0. , + 0.75782686, 0.7100514 , 0.75065047, 0.7894437 ], + [0.7282667 , 0.71547383, 0.71848303, 0.7005273 , 0.75782686, + 0. , 0.63593644, 0.71283615, 0.5831464 ], + [0.72065896, 0.7839781 , 0.7041634 , 0.6983272 , 0.7100514 , + 0.63593644, 0. , 0.6920076 , 0.6897206 ], + [0.7264058 , 0.723184 , 0.75258476, 0.77818936, 0.75065047, + 0.71283615, 0.6920076 , 0. , 0.7151408 ], + [0.7360605 , 0.7613893 , 0.7924903 , 0.72959894, 0.7894437 , + 0.5831464 , 0.6897206 , 0.7151408 , 0. ]], dtype=float32) + From c9c6a27ed9280797151cf7c36f66f290716c293e Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 13:48:56 -0800 Subject: [PATCH 04/14] Request scikit-bio>=0.5.8 and remove artificial scipy and hdf5 version limits --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 818ff6b9..afde83ec 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -60,8 +60,7 @@ jobs: conda install --yes -c conda-forge -c bioconda clangxx_osx-64 fi conda install --yes -c conda-forge -c bioconda "unifrac-binaries>=1.2" - # TEMP HACK: Use older version of scipy to work around scikit-bio problem - conda install --yes -c conda-forge -c bioconda cython "scipy<1.9" "hdf5<1.12.1" biom-format numpy "h5py<3.0.0 | >3.3.0" "scikit-bio>=0.5.7" nose + conda install --yes -c conda-forge -c bioconda cython scipy hdf5 biom-format numpy "h5py>3.3.0" "scikit-bio>=0.5.8" nose echo "$(uname -s)" if [[ "$(uname -s)" == "Linux" ]]; then From f150a398ca60a928de44b52bee310510a01eac1f Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 13:59:31 -0800 Subject: [PATCH 05/14] Add explicit fp64 test and update precision on the default test --- unifrac/tests/test_api.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/unifrac/tests/test_api.py b/unifrac/tests/test_api.py index e92d6c81..d41767cc 100644 --- a/unifrac/tests/test_api.py +++ b/unifrac/tests/test_api.py @@ -40,7 +40,7 @@ def test_unweighted_inmem(self): tree=tree) obs = ssu_inmem(table, tree, 'unweighted', False, 1.0, False, 1) - npt.assert_almost_equal(obs.data, exp.data) + npt.assert_almost_equal(obs.data, exp.data, decimal=6) obs2 = unweighted(table_fp, tree_fp) npt.assert_almost_equal(obs2.data, exp.data) @@ -65,6 +65,26 @@ def test_unweighted_fp32_inmem(self): obs2 = unweighted(table_fp, tree_fp) npt.assert_almost_equal(obs2.data, exp.data) + def test_unweighted_fp64_inmem(self): + tree_fp = self.get_data_path('crawford.tre') + table_fp = self.get_data_path('crawford.biom') + + table = load_table(table_fp) + tree = skbio.TreeNode.read(tree_fp) + + ids = table.ids() + otu_ids = table.ids(axis='observation') + cnts = table.matrix_data.astype(int).toarray().T + exp = skbio.diversity.beta_diversity('unweighted_unifrac', cnts, + ids=ids, otu_ids=otu_ids, + tree=tree) + obs = ssu_inmem(table, tree, 'unweighted_fp64', False, 1.0, + False, 1) + npt.assert_almost_equal(obs.data, exp.data) + + obs2 = unweighted(table_fp, tree_fp) + npt.assert_almost_equal(obs2.data, exp.data) + def get_data_path(self, filename): # adapted from qiime2.plugin.testing.TestPluginBase return pkg_resources.resource_filename(self.package, From d9e17cb85a8e505afecf91789ed7646c8e173e63 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 14:10:12 -0800 Subject: [PATCH 06/14] Relax almost_eqal precision, since we are defaulting to fp32 --- unifrac/tests/test_api.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/unifrac/tests/test_api.py b/unifrac/tests/test_api.py index d41767cc..8e8c3221 100644 --- a/unifrac/tests/test_api.py +++ b/unifrac/tests/test_api.py @@ -43,7 +43,7 @@ def test_unweighted_inmem(self): npt.assert_almost_equal(obs.data, exp.data, decimal=6) obs2 = unweighted(table_fp, tree_fp) - npt.assert_almost_equal(obs2.data, exp.data) + npt.assert_almost_equal(obs2.data, exp.data, decimal=6) def test_unweighted_fp32_inmem(self): tree_fp = self.get_data_path('crawford.tre') @@ -62,8 +62,8 @@ def test_unweighted_fp32_inmem(self): False, 1) npt.assert_almost_equal(obs.data, exp.data, decimal=6) - obs2 = unweighted(table_fp, tree_fp) - npt.assert_almost_equal(obs2.data, exp.data) + obs2 = unweighted_fp32(table_fp, tree_fp) + npt.assert_almost_equal(obs2.data, exp.data, decimal=6) def test_unweighted_fp64_inmem(self): tree_fp = self.get_data_path('crawford.tre') @@ -82,7 +82,7 @@ def test_unweighted_fp64_inmem(self): False, 1) npt.assert_almost_equal(obs.data, exp.data) - obs2 = unweighted(table_fp, tree_fp) + obs2 = unweighted_fp64(table_fp, tree_fp) npt.assert_almost_equal(obs2.data, exp.data) def get_data_path(self, filename): @@ -104,17 +104,17 @@ def test_unweighted_root_eval_issue_46(self): ids=ids, otu_ids=otu_ids, tree=tree_inmem) obs = ssu(table, tree, 'unweighted', False, 1.0, False, 1) - npt.assert_almost_equal(obs.data, exp.data) + npt.assert_almost_equal(obs.data, exp.data, decimal=6) obs2 = unweighted(table, tree) - npt.assert_almost_equal(obs2.data, exp.data) + npt.assert_almost_equal(obs2.data, exp.data, decimal=6) tmpfile = '/tmp/uf_ta_1.md5' unweighted_to_file(table, tree, tmpfile, pcoa_dims=0) try: obs3 = h5unifrac(tmpfile) - npt.assert_almost_equal(obs3.data, exp.data) + npt.assert_almost_equal(obs3.data, exp.data, decimal=6) finally: os.unlink(tmpfile) @@ -128,7 +128,7 @@ def test_meta_unifrac(self): [10 / 16., 0, 8 / 17.], [8 / 13., 8 / 17., 0]]) - npt.assert_almost_equal(u1_distances, result.data) + npt.assert_almost_equal(u1_distances, result.data, decimal=6) self.assertEqual(tuple('ABC'), result.ids) def test_ssu_bad_tree(self): From efe4ba476ca3cc17f1ce28beb5cef9b02b2765e0 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 14:15:00 -0800 Subject: [PATCH 07/14] Add missing import --- unifrac/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unifrac/tests/test_api.py b/unifrac/tests/test_api.py index 8e8c3221..f64d9131 100644 --- a/unifrac/tests/test_api.py +++ b/unifrac/tests/test_api.py @@ -19,7 +19,7 @@ import skbio.diversity from unifrac import ssu, faith_pd, ssu_inmem -from unifrac import unweighted, unweighted_to_file, h5unifrac +from unifrac import unweighted, unweighted_to_file, h5unifrac, unweighted_fp32, unweighted_fp64 class UnifracAPITests(unittest.TestCase): From 126d93fad8f93b55a1a3de94a670130402d6dd93 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 15:17:08 -0800 Subject: [PATCH 08/14] Shorten line --- unifrac/tests/test_api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unifrac/tests/test_api.py b/unifrac/tests/test_api.py index f64d9131..e4f61c7e 100644 --- a/unifrac/tests/test_api.py +++ b/unifrac/tests/test_api.py @@ -19,7 +19,8 @@ import skbio.diversity from unifrac import ssu, faith_pd, ssu_inmem -from unifrac import unweighted, unweighted_to_file, h5unifrac, unweighted_fp32, unweighted_fp64 +from unifrac import unweighted, unweighted_to_file, h5unifrac +from unifrac import unweighted_fp32, unweighted_fp64 class UnifracAPITests(unittest.TestCase): From 1a0636b580a14f6be4a5770e252cbee63589d6b0 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 15:31:26 -0800 Subject: [PATCH 09/14] Relax almostEqqal precision, since we are defaulting to fp32 --- unifrac/tests/test_api.py | 132 +++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/unifrac/tests/test_api.py b/unifrac/tests/test_api.py index e4f61c7e..25eab211 100644 --- a/unifrac/tests/test_api.py +++ b/unifrac/tests/test_api.py @@ -248,7 +248,7 @@ def test_unweighted_otus_out_of_order(self): self.b1[i], self.b1[j], self.oids1, self.t1) expected = self.unweighted_unifrac( shuffled_b1[i], shuffled_b1[j], shuffled_ids, self.t1) - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_otus_out_of_order(self): # UniFrac API does not assert the observations are in tip order of the @@ -265,7 +265,7 @@ def test_weighted_otus_out_of_order(self): self.b1[i], self.b1[j], self.oids1, self.t1) expected = self.weighted_unifrac( shuffled_b1[i], shuffled_b1[j], shuffled_ids, self.t1) - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_unweighted_extra_tips(self): # UniFrac values are the same despite unobserved tips in the tree @@ -275,7 +275,7 @@ def test_unweighted_extra_tips(self): self.b1[i], self.b1[j], self.oids1, self.t1_w_extra_tips) expected = self.unweighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_extra_tips(self): # UniFrac values are the same despite unobserved tips in the tree @@ -285,7 +285,7 @@ def test_weighted_extra_tips(self): self.b1[i], self.b1[j], self.oids1, self.t1_w_extra_tips) expected = self.weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_unweighted_minimal_trees(self): # two tips @@ -306,7 +306,7 @@ def test_unweighted_root_not_observed(self): # a point of confusion for me here, so leaving these in for # future reference expected = 0.2 / (0.1 + 0.2 + 0.3) # 0.3333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # root node not observed, but branch between (OTU3, OTU4) and root # is considered shared @@ -317,7 +317,7 @@ def test_unweighted_root_not_observed(self): # a point of confusion for me here, so leaving these in for # future reference expected = 0.7 / (1.1 + 0.5 + 0.7) # 0.3043478261 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_root_not_observed(self): # expected values computed by hand, these disagree with QIIME 1.9.1 @@ -326,14 +326,14 @@ def test_weighted_root_not_observed(self): actual = self.weighted_unifrac([1, 0, 0, 0], [1, 1, 0, 0], self.oids2, self.t2) expected = 0.15 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # root node not observed, but branch between (OTU3, OTU4) and root # is considered shared actual = self.weighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0], self.oids2, self.t2) expected = 0.6 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_normalized_root_not_observed(self): # expected values computed by hand, these disagree with QIIME 1.9.1 @@ -342,21 +342,21 @@ def test_weighted_normalized_root_not_observed(self): actual = self.weighted_unifrac([1, 0, 0, 0], [1, 1, 0, 0], self.oids2, self.t2, normalized=True) expected = 0.1764705882 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # root node not observed, but branch between (OTU3, OTU4) and root # is considered shared actual = self.weighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0], self.oids2, self.t2, normalized=True) expected = 0.1818181818 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_unweighted_unifrac_identity(self): for i in range(len(self.b1)): actual = self.unweighted_unifrac( self.b1[i], self.b1[i], self.oids1, self.t1) expected = 0.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_unweighted_unifrac_symmetry(self): for i in range(len(self.b1)): @@ -365,18 +365,18 @@ def test_unweighted_unifrac_symmetry(self): self.b1[i], self.b1[j], self.oids1, self.t1) expected = self.unweighted_unifrac( self.b1[j], self.b1[i], self.oids1, self.t1) - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_unweighted_unifrac_non_overlapping(self): # these communities only share the root node actual = self.unweighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1) expected = 1.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 1, 1], self.oids1, self.t1) expected = 1.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_unweighted_unifrac(self): # expected results derived from QIIME 1.9.1, which @@ -386,74 +386,74 @@ def test_unweighted_unifrac(self): actual = self.unweighted_unifrac( self.b1[0], self.b1[1], self.oids1, self.t1) expected = 0.238095238095 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[0], self.b1[2], self.oids1, self.t1) expected = 0.52 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[0], self.b1[3], self.oids1, self.t1) expected = 0.52 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[0], self.b1[4], self.oids1, self.t1) expected = 0.545454545455 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[0], self.b1[5], self.oids1, self.t1) expected = 0.619047619048 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample B versus remaining actual = self.unweighted_unifrac( self.b1[1], self.b1[2], self.oids1, self.t1) expected = 0.347826086957 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[1], self.b1[3], self.oids1, self.t1) expected = 0.347826086957 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[1], self.b1[4], self.oids1, self.t1) expected = 0.68 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[1], self.b1[5], self.oids1, self.t1) expected = 0.421052631579 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample C versus remaining actual = self.unweighted_unifrac( self.b1[2], self.b1[3], self.oids1, self.t1) expected = 0.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[2], self.b1[4], self.oids1, self.t1) expected = 0.68 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[2], self.b1[5], self.oids1, self.t1) expected = 0.421052631579 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample D versus remaining actual = self.unweighted_unifrac( self.b1[3], self.b1[4], self.oids1, self.t1) expected = 0.68 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.unweighted_unifrac( self.b1[3], self.b1[5], self.oids1, self.t1) expected = 0.421052631579 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample E versus remaining actual = self.unweighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1) expected = 1.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_unifrac_identity(self): for i in range(len(self.b1)): actual = self.weighted_unifrac( self.b1[i], self.b1[i], self.oids1, self.t1) expected = 0.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_unifrac_symmetry(self): for i in range(len(self.b1)): @@ -462,7 +462,7 @@ def test_weighted_unifrac_symmetry(self): self.b1[i], self.b1[j], self.oids1, self.t1) expected = self.weighted_unifrac( self.b1[j], self.b1[i], self.oids1, self.t1) - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_unifrac_non_overlapping(self): # expected results derived from QIIME 1.9.1, which @@ -472,7 +472,7 @@ def test_weighted_unifrac_non_overlapping(self): actual = self.weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1) expected = 4.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_unifrac(self): # expected results derived from QIIME 1.9.1, which @@ -481,74 +481,74 @@ def test_weighted_unifrac(self): actual = self.weighted_unifrac( self.b1[0], self.b1[1], self.oids1, self.t1) expected = 2.4 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[0], self.b1[2], self.oids1, self.t1) expected = 1.86666666667 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[0], self.b1[3], self.oids1, self.t1) expected = 2.53333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[0], self.b1[4], self.oids1, self.t1) expected = 1.35384615385 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[0], self.b1[5], self.oids1, self.t1) expected = 3.2 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample B versus remaining actual = self.weighted_unifrac( self.b1[1], self.b1[2], self.oids1, self.t1) expected = 2.26666666667 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[1], self.b1[3], self.oids1, self.t1) expected = 0.933333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[1], self.b1[4], self.oids1, self.t1) expected = 3.2 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[1], self.b1[5], self.oids1, self.t1) expected = 0.8375 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample C versus remaining actual = self.weighted_unifrac( self.b1[2], self.b1[3], self.oids1, self.t1) expected = 1.33333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[2], self.b1[4], self.oids1, self.t1) expected = 1.89743589744 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[2], self.b1[5], self.oids1, self.t1) expected = 2.66666666667 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample D versus remaining actual = self.weighted_unifrac( self.b1[3], self.b1[4], self.oids1, self.t1) expected = 2.66666666667 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[3], self.b1[5], self.oids1, self.t1) expected = 1.33333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample E versus remaining actual = self.weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1) expected = 4.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_unifrac_identity_normalized(self): for i in range(len(self.b1)): actual = self.weighted_unifrac( self.b1[i], self.b1[i], self.oids1, self.t1, normalized=True) expected = 0.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_unifrac_symmetry_normalized(self): for i in range(len(self.b1)): @@ -559,19 +559,19 @@ def test_weighted_unifrac_symmetry_normalized(self): expected = self.weighted_unifrac( self.b1[j], self.b1[i], self.oids1, self.t1, normalized=True) - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_unifrac_non_overlapping_normalized(self): # these communities only share the root node actual = self.weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1, normalized=True) expected = 1.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 1, 1], self.oids1, self.t1, normalized=True) expected = 1.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) def test_weighted_unifrac_normalized(self): # expected results derived from QIIME 1.9.1, which @@ -580,67 +580,67 @@ def test_weighted_unifrac_normalized(self): actual = self.weighted_unifrac( self.b1[0], self.b1[1], self.oids1, self.t1, normalized=True) expected = 0.6 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[0], self.b1[2], self.oids1, self.t1, normalized=True) expected = 0.466666666667 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[0], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.633333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[0], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.338461538462 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[0], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.8 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample B versus remaining actual = self.weighted_unifrac( self.b1[1], self.b1[2], self.oids1, self.t1, normalized=True) expected = 0.566666666667 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[1], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.233333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[1], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.8 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[1], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.209375 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample C versus remaining actual = self.weighted_unifrac( self.b1[2], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.333333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[2], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.474358974359 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[2], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.666666666667 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample D versus remaining actual = self.weighted_unifrac( self.b1[3], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.666666666667 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) actual = self.weighted_unifrac( self.b1[3], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.333333333333 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) # sample E versus remaining actual = self.weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1, normalized=True) expected = 1.0 - self.assertAlmostEqual(actual, expected) + self.assertAlmostEqual(actual, expected, places=6) class FaithPDEdgeCasesTests(unittest.TestCase): From ed232d7a24a3f2aa02bfad14fd405374ff94a0fc Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 15:47:57 -0800 Subject: [PATCH 10/14] Deprecate python 3.7 and add 3.11 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index afde83ec..97af9313 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,7 +29,7 @@ jobs: needs: lint strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11'] os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: From 60b5bb544dec04a1e1171ad53719e18a50405de1 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 15:52:59 -0800 Subject: [PATCH 11/14] Remove python 3.11 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 97af9313..9fd92bba 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,7 +29,7 @@ jobs: needs: lint strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10'] os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: From 43a0c85bd11e6acc9bb359162fb883e9a3553c04 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 16:26:18 -0800 Subject: [PATCH 12/14] Add GPU tests --- .github/workflows/main.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9fd92bba..70f408bc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -30,7 +30,7 @@ jobs: strategy: matrix: python-version: ['3.8', '3.9', '3.10'] - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, linux-gpu-cuda] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 @@ -79,6 +79,7 @@ jobs: shell: bash -l {0} run: | conda activate unifrac + export UNIFRAC_GPU_INFO=Y ls -lrt $CONDA_PREFIX/lib/libhdf5_cpp* nosetests @@ -86,6 +87,7 @@ jobs: shell: bash -l {0} run: | conda activate unifrac + export UNIFRAC_GPU_INFO=Y set -e ssu -i unifrac/tests/data/crawford.biom -t unifrac/tests/data/crawford.tre -o ci/test.dm -m unweighted python -c "import skbio; dm = skbio.DistanceMatrix.read('ci/test.dm')" From c30cecbea92b9f82db074a6650bdfedaff94c347 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 16:35:51 -0800 Subject: [PATCH 13/14] Request miniconda for self-runner --- .github/workflows/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 70f408bc..c81dcb7a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -35,7 +35,8 @@ jobs: steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2 - with: + with: + miniconda-version: "latest" auto-update-conda: true python-version: ${{ matrix.python-version }} - name: Install From 61945262683e426d46963159525e2836c6fc6ebf Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Dec 2022 17:25:09 -0800 Subject: [PATCH 14/14] Update CI and pip files --- ci/linux-64.txt | 2 +- setup.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/linux-64.txt b/ci/linux-64.txt index 9b758361..04b5661a 100644 --- a/ci/linux-64.txt +++ b/ci/linux-64.txt @@ -3,4 +3,4 @@ flake8 nose scikit-bio biom-format -h5py==2.7.0 +h5py diff --git a/setup.py b/setup.py index ce5cb28c..52fb3107 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,8 @@ PREFIX = os.environ.get('PREFIX', "") -base = ["cython >= 0.26", "biom-format", "numpy", "h5py >= 2.7.0", - "scikit-bio >= 0.5.1", "iow"] +base = ["cython >= 0.26", "biom-format", "numpy", "h5py >= 3.3.0", + "scikit-bio >= 0.5.8", "iow"] test = ["nose", "flake8"] @@ -92,7 +92,7 @@ def run_compile_ssu(self): setup( name="unifrac", - version="1.0.0", + version="1.2.0", packages=find_packages(), author="Daniel McDonald", license='BSD-3-Clause',