diff --git a/scimap/helpers/_add_roi_omero.py b/scimap/helpers/_add_roi_omero.py index 52661fd5..087224d7 100644 --- a/scimap/helpers/_add_roi_omero.py +++ b/scimap/helpers/_add_roi_omero.py @@ -18,6 +18,7 @@ import matplotlib.patches as mpatches import scipy.spatial.distance as sdistance from joblib import Parallel, delayed +import argparse def add_roi_omero (adata, roi, x_coordinate='X_centroid',y_coordinate='Y_centroid', @@ -182,3 +183,29 @@ def add_roi_internal (roi_id): # return return adata + +parser = argparse.ArgumentParser(description='Add ROI to AnnData object from Omero') + +parser.add_argument('adata', help='Path to the AnnData object') +parser.add_argument('roi', help='Path to the ROI DataFrame') +parser.add_argument('--x_coordinate', default='X_centroid', help='Column name for x-coordinates') +parser.add_argument('--y_coordinate', default='Y_centroid', help='Column name for y-coordinates') +parser.add_argument('--imageid', default='imageid', help='Column name for image IDs') +parser.add_argument('--subset', help='List of image names to add ROIs') +parser.add_argument('--overwrite', action='store_true', help='Overwrite the label column') +parser.add_argument('--label', default='ROI', help='Key for the returned data') +parser.add_argument('--n_jobs', type=int, default=-1, help='Number of cores to use') +parser.add_argument('--verbose', action='store_true', help='Enable verbose mode') + +args = parser.parse_args() + + # Call the add_roi_omero function +result = add_roi_omero(args.adata,args.roi, x_coordinate=args.x_coordinate, y_coordinate=args.y_coordinate, + imageid=args.imageid, subset=args.subset, overwrite=args.overwrite, + label=args.label, n_jobs=args.n_jobs, verbose=args.verbose) + + # Save the modified AnnData object +result.write('modified_anndata.h5ad') + +if __name__ == '__main__': + add_roi_omero_command_line() \ No newline at end of file diff --git a/scimap/tools/_spatial_aggregate.py b/scimap/tools/_spatial_aggregate.py index 12f738a4..922c1e39 100755 --- a/scimap/tools/_spatial_aggregate.py +++ b/scimap/tools/_spatial_aggregate.py @@ -13,6 +13,7 @@ # Import library import pandas as pd import numpy as np +import argparse from sklearn.neighbors import BallTree # Function @@ -24,40 +25,40 @@ def spatial_aggregate (adata, x_coordinate='X_centroid',y_coordinate='Y_centroid Parameters: adata : AnnData object - x_coordinate : float, required + x_coordinate (float): Column name containing the x-coordinates values. - y_coordinate : float, required + y_coordinate (float): Column name containing the y-coordinates values. - purity : int, optional + purity (int): Supply a value between 1 to 100. It is the percent purity of neighbouring cells. For e.g. if 60 is chosen, every neighbourhood is tested such that if a particular phenotype makes up greater than 60% of the total population it is annotated to be an aggregate of that particular phenotype. - phenotype : string, required + phenotype (string): Column name of the column containing the phenotype information. It could also be any categorical assignment given to single cells. - method : string, optional + method (string): Two options are available: a) 'radius', b) 'knn'. a) radius - Identifies the neighbours within a given radius for every cell. b) knn - Identifies the K nearest neigbours for every cell. - radius : int, optional + radius (int): The radius used to define a local neighbhourhood. - knn : int, optional + knn (int): Number of cells considered for defining the local neighbhourhood. - imageid : string, optional + imageid (string): Column name of the column containing the image id. - subset : string, optional + subset (string): imageid of a single image to be subsetted for analyis. - label : string, optional + label (string): Key for the returned data, stored in `adata.obs`. Returns: @@ -183,3 +184,28 @@ def spatial_aggregate_internal (adata_subset, x_coordinate,y_coordinate,phenotyp # Return return adata +if __name__ == '__main__': + # Create argparse parser + parser = argparse.ArgumentParser(description='Perform spatial aggregation analysis.') + + # Add arguments + parser.add_argument('adata', help='Path to the AnnData object file.') + parser.add_argument('--x_coordinate', default='X_centroid', help='Column name for x-coordinates.') + parser.add_argument('--y_coordinate', default='Y_centroid', help='Column name for y-coordinates.') + parser.add_argument('--purity', type=int, default=60, help='Percent purity of neighboring cells.') + parser.add_argument('--phenotype', default='phenotype', help='Column name for phenotype information.') + parser.add_argument('--method', default='radius', help='Method for identifying neighbors.') + parser.add_argument('--radius', type=int, default=30, help='Radius used to define a local neighborhood.') + parser.add_argument('--knn', type=int, default=10, help='Number of cells considered for defining the local neighborhood.') + parser.add_argument('--imageid',type=str, default='imageid', help='Column name for image ID.') + parser.add_argument('--subset',type=str, help='Image ID of a single image to be subsetted for analysis.') + parser.add_argument('--label',type=str, default='spatial_aggregate', help='Key for the returned data.') + + # Parse the command line arguments + args = parser.parse_args() + + # Call the spatial_aggregate function with the parsed arguments + spatial_aggregate(args.adata, args.x_coordinate, args.y_coordinate, args.purity, + args.phenotype, args.method, args.radius, args.knn, + args.imageid, args.subset, args.label) + diff --git a/scimap/tools/_spatial_count.py b/scimap/tools/_spatial_count.py index 3f16ebea..5d8952e2 100755 --- a/scimap/tools/_spatial_count.py +++ b/scimap/tools/_spatial_count.py @@ -23,6 +23,7 @@ # Import library import pandas as pd import numpy as np +import argparse from sklearn.neighbors import BallTree # Function @@ -39,34 +40,34 @@ def spatial_count (adata, Parameters: adata : anndata object - x_coordinate : float, required + x_coordinate (float): Column name containing the x-coordinates values. - y_coordinate : float, required + y_coordinate (float): Column name containing the y-coordinates values. - phenotype : string, required + phenotype (string): Column name of the column containing the phenotype information. It could also be any categorical assignment given to single cells. - method : string, optional + method (string): Two options are available: a) `radius`, b) `knn`. a) radius - Identifies the neighbours within a given radius for every cell. b) knn - Identifies the K nearest neigbours for every cell. - radius : int, optional + radius (int): The radius used to define a local neighbhourhood. - knn : int, optional + knn (int): Number of cells considered for defining the local neighbhourhood. - imageid : string, optional + imageid (string): Column name of the column containing the image id. - subset : string, optional + subset (string): imageid of a single image to be subsetted for analyis. - label : string, optional + label (string): Key for the returned data, stored in `adata.uns`. Returns: @@ -172,4 +173,28 @@ def spatial_count_internal (adata_subset,x_coordinate,y_coordinate,phenotype,met adata.uns[label] = result # Return - return adata \ No newline at end of file + return adata + +if __name__ == '__main__': + # Create argparse parser + parser = argparse.ArgumentParser(description='Perform spatial counting analysis.') + + # Add arguments + parser.add_argument('--adata',type=str ,help='Path to the AnnData object file.') + parser.add_argument('--x_coordinate',type=str, default='X_centroid', help='Column name for x-coordinates.') + parser.add_argument('--y_coordinate',type=str, default='Y_centroid', help='Column name for y-coordinates.') + parser.add_argument('--phenotype',type=str, default='phenotype', help='Column name for phenotype information.') + parser.add_argument('--method', type=str, default='radius', help='Method for identifying neighbors.') + parser.add_argument('--radius', type=int, default=30, help='Radius used to define a local neighborhood.') + parser.add_argument('--knn', type=int, default=10, help='Number of cells considered for defining the local neighborhood.') + parser.add_argument('--imageid', type=str, default='imageid', help='Column name for image ID.') + parser.add_argument('--subset', type=str, help='Image ID of a single image to be subsetted for analysis.') + parser.add_argument('--label', type=str, default='spatial_count', help='Key for the returned data.') + + # Parse the command line arguments + args = parser.parse_args() + + # Call the spatial_count function with the parsed arguments + spatial_count(args.adata, args.x_coordinate, args.y_coordinate, args.phenotype, + args.method, args.radius, args.knn, args.imageid, + args.subset, args.label) \ No newline at end of file diff --git a/scimap/tools/_spatial_distance.py b/scimap/tools/_spatial_distance.py index 9765657f..a7e8ecfa 100644 --- a/scimap/tools/_spatial_distance.py +++ b/scimap/tools/_spatial_distance.py @@ -13,6 +13,7 @@ # Import library import pandas as pd from sklearn.neighbors import BallTree +import argparse from joblib import Parallel, delayed import itertools @@ -27,26 +28,26 @@ def spatial_distance (adata,x_coordinate='X_centroid',y_coordinate='Y_centroid', adata : AnnData object - x_coordinate : float, required + x_coordinate (float): Column name containing the x-coordinates values. - y_coordinate : float, required + y_coordinate (float): Column name containing the y-coordinates values. - z_coordinate : float, optional + z_coordinate (float, optional): Column name containing the z-coordinates values. - phenotype : string, required + phenotype (string): Column name of the column containing the phenotype information. It could also be any categorical assignment given to single cells. - subset : string, optional + subset (string): imageid of a single image to be subsetted for analyis. - imageid : string, optional + imageid (string): Column name of the column containing the image id. - label : string, optional + label (string): Key for the returned data, stored in `adata.obs`. Returns: @@ -122,6 +123,27 @@ def distance (pheno): # return return adata + +if __name__ == '__main__': + # Create argparse parser + parser = argparse.ArgumentParser(description='Compute spatial distances.') + + # Add arguments + parser.add_argument('--adata', type = str, help='Path to the AnnData object file.') + parser.add_argument('--x_coordinate', type = float, default='X_centroid', help='Column name for x-coordinates.') + parser.add_argument('--y_coordinate', type= float, default='Y_centroid', help='Column name for y-coordinates.') + parser.add_argument('--z_coordinate', type= float, help='Column name for z-coordinates.') + parser.add_argument('--phenotype', type=str, default='phenotype', help='Column name for phenotype information.') + parser.add_argument('--subset', type=str, help='Image ID of a single image to be subsetted for analysis.') + parser.add_argument('--imageid', type=str, default='imageid', help='Column name for image ID.') + parser.add_argument('--label', type=str, default='spatial_distance', help='Key for the returned data.') + + # Parse the command line arguments + args = parser.parse_args() + + # Call the spatial_distance function with the parsed arguments + spatial_distance(args.adata, args.x_coordinate, args.y_coordinate, args.z_coordinate, + args.phenotype, args.subset, args.imageid, args.label) diff --git a/scimap/tools/_spatial_interaction.py b/scimap/tools/_spatial_interaction.py index 11885f04..b17e35c3 100644 --- a/scimap/tools/_spatial_interaction.py +++ b/scimap/tools/_spatial_interaction.py @@ -17,6 +17,7 @@ from joblib import Parallel, delayed import scipy from functools import reduce +import argparse # Function @@ -31,36 +32,36 @@ def spatial_interaction (adata,x_coordinate='X_centroid',y_coordinate='Y_centroi """ Parameters: adata : AnnData object - x_coordinate : float, required + x_coordinate (float): Column name containing the x-coordinates values. - y_coordinate : float, required + y_coordinate (float): Column name containing the y-coordinates values. - z_coordinate : float, optional + z_coordinate (float): Column name containing the z-coordinates values. - phenotype : string, required + phenotype (string): Column name of the column containing the phenotype information. It could also be any categorical assignment given to single cells. - method : string, optional + method (string): Two options are available: a) 'radius', b) 'knn'. a) radius - Identifies the neighbours within a given radius for every cell. b) knn - Identifies the K nearest neigbours for every cell. - radius : int, optional + radius (int): The radius used to define a local neighbhourhood. - knn : int, optional + knn (int): Number of cells considered for defining the local neighbhourhood. - permutation : int, optional + permutation (int): The number of permutations to be performed for calculating the P-Value. - imageid : string, optional + imageid (string): Column name of the column containing the image id. - subset : string, optional + subset (string): imageid of a single image to be subsetted for analyis. - pval_method : string, optional + pval_method (string): Two options are available: a) 'histocat', b) 'zscore'. a) P-values are calculated by subtracting the permuted mean from the observed mean divided by the number of permutations as described in the histoCAT manuscript (Denis et.al, Nature Methods 2017) b) zscores are calculated from the mean and standard deviation and further p-values are derived by fitting the observed values to a normal distribution. The default is 'histocat'. - label : string, optional + label (string): Key for the returned data, stored in `adata.obs`. The default is 'spatial_interaction'. Returns: adata : AnnData object @@ -231,4 +232,37 @@ def permutation_pval (data): # return return adata +if __name__ == '__main__': + # Create argparse parser + parser = argparse.ArgumentParser(description='Compute spatial interaction.') + + parser.add_argument('--adata', type=str,help='Path to the AnnData object file.') + parser.add_argument('--x_coordinate', type=float, default='X_centroid', help='Column name for x-coordinates.') + parser.add_argument('--y_coordinate', type=float,default='Y_centroid', help='Column name for y-coordinates.') + parser.add_argument('--z_coordinate',type=float, help='Column name for z-coordinates.') + parser.add_argument('--phenotype',type=str, default='phenotype', help='Column name for phenotype information.') + parser.add_argument('--method',type=str, default='radius', choices=['radius', 'knn'], help='Method for identifying neighbors.') + parser.add_argument('--radius', type=int, default=30, help='Radius used to define a local neighborhood.') + parser.add_argument('--knn', type=int, default=10, help='Number of cells considered for defining the local neighborhood.') + parser.add_argument('--permutation', type=int, default=1000, help='Number of permutations for calculating p-value.') + parser.add_argument('--imageid',type=str, default='imageid', help='Column name for image ID.') + parser.add_argument('--subset',type=str, help='Image ID of a single image to be subsetted for analysis.') + parser.add_argument('--pval_method',type=str, default='histocat', choices=['histocat', 'zscore'], help='Method for calculating p-values.') + parser.add_argument('--label',type=str, default='spatial_interaction', help='Key for the returned data.') + + # Parse the command-line arguments + args = parser.parse_args() + + # Call the spatial_interaction function with the parsed arguments + spatial_interaction(args.adata, args.x_coordinate, args.y_coordinate, args.z_coordinate, + args.phenotype, args.method, args.radius, args.knn, + args.permutation, args.imageid, args.subset, + args.pval_method, args.label) + + + + + + + diff --git a/scimap/tools/_spatial_lda.py b/scimap/tools/_spatial_lda.py index 547006bd..451591ad 100644 --- a/scimap/tools/_spatial_lda.py +++ b/scimap/tools/_spatial_lda.py @@ -21,6 +21,7 @@ import numpy as np import pandas as pd import re +import argparse # Gensim import gensim @@ -36,40 +37,40 @@ def spatial_lda (adata, x_coordinate='X_centroid',y_coordinate='Y_centroid', Parameters: adata : AnnData object - x_coordinate : float, required + x_coordinate (float): Column name containing the x-coordinates values. - y_coordinate : float, required + y_coordinate (float): Column name containing the y-coordinates values. - phenotype : string, required + phenotype (string): Column name of the column containing the phenotype information. It could also be any categorical assignment given to single cells. - method : string, optional + method (string): Two options are available: a) 'radius', b) 'knn'. a) radius - Identifies the neighbours within a given radius for every cell. b) knn - Identifies the K nearest neigbours for every cell. - radius : int, optional + radius (int): The radius used to define a local neighbhourhood. - knn : int, optional + knn (int): Number of cells considered for defining the local neighbhourhood. - imageid : string, optional + imageid (string): Column name of the column containing the image id. - subset : string, optional + subset (string): imageid of a single image to be subsetted for analyis. - num_motifs : int, optional + num_motifs (int): The number of requested latent motifs to be extracted from the training corpus. - random_state : int, optional + random_state (int): Either a randomState object or a seed to generate one. Useful for reproducibility. - label : string, optional + label (string): Key for the returned data, stored in `adata.uns`. Returns: @@ -196,3 +197,29 @@ def spatial_lda_internal (adata_subset, x_coordinate,y_coordinate,phenotype, # return return adata + +if __name__ == '__main__': + # Create argparse parser + parser = argparse.ArgumentParser(description='Perform spatial LDA.') + + # Add arguments + parser.add_argument('--adata',type=str, help='Path to the AnnData object file.') + parser.add_argument('--x_coordinate',type=float, default='X_centroid', help='Column name for x-coordinates.') + parser.add_argument('--y_coordinate',type=float, default='Y_centroid', help='Column name for y-coordinates.') + parser.add_argument('--phenotype',type = float, default='phenotype', help='Column name for phenotype information.') + parser.add_argument('--method', type=str, default='radius', choices=['radius', 'knn'], help='Method for identifying neighbors.') + parser.add_argument('--radius', type=int, default=30, help='Radius used to define a local neighborhood.') + parser.add_argument('--knn', type=int, default=10, help='Number of cells considered for defining the local neighborhood.') + parser.add_argument('--imageid', default='imageid', help='Column name for image ID.') + parser.add_argument('--num_motifs', type=int, default=10, help='Number of requested latent motifs.') + parser.add_argument('--random_state', type=int, default=0, help='Random state for reproducibility.') + parser.add_argument('--subset',type=str, help='Image ID of a single image to be subsetted for analysis.') + parser.add_argument('--label',type=str, default='spatial_lda', help='Key for the returned data.') + + # Parse the command-line arguments + args = parser.parse_args() + + # Call the spatial_lda function with the parsed arguments + spatial_lda(args.adata, args.x_coordinate, args.y_coordinate, args.phenotype, args.method, + args.radius, args.knn, args.imageid, args.num_motifs, args.random_state, + args.subset, args.label) \ No newline at end of file diff --git a/scimap/tools/_spatial_pscore.py b/scimap/tools/_spatial_pscore.py index 34dda10e..ef8150f9 100644 --- a/scimap/tools/_spatial_pscore.py +++ b/scimap/tools/_spatial_pscore.py @@ -18,6 +18,7 @@ # Import library import pandas as pd from sklearn.neighbors import BallTree +import argparse import numpy as np # Function @@ -28,41 +29,41 @@ def spatial_pscore (adata,proximity, score_by='imageid', x_coordinate='X_centroi Parameters: adata : AnnData object - proximity : list + proximity (list): Pass a list of cell-types for which the proximity score needs to calculated. e.g. ['CellType-A', 'CellType-B'] - score_by : string, optional + score_by (string): If the scores need to compared across region's of interest, the column name containing the ROI's should be passed. By default the score is calculated across the entire image. - x_coordinate : float, required + x_coordinate (float): Column name containing the x-coordinates values. - y_coordinate : float, required + y_coordinate (float): Column name containing the y-coordinates values. - phenotype : string, required + phenotype (string): Column name of the column containing the phenotype information. It could also be any categorical assignment given to single cells. - method : string, optional + method (string): Two options are available: a) 'radius', b) 'knn'. a) radius - Identifies the neighbours within a given radius for every cell. b) knn - Identifies the K nearest neigbours for every cell. - radius : int, optional + radius (int): The radius used to define a local neighbhourhood. - knn : int, optional + knn (int): Number of cells considered for defining the local neighbhourhood. - imageid : string, optional + imageid (string): Column name of the column containing the image id. - subset : string, optional + subset (string): imageid of a single image to be subsetted for analyis. - label : string, optional + label (string): Key for the returned data, stored in `adata.obs` and `adata.uns`. Returns: @@ -204,4 +205,30 @@ def spatial_pscore_internal (adata_subset,proximity,x_coordinate,y_coordinate,ph # Return return adata +if __name__ == '__main__': + # Create argparse parser + parser = argparse.ArgumentParser(description='Calculate spatial proximity scores.') + + # Add arguments + parser.add_argument('--adata',type=str, help='Path to the AnnData object file.') + parser.add_argument('--proximity',type=list, nargs='+', help='List of cell types for proximity scores.') + parser.add_argument('--score_by',type=str, default='imageid', help='Column name for comparing scores by ROIs.') + parser.add_argument('--x_coordinate',type=float, default='X_centroid', help='Column name for x-coordinates.') + parser.add_argument('--y_coordinate', type=float, default='Y_centroid', help='Column name for y-coordinates.') + parser.add_argument('--phenotype', type=str, default='phenotype', help='Column name for phenotype information.') + parser.add_argument('--method',type=str, default='radius', choices=['radius', 'knn'], help='Method for identifying neighbors.') + parser.add_argument('--radius', type=int, default=20, help='Radius used to define a local neighborhood.') + parser.add_argument('--knn', type=int, default=3, help='Number of cells considered for defining the local neighborhood.') + parser.add_argument('--imageid', type=str, default='imageid', help='Column name for image ID.') + parser.add_argument('--subset',type=str, help='Image ID of a single image to be subsetted for analysis.') + parser.add_argument('--label',type=str, default='spatial_pscore', help='Key for the returned data.') + + # Parse the command-line arguments + args = parser.parse_args() + + # Call the spatial_pscore function with the parsed arguments + spatial_pscore(args.adata, args.proximity, args.score_by, args.x_coordinate, args.y_coordinate, + args.phenotype, args.method, args.radius, args.knn, args.imageid, + args.subset, args.label) + \ No newline at end of file diff --git a/scimap/tools/_spatial_similarity_search.py b/scimap/tools/_spatial_similarity_search.py index 97ba5f1c..4bc1fce3 100644 --- a/scimap/tools/_spatial_similarity_search.py +++ b/scimap/tools/_spatial_similarity_search.py @@ -23,6 +23,7 @@ import numba from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import RobustScaler +import argparse # Function @@ -44,70 +45,70 @@ def spatial_similarity_search (adata,ROI_column, Parameters: adata : AnnData object loaded into memory or path to AnnData object. - ROI_column : string, required + ROI_column (string): Column name containing the ROI or region for which the similarity is sorted. This should be a small region in the image that the user is interested in. The ROI can be added by using the `sm.pl.addROI_image` function. - ROI_subset : list, optional + ROI_subset (list): A list of ROI's within the `ROI_column` for which similarity is sorted. By default similarity is calculated for every ROI within the `ROI_column`. The user can also restrict it to one or fewer ROI's by passing its name through this parameter. The default is None. - similarity_threshold : float, optional + similarity_threshold (float): This threshold can be changed to adjust for the strictness of similarity. Often the user would need to run this function with multiple `thresholds` to identify the best fit (based on visual interpretation of the results. To decrease compute time during this process the similarity vectors are saved and hence this parameter can be coupled with `reuse_similarity_matrix` parameter for optimal run time efficiency. The default is 0.5. - x_coordinate : float, required + x_coordinate (float): Column name containing the x-coordinates values. - y_coordinate : float, required + y_coordinate (float): Column name containing the y-coordinates values. - method : string, optional + method (string): Two options are available: a) `radius`, b) `knn`. a) `radius` - Identifies the neighbours within a given radius for every cell. b) `knn` - Identifies the K nearest neigbours for every cell. - radius : int, optional + radius (int): The radius used to define a local neighbhourhood. - knn : int, optional + knn (int): Number of cells considered for defining the local neighbhourhood. - imageid : string, optional + imageid (string): Column name of the column containing the image id. - use_raw : boolian, optional + use_raw (bool): Argument to denote whether to use the raw data or scaled data after applying `sm.pp.rescale`. - subset : string, optional + subset (string): imageid of a single image to be subsetted for analyis. Note, if this is used, the similarity will not be computed for other images in the dataset. This is often used for quick look at a single image. - label : string, optional + label (string): Key for the returned data, stored in `adata.obs`. The results will be stored as [label]_ROIname - reuse_similarity_matrix : string, optional + reuse_similarity_matrix (string): In order to save compute time for large datasets, this function can be run once and the `similarity_threshold` can be adjusted multiple times to identify the regions that best resemble the input ROI. In order to use this parameter, pass the `label` used when running this function for the first time. The defaul label is `spatial_similarity_search`. The default is None. - morphological_features : list, optional + morphological_features (list): For calculating the similarity between regions, in addition to the molecular/marker inforamtion, any additional information such as morphological features pertaining to individual cells can be passed into the algorithm. If the data was generated using the `mcmicro` pipeline these ['Area', 'MajorAxisLength','MinorAxisLength', 'Eccentricity', 'Solidity', 'Extent', 'Orientation'] are the usual morphological features that are captured. These can be passed into this parameter. Note one can use any additional feature that is stored in `adata.obs`. The default is None. - use_only_morphological_features : bool, optional + use_only_morphological_features (bool): If the user passes data through `morphological_features`, one also has an option to identify regions of similarity just using the morphological features. If `morphological_features` is included and `use_only_morphological_features` is set to `False`, both the morphological features and molecular features will be used. The default is False. - output_dir : string, optional + output_dir (string): Path to output directory. Returns: