|
1 | 1 | #' Find Clusters
|
2 | 2 | #'
|
3 |
| -#' Search for clusters in the scCNA data by |
4 |
| -#' using a graph based approach. \code{findClusters()} |
5 |
| -#' builds an SNN graph of the k-nearest neighbors and |
6 |
| -#' attempts to find two different configuration of clusters. |
7 |
| -#' Major and minor subpopulations. |
8 |
| -#' Major clusters are found by looking at the graph connected components, |
9 |
| -#' whereas the minor clusters use the hdbscan or leiden |
10 |
| -#' algorithm to detect connected communities within the major clusters. |
11 |
| -#' \code{findClusters()} generates the graph by using the |
12 |
| -#' UMAP embedding that can be obtained after running \code{runUmap()}. |
13 |
| -#' |
| 3 | +#' Search for clusters in the scCNA data. |
14 | 4 | #'
|
15 | 5 | #' @author Darlan Conterno Minussi
|
16 | 6 | #'
|
17 | 7 | #' @param scCNA scCNA object.
|
18 | 8 | #' @param embedding String with the name of the reducedDim to pull data from.
|
19 |
| -#' @param method Which method should be used for clustering, |
20 |
| -#' options are "hdbscan" or "leiden". Defaults to "hdbscan". |
21 |
| -#' @param k_superclones k-nearest-neighbor value. |
22 |
| -#' Used to find the major clusters. |
23 |
| -#' @param k_subclones k-nearest-neighbor value. |
24 |
| -#' Used to find the minor clusters |
25 |
| -#' @param seed Seed passed on to pseudorandom dependent functions (Defaults to 17). |
| 9 | +#' @param method A string with method used for clustering. |
| 10 | +#' @param k_superclones A numeric scalar k-nearest-neighbor value. |
| 11 | +#' Used to find the superclones. |
| 12 | +#' @param k_subclones A numeric scalar k-nearest-neighbor value. |
| 13 | +#' Used to find the subclones |
| 14 | +#' @param seed A numeric scalar seed passed on to pseudo-random dependent functions. |
| 15 | +#' |
| 16 | +#' @details \code{findClusters} uses the reduced dimensional embedding resulting |
| 17 | +#' from \code{\link{runUmap}} to perform clustering at two levels, hereby referred |
| 18 | +#' to as superclones, and subclones. When clustering for superclones findClusters |
| 19 | +#' creates a graph representation of the dataset reduced dimension embedding |
| 20 | +#' using a shared nearest neighbor algorithm (SNN) \code{\link[scran]{buildSNNGraph}}, |
| 21 | +#' from this graph the connected components are extracted and generally |
| 22 | +#' represent high-level structures that share large, lineage defining copy |
| 23 | +#' number events. At a more fine-grained resolution, CopyKit can also be |
| 24 | +#' used to detect subclones, i. e. groups of cells containing a unique |
| 25 | +#' copy number event per cluster, to do so the umap embedding is again |
| 26 | +#' used as the pre-processing step, this time to perform a density-based |
| 27 | +#' clustering with hdbscan \code{\link[dbscan]{hdbscan}}. Network clustering |
| 28 | +#' algorithms on top of the SNN graph such as the leiden algorithm |
| 29 | +#' \code{\link[leidenbase]{leiden_find_partition}}. |
| 30 | +#' |
| 31 | +#' \itemize{ |
| 32 | +#' \item{hdbscan}: hdbscan is an outlier aware clustering algorithm, since |
| 33 | +#' extensive filtering of the dataset can be applied before clustering with |
| 34 | +#' \code{\link{filterCells}}, any cell classified as an outlier is inferred |
| 35 | +#' to the same cluster group as its closest, non-outlier, nearest-neighbor |
| 36 | +#' according to Euclidean distance. |
| 37 | +#' } |
| 38 | +#' |
| 39 | +#' @return Cluster information is added to \code{\link[SummarizedExperiment]{colData}} |
| 40 | +#' in columns superclones or subclones. Superclones are prefixed by 's' whereas subclones |
| 41 | +#' are prefixed by 'c' |
| 42 | +#' |
| 43 | +#' @seealso \code{\link{findSuggestedK}} to obtain suggestions of k_subclones values. |
26 | 44 | #'
|
27 |
| -#' @return Metadata cluster information that can be found in |
28 |
| -#' \code{SummarizedExperiment::colData(scCNA)$superclones} |
29 |
| -#' for the major clusters and \code{SummarizedExperiment::colData(scCNA)$subclones} |
30 |
| -#' for the minor clusters. |
| 45 | +#' @references Laks, E., McPherson, A., Zahn, H., et al. (2019). Clonal Decomposition |
| 46 | +#' and DNA Replication States Defined by Scaled Single-Cell Genome Sequencing. |
| 47 | +#' Cell, 179(5), 1207–1221.e22. https://doi.org/10.1016/j.cell.2019.10.026 |
| 48 | +#' |
| 49 | +#' Leland McInnes and John Healy and James Melville. UMAP: Uniform Manifold |
| 50 | +#' Approximation and Projection for Dimension Reduction. arXiv:1802.03426 |
| 51 | +#' |
| 52 | +#' Lun ATL, McCarthy DJ, Marioni JC (2016). “A step-by-step workflow for low-level |
| 53 | +#' analysis of single-cell RNA-seq data with Bioconductor.” |
| 54 | +#' F1000Res., 5, 2122. doi: 10.12688/f1000research.9501.2. |
| 55 | +#' |
| 56 | +#' @seealso \code{\link[dbscan]{hdbscan}} For hdbscan clustering. |
31 | 57 | #'
|
32 | 58 | #' @export
|
33 | 59 | #' @import leidenbase
|
|
45 | 71 |
|
46 | 72 | findClusters <- function(scCNA,
|
47 | 73 | embedding = "umap",
|
48 |
| - method = "hdbscan", |
| 74 | + method = c("hdbscan", "leiden"), |
49 | 75 | k_superclones = NULL,
|
50 | 76 | k_subclones = NULL,
|
51 | 77 | seed = 17) {
|
52 | 78 |
|
| 79 | + method <- match.arg(method) |
| 80 | + |
53 | 81 | # obtaining data from reducedDim slot
|
54 | 82 | if (!is.null(SingleCellExperiment::reducedDim(scCNA, embedding))) {
|
55 | 83 |
|
|
0 commit comments