Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose Eigensolver's init and method arguments in UMAP class #1143

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 28 additions & 84 deletions umap/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def component_layout(
metric="euclidean",
metric_kwds={},
):
"""Provide a layout relating the separate connected components. This is done
by taking the centroid of each component and then performing a spectral embedding
of the centroids.
"""Provide a layout relating the separate connected components. This is done by
taking the centroid of each component and then performing a spectral embedding of
the centroids.

Parameters
----------
Expand Down Expand Up @@ -153,13 +153,14 @@ def multi_component_layout(
metric_kwds={},
init="random",
tol=0.0,
maxiter=0
maxiter=0,
):
"""Specialised layout algorithm for dealing with graphs with many connected components.
This will first find relative positions for the components by spectrally embedding
their centroids, then spectrally embed each individual connected component positioning
them according to the centroid embeddings. This provides a decent embedding of each
component while placing the components in good relative positions to one another.
"""Specialised layout algorithm for dealing with graphs with many connected
components. This will first find relative positions for the components by spectrally
embedding their centroids, then spectrally embed each individual connected component
positioning them according to the centroid embeddings. This provides a decent
embedding of each component while placing the components in good relative positions
to one another.

Parameters
----------
Expand Down Expand Up @@ -240,7 +241,7 @@ def multi_component_layout(
+ meta_embedding[label]
)
else:
component_embedding = _spectral_layout(
component_embedding = spectral_layout(
data=None,
graph=component_graph,
dim=dim,
Expand All @@ -249,7 +250,7 @@ def multi_component_layout(
metric_kwds=metric_kwds,
init=init,
tol=tol,
maxiter=maxiter
maxiter=maxiter,
)
expansion = data_range / np.max(np.abs(component_embedding))
component_embedding *= expansion
Expand All @@ -260,60 +261,6 @@ def multi_component_layout(
return result


def spectral_layout(
data,
graph,
dim,
random_state,
metric="euclidean",
metric_kwds={},
tol=0.0,
maxiter=0
):
"""
Given a graph compute the spectral embedding of the graph. This is
simply the eigenvectors of the laplacian of the graph. Here we use the
normalized laplacian.

Parameters
----------
data: array of shape (n_samples, n_features)
The source data

graph: sparse matrix
The (weighted) adjacency matrix of the graph as a sparse matrix.

dim: int
The dimension of the space into which to embed.

random_state: numpy RandomState or equivalent
A state capable being used as a numpy random state.

tol: float, default chosen by implementation
Stopping tolerance for the numerical algorithm computing the embedding.

maxiter: int, default chosen by implementation
Number of iterations the numerical algorithm will go through at most as it
attempts to compute the embedding.

Returns
-------
embedding: array of shape (n_vertices, dim)
The spectral embedding of the graph.
"""
return _spectral_layout(
data=data,
graph=graph,
dim=dim,
random_state=random_state,
metric=metric,
metric_kwds=metric_kwds,
init="random",
tol=tol,
maxiter=maxiter
)


def tswspectral_layout(
data,
graph,
Expand All @@ -323,15 +270,14 @@ def tswspectral_layout(
metric_kwds={},
method=None,
tol=0.0,
maxiter=0
maxiter=0,
):
"""Given a graph, compute the spectral embedding of the graph. This is
simply the eigenvectors of the Laplacian of the graph. Here we use the
normalized laplacian and a truncated SVD-based guess of the
eigenvectors to "warm" up the eigensolver. This function should
give results of similar accuracy to the spectral_layout function, but
may converge more quickly for graph Laplacians that cause
spectral_layout to take an excessive amount of time to complete.
"""Given a graph, compute the spectral embedding of the graph. This is simply the
eigenvectors of the Laplacian of the graph. Here we use the normalized laplacian and
a truncated SVD-based guess of the eigenvectors to "warm" up the eigensolver. This
function should give results of similar accuracy to the spectral_layout function,
but may converge more quickly for graph Laplacians that cause spectral_layout to
take an excessive amount of time to complete.

Parameters
----------
Expand Down Expand Up @@ -378,7 +324,7 @@ def tswspectral_layout(
embedding: array of shape (n_vertices, dim)
The spectral embedding of the graph.
"""
return _spectral_layout(
return spectral_layout(
data=data,
graph=graph,
dim=dim,
Expand All @@ -388,11 +334,11 @@ def tswspectral_layout(
init="tsvd",
method=method,
tol=tol,
maxiter=maxiter
maxiter=maxiter,
)


def _spectral_layout(
def spectral_layout(
data,
graph,
dim,
Expand All @@ -402,10 +348,10 @@ def _spectral_layout(
init="random",
method=None,
tol=0.0,
maxiter=0
maxiter=0,
):
"""General implementation of the spectral embedding of the graph, derived as
a subset of the eigenvectors of the normalized Laplacian of the graph. The numerical
"""General implementation of the spectral embedding of the graph, derived as a
subset of the eigenvectors of the normalized Laplacian of the graph. The numerical
method for computing the eigendecomposition is chosen through heuristics.

Parameters
Expand Down Expand Up @@ -481,9 +427,7 @@ def _spectral_layout(
# L = D - graph
# Normalized Laplacian
I = scipy.sparse.identity(graph.shape[0], dtype=np.float64)
D = scipy.sparse.spdiags(
1.0 / sqrt_deg, 0, graph.shape[0], graph.shape[0]
)
D = scipy.sparse.spdiags(1.0 / sqrt_deg, 0, graph.shape[0], graph.shape[0])
L = I - D * graph * D
if not scipy.sparse.issparse(L):
L = np.asarray(L)
Expand Down Expand Up @@ -532,14 +476,14 @@ def _spectral_layout(
warnings.filterwarnings(
category=UserWarning,
message=r"(?ms).*not reaching the requested tolerance",
action="error"
action="error",
)
eigenvalues, eigenvectors = scipy.sparse.linalg.lobpcg(
L,
np.asarray(X),
largest=False,
tol=tol or 1e-4,
maxiter=maxiter or 5 * graph.shape[0]
maxiter=maxiter or 5 * graph.shape[0],
)
else:
raise ValueError("Method should either be None, 'eigsh' or 'lobpcg'")
Expand Down
Loading