Skip to content

Commit a3016f1

Browse files
committed
Address comment
1 parent 4253dd1 commit a3016f1

File tree

1 file changed

+34
-30
lines changed

1 file changed

+34
-30
lines changed

chapter_3/2_spectral_clustering.tex

+34-30
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ \section{Spectral Clustering}
129129
\item let $A \subset V$. $\mathbbm 1_A^T L \mathbbm 1_A = \sum_{(i,j) \in E} w_{ij} (\mathbbm 1_A(i - \mathbbm 1_A(j)) )^2 = weight(E(A, \bar{A})) $
130130
\end{enumerate}
131131

132-
\subsection{Review of Spectral Theory: Geometric \& the collection of all Borel sets on Characterization of Eigenvalues}
132+
\subsection{Review of Spectral Theory: Geometric \& Variational Characterization of Eigenvalues}
133133

134134
\begin{definition}
135135
Let $M \in \mathbb R^{n \times n}$. An eigenvector $v \in \mathbb R^n$ satisfies $v \neq 0, \exists\lambda \in \mathbb R $ s.t. $Mv = \lambda v$
@@ -167,18 +167,21 @@ \section{Spectral Clustering}
167167

168168
After introducing the graph Laplacian, we present the spectral clustering algorithm that projects data onto lower dimension space. The algorithm needs predefined number of clusters, and the similarity matrix.
169169

170-
\textbf{Input:} $S$: $n\times n$ similarity matrix (on $n$ datapoints), $k$: number of clusters.
170+
\textbf{Input:} $S$: $n\times n$ similarity matrix (on $n$ data points), $k$: number of clusters.
171171

172-
\textbf{Output:} the partition of $n$ datapoints returned by $k$-means as the clustering.
172+
\textbf{Output:} the partition of $n$ data points returned by $k$-means as the clustering.
173173

174-
\begin{enumerate}
175-
\item Compute the degree matrix $D$ and adjacency matrix $W$ from the weighted graph induced by $S$.
176-
\item Compute the graph Laplacian $L = D - W$.
177-
\item Compute the bottom $k$ eigenvectors $u_1,\ldots,u_k$ of the generalized eigensystem $\mathbf{Lu} = \lambda \mathbf{Du}$.
178-
\item Let $U$ be the $n \times k$ matrix containing vectors $u_1,\ldots,u_k$ as columns.
179-
\item Let $y_i$ be the $i$-th row of $U$; it corresponds to the $k$ dimensional representation of the datapoint $x_i$.
180-
\item Cluster points $y_1,\ldots,y_n$ into $k$ clusters via a centroid-based algorithm like $k$-means.
181-
\end{enumerate}
174+
\begin{algorithm}
175+
\caption{Spectral Clustering}
176+
\begin{algorithmic}
177+
\STATE Compute the degree matrix $D$ and adjacency matrix $W$ from the weighted graph induced by $S$.
178+
\STATE Compute the graph Laplacian $L = D - W$.
179+
\STATE Compute the bottom $k$ eigenvectors $u_1,\ldots,u_k$ of the generalized eigensystem $\mathbf{Lu} = \lambda \mathbf{Du}$.
180+
\STATE Let $U$ be the $n \times k$ matrix containing vectors $u_1,\ldots,u_k$ as columns.
181+
\STATE Let $y_i$ be the $i$-th row of $U$; it corresponds to the $k$ dimensional representation of the datapoint $x_i$.
182+
\STATE Cluster points $y_1,\ldots,y_n$ into $k$ clusters via a centroid-based algorithm like $k$-means.
183+
\end{algorithmic}
184+
\end{algorithm}
182185

183186
One can calculate similarity from distance using a drop off function $e^{-\mathrm{dist}^2/\sigma^2}$.
184187

@@ -192,24 +195,25 @@ \section{Spectral Clustering}
192195

193196
For $p \geq 1/2$, the following Subsquare algorithm described in \cite{bsh10} finds partitions $U_1, \ldots, U_s$ with probability $1-1/\mathrm{poly}(n)$ that for each input cluster $C_i$ of size $\Omega(\max \{qn, \log n \})$, contains a cluster $U_j$ such that $C_i=U_j$. The algorithm takes three parameters, $c_0$, $c_1$, and $\delta$.
194197

195-
\begin{itemize}
196-
\item Randomly order the the vertices with a bijection $\pi : V \rightarrow \{1, \ldots, n\}$.
197-
\item Run two passes of the following for each $v$:
198-
\begin{enumerate}
199-
\item If $|N(G,v)|<c_0 \log(n/\delta)$, assign $v$ to its own cluster and continue to the next $v$.
200-
\item Let $R_{temp}$ be the neighbors of $v$ that have been clustered.
201-
\item For each element of $R_{temp}$, include it in $R$ with probability $\min \left\{\frac{c_{0} \log (n / \delta)}{\left|R_{t e m p}\right|}, 1\right\}$.
202-
\item For each element of $N(G,v)$, include it in $S$ with probability $\frac{c_{0} \log (n / \delta)}{|N(G, v)|}$. Similarly for each element of $N(G,w)$, include it in $S_w$ with probability $\frac{c_{0} \log (n / \delta)}{|N(G, w)|}$.
203-
\item Initialize a candidate cluster set $\mathcal{D}$ for $v$ to empty set.
204-
\item For each $w \in R$, if
205-
\begin{enumerate}
206-
\item $|S \cap N(G, w)| \geq c_{1} \log (n / \delta)$,
207-
\item $|N(G, w)| \geq c_{0} \log (n / \delta)$ and $\left|S_{w} \cap N(G, v)\right| \geq c_{1} \log (n / \delta)$,
208-
\end{enumerate}
209-
add $w$'s cluster $\hat{C}(w)$ to $\mathcal{D}$.
210-
\item If $\mathcal{D}$ is not empty, set $\hat{C}(v)=\hat{C}\left(\operatorname{argmin}_{w^{\prime} \in \cup_{C \in \mathcal{D}}} \pi\left(w^{\prime}\right)\right)$. Else, assign $v$ to its own cluster.
211-
\end{enumerate}
212-
\end{itemize}
198+
\begin{algorithm}
199+
\caption{Planted Partition Clustering}
200+
\begin{algorithmic}
201+
\STATE Randomly order the the vertices with a bijection $\pi : V \rightarrow \{1, \ldots, n\}$.
202+
\FORALL{$v$, for two passes}
203+
\STATE If $|N(G,v)|<c_0 \log(n/\delta)$, assign $v$ to its own cluster and continue to the next $v$.
204+
\STATE Let $R_{temp}$ be the neighbors of $v$ that have been clustered.
205+
\STATE For each element of $R_{temp}$, include it in $R$ with probability $\min \left\{\frac{c_{0} \log (n / \delta)}{\left|R_{t e m p}\right|}, 1\right\}$.
206+
\STATE For each element of $N(G,v)$, include it in $S$ with probability $\frac{c_{0} \log (n / \delta)}{|N(G, v)|}$. Similarly for each element of $N(G,w)$, include it in $S_w$ with probability $\frac{c_{0} \log (n / \delta)}{|N(G, w)|}$.
207+
\STATE Initialize a candidate cluster set $\mathcal{D}$ for $v$ to empty set.
208+
\FORALL{$w \in R$}
209+
\IF{$|S \cap N(G, w)| \geq c_{1} \log (n / \delta)$, $|N(G, w)| \geq c_{0} \log (n / \delta)$ and $\left|S_{w} \cap N(G, v)\right| \geq c_{1} \log (n / \delta)$}
210+
\STATE add $w$'s cluster $\hat{C}(w)$ to $\mathcal{D}$.
211+
\ENDIF
212+
\ENDFOR
213+
\ENDFOR
214+
\STATE If $\mathcal{D}$ is not empty, set $\hat{C}(v)=\hat{C}\left(\operatorname{argmin}_{w^{\prime} \in \cup_{C \in \mathcal{D}}} \pi\left(w^{\prime}\right)\right)$. Else, assign $v$ to its own cluster.
215+
\end{algorithmic}
216+
\end{algorithm}
213217

214218
The correctness and runtime are bounded by the following theorems:
215219

@@ -383,7 +387,7 @@ \section{Spectral Clustering}
383387

384388
The paper proves that $d_n$ converges to $d$ uniformly on the sample, and $\left\|T_n'-T_n\right\|_{L_{2}\left(P_{n}\right)}$ converges to 0. With sufficiently large sample size $n$, we can then get $d_n(x)$ arbitrarily close to $d(x)$. Thus $\left\|T_{n}-T_{n}'\right\|$ and $\left\|H_{n}-H_{n}'\right\|$ almost surely converges to 0.
385389

386-
They relate the operators $T_n$ defined on $L_2 (P_n)$, to some operators $S_n$ on the space $L_2 (P)$ such that their spectra are preserved, so that it could be used as a middle ground to prove $T_n$ converges to $T$. The problem that the operators $T_n$ and $T$ are not defined on the same space has been circumvented by considering bilinear forms instead of the operators themselves.
390+
They relate the operators $T_n$ defined on $L_2 (P_n)$, to some operators $S_n$ on the space $L_2 (P)$ such that their spectra are preserved, so that it could be used as a middle ground to prove $T_n$ converges to $T$. The problem that the operators $T_n$ and $T$ are not defined on the same space has been circumvented by considering bilinear forms instead of the operators themselves.
387391

388392
Then it is proven that the second eigenvector of $H_n'$ converge to the second eigenfunction of the limit operator almost surely.
389393

0 commit comments

Comments
 (0)