Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing a feature to return random principal components. User re… #736

Merged
merged 1 commit into from
Jul 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions hicexplorer/hicPCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ def parse_arguments():

parserOpt = parser.add_argument_group('Optional arguments')

parserOpt.add_argument('--numberOfEigenvectors', '-noe',
help='The number of eigenvectors that the PCA '
'should compute'
parserOpt.add_argument('--whichEigenvectors', '-we',
help='The list of eigenvectors that the PCA '
'should compute e.g. 1 2 5 will return the first, second and fifth eigenvector.'
' (Default: %(default)s).',
default=2,
type=int,
default='1 2',
nargs='+',
required=False)

parserOpt.add_argument('--format', '-f',
Expand Down Expand Up @@ -239,12 +239,12 @@ def correlateEigenvectorWithHistonMarkTrack(pEigenvector, bwTrack, chromosome,

def main(args=None):
args = parse_arguments().parse_args(args)
if int(args.numberOfEigenvectors) != len(args.outputFileName):
if len(args.whichEigenvectors) != len(args.outputFileName):
log.error("Number of output file names and number of eigenvectors"
" does not match. Please"
"provide the name of each file.\nFiles: {}\nNumber of "
"eigenvectors: {}".format(args.outputFileName,
args.numberOfEigenvectors))
len(args.whichEigenvectors)))
exit(1)

ma = hm.hiCMatrix(args.matrix)
Expand Down Expand Up @@ -303,21 +303,27 @@ def main(args=None):
corrmatrix = convertNansToZeros(csr_matrix(corrmatrix)).todense()
corrmatrix = convertInfsToZeros(csr_matrix(corrmatrix)).todense()
evals, eigs = linalg.eig(corrmatrix)
k = args.numberOfEigenvectors
# k = len(rgs.numberOfEigenvectors

chrom, start, end, _ = zip(*ma.cut_intervals[chr_range[0]:chr_range[1]])

chrom_list += chrom
start_list += start
end_list += end
eigenvectors_correlate = None
for id in args.whichEigenvectors:
if eigenvectors_correlate is None:
eigenvectors_correlate = eigs[:, int(id) - 1:int(id)]
else:
eigenvectors_correlate = np.hstack((eigenvectors_correlate, eigs[:, int(id) - 1:int(id)]))

if args.extraTrack and (args.extraTrack.endswith('.bw') or args.extraTrack.endswith('.bigwig')):
assert(len(end) == len(start))
correlateEigenvectorWithHistonMarkTrack(eigs[:, :k].transpose(),
correlateEigenvectorWithHistonMarkTrack(eigenvectors_correlate.transpose(),
bwTrack, chrname, start,
end, args.extraTrack,
args.histonMarkType)

vecs_list += eigs[:, :k].tolist()
vecs_list += eigenvectors_correlate.tolist()

if args.pearsonMatrix:
file_type = 'cool'
Expand Down Expand Up @@ -350,11 +356,12 @@ def main(args=None):

if args.format == 'bedgraph':
for idx, outfile in enumerate(args.outputFileName):

assert(len(vecs_list) == len(chrom_list))

with open(outfile, 'w') as fh:
for i, value in enumerate(vecs_list):
if len(value) == args.numberOfEigenvectors:
if len(value) == len(args.whichEigenvectors):
if isinstance(value[idx], np.complex):
value[idx] = value[idx].real
fh.write("{}\t{}\t{}\t{:.12f}\n".format(toString(chrom_list[i]), start_list[i], end_list[i], value[idx]))
Expand Down Expand Up @@ -388,7 +395,7 @@ def main(args=None):
# create entry lists
for i, value in enumerate(vecs_list):
# it can happen that some 'value' is having less dimensions than it should
if len(value) == args.numberOfEigenvectors:
if len(value) == len(args.whichEigenvectors):
if isinstance(value[idx], np.complex):
value[idx] = value[idx].real
values.append(value[idx])
Expand Down
24 changes: 12 additions & 12 deletions hicexplorer/test/general/test_hicPCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ def test_pca_bedgraph_lieberman():
pca1.close()
pca2.close()
matrix = ROOT + "small_test_matrix_50kb_res.h5"
args = "--matrix {} --outputFileName {} {} -f bedgraph -noe 2 --method lieberman"\
args = "--matrix {} --outputFileName {} {} -f bedgraph --whichEigenvectors 1 2 --method lieberman"\
.format(matrix, pca1.name, pca2.name).split()
# hicPCA.main(args)
compute(hicPCA.main, args, 5)
hicPCA.main(args)
# compute(hicPCA.main, args, 5)
assert are_files_equal(ROOT + "hicPCA/pca1.bedgraph", pca1.name)
assert are_files_equal(ROOT + "hicPCA/pca2.bedgraph", pca2.name)

Expand All @@ -92,7 +92,7 @@ def test_pca_bedgraph_lieberman_ignore_masked_bins():
pca1.close()
pca2.close()
matrix = ROOT + "small_test_matrix_50kb_res.h5"
args = "--matrix {} --outputFileName {} {} -f bedgraph -noe 2 \
args = "--matrix {} --outputFileName {} {} -f bedgraph --whichEigenvectors 1 2 \
--ignoreMaskedBins --method lieberman".format(matrix, pca1.name, pca2.name).split()
# hicPCA.main(args)
compute(hicPCA.main, args, 5)
Expand All @@ -111,7 +111,7 @@ def test_pca_bigwig_lieberman():
pca1.close()
pca2.close()
matrix = ROOT + "small_test_matrix_50kb_res.h5"
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman"\
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman"\
.format(matrix, pca1.name, pca2.name).split()
# hicPCA.main(args)
compute(hicPCA.main, args, 5)
Expand All @@ -137,7 +137,7 @@ def test_pca_bedgraph_lieberman_gene_density():
matrix = ROOT + "small_test_matrix.h5"
gene_track = ROOT + 'dm3_genes.bed.gz'
chromosomes = 'chrX chrXHet'
args = "--matrix {} --outputFileName {} {} -f bedgraph -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bedgraph --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {}"\
.format(matrix, pca1.name, pca2.name, gene_track, chromosomes).split()
# hicPCA.main(args)
Expand All @@ -159,7 +159,7 @@ def test_pca_bigwig_lieberman_gene_density():
matrix = ROOT + "small_test_matrix.h5"
gene_track = ROOT + 'dm3_genes.bed.gz'
chromosomes = 'chrX chrXHet'
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {}"\
.format(matrix, pca1.name, pca2.name, gene_track, chromosomes).split()
# hicPCA.main(args)
Expand Down Expand Up @@ -187,7 +187,7 @@ def test_pca_bigwig_lieberman_gene_density_intermediate_matrices():
matrix = ROOT + "small_test_matrix.h5"
gene_track = ROOT + 'dm3_genes.bed.gz'
chromosomes = 'chrX chrXHet'
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {} --pearsonMatrix {} --obsexpMatrix {}"\
.format(matrix, pca1.name, pca2.name, gene_track, chromosomes,
pearson_matrix.name, obs_exp_matrix.name).split()
Expand Down Expand Up @@ -235,7 +235,7 @@ def test_pca_bigwig_gene_density_intermediate_matrices_norm():
matrix = ROOT + "small_test_matrix.h5"
gene_track = ROOT + 'dm3_genes.bed.gz'
chromosomes = 'chrX chrXHet'
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 \
--extraTrack {} --chromosomes {} --pearsonMatrix {} --obsexpMatrix {} \
--method dist_norm --ligation_factor"\
.format(matrix, pca1.name, pca2.name, gene_track, chromosomes,
Expand Down Expand Up @@ -291,7 +291,7 @@ def test_pca_bigwig_lieberman_histoneMark_track():
matrix = ROOT + "small_test_matrix.h5"
extra_track = ROOT + 'bigwig_chrx_2e6_5e6.bw'
chromosomes = 'chrX '
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {}"\
.format(matrix, pca1.name, pca2.name, extra_track, chromosomes).split()
# hicPCA.main(args)
Expand All @@ -315,7 +315,7 @@ def test_pca_bedgraph_lieberman_histoneMark_track():
matrix = ROOT + "small_test_matrix.h5"
extra_track = ROOT + 'bigwig_chrx_2e6_5e6.bw'
chromosomes = 'chrX '
args = "--matrix {} --outputFileName {} {} -f bedgraph -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bedgraph --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {}"\
.format(matrix, pca1.name, pca2.name, extra_track, chromosomes).split()
# hicPCA.main(args)
Expand All @@ -338,7 +338,7 @@ def test_pca_extratrack_extends_chromosomesize():
matrix = ROOT + "hicPCA/mm9_reduced_chr1.cool"
extra_track = ROOT + 'hicPCA/mm9_genes_sorted.bed12'

args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman \
--extraTrack {}"\
.format(matrix, pca1.name, pca2.name, extra_track).split()
# hicPCA.main(args)
Expand Down