Skip to content

Commit

Permalink
Merge branch 'master' into repair_vrids
Browse files Browse the repository at this point in the history
  • Loading branch information
melissacline authored Nov 9, 2023
2 parents 249dbaa + cc010d3 commit 9c34f23
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def main(vcf_parts, built_tsv, output):
df_victor = victor_results_as_df(vcf_parts)
df = pd.read_csv(built_tsv, sep='\t', keep_default_na=False)

df_merged = df.merge(df_victor, left_on='Genomic_Coordinate_hg38', right_on=coord_col, how='left')
df_merged = df.merge(df_victor, left_on='pyhgvs_Genomic_Coordinate_38', right_on=coord_col, how='left')

# drop join key and write
(df_merged.drop(columns=[coord_col]).
Expand Down
2 changes: 1 addition & 1 deletion pipeline/splice_ai/add_splice_scores_to_built_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def main(vcf, built_tsv, output):
df_spliceai = spliceai_results_as_df(vcf)
df = pd.read_csv(built_tsv, sep='\t', keep_default_na=False)

df_merged = df.merge(df_spliceai, left_on='Genomic_Coordinate_hg38', right_on=coord_col, how='left')
df_merged = df.merge(df_spliceai, left_on='pyhgvs_Genomic_Coordinate_38', right_on=coord_col, how='left')

# drop join key and write
(df_merged.drop(columns=[coord_col]).
Expand Down
12 changes: 6 additions & 6 deletions pipeline/workflow/CompileVCFFiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,13 +389,13 @@ def run(self):

class DownloadStaticG1KData(DefaultPipelineTask):
def output(self):
return luigi.LocalTarget(f"{self.g1k_file_dir}/1000G.sorted.hg38.vcf")
return luigi.LocalTarget(f"{self.esp_file_dir}/1000G.sorted.hg38.vcf")

def run(self):
os.chdir(self.g1k_file_dir)
os.chdir(self.esp_file_dir)

g1k_vcf_url = "https://brcaexchange.org/backend/downloads/1000G.sorted.hg38.vcf"
pipeline_utils.download_file_and_display_progress(g1k_vcf_url)
esp_vcf_url = "https://brcaexchange.org/backend/downloads/ESP.sorted.hg38.vcf"
pipeline_utils.download_file_and_display_progress(esp_vcf_url)


@requires(DownloadStaticG1KData)
Expand Down Expand Up @@ -566,7 +566,7 @@ def run(self):
class MergeVCFsIntoTSVFile(DefaultPipelineTask):
def requires(self):
yield pipeline_common.CopyOutputToOutputDir(self.cfg.output_dir,
esp_processing.SortConcatenatedESPData())
esp_processing.DownloadStaticESPData())
yield pipeline_common.CopyOutputToOutputDir(self.cfg.output_dir,
gnomad_processing.DownloadStaticGnomADVCF())
yield CopyClinvarVCFToOutputDir()
Expand Down Expand Up @@ -722,7 +722,7 @@ def run(self):
self.input().path,
self.output().path)

os.chdir(artifacts_dir_host)
os.chdir(self.artifacts_dir)
os.remove("ready_for_priors.tsv")


Expand Down
14 changes: 13 additions & 1 deletion pipeline/workflow/esp_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,18 @@ def __init__(self, *args, **kwargs):
pipeline_utils.create_path_if_nonexistent(self.esp_file_dir)


class DownloadStaticESPData(DefaultPipelineTask):
def output(self):
return luigi.LocalTarget(f"{self.esp_file_dir}/esp.sorted.hg38.vcf")

def run(self):
os.chdir(self.esp_file_dir)

esp_vcf_url = "https://brcaexchange.org/backend/downloads/esp.sorted.hg38.vcf"
pipeline_utils.download_file_and_display_progress(esp_vcf_url)



class DownloadLatestESPData(ESPTask):
def output(self):
return luigi.LocalTarget(
Expand Down Expand Up @@ -107,7 +119,7 @@ def run(self):


@requires(ConcatenateESPData)
class SortConcatenatedESPData(ESPTask):
class DownloadSortedESPData(ESPTask):
def output(self):
return luigi.LocalTarget(self.esp_file_dir + "/esp.sorted.hg38.vcf")

Expand Down
1 change: 0 additions & 1 deletion pipeline/workflow/tarball_files_discard_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
./release/artifacts/bayesdel.vcf
./release/artifacts/BICready.vcf
./release/artifacts/BIC.vcf
./release/artifacts/variants_with_splice_ai.vcf
./release/artifacts/built.tsv
./release/artifacts/built_with_bayesdel.tsv
./release/artifacts/built_with_spliceai.tsv
Expand Down
1 change: 1 addition & 0 deletions pipeline/workflow/tarball_files_keep_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
./release/artifacts/missing_reports.log
./release/artifacts/reports.tsv
./release/artifacts/variant_merging.log
./release/artifacts/variants_with_splice_ai.vcf
./release/artifacts/victor_annotation.log
./release/artifacts/wrong_genome_coors/1000_Genomes_wrong_genome_coor.vcf
./release/artifacts/wrong_genome_coors/BIC_wrong_genome_coor.vcf
Expand Down

0 comments on commit 9c34f23

Please sign in to comment.