@@ -2,10 +2,122 @@ from snakemake.utils import min_version
22
33min_version ("8.0" )
44
5-
65include : "rules/common.smk"
6+ import math
7+ import random
8+
9+ #repo dir--direct snakemake to our scripts
10+ REPO = "/nas/longleaf/home/adaigle/fpos_estimation/workflow"
11+
12+ # ─── Simulation settings ───
13+ N_REPS = 5
14+
15+ # log-uniform prior for s; linear-uniform prior for fpos
16+ S_MIN , S_MAX = 1e-4 , 5e-2
17+ FPOS_MIN , FPOS_MAX = 0.0 , 0.05
18+
19+ REPS = list (range (1 , N_REPS + 1 ))
20+ rep_params = {}
21+ for rep in REPS :
22+ # draw s on the log scale
23+ log_s = random .uniform (math .log10 (S_MIN ), math .log10 (S_MAX ))
24+ s = 10 ** log_s
25+
26+ # draw fpos uniformly between 0 and 0.05
27+ fpos = random .uniform (FPOS_MIN , FPOS_MAX )
728
29+ rep_params [rep ] = {
30+ "s" : s ,
31+ "fpos" : fpos ,
32+ }
833
34+ # ─── Targets ───
935rule all :
1036 input :
11- [],
37+ #expand("simulations/rep_{rep}.vcf", rep=REPS),
38+ #expand("simulations/rep_{rep}.vcf.fixed", rep=REPS),
39+ expand ("stats/rep_{rep}.stats.tsv" , rep = REPS ),
40+ #"filename.txt"
41+ "mapped_reads/A.bam"
42+
43+ # ─── Run SLiM for each replicate ───
44+ rule simulate :
45+ output :
46+ vcf = "simulations/rep_{rep}.vcf.gz" ,
47+ fixed = "simulations/rep_{rep}.vcf.fixed.gz"
48+ params :
49+ script = REPO + "/scripts/fpos_sims.slim" ,
50+ s = lambda wc : rep_params [int (wc .rep )]["s" ],
51+ fpos = lambda wc : rep_params [int (wc .rep )]["fpos" ],
52+ vcf_base = lambda wc : f"simulations/rep_{ wc .rep } .vcf"
53+ shell :
54+ r"""
55+ mkdir -p simulations
56+
57+ # 1) Run SLiM, passing VCF_OUT as an Eidos string literal:
58+ slim \
59+ -d FPOS={params.fpos} \
60+ -d SBEN={params.s} \
61+ -d 'VCF_OUT="{params.vcf_base}"' \
62+ {params.script}
63+
64+ # 2) Compress outputs
65+ gzip -c {params.vcf_base} > {output.vcf}
66+ gzip -c {params.vcf_base}.fixed > {output.fixed}
67+
68+ # 3) Clean up intermediates
69+ rm {params.vcf_base} {params.vcf_base}.fixed
70+ """
71+
72+
73+ # ─── 2) Compute summary stats ───
74+ rule summarize :
75+ priority : 100
76+ input :
77+ vcf = "simulations/rep_{rep}.vcf.gz" ,
78+ fixed = "simulations/rep_{rep}.vcf.fixed.gz"
79+ output :
80+ stats = "stats/rep_{rep}.stats.tsv"
81+ params :
82+ script = REPO + "/scripts/richer_summary_stats_fromvcf.py" ,
83+ window_length = 10000 ,
84+ max_dist_ld = 1000 ,
85+ bin_size_ld = 100 ,
86+ seq_length = 1e8 ,
87+ seed = 42 ,
88+ s = lambda wc : rep_params [int (wc .rep )]["s" ],
89+ fpos = lambda wc : rep_params [int (wc .rep )]["fpos" ]
90+ shell :
91+ r"""
92+ mkdir -p stats
93+ python {params.script} \
94+ --vcf {input.vcf} \
95+ --window_length {params.window_length} \
96+ --output {output.stats} \
97+ --max_dist_ld {params.max_dist_ld} \
98+ --bin_size_ld {params.bin_size_ld} \
99+ --sequence_length {params.seq_length} \
100+ --n_individuals 50 \
101+ --seed {params.seed}
102+
103+ # prepend the simulation parameters as the first two columns
104+ mv {output.stats} {output.stats}.tmp
105+ awk -v s={params.s} -v f={params.fpos} 'BEGIN {{OFS="\t"}} NR==1 {{print "s","fpos",$0; next}} {{print s,f,$0}}' \
106+ {output.stats}.tmp > {output.stats}
107+ rm {output.stats}.tmp
108+ """
109+
110+
111+
112+ rule bwa_map :
113+ input :
114+ "data/genome.fa" ,
115+ "data/samples/A.fastq"
116+ output :
117+ "mapped_reads/A.bam"
118+ shell :
119+ "echo hello > {output}"
120+
121+
122+ rule new :
123+ input :
0 commit comments