Skip to content

Commit 1065a5c

Browse files
committed
stuff
1 parent cd8712c commit 1065a5c

7 files changed

Lines changed: 833 additions & 4 deletions

File tree

data/genome.fa

Whitespace-only changes.

data/samples/A.fastq

Whitespace-only changes.

mapped_reads/A.bam

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hello

workflow/Snakefile

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,122 @@ from snakemake.utils import min_version
22

33
min_version("8.0")
44

5-
65
include: "rules/common.smk"
6+
import math
7+
import random
8+
9+
#repo dir--direct snakemake to our scripts
10+
REPO = "/nas/longleaf/home/adaigle/fpos_estimation/workflow"
11+
12+
# ─── Simulation settings ───
13+
N_REPS = 5
14+
15+
# log-uniform prior for s; linear-uniform prior for fpos
16+
S_MIN, S_MAX = 1e-4, 5e-2
17+
FPOS_MIN, FPOS_MAX = 0.0, 0.05
18+
19+
REPS = list(range(1, N_REPS + 1))
20+
rep_params = {}
21+
for rep in REPS:
22+
# draw s on the log scale
23+
log_s = random.uniform(math.log10(S_MIN), math.log10(S_MAX))
24+
s = 10 ** log_s
25+
26+
# draw fpos uniformly between 0 and 0.05
27+
fpos = random.uniform(FPOS_MIN, FPOS_MAX)
728

29+
rep_params[rep] = {
30+
"s": s,
31+
"fpos": fpos,
32+
}
833

34+
# ─── Targets ───
935
rule all:
1036
input:
11-
[],
37+
#expand("simulations/rep_{rep}.vcf", rep=REPS),
38+
#expand("simulations/rep_{rep}.vcf.fixed", rep=REPS),
39+
expand("stats/rep_{rep}.stats.tsv", rep=REPS),
40+
#"filename.txt"
41+
"mapped_reads/A.bam"
42+
43+
# ─── Run SLiM for each replicate ───
44+
rule simulate:
45+
output:
46+
vcf = "simulations/rep_{rep}.vcf.gz",
47+
fixed = "simulations/rep_{rep}.vcf.fixed.gz"
48+
params:
49+
script = REPO + "/scripts/fpos_sims.slim",
50+
s = lambda wc: rep_params[int(wc.rep)]["s"],
51+
fpos = lambda wc: rep_params[int(wc.rep)]["fpos"],
52+
vcf_base = lambda wc: f"simulations/rep_{wc.rep}.vcf"
53+
shell:
54+
r"""
55+
mkdir -p simulations
56+
57+
# 1) Run SLiM, passing VCF_OUT as an Eidos string literal:
58+
slim \
59+
-d FPOS={params.fpos} \
60+
-d SBEN={params.s} \
61+
-d 'VCF_OUT="{params.vcf_base}"' \
62+
{params.script}
63+
64+
# 2) Compress outputs
65+
gzip -c {params.vcf_base} > {output.vcf}
66+
gzip -c {params.vcf_base}.fixed > {output.fixed}
67+
68+
# 3) Clean up intermediates
69+
rm {params.vcf_base} {params.vcf_base}.fixed
70+
"""
71+
72+
73+
# ─── 2) Compute summary stats ───
74+
rule summarize:
75+
priority: 100
76+
input:
77+
vcf = "simulations/rep_{rep}.vcf.gz",
78+
fixed = "simulations/rep_{rep}.vcf.fixed.gz"
79+
output:
80+
stats = "stats/rep_{rep}.stats.tsv"
81+
params:
82+
script = REPO + "/scripts/richer_summary_stats_fromvcf.py",
83+
window_length = 10000,
84+
max_dist_ld = 1000,
85+
bin_size_ld = 100,
86+
seq_length = 1e8,
87+
seed = 42,
88+
s = lambda wc: rep_params[int(wc.rep)]["s"],
89+
fpos = lambda wc: rep_params[int(wc.rep)]["fpos"]
90+
shell:
91+
r"""
92+
mkdir -p stats
93+
python {params.script} \
94+
--vcf {input.vcf} \
95+
--window_length {params.window_length} \
96+
--output {output.stats} \
97+
--max_dist_ld {params.max_dist_ld} \
98+
--bin_size_ld {params.bin_size_ld} \
99+
--sequence_length {params.seq_length} \
100+
--n_individuals 50 \
101+
--seed {params.seed}
102+
103+
# prepend the simulation parameters as the first two columns
104+
mv {output.stats} {output.stats}.tmp
105+
awk -v s={params.s} -v f={params.fpos} 'BEGIN {{OFS="\t"}} NR==1 {{print "s","fpos",$0; next}} {{print s,f,$0}}' \
106+
{output.stats}.tmp > {output.stats}
107+
rm {output.stats}.tmp
108+
"""
109+
110+
111+
112+
rule bwa_map:
113+
input:
114+
"data/genome.fa",
115+
"data/samples/A.fastq"
116+
output:
117+
"mapped_reads/A.bam"
118+
shell:
119+
"echo hello > {output}"
120+
121+
122+
rule new:
123+
input:

workflow/profiles/default/config.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,3 @@ software-deployment-method:
77
- conda
88
printshellcmds: True
99
show-failed-logs: True
10-
cores: 32
11-
local-cores: 4

workflow/scripts/fpos_sims.slim

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*******************************************************************
2+
* Simple one-population burn-in with parametric beneficial DFE
3+
* - One pop p1 of size N (10,000)
4+
* - Burn-in for 10N generations, then output a polymorphism VCF
5+
* - Neutral fraction fixed at 0.5
6+
* - Beneficial fraction = FPOS; mean s of exponential = SBEN
7+
* - Deleterious DFE unchanged (gamma mean -0.05, shape 0.3)
8+
*******************************************************************/
9+
10+
initialize() {
11+
// --- constants (all defined here) ---
12+
// comment out and include in command line if you want to
13+
// jointly infer them
14+
defineConstant("GENOME_SIZE", 2000000);
15+
defineConstant("MU", 3e-9);
16+
defineConstant("RHO", 3e-8);
17+
18+
defineConstant("N", 10000);
19+
// defineConstant("FPOS", 0.0002); // fraction beneficial among all new muts
20+
// defineConstant("SBEN", 0.0125); // mean s (>0) for exponential beneficial DFE
21+
defineConstant("SAMPLE_N", 200); // diploids to write to VCF
22+
defineConstant("NEU_FRAC", 0.5);
23+
defineConstant("DEL_FRAC", NEU_FRAC - FPOS); // = 0.5 - FPOS
24+
25+
26+
// rates and mutation types
27+
initializeMutationRate(MU);
28+
initializeMutationType("m1", 0.5, "f", 0.0); // neutral
29+
initializeMutationType("m2", 0.5, "g", -0.05, 0.3); // deleterious γ
30+
initializeMutationType("m3", 0.5, "e", SBEN); // beneficial exp (mean s = SBEN)
31+
32+
// mixed genome: 50% m1, (0.5 - FPOS) m2, FPOS m3
33+
initializeGenomicElementType("g1",
34+
c(m1, m2, m3),
35+
c(0.50, DEL_FRAC, FPOS)
36+
);
37+
initializeGenomicElement(g1, 0, GENOME_SIZE - 1);
38+
39+
initializeRecombinationRate(RHO);
40+
}
41+
42+
/*** generation 1: create the single population ***/
43+
1 early() {
44+
sim.addSubpop("p1", N);
45+
catn("Start: N=" + N + " burn-in=" + 100000 +
46+
" fractions: neu=0.5 del=" + DEL_FRAC + " ben=" + FPOS +
47+
" SBEN=" + SBEN + " MU=" + MU + " RHO=" + RHO);
48+
}
49+
50+
/*** end of burn-in: sample and write VCF, then finish ***/
51+
100000 late() {
52+
nOut = min(SAMPLE_N, p1.individualCount);
53+
inds = p1.sampleIndividuals(nOut);
54+
inds.genomes.outputVCF(VCF_OUT, F); // polymorphism-only VCF
55+
sim.outputFixedMutations(filePath=VCF_OUT + ".fixed", append=F);
56+
catn("DONE at gen " + sim.cycle +
57+
" | wrote VCF for " + nOut + " diploids -> " + VCF_OUT);
58+
sim.simulationFinished();
59+
}

0 commit comments

Comments
 (0)