austintdaigle
diff --git a/‎data/genome.fa‎ b/‎data/genome.fa‎
diff --git a/‎data/samples/A.fastq‎ b/‎data/samples/A.fastq‎
diff --git a/‎mapped_reads/A.bam‎
Lines changed: 1 addition & 0 deletions b/‎mapped_reads/A.bam‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎workflow/Snakefile‎
Lines changed: 114 additions & 2 deletions b/‎workflow/Snakefile‎
Lines changed: 114 additions & 2 deletions
diff --git a/‎workflow/profiles/default/config.yaml‎
Lines changed: 0 additions & 2 deletions b/‎workflow/profiles/default/config.yaml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎workflow/scripts/fpos_sims.slim‎
Lines changed: 59 additions & 0 deletions b/‎workflow/scripts/fpos_sims.slim‎
Lines changed: 59 additions & 0 deletions
@@ -0,0 +1 @@
+hello
@@ -2,10 +2,122 @@ from snakemake.utils import min_version
 
 min_version("8.0")
 
-
 include: "rules/common.smk"
+import math
+import random
+
+#repo dir--direct snakemake to our scripts
+REPO = "/nas/longleaf/home/adaigle/fpos_estimation/workflow"
+
+# ─── Simulation settings ───
+N_REPS      = 5
+
+# log-uniform prior for s; linear-uniform prior for fpos
+S_MIN, S_MAX       = 1e-4, 5e-2
+FPOS_MIN, FPOS_MAX = 0.0,  0.05
+
+REPS = list(range(1, N_REPS + 1))
+rep_params = {}
+for rep in REPS:
+    # draw s on the log scale
+    log_s = random.uniform(math.log10(S_MIN), math.log10(S_MAX))
+    s     = 10 ** log_s
+
+    # draw fpos uniformly between 0 and 0.05
+    fpos  = random.uniform(FPOS_MIN, FPOS_MAX)
 
+    rep_params[rep] = {
+        "s":    s,
+        "fpos": fpos,
+    }
 
+# ─── Targets ───
 rule all:
     input:
-        [],
+        #expand("simulations/rep_{rep}.vcf",       rep=REPS),
+        #expand("simulations/rep_{rep}.vcf.fixed", rep=REPS),
+        expand("stats/rep_{rep}.stats.tsv",    rep=REPS),
+        #"filename.txt"
+        "mapped_reads/A.bam"
+
+# ─── Run SLiM for each replicate ───
+rule simulate:
+    output:
+        vcf   = "simulations/rep_{rep}.vcf.gz",
+        fixed = "simulations/rep_{rep}.vcf.fixed.gz"
+    params:
+        script   = REPO + "/scripts/fpos_sims.slim",
+        s        = lambda wc: rep_params[int(wc.rep)]["s"],
+        fpos     = lambda wc: rep_params[int(wc.rep)]["fpos"],
+        vcf_base = lambda wc: f"simulations/rep_{wc.rep}.vcf"
+    shell:
+        r"""
+        mkdir -p simulations
+
+        # 1) Run SLiM, passing VCF_OUT as an Eidos string literal:
+        slim \
+          -d FPOS={params.fpos} \
+          -d SBEN={params.s} \
+          -d 'VCF_OUT="{params.vcf_base}"' \
+          {params.script}
+
+        # 2) Compress outputs
+        gzip -c {params.vcf_base}        > {output.vcf}
+        gzip -c {params.vcf_base}.fixed  > {output.fixed}
+
+        # 3) Clean up intermediates
+        rm {params.vcf_base} {params.vcf_base}.fixed
+        """
+
+
+# ─── 2) Compute summary stats ───
+rule summarize:
+    priority: 100
+    input:
+        vcf   = "simulations/rep_{rep}.vcf.gz",
+        fixed = "simulations/rep_{rep}.vcf.fixed.gz"
+    output:
+        stats = "stats/rep_{rep}.stats.tsv"
+    params:
+        script        = REPO + "/scripts/richer_summary_stats_fromvcf.py",
+        window_length = 10000,
+        max_dist_ld   = 1000,
+        bin_size_ld   = 100,
+        seq_length    = 1e8,
+        seed          = 42,
+        s             = lambda wc: rep_params[int(wc.rep)]["s"],
+        fpos          = lambda wc: rep_params[int(wc.rep)]["fpos"]
+    shell:
+        r"""
+        mkdir -p stats
+        python {params.script} \
+            --vcf {input.vcf} \
+            --window_length {params.window_length} \
+            --output {output.stats} \
+            --max_dist_ld {params.max_dist_ld} \
+            --bin_size_ld {params.bin_size_ld} \
+            --sequence_length {params.seq_length} \
+            --n_individuals 50 \
+            --seed {params.seed}
+
+        # prepend the simulation parameters as the first two columns
+        mv {output.stats} {output.stats}.tmp
+        awk -v s={params.s} -v f={params.fpos} 'BEGIN {{OFS="\t"}} NR==1 {{print "s","fpos",$0; next}} {{print s,f,$0}}' \
+            {output.stats}.tmp > {output.stats}
+        rm {output.stats}.tmp
+        """
+
+
+
+rule bwa_map:
+    input:
+        "data/genome.fa",
+        "data/samples/A.fastq"
+    output:
+        "mapped_reads/A.bam"
+    shell:
+        "echo hello > {output}"
+
+
+rule new: 
+    input:
@@ -7,5 +7,3 @@ software-deployment-method:
   - conda
 printshellcmds: True
 show-failed-logs: True
-cores: 32
-local-cores: 4
@@ -0,0 +1,59 @@
+/*******************************************************************
+ * Simple one-population burn-in with parametric beneficial DFE
+ * - One pop p1 of size N (10,000)
+ * - Burn-in for 10N generations, then output a polymorphism VCF
+ * - Neutral fraction fixed at 0.5
+ * - Beneficial fraction = FPOS; mean s of exponential = SBEN
+ * - Deleterious DFE unchanged (gamma mean -0.05, shape 0.3)
+ *******************************************************************/
+
+initialize() {
+    // --- constants (all defined here) ---
+    // comment out and include in command line if you want to 
+    // jointly infer them
+    defineConstant("GENOME_SIZE", 2000000);
+    defineConstant("MU",          3e-9);
+    defineConstant("RHO",         3e-8);
+
+    defineConstant("N",        10000);
+   // defineConstant("FPOS",     0.0002);         // fraction beneficial among all new muts
+   // defineConstant("SBEN",     0.0125);         // mean s (>0) for exponential beneficial DFE
+    defineConstant("SAMPLE_N", 200);            // diploids to write to VCF
+    defineConstant("NEU_FRAC",   0.5);
+    defineConstant("DEL_FRAC",   NEU_FRAC - FPOS);  // = 0.5 - FPOS
+
+
+    // rates and mutation types
+    initializeMutationRate(MU);
+    initializeMutationType("m1", 0.5, "f",  0.0);          // neutral
+    initializeMutationType("m2", 0.5, "g", -0.05, 0.3);    // deleterious γ
+    initializeMutationType("m3", 0.5, "e",  SBEN);         // beneficial exp (mean s = SBEN)
+
+    // mixed genome: 50% m1, (0.5 - FPOS) m2, FPOS m3
+    initializeGenomicElementType("g1",
+        c(m1,   m2,       m3),
+        c(0.50, DEL_FRAC, FPOS)
+    );
+    initializeGenomicElement(g1, 0, GENOME_SIZE - 1);
+
+    initializeRecombinationRate(RHO);
+}
+
+/*** generation 1: create the single population ***/
+1 early() {
+    sim.addSubpop("p1", N);
+    catn("Start: N=" + N + "  burn-in=" + 100000 +
+         "  fractions: neu=0.5 del=" + DEL_FRAC + " ben=" + FPOS +
+         "  SBEN=" + SBEN + "  MU=" + MU + "  RHO=" + RHO);
+}
+
+/*** end of burn-in: sample and write VCF, then finish ***/
+100000 late() {
+    nOut = min(SAMPLE_N, p1.individualCount);
+    inds = p1.sampleIndividuals(nOut);
+    inds.genomes.outputVCF(VCF_OUT, F);  // polymorphism-only VCF
+    sim.outputFixedMutations(filePath=VCF_OUT + ".fixed", append=F);
+    catn("DONE at gen " + sim.cycle +
+         " | wrote VCF for " + nOut + " diploids -> " + VCF_OUT);
+    sim.simulationFinished();
+}