-
Notifications
You must be signed in to change notification settings - Fork 5
/
extract_guides.py
68 lines (56 loc) · 2.04 KB
/
extract_guides.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import argparse
import build
import common
from padding import (
get_gene_to_padding,
set_to_none_if_padding_not_provided
)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--library",
metavar="file",
type=argparse.FileType("r"),
required=True)
parser.add_argument("--genes",
metavar="file",
type=argparse.FileType("r"),
required=True)
parser.add_argument("--padding",
metavar="file",
type=argparse.FileType("r")
)
parser.add_argument("--max-cuts",
help="Max number of cuts to keep per gene.",
type=int)
parser.add_argument("--output",
help="Path to the output file.",
type=argparse.FileType("w"),
required=True)
return parser.parse_args()
def main():
def read_file(f): return [e.strip() for e in f.readlines()]
args = parse_args()
gene_names = read_file(args.genes)
library = read_file(args.library)
padding_file = set_to_none_if_padding_not_provided(args.padding)
gene_to_padding = get_gene_to_padding(padding_file)
# Set up Gene objects.
genes = [
common.Gene(gname, gene_to_padding.get(gname)) for gname in gene_names]
[g.load_targets("dna_good_5_9_18.txt") for g in genes]
[g.cut_with_library(library) for g in genes]
# select all guides that are part of the cuts
guides = set()
for gene in genes:
if args.max_cuts:
gene_guides = [
e for g in genes for e in g.trim_library(args.max_cuts)]
else:
gene_guides = [t.guide for t in gene.cuts]
for guide in gene_guides:
if guide in library:
guides.add(guide)
for guide in sorted(guides):
args.output.write("{}\n".format(guide))
if __name__ == "__main__":
main()