-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathti-tv-ratio.sql
More file actions
40 lines (40 loc) · 1.17 KB
/
ti-tv-ratio.sql
File metadata and controls
40 lines (40 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Compute the Ti/Tv ratio for variants within genomic region windows.
SELECT
reference_name,
window * _WINDOW_SIZE_ AS window_start,
transitions,
transversions,
transitions/transversions AS titv,
num_variants_in_window,
FROM (
SELECT
reference_name,
window,
SUM(mutation IN ('A->G', 'G->A', 'C->T', 'T->C')) AS transitions,
SUM(mutation IN ('A->C', 'C->A', 'G->T', 'T->G',
'A->T', 'T->A', 'C->G', 'G->C')) AS transversions,
COUNT(mutation) AS num_variants_in_window
FROM (
SELECT
reference_name,
reference_bases,
alternate_bases,
INTEGER(FLOOR(start / _WINDOW_SIZE_)) AS window,
CONCAT(reference_bases, CONCAT(STRING('->'), alternate_bases)) AS mutation,
COUNT(alternate_bases) WITHIN RECORD AS num_alts,
FROM
[_THE_TABLE_]
# Optionally add clause here to limit the query to a particular
# region of the genome.
#_WHERE_
HAVING
# Skip 1/2 genotypes _and non-SNP variants
num_alts = 1
AND reference_bases IN ('A','C','G','T')
AND alternate_bases IN ('A','C','G','T'))
GROUP BY
reference_name,
window)
ORDER BY
reference_name,
window_start