-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdisplay_bedtools.Rmd
105 lines (68 loc) · 3.32 KB
/
display_bedtools.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
---
title: "bedtools_display.Rmd"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = '/home/tkastylevsky/results/bedtools_analysis')
```
## R Markdown
ici, on va utiliser la sortie du code repeatmasker_to_bedtools et l'afficher sous forme de barplot histoire de pouvoir quantifier un peu en fonction des familles la qualité de notre annotation.
le code la tout de suite est fait pour le chr16 (il faut que je me fasse des fonctions...)
libraries
```{r}
library(csv)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(ggsci)
```
importation des fichiers
```{r}
#shape_intersect = function(path){
path = "/home/tkastylevsky/results/results_bedtools/chr1_libchr1_ref_chr1.txt"
rep_intersect = read.csv(path, header=TRUE)
names(rep_intersect) = list('A_query_seq','A_method','A_class','A_start','A_end', 'A_perc_div','A_strand','A_phase','A_ID','B_query_seq','B_method','B_class','B_start','B_end', 'B_perc_div','B_strand','B_phase','B_ID','intersect')
rep_intersect$repeat_class_B = gsub("?",'',rep_intersect$repeat_class_B, fixed = TRUE)
rep_intersect[rep_intersect$repeat_class_A == 'Unspecified',]$repeat_class_A = 'Unknown'
rep_intersect[rep_intersect$repeat_class_B == 'Unspecified',]$repeat_class_B = 'Unknown'
rep_intersect[rep_intersect$repeat_class_B == 'snRNA',]$repeat_class_B = 'snRNA/tRNA'
rep_intersect[rep_intersect$repeat_class_B == 'tRNA',]$repeat_class_B = 'snRNA/tRNA'
rep_intersect[rep_intersect$repeat_class_A == 'snRNA',]$repeat_class_A = 'snRNA/tRNA'
rep_intersect[rep_intersect$repeat_class_A == 'tRNA',]$repeat_class_A = 'snRNA/tRNA'
# return (rep_intersect)
# }
#rep_intersect = shape_intersect(path)
```
```{r}
best_hits_data = rep_intersect %>% group_by(A_ID) %>% top_n(1, intersect_A)
write.csv(best_hits_data,"/home/tkastylevsky/results/bedtools_analysis/chk_galgal6_chr16_repeat_fam_intersect_besthit_data.csv", row.names = FALSE)
best_hits_ref = rep_intersect %>% group_by(B_ID) %>% top_n(1, intersect_B)
```
```{r}
best_hits_data_class = best_hits_data %>% group_by(repeat_class_A) %>% count(repeat_class_B)
ans1 = ggplot(best_hits_data_class, aes(fill=repeat_class_B, y=n, x=repeat_class_A)) +
geom_bar(position="stack", stat="identity") +
scale_y_log10()+
scale_fill_brewer(palette="Paired") +
ggsave("besthits_chr1libwhole_on_class_chr1libchr1.png", height = 5, width =10)
ans2 = ggplot(best_hits_data_class, aes(fill=repeat_class_B, y=n, x=repeat_class_A)) +
geom_bar(position="fill", stat="identity") +
scale_fill_brewer(palette="Paired") +
ggsave("besthits_chr1libwhole_on_class_chr1libchr1_perc.png", height = 5, width =10)
ans2
```
```{r}
best_hits_ref_class = best_hits_ref %>% group_by(repeat_class_B) %>% count(repeat_class_A)
ans3 = ggplot(best_hits_ref_class, aes(fill=repeat_class_A, y=n, x=repeat_class_B)) +
geom_bar(position="stack", stat="identity") +
scale_y_log10() +
scale_fill_brewer(palette="Paired")# +
ggsave("besthits_chr1libchr1_on_class_chr1libwhole.png", height = 5, width =10)
ans4 = ggplot(best_hits_ref_class, aes(fill=repeat_class_A, y=n, x=repeat_class_B)) +
geom_bar(position="fill", stat="identity") +
scale_fill_brewer(palette="Paired")# +
ggsave("besthits_chr1libchr1_on_class_chr1libwhole_perc.png", height = 5, width =10)
```
```{r}
```