-
Notifications
You must be signed in to change notification settings - Fork 1
/
density.c
148 lines (125 loc) · 5.83 KB
/
density.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#include "generic.h"
int density_usage(){
fprintf(stderr, "\nAnalyzing ChIP-seq data, generating density and reports.\n");
fprintf(stderr, "Please notice that if reads mapped to the chromosomes which are not in the size file, those reads will be discarded.\n\n");
fprintf(stderr, "Usage: methylQA density [options] <chromosome size file> <bam/sam alignment file>\n\n");
fprintf(stderr, "Options: -S input is SAM [off]\n");
fprintf(stderr, " -Q unique reads mapping Quality threshold [10]\n");
fprintf(stderr, " -r keep redundant reads [off]\n");
fprintf(stderr, " -T treat 1 paired-end read as 2 single-end reads [off]\n");
fprintf(stderr, " -D keep reads if only one end mapped in a pair [off]\n");
fprintf(stderr, " -C add 'chr' string as prefix of reference sequence [off]\n");
fprintf(stderr, " -E extend reads to represent fragment [150], specify 0 if want no extension\n");
fprintf(stderr, " -I insert length threshold [500]\n");
fprintf(stderr, " -o output prefix [basename of input without extension]\n");
fprintf(stderr, " -h help message\n");
fprintf(stderr, " -? help message\n");
fprintf(stderr, "\n");
return 1;
}
/* main function */
int main_density (int argc, char *argv[]) {
char *output, *outReportfile, *outExtfile, *outbedGraphfile, *outbigWigfile, *outInsertfile, *outGenomeCov;
unsigned long long int *cnt;
int optSam = 0, c, optDup = 1, optaddChr = 0, optDis = 1, optTreat = 0;
unsigned int optQual = 10, optExt = 150, optisize = 500;
char *optoutput = NULL;
time_t start_time, end_time;
start_time = time(NULL);
struct slInt *cpgCount = NULL;
struct slInt *slPair = NULL;
long long fragbase = 0;
while ((c = getopt(argc, argv, "SQ:rTDCo:E:I:h?")) >= 0) {
switch (c) {
case 'S': optSam = 1; break;
case 'Q': optQual = (unsigned int)strtol(optarg, 0, 0); break;
case 'r': optDup = 0; break;
case 'T': optTreat = 1; break;
case 'D': optDis = 0; break;
case 'C': optaddChr = 1; break;
case 'E': optExt = (unsigned int)strtol(optarg, 0, 0); break;
case 'I': optisize = (unsigned int)strtol(optarg, 0, 0); break;
case 'o': optoutput = strdup(optarg); break;
case 'h':
case '?': return density_usage(); break;
default: return 1;
}
}
if (optind + 2 > argc)
return density_usage();
char *chr_size_file = argv[optind];
char *sam_file = argv[optind+1];
//struct hash *genome = newHash(0);
struct hash *hash = hashNameIntFile(chr_size_file);
struct hash *cpgHash = newHash(0);
if(optoutput) {
output = optoutput;
} else {
output = cloneString(get_filename_without_ext(basename(sam_file)));
}
if(asprintf(&outExtfile, "%s.extended.bed", output) < 0)
errAbort("Mem Error.\n");
if(asprintf(&outbedGraphfile, "%s.extended.bedGraph", output) < 0)
errAbort("Mem Error.\n");
if(asprintf(&outbigWigfile, "%s.bigWig", output) < 0)
errAbort("Mem Error.\n");
if (asprintf(&outReportfile, "%s.report", output) < 0)
errAbort("Preparing output wrong");
if (asprintf(&outInsertfile, "%s.insertdistro", output) < 0)
errAbort("Preparing output wrong");
if (asprintf(&outGenomeCov, "%s.genomeCoverage", output) < 0)
errAbort("Preparing output wrong");
//sam file to bed file
fprintf(stderr, "* Parsing the SAM/BAM file\n");
cnt = sam2bedwithCpGstat(sam_file, outExtfile, hash, cpgHash, &cpgCount, &slPair, optSam, optQual, optDup, optaddChr, optDis, optisize, optExt, optTreat);
//sort
//fprintf(stderr, "\n* Sorting\n");
//bedSortFile(outBedfile, outBedfile);
//remove dup
//fprintf(stderr, "* Removing duplication\n");
//uniqueBed = removeBedDup(outBedfile, outFilterfile);
//extend and write extend bed
//fprintf(stderr, "* Extending to %d and writing extended bed\n", arguments.extend);
//int extendWarn = extendBed(hash, arguments.extend, outFilterfile, outExtfile);
//if (extendWarn == 1)
// outExtfile = cloneString(outFilterfile);
//
//if (extendWarn != 1){
//sort extend bed
// fprintf(stderr, "* Sorting extended bed\n");
// bedSortFile(outExtfile, outExtfile);
//}
plotMappingStat(cnt, output);
if(slPair != NULL){
fprintf(stderr, "* Generating fragments size stats\n");
//writeInsertsize(slPair, outInsertfile);
fragbase = plotInsertsize(slPair, output); //quite time consuming -- fixed
}
fprintf(stderr, "* fragments total base: %lli\n", fragbase);
//sort extend bed
fprintf(stderr, "* Sorting extended bed\n");
sortBedfile(outExtfile);
//bedItemOverlap step
fprintf(stderr, "* Generating bedGraph\n");
bedItemOverlapCount(hash, outExtfile, outbedGraphfile);
//generate bigWig
fprintf(stderr, "* Generating bigWig\n");
//bigWigFileCreate(outbedGraphfile, chr_size_file, 256, 1024, 0, 1, outbigWigfile);
bedGraphToBigWig(outbedGraphfile, chr_size_file, outbigWigfile);
fprintf(stderr, "* Calculating genome coverage\n");
struct hash *covhash = calGenomeCovBedGraph(chr_size_file, outbedGraphfile);
writeGenomeCov(covhash, outGenomeCov);
//plotGenomeCov(covhash, output);
//write report file
fprintf(stderr, "* Preparing report file\n");
writeReportDensity(outReportfile, cnt, optQual);
//cleaning
hashFree(&hash);
free(outExtfile);
free(outbedGraphfile);
free(outbigWigfile);
free(outReportfile);
end_time = time(NULL);
fprintf(stderr, "* Done, time used %.0f seconds.\n", difftime(end_time, start_time));
return 0;
}