-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapi_gene_name.cpp
More file actions
89 lines (69 loc) · 2.94 KB
/
api_gene_name.cpp
File metadata and controls
89 lines (69 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/*Take a file.txt (list of specific gene he wants to study from a researcher) and a file.tsv (gene present in the matrix table)
* and returns a set with all the different possible names of these genes that are also present in the matrix table
*
* The API will work like that: read gene.tsv file, stores all gene names in a set,
* then read gene.txt file, store in a vector,
* then call the API on this vector to create a set with all the possible names of the genes,
* and then take the intersection of the 2 sets (so that we only keep the one we are interested in),
* transform the set into a vector and return this vector.
*/
#include "api_gene_name.h"
#include "readgenetxt.h"
#include "only_gene_name.h"
#include "read_tsv_set.h"
#include "intersection_sets.h"
#include "api.h"
#include <iostream>
#include <string>
#include <sstream>
#include <iterator>
#include <vector>
#include <algorithm>
#include <fstream>
#include <QJsonDocument>
#include <stdio.h>
api_gene_name::api_gene_name()
{
}
std::vector<std::string> api_gene_name::api_gene_name_funtion(std::string geneNameFile, std::string geneSubsetFile){
//first we open the set of the genes the researcher is interested and call the
//API on each gene to find all their different names
std::cout<<"entered gene_name_function"<<std::endl;
readgenetxt vec;
std::vector<std::string> res;
res=vec.listgene(geneSubsetFile); //list of all the genes in the txt file
CURL *curl = curl_easy_init();
std::set<std::string> final_set;
for (unsigned int i=0; i<res.size();i++){
std::cout<<"loop number: "<<i<<std::endl;
std::string search=res[i];
//API call
std::string l = searchHGNC(search, curl);
only_gene_name test; //search other name of this specific gene in the string l
std::set<std::string> small_set;
small_set=test.listgene(l, search);
final_set.insert(small_set.begin(), small_set.end()); //add the set of the names of this gene in the set of all the genes
}
curl_easy_cleanup(curl);
//Now let us create the set of all the gene of the matrix file
read_tsv_set test2;
std::set<std::string> string_set_gene_matrix;
string_set_gene_matrix=test2.read_tsv(geneNameFile);
//Now we take the intersection of the 2 sets, to keep only the genes we are interrested in
intersection_sets test3;
std::set<std::string> intersection_set;
intersection_set = test3.set_intersection(final_set, string_set_gene_matrix);
std::cout << std::endl;
std::cout<<"[Progress API gene name finished], Intersection set is: "<<std::endl;
for (std::string x : intersection_set) {
std::cout << x << " ";
}
std::vector<std::string> v = std::vector<std::string>();
for (std::string x : intersection_set) {
std::cout << x << " ";
v.push_back(x);
}
std::cout << std::endl;
std::cout << "Set operations finished" << std::endl;
return v;
}