Skip to content

Commit 89a6d9f

Browse files
committed
tmp
1 parent a0ba430 commit 89a6d9f

5 files changed

Lines changed: 135 additions & 14 deletions

File tree

src/assembly_graph.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ static void asm_edge_cc(struct asm_graph_t *g, gint_t *id_edge, gint_t **ret_siz
775775
free(q);
776776
}
777777

778-
static inline uint64_t get_bandage_count(struct asm_edge_t *e, int ksize)
778+
uint64_t get_bandage_count(struct asm_edge_t *e, int ksize)
779779
{
780780
float cov = __get_edge_cov(e, ksize);
781781
uint32_t i, len = e->seq_len;

src/assembly_graph.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ struct asm_graph_t {
131131

132132
#define MIN_BARCODE_RATIO 0.044
133133
#define MIN_SUB_BARCODE_RATIO 0.022
134-
#define CONTIG_PARTITION_LEN 1500
134+
#define CONTIG_PARTITION_LEN 500
135135
#define MIN_COMPONENT 250
136136

137137
/************************* Build graph ultilities *****************************/
@@ -304,5 +304,6 @@ void asm_clone_graph(struct asm_graph_t *g0, struct asm_graph_t *g1,
304304
char *tmp_name);
305305
void asm_join_edge3_wrapper(struct asm_graph_t *g, gint_t e1, gint_t e2, gint_t e3, int count);
306306
void asm_join_edge_wrapper(struct asm_graph_t *g, gint_t e1, gint_t e2);
307+
inline uint64_t get_bandage_count(struct asm_edge_t *e, int ksize);
307308

308309
#endif /* __ASSEMBLY_GRAPH_H__ */

src/process.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,8 +627,14 @@ void resolve_1_2_process(struct opt_proc_t *opt)
627627
init_logger(opt->log_level, log_file);
628628
init_clock();
629629
struct asm_graph_t *g0 = create_and_load_graph(opt);
630+
struct asm_graph_t *g1 = calloc(1, sizeof(struct asm_graph_t));
630631
log_info("resolve 1_2 process");
631632
resolve_1_2(g0,opt);
633+
// asm_lazy_condense(g0);
634+
asm_condense(g0, g1);
635+
log_info("condense done");
636+
g0 = g1;
637+
test_asm_graph(g0);
632638
save_graph_info(opt->out_dir, g0, "level_2_huu");
633639
asm_graph_destroy(g0);
634640
free(g0);

src/resolve_big.c

Lines changed: 121 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ int join_3_and_count(struct asm_edge_t *left, struct asm_edge_t *right, struct a
7171
copy_seq32_seq8(left->seq, left->seq_len - graph_ksize - left_len, new_seq, 0, left_len);
7272
copy_seq32_seq8(mid->seq, 0, new_seq, left_len, mid->seq_len);
7373
copy_seq32_seq8(right->seq, graph_ksize, new_seq, left_len + mid_len, right_len);
74-
print_u8_seq(new_seq, new_len);
74+
// print_u8_seq(new_seq, new_len);
7575

7676
//todo 1: does not need to copy all middle seq
7777

@@ -172,6 +172,104 @@ int is_case_2_1_2(struct asm_graph_t *g, int i_e)
172172
return 1;
173173
}
174174

175+
#define SWAP(a, b) {a+=b;b=a-b;a=a-b;}
176+
177+
void fwrite_gfa(FILE *f, struct asm_graph_t *g, int i_e)
178+
{
179+
assert(g->edges[i_e].source != -1);
180+
int i_e_rc = g->edges[i_e].rc_id;
181+
if (i_e > i_e_rc) {
182+
SWAP(i_e, i_e_rc);
183+
}
184+
185+
uint64_t fake_count = get_bandage_count(g->edges + i_e, g->ksize);
186+
fprintf(f, "S\t%lld_%lld\t%s\tKC:i:%llu\n", (long long) i_e,
187+
(long long) i_e_rc, g->edges[i_e].seq, (long long unsigned) fake_count);
188+
189+
int source = g->edges[i_e].source;
190+
int source_rc = g->nodes[source].rc_id;
191+
int target = g->edges[i_e].target;
192+
for (int i = 0; i < g->nodes[target].deg; i++) {
193+
gint_t next_e = g->nodes[target].adj[i];
194+
gint_t next_e_rc = g->edges[next_e].rc_id;
195+
char next_ce = '+';
196+
if (next_e > next_e_rc) {
197+
SWAP(next_e, next_e_rc);
198+
next_ce = '-';
199+
}
200+
fprintf(f, "L\t%lld_%lld\t%c\t%lld_%lld\t%c\t%dM\n",
201+
(long long) i_e, (long long) i_e_rc, '+',
202+
(long long) next_e, (long long) next_e_rc,
203+
next_ce, g->ksize);
204+
}
205+
for (int i = 0; i < g->nodes[source_rc].deg; i++) {
206+
gint_t next_e = g->nodes[source_rc].adj[i];
207+
gint_t next_e_rc = g->edges[next_e].rc_id;
208+
char next_ce = '+';
209+
if (next_e > next_e_rc) {
210+
SWAP(next_e, next_e_rc);
211+
next_ce = '-';
212+
}
213+
fprintf(f, "L\t%lld_%lld\t%c\t%lld_%lld\t%c\t%dM\n",
214+
(long long) i_e, (long long) i_e_rc, '-',
215+
(long long) next_e, (long long) next_e_rc,
216+
next_ce, g->ksize);
217+
}
218+
}
219+
220+
void deepcopy_edge(struct asm_edge_t *src, struct asm_edge_t *des)
221+
{
222+
assert("not implemented yet");
223+
des->count = src->count;
224+
des->seq = calloc(src->seq_len, 1);
225+
COPY_ARR(src->seq, des->seq, src->seq_len);
226+
des->source = src->source;
227+
des-> target = des->target;
228+
des->rc_id = des->rc_id;
229+
}
230+
231+
void add_node_to_graph(struct asm_graph_t *sub_graph)
232+
{
233+
sub_graph->nodes = realloc(sub_graph->nodes, (sub_graph->n_v+1) * sizeof(struct asm_node_t));
234+
sub_graph->n_v++;
235+
}
236+
237+
//add edge and it's node if they haven't exist in graph yet
238+
void add_edge_to_sub_graph(struct asm_graph_t *g, int i_e, struct asm_graph_t *sub_graph,
239+
int *map_node)
240+
{
241+
sub_graph->edges = realloc(sub_graph->edges, (sub_graph->n_e + 1) * sizeof(struct asm_edge_t));
242+
deepcopy_edge(&g->edges[i_e], &sub_graph->edges[sub_graph->n_e]);
243+
244+
struct asm_edge_t *e = &sub_graph->edges[sub_graph->n_e];
245+
if (map_node[e->source] == -1) {
246+
map_node[e->source] = sub_graph->n_v;
247+
add_node_to_graph(sub_graph);
248+
}
249+
e->source = map_node[e->source];
250+
if (map_node[e->target] == -1) {
251+
map_node[e->target] = sub_graph->n_v;
252+
add_node_to_graph(sub_graph);
253+
}
254+
e->target = map_node[e->target];
255+
256+
struct asm_node_t *source = &g->nodes[e->source];
257+
source->adj = realloc(source->adj, (source->deg+1) * sizeof(gint_t));
258+
source->adj[source->deg] = sub_graph->n_e;
259+
sub_graph->n_e++;
260+
source->deg++;
261+
}
262+
263+
void add_pair_edge_to_sub_graph(struct asm_graph_t *g, int i_e, struct asm_graph_t *sub_graph,
264+
int *map_node)
265+
{
266+
add_edge_to_sub_graph(g, i_e, sub_graph, map_node);
267+
add_edge_to_sub_graph(g, g->edges[i_e].rc_id, sub_graph, map_node);
268+
int n_e = sub_graph->n_e;
269+
sub_graph->edges[n_e-2].rc_id = n_e-1;
270+
sub_graph->edges[n_e-1].rc_id = n_e-2;
271+
}
272+
175273
int dfs_partition(struct asm_graph_t *g, int *partition, int i_e, int index_par,
176274
struct partition_information *part_infor)
177275
{
@@ -202,6 +300,10 @@ int dfs_partition(struct asm_graph_t *g, int *partition, int i_e, int index_par,
202300
int source_rc = g->nodes[source].rc_id;
203301
int target = g->edges[i_e].target;
204302

303+
// FILE *f = part_infor->output_gfa_file;
304+
//todo huu write gfa
305+
// fwrite_gfa(f, g, i_e);
306+
add_pair_edge_to_sub_graph(g, i_e, part_infor->sub_graph, part_infor->map_node);
205307
for (int i = 0; i < g->nodes[source_rc].deg; i++) {
206308
int i_e_rc = g->nodes[source_rc].adj[i];
207309
res += dfs_partition(g, partition, i_e_rc, index_par, part_infor);
@@ -402,7 +504,7 @@ void dfs_resolve(struct asm_graph_t *g, int i_e, int partition_index, khash_t(pa
402504
}
403505

404506
void partition_graph(struct read_path_t *ori_read, struct asm_graph_t *g, int *partition, int n_threads,
405-
int mmem, int *n_partition, khash_t(pair_kmer_count) *kmer_pair_table)
507+
int mmem, int *n_partition, khash_t(pair_kmer_count) *kmer_pair_table, char *out_dir)
406508
{
407509
// khash_t(bcpos) *dict = kh_init(bcpos);
408510
// construct_read_index(ori_read, dict);
@@ -411,26 +513,35 @@ void partition_graph(struct read_path_t *ori_read, struct asm_graph_t *g, int *p
411513
partition[i] = 0;
412514
int count = 0;
413515
int *resolve_stat = calloc(4, 4);
516+
char *output_gfa_dir = calloc(PATH_MAX, 1);
517+
struct partition_information *part_infor
518+
= calloc(1, sizeof(struct partition_information));
519+
part_infor->map_node = calloc(g->n_v, 4);
520+
for (int i = 0 ; i < g->n_v; i ++)
521+
part_infor->map_node[i] = -1;
522+
414523
for (int i = 0; i < (int) g->n_e; i++)
415524
if (partition[i] == 0)
416525
if (g->edges[i].seq_len < CONTIG_PARTITION_LEN) {
417526
log_warn("Dfs from %d", i);
418527
count++;
419-
// khash_t(big_kmer_count) *kmer_count_table = kh_init(big_kmer_count);
420-
struct partition_information *part_infor
421-
= calloc(1, sizeof(struct partition_information));
528+
part_infor->sub_graph = calloc(1, sizeof(struct asm_graph_t));
529+
part_infor->total_len = 0;
530+
sprintf(output_gfa_dir, "partition_%d", count);
531+
// part_infor->output_gfa_file = fopen(output_gfa_dir, "w");
422532
part_infor->union_barcodes = kh_init(union_barcode);
423533
int n_edges = dfs_partition(g, partition, i, count, part_infor);
424-
//
425-
// log_warn("n edges %d n barcodes %d total length %d", n_edges, total_barcodes, total_length_component);
534+
test_asm_graph(part_infor->sub_graph);
535+
save_graph_info(out_dir, g, output_gfa_dir);
536+
dfs_resolve(g, i, count, kmer_pair_table, partition, resolve_stat);
537+
fclose(part_infor->output_gfa_file);
426538
// if (total_barcodes > 10000) {
427539
// //todo 1 work for big case
428540
// continue;
429541
// }
430542
// if (total_barcodes > 10 && total_length_component / 2 > MIN_COMPONENT && count_resolvable > 2) {
431543
//// filter_read_build_kmer(ori_read, dict, bc, kmer_count_table, 55, 70, n_threads,
432544
//// mmem, count); //todo 0 choose range better
433-
dfs_resolve(g, i, count, kmer_pair_table, partition, resolve_stat);
434545
//todo 0 not break
435546
// break;
436547
// }
@@ -470,6 +581,7 @@ void partition_graph(struct read_path_t *ori_read, struct asm_graph_t *g, int *p
470581
}
471582
log_info("Size of table %d", kmer_pair_table->size);
472583
*n_partition = count;
584+
free(output_gfa_dir);
473585
}
474586

475587
int resolve_212_using_big_kmer(struct asm_graph_t *g, int i_e,
@@ -628,6 +740,6 @@ void resolve_1_2(struct asm_graph_t *g, struct opt_proc_t *opt)
628740
build_pair_kmer_table(opt, table);
629741
log_info("Build big kmer table done");
630742

631-
partition_graph(ori_read, g, partition, opt->n_threads, opt->mmem, &n_partitions, table);
743+
partition_graph(ori_read, g, partition, opt->n_threads, opt->mmem, &n_partitions, table, opt->out_dir);
632744
free(partition);
633745
}

src/resolve_big.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,13 @@
66
#define SKIPPING_RESOLVE_BIG_H
77

88
#include "build_hash_table.h"
9+
#include <stdio.h>
10+
911

1012
KHASH_MAP_INIT_INT64(big_kmer_count, int);
1113
KHASH_SET_INIT_INT64(union_barcode);
1214

1315

14-
void partition_graph(struct read_path_t *ori_read, struct asm_graph_t *g, int *partition, int n_threads,
15-
int mmem, int *n_partition, khash_t(pair_kmer_count) *kmer_pair_table);
16-
1716
struct km_count_bundle_t {
1817
khash_t(big_kmer_count) *kmer_count_table;
1918
int ksize;
@@ -24,7 +23,10 @@ struct km_count_bundle_t {
2423
struct partition_information {
2524
int total_len;
2625
int n_barcodes;
26+
int *map_node;
2727
khash_t(union_barcode) *union_barcodes;
28+
struct asm_graph_t *sub_graph;
29+
FILE *output_gfa_file;
2830
};
2931
void km_count_bundle_destroy(struct km_count_bundle_t *b);
3032
void resolve_1_2(struct asm_graph_t *g, struct opt_proc_t *opt);

0 commit comments

Comments
 (0)