@@ -71,7 +71,7 @@ int join_3_and_count(struct asm_edge_t *left, struct asm_edge_t *right, struct a
7171 copy_seq32_seq8 (left -> seq , left -> seq_len - graph_ksize - left_len , new_seq , 0 , left_len );
7272 copy_seq32_seq8 (mid -> seq , 0 , new_seq , left_len , mid -> seq_len );
7373 copy_seq32_seq8 (right -> seq , graph_ksize , new_seq , left_len + mid_len , right_len );
74- print_u8_seq (new_seq , new_len );
74+ // print_u8_seq(new_seq, new_len);
7575
7676 //todo 1: does not need to copy all middle seq
7777
@@ -172,6 +172,104 @@ int is_case_2_1_2(struct asm_graph_t *g, int i_e)
172172 return 1 ;
173173}
174174
175+ #define SWAP (a , b ) {a+=b;b=a-b;a=a-b;}
176+
177+ void fwrite_gfa (FILE * f , struct asm_graph_t * g , int i_e )
178+ {
179+ assert (g -> edges [i_e ].source != -1 );
180+ int i_e_rc = g -> edges [i_e ].rc_id ;
181+ if (i_e > i_e_rc ) {
182+ SWAP (i_e , i_e_rc );
183+ }
184+
185+ uint64_t fake_count = get_bandage_count (g -> edges + i_e , g -> ksize );
186+ fprintf (f , "S\t%lld_%lld\t%s\tKC:i:%llu\n" , (long long ) i_e ,
187+ (long long ) i_e_rc , g -> edges [i_e ].seq , (long long unsigned ) fake_count );
188+
189+ int source = g -> edges [i_e ].source ;
190+ int source_rc = g -> nodes [source ].rc_id ;
191+ int target = g -> edges [i_e ].target ;
192+ for (int i = 0 ; i < g -> nodes [target ].deg ; i ++ ) {
193+ gint_t next_e = g -> nodes [target ].adj [i ];
194+ gint_t next_e_rc = g -> edges [next_e ].rc_id ;
195+ char next_ce = '+' ;
196+ if (next_e > next_e_rc ) {
197+ SWAP (next_e , next_e_rc );
198+ next_ce = '-' ;
199+ }
200+ fprintf (f , "L\t%lld_%lld\t%c\t%lld_%lld\t%c\t%dM\n" ,
201+ (long long ) i_e , (long long ) i_e_rc , '+' ,
202+ (long long ) next_e , (long long ) next_e_rc ,
203+ next_ce , g -> ksize );
204+ }
205+ for (int i = 0 ; i < g -> nodes [source_rc ].deg ; i ++ ) {
206+ gint_t next_e = g -> nodes [source_rc ].adj [i ];
207+ gint_t next_e_rc = g -> edges [next_e ].rc_id ;
208+ char next_ce = '+' ;
209+ if (next_e > next_e_rc ) {
210+ SWAP (next_e , next_e_rc );
211+ next_ce = '-' ;
212+ }
213+ fprintf (f , "L\t%lld_%lld\t%c\t%lld_%lld\t%c\t%dM\n" ,
214+ (long long ) i_e , (long long ) i_e_rc , '-' ,
215+ (long long ) next_e , (long long ) next_e_rc ,
216+ next_ce , g -> ksize );
217+ }
218+ }
219+
220+ void deepcopy_edge (struct asm_edge_t * src , struct asm_edge_t * des )
221+ {
222+ assert ("not implemented yet" );
223+ des -> count = src -> count ;
224+ des -> seq = calloc (src -> seq_len , 1 );
225+ COPY_ARR (src -> seq , des -> seq , src -> seq_len );
226+ des -> source = src -> source ;
227+ des -> target = des -> target ;
228+ des -> rc_id = des -> rc_id ;
229+ }
230+
231+ void add_node_to_graph (struct asm_graph_t * sub_graph )
232+ {
233+ sub_graph -> nodes = realloc (sub_graph -> nodes , (sub_graph -> n_v + 1 ) * sizeof (struct asm_node_t ));
234+ sub_graph -> n_v ++ ;
235+ }
236+
237+ //add edge and it's node if they haven't exist in graph yet
238+ void add_edge_to_sub_graph (struct asm_graph_t * g , int i_e , struct asm_graph_t * sub_graph ,
239+ int * map_node )
240+ {
241+ sub_graph -> edges = realloc (sub_graph -> edges , (sub_graph -> n_e + 1 ) * sizeof (struct asm_edge_t ));
242+ deepcopy_edge (& g -> edges [i_e ], & sub_graph -> edges [sub_graph -> n_e ]);
243+
244+ struct asm_edge_t * e = & sub_graph -> edges [sub_graph -> n_e ];
245+ if (map_node [e -> source ] == -1 ) {
246+ map_node [e -> source ] = sub_graph -> n_v ;
247+ add_node_to_graph (sub_graph );
248+ }
249+ e -> source = map_node [e -> source ];
250+ if (map_node [e -> target ] == -1 ) {
251+ map_node [e -> target ] = sub_graph -> n_v ;
252+ add_node_to_graph (sub_graph );
253+ }
254+ e -> target = map_node [e -> target ];
255+
256+ struct asm_node_t * source = & g -> nodes [e -> source ];
257+ source -> adj = realloc (source -> adj , (source -> deg + 1 ) * sizeof (gint_t ));
258+ source -> adj [source -> deg ] = sub_graph -> n_e ;
259+ sub_graph -> n_e ++ ;
260+ source -> deg ++ ;
261+ }
262+
263+ void add_pair_edge_to_sub_graph (struct asm_graph_t * g , int i_e , struct asm_graph_t * sub_graph ,
264+ int * map_node )
265+ {
266+ add_edge_to_sub_graph (g , i_e , sub_graph , map_node );
267+ add_edge_to_sub_graph (g , g -> edges [i_e ].rc_id , sub_graph , map_node );
268+ int n_e = sub_graph -> n_e ;
269+ sub_graph -> edges [n_e - 2 ].rc_id = n_e - 1 ;
270+ sub_graph -> edges [n_e - 1 ].rc_id = n_e - 2 ;
271+ }
272+
175273int dfs_partition (struct asm_graph_t * g , int * partition , int i_e , int index_par ,
176274 struct partition_information * part_infor )
177275{
@@ -202,6 +300,10 @@ int dfs_partition(struct asm_graph_t *g, int *partition, int i_e, int index_par,
202300 int source_rc = g -> nodes [source ].rc_id ;
203301 int target = g -> edges [i_e ].target ;
204302
303+ // FILE *f = part_infor->output_gfa_file;
304+ //todo huu write gfa
305+ // fwrite_gfa(f, g, i_e);
306+ add_pair_edge_to_sub_graph (g , i_e , part_infor -> sub_graph , part_infor -> map_node );
205307 for (int i = 0 ; i < g -> nodes [source_rc ].deg ; i ++ ) {
206308 int i_e_rc = g -> nodes [source_rc ].adj [i ];
207309 res += dfs_partition (g , partition , i_e_rc , index_par , part_infor );
@@ -402,7 +504,7 @@ void dfs_resolve(struct asm_graph_t *g, int i_e, int partition_index, khash_t(pa
402504}
403505
404506void partition_graph (struct read_path_t * ori_read , struct asm_graph_t * g , int * partition , int n_threads ,
405- int mmem , int * n_partition , khash_t (pair_kmer_count ) * kmer_pair_table )
507+ int mmem , int * n_partition , khash_t (pair_kmer_count ) * kmer_pair_table , char * out_dir )
406508{
407509// khash_t(bcpos) *dict = kh_init(bcpos);
408510// construct_read_index(ori_read, dict);
@@ -411,26 +513,35 @@ void partition_graph(struct read_path_t *ori_read, struct asm_graph_t *g, int *p
411513 partition [i ] = 0 ;
412514 int count = 0 ;
413515 int * resolve_stat = calloc (4 , 4 );
516+ char * output_gfa_dir = calloc (PATH_MAX , 1 );
517+ struct partition_information * part_infor
518+ = calloc (1 , sizeof (struct partition_information ));
519+ part_infor -> map_node = calloc (g -> n_v , 4 );
520+ for (int i = 0 ; i < g -> n_v ; i ++ )
521+ part_infor -> map_node [i ] = -1 ;
522+
414523 for (int i = 0 ; i < (int ) g -> n_e ; i ++ )
415524 if (partition [i ] == 0 )
416525 if (g -> edges [i ].seq_len < CONTIG_PARTITION_LEN ) {
417526 log_warn ("Dfs from %d" , i );
418527 count ++ ;
419- // khash_t(big_kmer_count) *kmer_count_table = kh_init(big_kmer_count);
420- struct partition_information * part_infor
421- = calloc (1 , sizeof (struct partition_information ));
528+ part_infor -> sub_graph = calloc (1 , sizeof (struct asm_graph_t ));
529+ part_infor -> total_len = 0 ;
530+ sprintf (output_gfa_dir , "partition_%d" , count );
531+ // part_infor->output_gfa_file = fopen(output_gfa_dir, "w");
422532 part_infor -> union_barcodes = kh_init (union_barcode );
423533 int n_edges = dfs_partition (g , partition , i , count , part_infor );
424- //
425- // log_warn("n edges %d n barcodes %d total length %d", n_edges, total_barcodes, total_length_component);
534+ test_asm_graph (part_infor -> sub_graph );
535+ save_graph_info (out_dir , g , output_gfa_dir );
536+ dfs_resolve (g , i , count , kmer_pair_table , partition , resolve_stat );
537+ fclose (part_infor -> output_gfa_file );
426538// if (total_barcodes > 10000) {
427539// //todo 1 work for big case
428540// continue;
429541// }
430542// if (total_barcodes > 10 && total_length_component / 2 > MIN_COMPONENT && count_resolvable > 2) {
431543//// filter_read_build_kmer(ori_read, dict, bc, kmer_count_table, 55, 70, n_threads,
432544//// mmem, count); //todo 0 choose range better
433- dfs_resolve (g , i , count , kmer_pair_table , partition , resolve_stat );
434545 //todo 0 not break
435546// break;
436547// }
@@ -470,6 +581,7 @@ void partition_graph(struct read_path_t *ori_read, struct asm_graph_t *g, int *p
470581 }
471582 log_info ("Size of table %d" , kmer_pair_table -> size );
472583 * n_partition = count ;
584+ free (output_gfa_dir );
473585}
474586
475587int resolve_212_using_big_kmer (struct asm_graph_t * g , int i_e ,
@@ -628,6 +740,6 @@ void resolve_1_2(struct asm_graph_t *g, struct opt_proc_t *opt)
628740 build_pair_kmer_table (opt , table );
629741 log_info ("Build big kmer table done" );
630742
631- partition_graph (ori_read , g , partition , opt -> n_threads , opt -> mmem , & n_partitions , table );
743+ partition_graph (ori_read , g , partition , opt -> n_threads , opt -> mmem , & n_partitions , table , opt -> out_dir );
632744 free (partition );
633745}
0 commit comments