Skip to content

Commit

Permalink
update readme for ont
Browse files Browse the repository at this point in the history
  • Loading branch information
chhylp123 committed Feb 1, 2025
1 parent 2c77b3c commit 4733ef5
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 5 deletions.
54 changes: 50 additions & 4 deletions CommandLines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ static ko_longopt_t long_options[] = {
// { "sc-n", ko_no_argument, 360},
{ "chem-c", ko_required_argument, 361},
{ "chem-f", ko_required_argument, 362},
{ "ul-m", ko_required_argument, 363},
// { "path-round", ko_required_argument, 348},
{ 0, 0, 0 }
};
Expand Down Expand Up @@ -186,6 +187,11 @@ void Print_H(hifiasm_opt_t* asm_opt)

fprintf(stderr, " Ultra-Long-integration:\n");
fprintf(stderr, " --ul FILEs file names of Ultra-Long reads [r1.fq,r2.fq,...]\n");
///pending for integration
/**
fprintf(stderr, " --ul-m INT\n");
fprintf(stderr, " hybrid assembly mode. 0: fast and memory efficent; 1: may produce better assembly with ONT R10 [%d]\n", asm_opt->ul_mod);
**/
fprintf(stderr, " --ul-rate FLOAT\n");
fprintf(stderr, " error rate of Ultra-Long reads [%.3g]\n", asm_opt->ul_error_rate);
fprintf(stderr, " --ul-tip INT\n");
Expand Down Expand Up @@ -357,6 +363,7 @@ void init_opt(hifiasm_opt_t* asm_opt)
asm_opt->is_sc = 0;
asm_opt->chemical_cov = 1;
asm_opt->chemical_flank = 256;
asm_opt->ul_mod = 0;
}

void destory_enzyme(enzyme* f)
Expand Down Expand Up @@ -430,14 +437,43 @@ static int check_file(char* name, const char* opt)

static int check_hic_reads(enzyme* f, const char* opt)
{
int i;
for (i = 0; i < f->n; i++)
{
int32_t i;
for (i = 0; i < f->n; i++) {
if(check_file(f->a[i], opt) == 0) return 0;
}
return 1;
}

static int check_fq_files(enzyme* f, const char* opt, int32_t is_fq)
{
int32_t i, ret; gzFile dfp; kseq_t *ks = NULL;
for (i = 0; i < f->n; i++) {
if(!(f->a[i])) {
fprintf(stderr, "[ERROR] input file does not exist (%s)\n", opt);
return 0;
}

dfp = gzopen(f->a[i], "r");
if (dfp == 0) {
fprintf(stderr, "[ERROR] Cannot find the input file: %s (%s)\n", f->a[i], opt);
return 0;
} else if(is_fq){
ks = kseq_init(dfp);
while (((ret = kseq_read(ks)) >= 0)) {
if((ks->qual.l == 0) || (ks->qual.s == NULL)) {
fprintf(stderr, "[ERROR] %s is in fasta format rather than fastq format (%s)\n", f->a[i], opt);
fprintf(stderr, "[ERROR] set --ul-m 0 for fasta files\n");
return 0;
}
break;
}
kseq_destroy(ks); ks = NULL;
}
gzclose(dfp);
}
return 1;
}

int check_option(hifiasm_opt_t* asm_opt)
{
if(asm_opt->read_file_names == NULL || asm_opt->num_reads == 0)
Expand Down Expand Up @@ -637,7 +673,7 @@ int check_option(hifiasm_opt_t* asm_opt)
return 0;
}

if(asm_opt->ar != NULL && check_hic_reads(asm_opt->ar, "UL") == 0) return 0;
if(asm_opt->ar != NULL && check_fq_files(asm_opt->ar, "--ul", asm_opt->ul_mod) == 0) return 0;
if(asm_opt->ar != NULL && asm_opt->ar->n == 0)
{
fprintf(stderr, "[ERROR] wrong UL reads (--ul)\n");
Expand Down Expand Up @@ -686,6 +722,11 @@ int check_option(hifiasm_opt_t* asm_opt)
return 0;
}

if(asm_opt->ul_mod != 0 && asm_opt->ul_mod != 1) {
fprintf(stderr, "[ERROR] must be 0 or 1 (--ul-m)\n");
return 0;
}

if(asm_opt->telo_motif) {
uint64_t k, tlen = strlen((asm_opt->telo_motif)); char c;
if(tlen > 32) {
Expand Down Expand Up @@ -944,6 +985,11 @@ int CommandLine_process(int argc, char *argv[], hifiasm_opt_t* asm_opt)
asm_opt->chemical_cov = atol(opt.arg);
} else if (c == 362) {
asm_opt->chemical_flank = atol(opt.arg);
///pending for integration
/**
} else if (c == 363) {
asm_opt->ul_mod = atol(opt.arg);
**/
} else if (c == 'l') { ///0: disable purge_dup; 1: purge containment; 2: purge overlap
asm_opt->purge_level_primary = asm_opt->purge_level_trio = atoi(opt.arg);
}
Expand Down
1 change: 1 addition & 0 deletions CommandLines.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ typedef struct {
int64_t infor_cov, s_hap_cov, trio_cov_het_ovlp;
double ul_error_rate, ul_error_rate_low, ul_error_rate_hpc;
int32_t ul_ec_round;
int32_t ul_mod;
uint8_t is_dbg_het_cnt;
uint8_t is_low_het_ul;
uint8_t is_base_trans;
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ hifiasm -o CHM13.asm -t32 -l0 CHM13-HiFi.fa.gz 2> CHM13.asm.log
# Assemble heterozygous genomes with built-in duplication purging
hifiasm -o HG002.asm -t32 HG002-file1.fq.gz HG002-file2.fq.gz

# Assemble genomes with ONT R10 reads rather than PacBio HiFi reads using the latest release of hifiasm (>0.21.0-r686)
hifiasm -o HG002.asm --ont -t32 HG002-ont.fq.gz

# Hi-C phasing with paired-end short reads in two FASTQ files
hifiasm -o HG002.asm --h1 read1.fq.gz --h2 read2.fq.gz HG002-HiFi.fq.gz

Expand Down
12 changes: 11 additions & 1 deletion htab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -991,8 +991,18 @@ ha_ct_t *ha_count(const hifiasm_opt_t *asm_o, int flag, int HPC, int k, int w, h
opt.adaLen = (keep_adapter? asm_o->adapterLen : 0);
opt.min_rcnt = (low_freq?*low_freq:-1);
opt.uq = (unique_only?1:0);

n_bs = 0;
///pending for integration
/**
if(rs && asm_o->ar && asm_o->ul_mod) {
for (i = 0; i < asm_o->ar->n; ++i){
h = yak_count(&opt, asm_o->ar->a[i], flag|HAF_CREATE_NEW, p0, h, flt_tab, rs, us, &n_seq);
if(h) n_bs += h->bs;
}
}**/
///asm_opt->num_reads is the number of fastq files
for (i = n_bs = 0; i < (us?1:asm_o->num_reads); ++i){
for (i = 0; i < (us?1:asm_o->num_reads); ++i){
h = yak_count(&opt, asm_o->read_file_names[i], flag|HAF_CREATE_NEW, p0, h, flt_tab, rs, us, &n_seq);
if(h) n_bs += h->bs;
}
Expand Down

0 comments on commit 4733ef5

Please sign in to comment.