daler · EFraim · Mar 2, 2020 · Mar 2, 2020 · Mar 2, 2020 · Mar 2, 2020
diff --git a/gffutils/helpers.py b/gffutils/helpers.py
@@ -434,7 +434,7 @@ def to_unicode(obj, encoding='utf-8'):
 
 def canonical_transcripts(db, fasta_filename):
     import pyfaidx
-    fasta = pyfaidx.Fasta(fasta_filename, as_raw=True)
+    fasta = pyfaidx.Fasta(fasta_filename, as_raw=False)
     for gene in db.features_of_type('gene'):
 
         # exons_list will contain (CDS_length, total_length, transcript, [exons]) tuples.
@@ -449,20 +449,20 @@ def canonical_transcripts(db, fasta_filename):
                     cds_len += exon_length
                 total_len += exon_length
 
-            exon_list.append((cds_len, total_len, transcript, exons))
+            exon_list.append((cds_len, total_len, transcript, exons if cds_len == 0 else [e for e in exons if e.featuretype in ['CDS', 'five_prime_UTR', 'three_prime_UTR']] ))
 
         # If we have CDS, then use the longest coding transcript
         if max(i[0] for i in exon_list) > 0:
-            best = sorted(exon_list)[0]
+            best = sorted(exon_list, key=lambda x: x[0], reverse=True)[0]
         # Otherwise, just choose the longest
         else:
-            best = sorted(exon_list, lambda x: x[1])[0]
+            best = sorted(exon_list, key=lambda x: x[1])[0]
 
         print(best)
 
         canonical_exons = best[-1]
         transcript = best[-2]
-        seqs = [i.sequence(fasta) for i in canonical_exons]
+        seqs = [i.sequence(fasta) for i in sorted(canonical_exons, key=lambda x: x.start, reverse=transcript.strand != '+')]
         yield transcript, ''.join(seqs)