Skip to content

Commit

Permalink
Make Genes.write_gff use the sequence ID as the ID of each gene (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
althonos committed Oct 22, 2022
1 parent 751c404 commit 7d03c45
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 35 deletions.
3 changes: 3 additions & 0 deletions pyrodigal/_pyrodigal.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ cdef class Gene:
cdef Genes owner
cdef _gene* gene

cpdef str _gene_data(self, object sequence_id)
cpdef str _score_data(self)

cpdef double confidence(self)
cpdef str sequence(self)
cpdef str translate(
Expand Down
61 changes: 31 additions & 30 deletions pyrodigal/_pyrodigal.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2501,32 +2501,6 @@ cdef class Gene:

# --- Properties ---------------------------------------------------------

@property
def _gene_data(self):
cdef size_t node_index = <size_t> (self.gene - &self.owner.genes[0])
return "ID={}_{};partial={}{};start_type={};rbs_motif={};rbs_spacer={};gc_cont={:.3f}".format(
self.owner._num_seq,
node_index + 1,
int(self.partial_begin),
int(self.partial_end),
self.start_type,
self.rbs_motif,
self.rbs_spacer,
self.owner.nodes.nodes[self.gene.start_ndx].gc_cont
)

@property
def _score_data(self):
return "conf={:.2f};score={:.2f};cscore={:.2f};sscore={:.2f};rscore={:.2f};uscore={:.2f};tscore={:.2f};".format(
self.confidence(),
self.score,
self.cscore,
self.sscore,
self.rscore,
self.uscore,
self.tscore,
)

@property
def begin(self):
"""`int`: The coordinate at which the gene begins.
Expand Down Expand Up @@ -2715,6 +2689,33 @@ cdef class Gene:
"""
return self.owner.nodes[self.gene.stop_ndx]


# --- Utils --------------------------------------------------------------

cpdef str _gene_data(self, object sequence_id):
cdef size_t node_index = <size_t> (self.gene - &self.owner.genes[0])
return "ID={}_{};partial={}{};start_type={};rbs_motif={};rbs_spacer={};gc_cont={:.3f}".format(
sequence_id,
node_index + 1,
int(self.partial_begin),
int(self.partial_end),
self.start_type,
self.rbs_motif,
self.rbs_spacer,
self.owner.nodes.nodes[self.gene.start_ndx].gc_cont
)

cpdef str _score_data(self):
return "conf={:.2f};score={:.2f};cscore={:.2f};sscore={:.2f};rscore={:.2f};uscore={:.2f};tscore={:.2f};".format(
self.confidence(),
self.score,
self.cscore,
self.sscore,
self.rscore,
self.uscore,
self.tscore,
)

# --- Python interface ---------------------------------------------------

cpdef double confidence(self):
Expand Down Expand Up @@ -3262,9 +3263,9 @@ cdef class Genes:
n += file.write("\t")
n += file.write("0")
n += file.write("\t")
n += file.write(gene._gene_data)
n += file.write(gene._gene_data(sequence_id))
n += file.write(";")
n += file.write(gene._score_data)
n += file.write(gene._score_data())
n += file.write("\n")

return n
Expand Down Expand Up @@ -3305,7 +3306,7 @@ cdef class Genes:
n += file.write(" # ")
n += file.write(str(gene.strand))
n += file.write(" # ")
n += file.write(gene._gene_data)
n += file.write(gene._gene_data(self._num_seq))
n += file.write("\n")
for line in textwrap.wrap(gene.sequence(), width=width):
n += file.write(line)
Expand Down Expand Up @@ -3355,7 +3356,7 @@ cdef class Genes:
n += file.write(" # ")
n += file.write(str(gene.strand))
n += file.write(" # ")
n += file.write(gene._gene_data)
n += file.write(gene._gene_data(self._num_seq))
n += file.write("\n")
for line in textwrap.wrap(gene.translate(translation_table), width=width):
n += file.write(line)
Expand Down
8 changes: 4 additions & 4 deletions pyrodigal/tests/test_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@ def setUpClass(cls):

def test_indexing(self):
length = len(self.genes)
self.assertEqual(self.genes[0]._gene_data, self.genes[-length]._gene_data)
self.assertEqual(self.genes[0]._gene_data(1), self.genes[-length]._gene_data(1))
with self.assertRaises(IndexError):
self.genes[length]
with self.assertRaises(IndexError):
self.genes[-length - 1]

def test_iter(self):
for i, gene in zip(range(len(self.genes)), self.genes):
self.assertEqual(gene._gene_data, self.genes[i]._gene_data)
self.assertEqual(gene._gene_data(1), self.genes[i]._gene_data(1))

def test_reversed(self):
for i, gene in zip(range(1, len(self.genes) + 1), reversed(self.genes)):
self.assertEqual(gene._gene_data, self.genes[-i]._gene_data)
self.assertEqual(gene._gene_data(1), self.genes[-i]._gene_data(1))

def test_bool(self):
self.assertTrue(bool(self.genes))
Expand Down Expand Up @@ -94,7 +94,7 @@ def test_write_scores(self):
def test_pickle(self):
genes = pickle.loads(pickle.dumps(self.genes))
for gene1, gene2 in zip(self.genes, genes):
self.assertEqual(gene1._gene_data, gene2._gene_data)
self.assertEqual(gene1._gene_data(1), gene2._gene_data(1))

def test_write_gff(self):
buffer = io.StringIO()
Expand Down
2 changes: 1 addition & 1 deletion pyrodigal/tests/test_orf_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def assertGeneDataEqual(self, predictions, proteins):
self.assertEqual(len(predictions), len(proteins))
for gene, protein in zip(predictions, proteins):
*_, gene_data = protein.description.split(" # ")
self.assertEqual(gene._gene_data, gene_data.strip())
self.assertEqual(gene._gene_data(1), gene_data.strip())

def assertPredictionsEqual(self, predictions, proteins):
self.assertTranslationsEqual(predictions, proteins)
Expand Down

0 comments on commit 7d03c45

Please sign in to comment.