|
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | import logging |
3 | 3 | import re |
| 4 | +from typing import Iterable |
4 | 5 | from cobra.core.dictlist import DictList |
5 | 6 |
|
6 | 7 | logger = logging.getLogger(__name__) |
7 | 8 |
|
8 | 9 | DEFAULT_SPLIT = " " |
9 | 10 |
|
10 | 11 |
|
11 | | -def to_fasta(features, filename, line_size=80, fn_header=None): |
| 12 | +class MSFeature: |
| 13 | + def __init__(self, feature_id, sequence, description=None, aliases=None): |
| 14 | + """ |
| 15 | +
|
| 16 | + @param feature_id: identifier for the protein coding feature |
| 17 | + @param sequence: protein sequence |
| 18 | + @param description: description of the feature |
| 19 | + """ |
| 20 | + |
| 21 | + self.id = feature_id |
| 22 | + self.seq = sequence |
| 23 | + self.description = description # temporary replace with proper parsing |
| 24 | + self.ontology_terms = {} |
| 25 | + self.aliases = aliases |
| 26 | + |
| 27 | + def add_ontology_term(self, ontology_term, value): |
| 28 | + """ |
| 29 | + Add functional term to the feature |
| 30 | +
|
| 31 | + @param ontology_term: type of the ontology (e.g., RAST, EC) |
| 32 | + @param value: value for the ontology (e.g., pyruvate kinase) |
| 33 | + """ |
| 34 | + if ontology_term not in self.ontology_terms: |
| 35 | + self.ontology_terms[ontology_term] = [] |
| 36 | + if value not in self.ontology_terms[ontology_term]: |
| 37 | + self.ontology_terms[ontology_term].append(value) |
| 38 | + |
| 39 | + |
| 40 | +def to_fasta(features: Iterable[MSFeature], filename, line_size=80, fn_header=None): |
12 | 41 | with open(filename, "w") as fh: |
13 | 42 | for feature in features: |
14 | 43 | if feature.seq: |
15 | | - h = f">{feature.id}\n" |
| 44 | + h = f">{feature.id}{DEFAULT_SPLIT}{feature.description}\n" |
16 | 45 | if fn_header: |
17 | 46 | h = fn_header(feature) |
18 | 47 | fh.write(h) |
19 | 48 | _seq = feature.seq |
20 | 49 | lines = [ |
21 | | - _seq[i : i + line_size] + "\n" |
| 50 | + _seq[i: i + line_size] + "\n" |
22 | 51 | for i in range(0, len(_seq), line_size) |
23 | 52 | ] |
24 | 53 | for line in lines: |
@@ -134,34 +163,6 @@ def extract_features(faa_str, split=DEFAULT_SPLIT, h_func=None): |
134 | 163 | return features |
135 | 164 |
|
136 | 165 |
|
137 | | -class MSFeature: |
138 | | - def __init__(self, feature_id, sequence, description=None, aliases=None): |
139 | | - """ |
140 | | -
|
141 | | - @param feature_id: identifier for the protein coding feature |
142 | | - @param sequence: protein sequence |
143 | | - @param description: description of the feature |
144 | | - """ |
145 | | - |
146 | | - self.id = feature_id |
147 | | - self.seq = sequence |
148 | | - self.description = description # temporary replace with proper parsing |
149 | | - self.ontology_terms = {} |
150 | | - self.aliases = aliases |
151 | | - |
152 | | - def add_ontology_term(self, ontology_term, value): |
153 | | - """ |
154 | | - Add functional term to the feature |
155 | | -
|
156 | | - @param ontology_term: type of the ontology (e.g., RAST, EC) |
157 | | - @param value: value for the ontology (e.g., pyruvate kinase) |
158 | | - """ |
159 | | - if ontology_term not in self.ontology_terms: |
160 | | - self.ontology_terms[ontology_term] = [] |
161 | | - if value not in self.ontology_terms[ontology_term]: |
162 | | - self.ontology_terms[ontology_term].append(value) |
163 | | - |
164 | | - |
165 | 166 | class MSGenome: |
166 | 167 | def __init__(self): |
167 | 168 | self.features = DictList() |
|
0 commit comments