Skip to content

Commit eaf64a3

Browse files
author
Philipp Benner
committed
2019/07/21-17:10:27 (Linux thalamus unknown)
1 parent b8ec54f commit eaf64a3

13 files changed

Lines changed: 563 additions & 498 deletions

kmers.go renamed to kmer.go

Lines changed: 9 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,18 @@ package gonetics
2121
//import "fmt"
2222
import "sort"
2323

24-
/* -------------------------------------------------------------------------- */
24+
/* K-mer equivalence class
25+
* -------------------------------------------------------------------------- */
2526

26-
type Kmer struct {
27+
type KmerClass struct {
2728
// K-mer ID
2829
K int
2930
I int
3031
// K-mer string representation
3132
Name string
3233
}
3334

34-
func (obj Kmer) Equals(b Kmer) bool {
35+
func (obj KmerClass) Equals(b KmerClass) bool {
3536
if obj.K != b.K {
3637
return false
3738
}
@@ -43,7 +44,7 @@ func (obj Kmer) Equals(b Kmer) bool {
4344

4445
/* -------------------------------------------------------------------------- */
4546

46-
type KmerList []Kmer
47+
type KmerList []KmerClass
4748

4849
func (obj KmerList) Clone() KmerList {
4950
r := make(KmerList, len(obj))
@@ -98,7 +99,7 @@ func (obj KmerList) Union(b ...KmerList) KmerList {
9899

99100
/* -------------------------------------------------------------------------- */
100101

101-
type KmerSet map[Kmer]struct{}
102+
type KmerSet map[KmerClass]struct{}
102103

103104
func (obj KmerSet) AsList() KmerList {
104105
r := make(KmerList, len(obj))
@@ -116,7 +117,7 @@ type KmerCounts struct {
116117
// this is a sorted list of k-mers and might contain more entries than
117118
// the counts map
118119
Kmers KmerList
119-
Counts map[Kmer]int
120+
Counts map[KmerClass]int
120121
}
121122

122123
func (obj KmerCounts) Len() int {
@@ -135,83 +136,18 @@ func (obj KmerCounts) At(i int) int {
135136
}
136137
}
137138

138-
func (obj KmerCounts) GetCount(kmer Kmer) int {
139+
func (obj KmerCounts) GetCount(kmer KmerClass) int {
139140
if c, ok := obj.Counts[kmer]; ok {
140141
return c
141142
} else {
142143
return 0
143144
}
144145
}
145146

146-
func (obj KmerCounts) GetKmer(i int) Kmer {
147+
func (obj KmerCounts) GetKmer(i int) KmerClass {
147148
return obj.Kmers[i]
148149
}
149150

150151
func (obj KmerCounts) Iterate() KmerCountsIterator {
151152
return KmerCountsIterator{obj, 0}
152153
}
153-
154-
/* -------------------------------------------------------------------------- */
155-
156-
type KmerCountsList struct {
157-
Kmers KmerList
158-
Counts []map[Kmer]int
159-
}
160-
161-
func NewKmerCountsList(counts ...KmerCounts) KmerCountsList {
162-
r := KmerCountsList{}
163-
return r.Append(counts...)
164-
}
165-
166-
func (obj KmerCountsList) Append(args ...KmerCounts) KmerCountsList {
167-
if len(args) == 0 {
168-
return obj
169-
}
170-
idLists := make([]KmerList, len(args))
171-
counts := obj.Counts
172-
for i, c := range args {
173-
idLists[i] = c.Kmers
174-
counts = append(counts, c.Counts)
175-
}
176-
ids := obj.Kmers.Union(idLists...)
177-
return KmerCountsList{Kmers: ids, Counts: counts}
178-
}
179-
180-
func (obj KmerCountsList) Len() int {
181-
return len(obj.Counts)
182-
}
183-
184-
func (obj KmerCountsList) At(i int) KmerCounts {
185-
return KmerCounts{Kmers: obj.Kmers, Counts: obj.Counts[i]}
186-
}
187-
188-
func (obj *KmerCountsList) Slice(i, j int) KmerCountsList {
189-
return KmerCountsList{Kmers: obj.Kmers, Counts: obj.Counts[i:j]}
190-
}
191-
192-
/* -------------------------------------------------------------------------- */
193-
194-
type KmerCountsIterator struct {
195-
KmerCounts
196-
i int
197-
}
198-
199-
func (obj KmerCountsIterator) Ok() bool {
200-
return obj.i < obj.Len()
201-
}
202-
203-
func (obj KmerCountsIterator) GetKmer() Kmer {
204-
return obj.Kmers[obj.i]
205-
}
206-
207-
func (obj KmerCountsIterator) GetName() string {
208-
return obj.Kmers[obj.i].Name
209-
}
210-
211-
func (obj KmerCountsIterator) GetCount() int {
212-
return obj.At(obj.i)
213-
}
214-
215-
func (obj *KmerCountsIterator) Next() {
216-
obj.i++
217-
}

kmer_catalogue.go

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
/* Copyright (C) 2019 Philipp Benner
2+
*
3+
* This program is free software: you can redistribute it and/or modify
4+
* it under the terms of the GNU General Public License as published by
5+
* the Free Software Foundation, either version 3 of the License, or
6+
* (at your option) any later version.
7+
*
8+
* This program is distributed in the hope that it will be useful,
9+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
* GNU General Public License for more details.
12+
*
13+
* You should have received a copy of the GNU General Public License
14+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
15+
*/
16+
17+
package gonetics
18+
19+
/* -------------------------------------------------------------------------- */
20+
21+
//import "fmt"
22+
import "sort"
23+
import "strings"
24+
25+
/* -------------------------------------------------------------------------- */
26+
27+
type KmerCatalogue struct {
28+
KmerEquivalenceRelation
29+
names []map[int]string // k-mer names (equivalent k-mers separated by pipe)
30+
idmap []map[string]int // unique k-mer IDs
31+
}
32+
33+
/* -------------------------------------------------------------------------- */
34+
35+
func NewKmerCatalogue(n, m int, comp, rev, rc bool, maxAmbiguous []int, al ComplementableAlphabet) (*KmerCatalogue, error) {
36+
r := KmerCatalogue{}
37+
if f, err := NewKmerEquivalenceRelation(n, m, comp, rev, rc, maxAmbiguous, al); err != nil {
38+
return nil, err
39+
} else {
40+
r.KmerEquivalenceRelation = f
41+
}
42+
idmap := make([]map[string]int, m-n+1)
43+
names := make([]map[int]string, m-n+1)
44+
for k := n; k <= m; k++ {
45+
idmap[k-n] = make(map[string]int)
46+
names[k-n] = make(map[int]string)
47+
}
48+
r.names = names
49+
r.idmap = idmap
50+
return &r, nil
51+
}
52+
53+
/* -------------------------------------------------------------------------- */
54+
55+
func (obj *KmerCatalogue) GetId(kmer string) int {
56+
k := len(kmer)
57+
if k < obj.n || k > obj.m {
58+
panic("k-mer has invalid length")
59+
}
60+
if i, ok := obj.idmap[k-obj.n][kmer]; ok {
61+
return i
62+
} else {
63+
r := obj.EquivalenceClass(kmer)
64+
for _, kmer := range strings.Split(r.Name, "|") {
65+
obj.idmap[k-obj.n][kmer] = r.I
66+
}
67+
obj.names[k-obj.n][r.I] = r.Name
68+
return r.I
69+
}
70+
}
71+
72+
func (obj *KmerCatalogue) GetName(kmer string) string {
73+
k := len(kmer)
74+
if k < obj.n || k > obj.m {
75+
panic("k-mer has invalid length")
76+
}
77+
if i, ok := obj.idmap[k-obj.n][kmer]; ok {
78+
return obj.names[k-obj.n][i]
79+
} else {
80+
r := obj.EquivalenceClass(kmer)
81+
for _, kmer := range strings.Split(r.Name, "|") {
82+
obj.idmap[k-obj.n][kmer] = r.I
83+
}
84+
obj.names[k-obj.n][r.I] = r.Name
85+
return r.Name
86+
}
87+
}
88+
89+
func (obj *KmerCatalogue) IdToName(k, id int) string {
90+
if name, ok := obj.names[k-obj.n][id]; ok {
91+
return name
92+
}
93+
panic("k-mer name not found")
94+
}
95+
96+
func (obj *KmerCatalogue) GetNames(kmer string) []string {
97+
return strings.Split(obj.GetName(kmer), "|")
98+
}
99+
100+
func (obj *KmerCatalogue) ObservedKmers() int {
101+
r := 0
102+
for i := 0; i < len(obj.names); i++ {
103+
r += len(obj.names[i])
104+
}
105+
return r
106+
}
107+
108+
/* -------------------------------------------------------------------------- */
109+
110+
func (obj *KmerCatalogue) scanSubKmers_(kmer []byte, k int) []int {
111+
idMap := make(map[int]struct{})
112+
// loop over sequence
113+
for i := 0; i < len(kmer); i++ {
114+
if i+k-1 >= len(kmer) {
115+
break
116+
}
117+
it := NewKmerInstantiationIterator(obj.al, string(kmer[i:i+k]), true)
118+
for ; it.Ok(); it.Next() {
119+
if id, ok := obj.idmap[k-obj.n][it.Get()]; ok {
120+
idMap[id] = struct{}{}
121+
}
122+
}
123+
}
124+
ids := []int{}
125+
for id, _ := range idMap {
126+
ids = append(ids, id)
127+
}
128+
sort.Ints(ids)
129+
return ids
130+
}
131+
132+
func (obj *KmerCatalogue) scanSubKmers(kmer []byte) []string {
133+
names := []string{}
134+
for k := obj.n; k <= obj.m; k++ {
135+
ids := obj.scanSubKmers_(kmer, k)
136+
for _, id := range ids {
137+
names = append(names , obj.IdToName(k, id))
138+
}
139+
}
140+
return names
141+
}
142+
143+
/* -------------------------------------------------------------------------- */
144+
145+
func (obj *KmerCatalogue) relatedKmers(s []byte, m, k int) []int {
146+
idMap := make(map[int]struct{})
147+
// loop over positions where the k-mer can be fixed
148+
for j := 0; j <= k - len(s); j++ {
149+
for it := NewKmerCylinderIterator(k, obj.ma[k-obj.n] - m, obj.al, j, string(s)); it.Ok(); it.Next() {
150+
if id, ok := obj.idmap[k-obj.n][it.Get()]; ok {
151+
idMap[id] = struct{}{}
152+
}
153+
}
154+
}
155+
ids := []int{}
156+
for id, _ := range idMap {
157+
ids = append(ids, id)
158+
}
159+
sort.Ints(ids)
160+
return ids
161+
}
162+
163+
func (obj *KmerCatalogue) RelatedKmers(kmer string) []string {
164+
s := []byte(obj.GetNames(kmer)[0])
165+
m := obj.countAmbiguous(kmer)
166+
// scan k-mer for sub-k-mers
167+
names := obj.scanSubKmers(s)
168+
// loop over k-mer sizes
169+
for k := len(s)+1; k <= obj.m; k++ {
170+
ids := obj.relatedKmers(s, m, k)
171+
for _, id := range ids {
172+
names = append(names , obj.IdToName(k, id))
173+
}
174+
}
175+
return names
176+
}
177+
178+
func (obj *KmerCatalogue) countAmbiguous(kmer string) int {
179+
m := 0
180+
s := []byte(kmer)
181+
for i := 0; i < len(s); i++ {
182+
if ok, err := obj.al.IsAmbiguous(s[i]); err != nil {
183+
panic("internal error")
184+
} else {
185+
if ok {
186+
m++
187+
}
188+
}
189+
}
190+
return m
191+
}
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@ package gonetics
2222

2323
/* -------------------------------------------------------------------------- */
2424

25-
type KmersCatalogueIterator struct {
25+
type KmerCatalogueIterator struct {
2626
names []string
2727
ids []int
2828
i int
2929
}
3030

3131
/* -------------------------------------------------------------------------- */
3232

33-
func NewKmersCatalogueIterator(kmersSet KmersCatalogue) KmersCatalogueIterator {
33+
func NewKmerCatalogueIterator(kmersSet KmerCatalogue) KmerCatalogueIterator {
3434
names := []string{}
3535
ids := []int{}
3636
for k := 0; k < len(kmersSet.names); k++ {
@@ -44,23 +44,23 @@ func NewKmersCatalogueIterator(kmersSet KmersCatalogue) KmersCatalogueIterator {
4444
names = append(names, n...)
4545
ids = append(ids, i...)
4646
}
47-
return KmersCatalogueIterator{names: names, ids: ids, i: 0}
47+
return KmerCatalogueIterator{names: names, ids: ids, i: 0}
4848
}
4949

5050
/* -------------------------------------------------------------------------- */
5151

52-
func (obj KmersCatalogueIterator) Ok() bool {
52+
func (obj KmerCatalogueIterator) Ok() bool {
5353
return obj.i < len(obj.names)
5454
}
5555

56-
func (obj KmersCatalogueIterator) Get() string {
56+
func (obj KmerCatalogueIterator) Get() string {
5757
return obj.names[obj.i]
5858
}
5959

60-
func (obj KmersCatalogueIterator) GetId() int {
60+
func (obj KmerCatalogueIterator) GetId() int {
6161
return obj.ids[obj.i]
6262
}
6363

64-
func (obj *KmersCatalogueIterator) Next() {
64+
func (obj *KmerCatalogueIterator) Next() {
6565
obj.i++
6666
}

0 commit comments

Comments
 (0)