forked from ryanbressler/CloudForest
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.go
More file actions
114 lines (100 loc) · 2.8 KB
/
utils.go
File metadata and controls
114 lines (100 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package CloudForest
import (
"fmt"
"io"
"log"
"math/rand"
"sync"
)
//RunningMean is a thread safe strut for keeping track of running means as used in
//importance calculations. (TODO: could this be made lock free?)
type RunningMean struct {
mutex sync.Mutex
Mean float64
Count float64
}
//RunningMean.Add add's the specified value to the running mean in a thread safe way.
func (rm *RunningMean) Add(val float64) {
rm.WeightedAdd(val, 1.0)
}
//RunningMean.Add add's the specified value to the running mean in a thread safe way.
func (rm *RunningMean) WeightedAdd(val float64, weight float64) {
rm.mutex.Lock()
rm.Mean = (rm.Mean*rm.Count + weight*val) / (rm.Count + weight)
rm.Count += weight
rm.mutex.Unlock()
}
//RunningMean.Read reads the mean and count
func (rm *RunningMean) Read() (mean float64, count float64) {
rm.mutex.Lock()
mean = rm.Mean
count = rm.Count
rm.mutex.Unlock()
return
}
func NewRunningMeans(size int) *[]*RunningMean {
importance := make([]*RunningMean, 0, size)
for i := 0; i < size; i++ {
rm := new(RunningMean)
importance = append(importance, rm)
}
return &importance
}
//Sparse counter uses maps to track sparse integer counts in large matrix.
//The matrix is assumed to contain zero values where nothing has been added.
type SparseCounter struct {
Map map[int]map[int]int
}
//Add increases the count in i,j by val.
func (sc *SparseCounter) Add(i int, j int, val int) {
if sc.Map == nil {
sc.Map = make(map[int]map[int]int, 0)
}
if v, ok := sc.Map[i]; !ok || v == nil {
sc.Map[i] = make(map[int]int, 0)
}
if _, ok := sc.Map[i][j]; !ok {
sc.Map[i][j] = 0
}
sc.Map[i][j] = sc.Map[i][j] + val
}
//Write tsv writes the non zero counts out into a three column tsv containing i, j, and
//count in the columns.
func (sc *SparseCounter) WriteTsv(writer io.Writer) {
for i := range sc.Map {
for j, val := range sc.Map[i] {
if _, err := fmt.Fprintf(writer, "%v\t%v\t%v\n", i, j, val); err != nil {
log.Fatal(err)
}
}
}
}
/*
SampleFirstN ensures that the first n entries in the supplied
deck are randomly drawn from all entries without replacement for use in selecting candidate
features to split on. It accepts a pointer to the deck so that it can be used repeatedly on
the same deck avoiding reallocations.
*/
func SampleFirstN(deck *[]int, n int) {
cards := *deck
length := len(cards)
old := 0
randi := 0
for i := 0; i < n; i++ {
old = cards[i]
randi = i + rand.Intn(length-i)
cards[i] = cards[randi]
cards[randi] = old
}
}
/*
SampleWithReplacment samples nSamples random draws from [0,totalCases) with replacement
for use in selecting cases to grow a tree from.
*/
func SampleWithReplacment(nSamples int, totalCases int) (cases []int) {
cases = make([]int, 0, nSamples)
for i := 0; i < nSamples; i++ {
cases = append(cases, rand.Intn(totalCases))
}
return
}