|
| 1 | +package gorilla_test |
| 2 | + |
| 3 | +import ( |
| 4 | + "encoding/binary" |
| 5 | + "testing" |
| 6 | + "time" |
| 7 | + |
| 8 | + "github.com/stretchr/testify/assert" |
| 9 | +) |
| 10 | + |
| 11 | +// a quick hack implementation for the algorithm described in gorilla paper |
| 12 | + |
| 13 | +// bits is based on https://github.com/dgryski/go-tsz/blob/master/bstream.go |
| 14 | +// it allows you to read/write individual bit(s) |
| 15 | +type bits struct { |
| 16 | + buf []byte // underlying bytes |
| 17 | + i int // index of last byte |
| 18 | + // NOTE: the reason we use remain instead of used because it makes appending bits easier |
| 19 | + // byte = byte | 1 << (remain - 1) |
| 20 | + remain uint8 |
| 21 | +} |
| 22 | + |
| 23 | +func newBits() *bits { |
| 24 | + // 0 byte and 0 bits, i is -1 so the grow logic works ... |
| 25 | + return &bits{buf: make([]byte, 0), i: -1, remain: 0} |
| 26 | +} |
| 27 | + |
| 28 | +func (b *bits) writeBit(bit bool) { |
| 29 | + if b.remain == 0 { |
| 30 | + b.buf = append(b.buf, 0) |
| 31 | + b.remain = 8 |
| 32 | + b.i++ |
| 33 | + } |
| 34 | + if bit { |
| 35 | + b.buf[b.i] |= 1 << (b.remain - 1) |
| 36 | + } |
| 37 | + b.remain-- |
| 38 | +} |
| 39 | + |
| 40 | +func (b *bits) writeByte(byt byte) { |
| 41 | + // fast path, previous write are aligned to byte boundary |
| 42 | + if b.remain == 0 { |
| 43 | + b.buf = append(b.buf, byt) |
| 44 | + b.i++ |
| 45 | + return |
| 46 | + } |
| 47 | + |
| 48 | + // e.g. b.remain = 6 |
| 49 | + // [0, 1, 2, 3, 4, 5, 6, 7] |
| 50 | + // [0, 1, 2, 3, 4, 5, 6, 7] [0, 1, 2, 3, 4, 5, 6, 7] |
| 51 | + b.buf[b.i] |= byt >> (8 - b.remain) |
| 52 | + b.buf = append(b.buf, 0) |
| 53 | + b.i++ |
| 54 | + b.buf[b.i] |= byt << b.remain |
| 55 | + // no need to update b.remain, it's the same |
| 56 | +} |
| 57 | + |
| 58 | +func (b *bits) writeBits(u uint64, n uint) { |
| 59 | + u <<= 64 - n |
| 60 | + for n >= 8 { |
| 61 | + byt := byte(u >> 56) |
| 62 | + b.writeByte(byt) |
| 63 | + u <<= 8 |
| 64 | + n -= 8 |
| 65 | + } |
| 66 | + |
| 67 | + for n > 0 { |
| 68 | + b.writeBit((u >> 63) == 1) |
| 69 | + u <<= 1 |
| 70 | + n-- |
| 71 | + } |
| 72 | +} |
| 73 | + |
| 74 | +func TestBits(t *testing.T) { |
| 75 | + t.Run("writeBit", func(t *testing.T) { |
| 76 | + bs := newBits() |
| 77 | + for i := 0; i < 8; i++ { |
| 78 | + bs.writeBit(true) |
| 79 | + } |
| 80 | + assert.Equal(t, bs.remain, uint8(0)) |
| 81 | + assert.Equal(t, bs.buf[0], byte(0b1111_1111)) |
| 82 | + bs.writeByte(8) |
| 83 | + assert.Equal(t, bs.buf[1], byte(8)) |
| 84 | + bs.writeBit(true) |
| 85 | + bs.writeByte(1) |
| 86 | + assert.Equal(t, bs.buf[2], byte(0b1000_0000)) |
| 87 | + assert.Equal(t, bs.buf[3], byte(0b1000_0000)) |
| 88 | + }) |
| 89 | + |
| 90 | + t.Run("writeBits", func(t *testing.T) { |
| 91 | + bs := newBits() |
| 92 | + bs.writeBits(20, 32) |
| 93 | + assert.Equal(t, bs.buf[0], byte(0)) |
| 94 | + assert.Equal(t, bs.buf[1], byte(0)) |
| 95 | + assert.Equal(t, bs.buf[2], byte(0)) |
| 96 | + assert.Equal(t, bs.buf[3], byte(20)) |
| 97 | + assert.Equal(t, len(bs.buf), 4) |
| 98 | + assert.Equal(t, bs.remain, uint8(0)) |
| 99 | + assert.Equal(t, bs.i, 3) |
| 100 | + }) |
| 101 | + |
| 102 | +} |
| 103 | + |
| 104 | +// encoder encodes time stream, i.e. it does not mix value into same stream |
| 105 | +type encoder struct { |
| 106 | + bs bits |
| 107 | + start uint64 |
| 108 | + prevTime uint64 |
| 109 | + delta uint64 |
| 110 | +} |
| 111 | + |
| 112 | +func newEncoder(start uint64) *encoder { |
| 113 | + bs := newBits() |
| 114 | + bs.writeBits(start, 64) |
| 115 | + return &encoder{ |
| 116 | + bs: *bs, |
| 117 | + start: start, |
| 118 | + prevTime: 0, |
| 119 | + } |
| 120 | +} |
| 121 | + |
| 122 | +func (e *encoder) write(tm uint64) { |
| 123 | + // first value since start, write using delta |
| 124 | + if e.prevTime == 0 { |
| 125 | + delta := tm - e.start |
| 126 | + e.prevTime = tm |
| 127 | + e.bs.writeBits(delta, 14) |
| 128 | + e.delta = delta |
| 129 | + return |
| 130 | + } |
| 131 | + |
| 132 | + // TODO: delta is positive if time comes in order, dod can be negative because interval |
| 133 | + // double delta |
| 134 | + delta := tm - e.prevTime |
| 135 | + dod := int64(delta - e.delta) |
| 136 | + e.delta = delta |
| 137 | + switch { |
| 138 | + case dod == 0: |
| 139 | + e.bs.writeBit(false) |
| 140 | + case dod <= 64 && dod >= -63: |
| 141 | + e.bs.writeBits(0b10, 2) |
| 142 | + e.bs.writeBits(uint64(dod), 7) |
| 143 | + case dod <= 256 && dod > -255: |
| 144 | + e.bs.writeBits(0b110, 3) |
| 145 | + e.bs.writeBits(uint64(dod), 9) |
| 146 | + case dod <= 2048 && dod > -2047: |
| 147 | + e.bs.writeBits(0b1110, 4) |
| 148 | + e.bs.writeBits(uint64(dod), 12) |
| 149 | + default: |
| 150 | + e.bs.writeBits(0b1111, 4) |
| 151 | + e.bs.writeBits(uint64(dod), 32) |
| 152 | + } |
| 153 | + e.prevTime = tm |
| 154 | +} |
| 155 | + |
| 156 | +func TestDoubleDelta(t *testing.T) { |
| 157 | + // Figure 2 in paper, start is aligned to 2 hour window |
| 158 | + start := mtime("2015-03-24T02:00:00Z") |
| 159 | + t1 := mtime("2015-03-24T02:01:02Z") |
| 160 | + t2 := mtime("2015-03-24T02:02:02Z") |
| 161 | + t3 := mtime("2015-03-24T02:03:02Z") |
| 162 | + enc := newEncoder(start) |
| 163 | + enc.write(t1) |
| 164 | + enc.write(t2) |
| 165 | + enc.write(t3) |
| 166 | + // first 64 bytes is the header |
| 167 | + var b8 [8]byte |
| 168 | + binary.BigEndian.PutUint64(b8[:], start) |
| 169 | + assert.Equal(t, enc.bs.buf[0], b8[0]) |
| 170 | + assert.Equal(t, enc.bs.buf[7], b8[7]) |
| 171 | + // the next 14 bits is the first time using delta |
| 172 | + // 62 is 111110, first 8 bits is empty, next 6 bits is the value |
| 173 | + assert.Equal(t, byte(0), enc.bs.buf[8]) |
| 174 | + assert.Equal(t, byte(62), enc.bs.buf[9]>>2) |
| 175 | + // the first double delta encoded value, dict is 10, value is -2 |
| 176 | + assert.Equal(t, byte(0b10), enc.bs.buf[9]&0b11) |
| 177 | + // TODO: value is 7 bit ... e, I need a bit reader implementation |
| 178 | + //assert.Equal(t, byte(-2), enc.bs.buf[10] ) |
| 179 | + //assert.Equal(t, byte(t2-t1), enc.bs.buf[9]>>2) |
| 180 | +} |
| 181 | + |
| 182 | +func subu64(a, b uint64) int64 { |
| 183 | + return int64(a - b) |
| 184 | +} |
| 185 | + |
| 186 | +func TestUint64(t *testing.T) { |
| 187 | + // ./gorilla_test.go:164:23: constant -1 overflows uint64 |
| 188 | + //a := int64(uint64(1) - uint64(2)) |
| 189 | + //t.Log(a) |
| 190 | + // TODO: does this unsigned subtraction produce signed integer work in other languages? |
| 191 | + assert.Equal(t, subu64(1, 2), int64(-1)) |
| 192 | + |
| 193 | + // cast is using the same bytes, but |
| 194 | + a := int64(-1) |
| 195 | + b := uint64(a) |
| 196 | + c := int64(a) |
| 197 | + t.Log(a, b, c) // -1 18446744073709551615 -1 |
| 198 | +} |
| 199 | + |
| 200 | +// given a RFC3339 string returns a unix epoch, panic if failed to convert |
| 201 | +// https://github.com/golang/go/issues/9346 |
| 202 | +// The time.RFC3339 format is a case where the format string itself isn't a valid time. You can't have a Z and an offset in the time string, but the format string has both because the spec can contain either type of timezone specification. |
| 203 | +// |
| 204 | +// Both of these are valid RFC3339 times: |
| 205 | +// |
| 206 | +// "2015-09-15T14:00:12-00:00" |
| 207 | +// "2015-09-15T14:00:13Z" |
| 208 | +// |
| 209 | +//And the time package needs to be able to parse them both using the same RFC3339 format string. |
| 210 | +func mtime(s string) uint64 { |
| 211 | + tm, err := time.Parse(time.RFC3339, s) |
| 212 | + if err != nil { |
| 213 | + panic(err) |
| 214 | + } |
| 215 | + return uint64(tm.Unix()) |
| 216 | +} |
0 commit comments