[doc] Add survey for prometheus encode

at15 · at15 · commit 64aa547dedcd · 2020-02-17T22:29:38.000-08:00
- [log] Add survey on primitive types, covered endianess
diff --git a/.gitignore b/.gitignore
@@ -10,10 +10,8 @@
 # Output of the go coverage tool, specifically when used with LiteIDE
 *.out
 
-# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
-.glide/
-
 .idea
 .vscode
 
-vendor
+# C code used in playground
+a.out
diff --git a/doc/compression/README.md b/doc/compression/README.md
@@ -4,5 +4,6 @@ Compression for both time series data and the meta data in index
 
 ## TODO
 
+- [ ] primitive types, how is int64, uint64, float64 stored and what happens when there is cast/conversion
 - [ ] gorilla
 - [ ] https://roaringbitmap.org/ I think used by both influxdb and m3 (and maybe more)
diff --git a/doc/database/README.md b/doc/database/README.md
@@ -25,6 +25,7 @@ API
 Internal
 
 - **model.md** General data model, what is a time series for this TSDB (yeah, this definition varies).
+- **compression.md** Compression related algorithm or code.
 - **query-execution.md** Query execution and optimization, especially for those with query language and distributed ones.
 - **storage-engine.md** Only applies to TSDB w/ their own storage format, i.e. write opaque blob to local fs or object store.
 - **schema.md** Only applies to TSDB w/ underlying database i.e. Cassandra, ElasticSearch
diff --git a/doc/database/prometheus/compression.md b/doc/database/prometheus/compression.md
@@ -0,0 +1,112 @@
+# Prometheus Compression
+
+Compression for data is in [tsdb/chunkenc](https://github.com/prometheus/prometheus/tree/master/tsdb/chunkenc).
+
+## Encode
+
+There is a `Appender` interface and two implementations, the second one is just a thread safe wrapper.
+The main logic is in [xorAppender.Append](https://github.com/prometheus/prometheus/blob/0703dae7cc4fcb1e051ab5fec89c47530e78c75a/tsdb/chunkenc/xor.go#L149)
+which contains the double delta logic for timestamp and deal with float64 in [xorAppender.writeVDelta](https://github.com/prometheus/prometheus/blob/0703dae7cc4fcb1e051ab5fec89c47530e78c75a/tsdb/chunkenc/xor.go#L206)
+
+```go
+// tsdb/chunenc/xor.go
+
+type xorAppender struct {
+	b *bstream
+
+	t      int64
+	v      float64
+	tDelta uint64
+
+	leading  uint8
+	trailing uint8
+}
+
+func (a *xorAppender) Append(t int64, v float64) {
+	var tDelta uint64
+	num := binary.BigEndian.Uint16(a.b.bytes())
+
+	if num == 0 {
+		buf := make([]byte, binary.MaxVarintLen64)
+		for _, b := range buf[:binary.PutVarint(buf, t)] {
+			a.b.writeByte(b)
+		}
+		a.b.writeBits(math.Float64bits(v), 64)
+
+	} else if num == 1 {
+		tDelta = uint64(t - a.t)
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		for _, b := range buf[:binary.PutUvarint(buf, tDelta)] {
+			a.b.writeByte(b)
+		}
+
+		a.writeVDelta(v)
+
+	} else {
+		tDelta = uint64(t - a.t)
+		dod := int64(tDelta - a.tDelta)
+
+		// Gorilla has a max resolution of seconds, Prometheus milliseconds.
+		// Thus we use higher value range steps with larger bit size.
+		switch {
+		case dod == 0:
+			a.b.writeBit(zero)
+		case bitRange(dod, 14):
+			a.b.writeBits(0x02, 2) // '10'
+			a.b.writeBits(uint64(dod), 14)
+		case bitRange(dod, 17):
+			a.b.writeBits(0x06, 3) // '110'
+			a.b.writeBits(uint64(dod), 17)
+		case bitRange(dod, 20):
+			a.b.writeBits(0x0e, 4) // '1110'
+			a.b.writeBits(uint64(dod), 20)
+		default:
+			a.b.writeBits(0x0f, 4) // '1111'
+			a.b.writeBits(uint64(dod), 64)
+		}
+
+		a.writeVDelta(v)
+	}
+
+	a.t = t
+	a.v = v
+	binary.BigEndian.PutUint16(a.b.bytes(), num+1)
+	a.tDelta = tDelta
+}
+
+func (a *xorAppender) writeVDelta(v float64) {
+	vDelta := math.Float64bits(v) ^ math.Float64bits(a.v)
+
+	if vDelta == 0 {
+		a.b.writeBit(zero)
+		return
+	}
+	a.b.writeBit(one)
+
+	leading := uint8(bits.LeadingZeros64(vDelta))
+	trailing := uint8(bits.TrailingZeros64(vDelta))
+
+	// Clamp number of leading zeros to avoid overflow when encoding.
+	if leading >= 32 {
+		leading = 31
+	}
+
+	if a.leading != 0xff && leading >= a.leading && trailing >= a.trailing {
+		a.b.writeBit(zero)
+		a.b.writeBits(vDelta>>a.trailing, 64-int(a.leading)-int(a.trailing))
+	} else {
+		a.leading, a.trailing = leading, trailing
+
+		a.b.writeBit(one)
+		a.b.writeBits(uint64(leading), 5)
+
+		// Note that if leading == trailing == 0, then sigbits == 64.  But that value doesn't actually fit into the 6 bits we have.
+		// Luckily, we never need to encode 0 significant bits, since that would put us in the other case (vdelta == 0).
+		// So instead we write out a 0 and adjust it back to 64 on unpacking.
+		sigbits := 64 - leading - trailing
+		a.b.writeBits(uint64(sigbits), 6)
+		a.b.writeBits(vDelta>>trailing, int(sigbits))
+	}
+}
+```
diff --git a/doc/log/2020-02/2020-02-03.md b/doc/log/2020-02/2020-02-03.md
@@ -13,4 +13,4 @@ Should be able to have
 
 ## TODO
 
-- [ ] tsz compression, at least a double delta example
+- [x] tsz compression, at least a double delta example
diff --git a/doc/log/2020-02/2020-02-17.md b/doc/log/2020-02/2020-02-17.md
@@ -0,0 +1,42 @@
+# 2020-02-17
+
+Didn't spent time on libtsdb for about two weeks (due to playing WOT ...).
+Double delta part on timestamp is finished, most time is spent on writing a bit stream writer/reader.
+
+go-tsz would change the underlying byte slice so prometheus modified it because they are using mmap.
+Though their iterator still need to reset to be able to use again.
+
+## TODO
+
+- [ ] survey: how is primitive type stored and what happens during cast
+  - [ ] big endian, small endian
+    - network order is big endian
+    - [ ] CPU, disk? intel, amd?
+  - [ ] int, unsigned int
+    - the unsigned int to int conversion (why it worked in code)
+    - [ ] `2s complement` as mentioned by Haiyu
+    - [ ] shift behavior on signed and unsigned
+  - [ ] float64
+    - [ ] how is it saved
+    - [ ] what happens when casting from float64 to uint64
+- [ ] prometheus
+  - [ ] what would happen to its xorAppender when appending time out of order
+  - [ ] finalize the stream, it seems it is writing number of samples at the end of stream ...
+
+## Endianness
+
+https://en.wikipedia.org/wiki/Endianness
+
+- big endian, e.g. 1024 is `2^10`, big endian `100` `0000_0000`, small endian `0000_0000` `100`
+- network use big endian
+
+> A big-endian ordering places the most significant byte first and the least significant byte last,
+> while a little-endian ordering does the opposite
+
+> big-endianness is the dominant ordering in networking protocols
+> little-endianness is the dominant ordering for processor architectures and their associated memory
+> File formats can use either ordering
+
+> programming languages use big-endian digit ordering for numeric literals 
+> as well as big-endian language (“left” and “right”) for bit-shift operations, 
+> regardless of the endianness of the target architecture
diff --git a/playground/gorilla/gorilla_test.go b/playground/gorilla/gorilla_test.go
@@ -299,7 +299,7 @@ func TestDoubleDelta(t *testing.T) {
 	log.Printf("%v", r.buf)
 	assert.Nil(t, err)
 	log.Print(v)
-	assert.Equal(t, -2, uint2int(v, 7))
+	assert.Equal(t, int64(-2), uint2int(v, 7))
 }
 
 // TODO: why this work ...
diff --git a/playground/primitive/primitive.c b/playground/primitive/primitive.c
@@ -0,0 +1,15 @@
+#include<stdio.h>
+
+int main() {
+    int a = 1024;
+    printf("%ld %d\n", sizeof(a), a);
+    // cast directly to see endianness
+    unsigned char* bytes = (unsigned char*) &a;
+    for (int i = 0; i < 4; i++) {
+        printf("%u ", bytes[i]);
+    }
+    printf("\n");
+}
+
+// 4 1024
+// 0 4 0 0
diff --git a/playground/primitive/primitive_test.go b/playground/primitive/primitive_test.go
@@ -0,0 +1,47 @@
+package primitive_test
+
+import (
+	"encoding/binary"
+	"reflect"
+	"testing"
+	"unsafe"
+)
+
+// test primitive types
+
+func TestEndianness(t *testing.T) {
+	v := uint64(1024)
+	var buf [8]byte
+	// The implementation simply do right shift
+	binary.BigEndian.PutUint64(buf[:], v)
+	// [0 0 0 0 0 0 4 0], which is
+	//  b[0] = byte(v >> 56)
+	//	b[1] = byte(v >> 48)
+	// ...
+	//	b[6] = byte(v >> 8)
+	//	b[7] = byte(v)
+	t.Logf("%v", buf)
+
+	// The implementation is also doing right shift ... just different order
+	// _ = b[7] // early bounds check to guarantee safety of writes below
+	//	b[0] = byte(v)
+	//	b[1] = byte(v >> 8)
+	//	b[2] = byte(v >> 16)
+	// ...
+	//	b[7] = byte(v >> 56)
+	binary.LittleEndian.PutUint64(buf[:], v)
+	t.Logf("%v", buf)
+
+	// [0 4 0 0 0 0 0 0] it's little endian when using unsafe, ok ...
+	t.Logf("%v", unsafeInt2Bytes(v))
+}
+
+// https://stackoverflow.com/a/17539687
+func unsafeInt2Bytes(v uint64) []byte {
+	hdr := reflect.SliceHeader{
+		Data: uintptr(unsafe.Pointer(&v)),
+		Len:  8,
+		Cap:  8,
+	}
+	return *(*[]byte)(unsafe.Pointer(&hdr))
+}

Original file line number	Diff line number	Diff line change
`@@ -13,4 +13,4 @@ Should be able to have`
`13`	`13`
`14`	`14`	`## TODO`
`15`	`15`
`16`		`-- [ ] tsz compression, at least a double delta example`
	`16`	`+- [x] tsz compression, at least a double delta example`
Original file line number	Diff line number	Diff line change
`@@ -299,7 +299,7 @@ func TestDoubleDelta(t *testing.T) {`
`299`	`299`	`log.Printf("%v", r.buf)`
`300`	`300`	`assert.Nil(t, err)`
`301`	`301`	`log.Print(v)`
`302`		`- assert.Equal(t, -2, uint2int(v, 7))`
	`302`	`+ assert.Equal(t, int64(-2), uint2int(v, 7))`
`303`	`303`	`}`
`304`	`304`
`305`	`305`	`// TODO: why this work ...`