Skip to content

Commit 28291b7

Browse files
authored
Alpha - entropy checking (#5)
Adds entropy checking feature
1 parent 37cb8aa commit 28291b7

File tree

10 files changed

+275
-6
lines changed

10 files changed

+275
-6
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ _testmain.go
2525
0
2626

2727
pre-commit
28-
main
28+
main
29+
build

CHANGELOG.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
1-
### Unreleased
2-
- Add regex line test for `----BEGIN CERTIFICATE----`
1+
# Changelog
32

4-
### 0.1.0 2018-02-07
5-
- Initial release
3+
## Unreleased
4+
5+
## 0.2.0 2018-03-28
6+
7+
- add regex line test for `----BEGIN CERTIFICATE----`
8+
- add entropy check into core (optional)
9+
- add `DC_ENTROPY_EXPERIMENT` environment option to activate entropy checking
10+
- add support for multi-os cross compilation in Makefile
11+
12+
## 0.1.0 2018-02-07
13+
14+
- initial release

Makefile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
platforms = windows/amd64 darwin/amd64
2+
package = cmd/pre-commit/main.go
3+
binary = build/pre-commit
4+
5+
help:
6+
@ echo "See Makefile for options"
7+
8+
compile:
9+
@ for platform in $(platforms); do \
10+
platform_split=($${platform//\// }); \
11+
GOOS=$${platform_split[0]}; \
12+
GOARCH=$${platform_split[1]}; \
13+
output_name=$(binary)'_'$$GOOS'-'$$GOARCH; \
14+
if [ $$GOOS = "windows" ]; then \
15+
output_name+='.exe'; \
16+
fi; \
17+
echo "Build for $$platform -> $$output_name"; \
18+
env GOOS=$$GOOS GOARCH=$$GOARCH go build -o $$output_name $(package); \
19+
done
20+

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,18 @@ If you're VERY SURE these files are ok, rerun commit with --no-verify
8989
**NB** Currently if you update the pre-commit script in your templates, you will
9090
need to manually re-copy it into each repo that uses it.
9191
92+
93+
## Experimental Entropy Checking
94+
95+
By default, the `pre-commit` tool won't use entropy checking on patch strings. If you
96+
wish to enable this functionality, please set the `DC_ENTROPY_EXPERIMENT` environment
97+
variable.
98+
99+
```shell
100+
$ export DC_ENTROPY_EXPERIMENT=1
101+
```
102+
103+
92104
License
93105
=======
94106

cmd/pre-commit/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,20 @@ var (
2222
)
2323

2424
func main() {
25+
2526
flag.Parse()
2627

2728
if *target == "" {
2829
*target = "."
2930
}
3031
fmt.Printf("Running precommit diff check on '%s'\n", *target)
3132

33+
// Import environmental feature flags
34+
if useEntropyFeature := os.Getenv("DC_ENTROPY_EXPERIMENT"); useEntropyFeature == "1" {
35+
fmt.Println("i) Experimental entropy checking enabled")
36+
diffcheck.UseEntropy = true
37+
}
38+
3239
// Get where we are so we can get back
3340
ex, err := os.Executable()
3441
if err != nil {

diffcheck/diffcheck.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strconv"
1212
"strings"
1313

14+
"github.com/ONSdigital/git-diff-check/entropy"
1415
"github.com/ONSdigital/git-diff-check/rule"
1516
)
1617

@@ -47,6 +48,10 @@ type (
4748
var (
4849
// Matches the first offset in the old and new diff
4950
reOffset = regexp.MustCompile("^@@ -(\\d+).* \\+(\\d+).* @@")
51+
52+
// UseEntropy is a feature flag that, if set true, enables experimental
53+
// string entropy testing
54+
UseEntropy = false
5055
)
5156

5257
const (
@@ -162,12 +167,20 @@ func checkLineBytes(line []byte, position int) (bool, []Warning) {
162167

163168
warnings := []Warning{}
164169

170+
// Normal line rulesets
165171
for _, rule := range rule.Sets["line"] {
166172
if rule.Regex.Match(line) {
167173
warnings = append(warnings, Warning{Type: "line", Description: rule.Caption, Line: position})
168174
}
169175
}
170176

177+
// Entropy check
178+
if UseEntropy {
179+
if ok, _ := entropy.Check(line); !ok {
180+
warnings = append(warnings, Warning{Type: "line", Description: "Possible key in high entropy string", Line: position})
181+
}
182+
}
183+
171184
if len(warnings) > 0 {
172185
return false, warnings
173186
}

diffcheck/diffcheck_test.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ type testCase struct {
1717

1818
func TestSnoopPatch(t *testing.T) {
1919

20+
// Enable the feature flag to assume entropy usage in these tests
21+
diffcheck.UseEntropy = true
22+
2023
for _, tc := range testCases {
2124

2225
t.Logf("Given a patch containing %s", tc.Name)
@@ -36,16 +39,22 @@ func TestSnoopPatch(t *testing.T) {
3639
}
3740

3841
for i, expected := range tc.ExpectedReports {
42+
if i >= len(reports) {
43+
break
44+
}
3945
gotReport := reports[i]
4046

4147
shouldEqual("path", gotReport.Path, expected.Path, t)
4248
shouldEqual("old path", gotReport.OldPath, expected.OldPath, t)
4349

4450
if len(expected.Warnings) != len(gotReport.Warnings) {
45-
t.Errorf("Incorrect number of warnings in report, got %d, expected %d", len(expected.Warnings), len(gotReport.Warnings))
51+
t.Errorf("Incorrect number of warnings in report, got %d, expected %d", len(gotReport.Warnings), len(expected.Warnings))
4652
}
4753

4854
for j, expWarning := range expected.Warnings {
55+
if j >= len(gotReport.Warnings) {
56+
break
57+
}
4958
gotWarning := gotReport.Warnings[j]
5059

5160
shouldEqual("type", gotWarning.Type, expWarning.Type, t)
@@ -126,6 +135,11 @@ index 0000000..e69de29
126135
Line: 6,
127136
Description: "Possible AWS Access Key",
128137
},
138+
{
139+
Type: "line",
140+
Line: 7,
141+
Description: "Possible key in high entropy string",
142+
},
129143
},
130144
},
131145
},
@@ -138,6 +152,7 @@ index e69de29..92251f8 100644
138152
139153
# Shhh
140154
aws=AKIA7362373827372737
155+
secret=ZWVTjPQSdhwRgl204Hc51YCsritMIzn8B=/p9UyeX7xu6KkAGqfm3FJ+oObLDNEva
141156
`),
142157
},
143158
}

entropy/entropy.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Package entropy contains functions for checking the entropy of given data
2+
package entropy
3+
4+
import (
5+
"bytes"
6+
"math"
7+
"strings"
8+
)
9+
10+
// Define entropy thresholds over which a string is considered complex enough
11+
// to be a potential key
12+
const (
13+
Base64Threshold = 4.5
14+
HexThreshold = 3.0
15+
)
16+
17+
const (
18+
consider = 20 // When scanning for strings, only consider >= this value length
19+
)
20+
21+
// CalculateShannon calculates the shannon entropy for a block of data
22+
// - http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html
23+
func CalculateShannon(data []byte) float64 {
24+
if len(data) == 0 {
25+
return 0.0
26+
}
27+
entropy := 0.0
28+
pX := 0.0
29+
for x := 0; x < 256; x++ {
30+
pX = float64(bytes.Count(data, []byte(string(x)))) / float64(len(data))
31+
if pX > 0 {
32+
entropy += -pX * math.Log2(pX)
33+
}
34+
}
35+
return entropy
36+
}
37+
38+
// Check searches through a given block of data to attempt to identify high
39+
// entropy blocks. Returns true and number of matching strings if found
40+
func Check(b []byte) (bool, int) {
41+
found := [][]byte{}
42+
43+
// Offset where we started reading the data - indexes from
44+
// 1 instead of zero otherwise we'll capture a spurious leading
45+
// byte into the slice
46+
// start := 1
47+
start := -1
48+
49+
// Base64 strings
50+
for i, tok := range b {
51+
if !isBase64Byte(tok) || i+1 == len(b) {
52+
if i-start >= consider {
53+
s := b[start+1 : i]
54+
if e := CalculateShannon(s); e > Base64Threshold {
55+
found = append(found, s)
56+
}
57+
}
58+
start = i
59+
}
60+
}
61+
62+
start = -1
63+
64+
// Hex strings
65+
for i, tok := range b {
66+
if !isHexByte(tok) || i+1 == len(b) {
67+
if i-start >= consider {
68+
s := b[start+1 : i]
69+
if e := CalculateShannon(s); e > HexThreshold {
70+
found = append(found, s)
71+
}
72+
}
73+
start = i
74+
}
75+
}
76+
77+
return len(found) == 0, len(found)
78+
}
79+
80+
func isBase64Byte(b byte) bool {
81+
return strings.Contains("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=", string(b))
82+
}
83+
84+
func isHexByte(b byte) bool {
85+
return strings.Contains("ABCDEFabcdef0123456789", string(b))
86+
}

entropy/entropy_test.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Package entropy_test is for testing the entropy package
2+
//
3+
// !IMPORTANT! - none of the keys or strings listed in this file are real keys. They
4+
// are generated purely to test this package and MUST NOT be used
5+
// anywhere else as actual credentials
6+
package entropy_test
7+
8+
import (
9+
"fmt"
10+
"testing"
11+
12+
"github.com/ONSdigital/git-diff-check/entropy"
13+
)
14+
15+
var (
16+
highBase64 = [][]byte{
17+
[]byte(`ZWVTjPQSdhwRgl204Hc51YCsritMIzn8B=/p9UyeX7xu6KkAGqfm3FJ+oObLDNEva`),
18+
[]byte(`hSXAQy9D1J0hkCQy0tKBCxnpcOQCPeM54RFXZLJE`),
19+
[]byte(`secret=ZWVTjPQSdhwRgl204Hc51YCsritMIzn8B=/p9UyeX7xu6KkAGqfm3FJ+oObLDNEva`),
20+
[]byte(`aws:ZWVTjPQSdhwRgl204Hc51YCsritMIzn8B=/p9UyeX7xu6KkAGqfm3FJ+oObLDNEva`),
21+
}
22+
highHex = [][]byte{
23+
[]byte(`b3A0a1FDfe86dcCE945B72`),
24+
}
25+
)
26+
27+
func ExampleCalculateShannon() {
28+
password := []byte("verysecret")
29+
if entropy.CalculateShannon(password) < entropy.Base64Threshold {
30+
fmt.Println("Password not complex enough!")
31+
}
32+
}
33+
34+
func TestCalculateEntropy(t *testing.T) {
35+
36+
t.Log("Check base64 data")
37+
for _, b := range highBase64 {
38+
if e := entropy.CalculateShannon(b); e < entropy.Base64Threshold {
39+
t.Errorf("Got entropy %f, expected > %f", e, entropy.Base64Threshold)
40+
}
41+
}
42+
43+
t.Log("Check hex data")
44+
for _, b := range highHex {
45+
if e := entropy.CalculateShannon(b); e < entropy.HexThreshold {
46+
t.Errorf("Got entropy %f, expected > %f", e, entropy.HexThreshold)
47+
}
48+
}
49+
}
50+
51+
func TestCheck(t *testing.T) {
52+
53+
exampleBlock := []byte(`+// CheckPatchLine takes a line from a patch hunk and tests it for naughty patterns
54+
+func CheckPatchLine(line []byte) (bool, []Warning) {
55+
+ warnings := []Warning{}
56+
+ aws := []byte("hSXAQy9D1J0hkCQy0tKBCxnpcOQCPeM54RFXZLJE")
57+
+
58+
+ // Log in with secret: ZWVTjPQSdhwRgl204Hc51YCsritMIzn8B=/p9UyeX7xu6KkAGqfm3FJ+oObLDNEva
59+
+
60+
+ for _, rule := range linePatterns {
61+
+ if found := rule.Regex.FindAll(line, -1); len(found) > 0 {
62+
+ for _, f := range found {
63+
+ // TODO Ignore exclusions
64+
+ if string(f) != "b3A0a1FDfe86dcCE945B72" {
65+
+ warnings = append(warnings, Warning{Type: "line", Line: -1})
66+
+ }`)
67+
68+
ok, n := entropy.Check(exampleBlock)
69+
if ok {
70+
t.Error("Expected 'not ok'")
71+
}
72+
if n != 3 {
73+
t.Errorf("Expected warnings, got %d, expected 3", n)
74+
}
75+
76+
for _, b := range highBase64 {
77+
ok, _ := entropy.Check(b)
78+
if ok {
79+
t.Error("Expected failed base64 entropy check")
80+
}
81+
}
82+
83+
for _, b := range highHex {
84+
ok, _ := entropy.Check(b)
85+
if ok {
86+
t.Error("Expected failed hex entropy check")
87+
}
88+
}
89+
90+
}

rule/rule_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package rule_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/ONSdigital/git-diff-check/rule"
7+
)
8+
9+
func TestInitRules(t *testing.T) {
10+
// The rulesets should be populated into rule.Sets as part of the package
11+
// init() method. We won't test for specific rules but the amount loaded
12+
// should be non-zero
13+
if len(rule.Sets) == 0 {
14+
t.Error("Failed to initialise rulesets")
15+
}
16+
}

0 commit comments

Comments
 (0)