Skip to content

Commit c342518

Browse files
committed
feat: add sortable keys for record linkage
1 parent ee9ce52 commit c342518

File tree

7 files changed

+682
-8
lines changed

7 files changed

+682
-8
lines changed

go.mod

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/adamdecaf/merge v0.1.1
1111
github.com/antchfx/htmlquery v1.3.4
1212
github.com/bbalet/stopwords v1.0.0
13+
github.com/dgryski/go-minhash v0.0.0-20190315135803-ad340ca03076
1314
github.com/dongri/phonenumber v0.1.12
1415
github.com/gorilla/mux v1.8.1
1516
github.com/hashicorp/go-retryablehttp v0.7.8
@@ -19,6 +20,7 @@ require (
1920
github.com/moov-io/iso3166 v0.2.1
2021
github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519
2122
github.com/pariz/gountries v0.1.6
23+
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72
2224
github.com/stretchr/testify v1.10.0
2325
github.com/urfave/cli/v3 v3.3.8
2426
github.com/vmihailenco/msgpack/v5 v5.4.1
@@ -28,6 +30,7 @@ require (
2830
go.uber.org/automaxprocs v1.6.0
2931
golang.org/x/sync v0.15.0
3032
golang.org/x/text v0.26.0
33+
gopkg.in/go-dedup/simhash.v1 v1.0.0-20170701025421-ab6ea107ab65
3134
)
3235

3336
require (
@@ -55,6 +58,8 @@ require (
5558
github.com/cespare/xxhash/v2 v2.3.0 // indirect
5659
github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 // indirect
5760
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
61+
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 // indirect
62+
github.com/dgryski/go-spooky v0.0.0-20170606183049-ed3d087f40e2 // indirect
5863
github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect
5964
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
6065
github.com/felixge/httpsnoop v1.0.4 // indirect
@@ -64,6 +69,9 @@ require (
6469
github.com/fyne-io/glfw-js v0.2.0 // indirect
6570
github.com/fyne-io/image v0.1.1 // indirect
6671
github.com/fyne-io/oksvg v0.1.0 // indirect
72+
github.com/go-dedup/megophone v0.0.0-20170830025436-f01be21026f5 // indirect
73+
github.com/go-dedup/simhash v0.0.0-20170904020510-9ecaca7b509c // indirect
74+
github.com/go-dedup/text v0.0.0-20170907015346-8bb1b95e3cb7 // indirect
6775
github.com/go-gl/gl v0.0.0-20231021071112-07e5d0ea2e71 // indirect
6876
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20240506104042-037f3cc74f2a // indirect
6977
github.com/go-jose/go-jose/v4 v4.0.5 // indirect

go.sum

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,12 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
706706
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
707707
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
708708
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
709+
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 h1:ucRHb6/lvW/+mTEIGbvhcYU3S8+uSNkuMjx/qZFfhtM=
710+
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
711+
github.com/dgryski/go-minhash v0.0.0-20190315135803-ad340ca03076 h1:EB7M2v8Svo3kvIDy+P1YDE22XskDQP+TEYGzeDwPAN4=
712+
github.com/dgryski/go-minhash v0.0.0-20190315135803-ad340ca03076/go.mod h1:VBi0XHpFy0xiMySf6YpVbRqrupW4RprJ5QTyN+XvGSM=
713+
github.com/dgryski/go-spooky v0.0.0-20170606183049-ed3d087f40e2 h1:lx1ZQgST/imDhmLpYDma1O3Cx9L+4Ie4E8S2RjFPQ30=
714+
github.com/dgryski/go-spooky v0.0.0-20170606183049-ed3d087f40e2/go.mod h1:hgHYKsoIw7S/hlWtP7wD1wZ7SX1jPTtKko5X9jrOgPQ=
709715
github.com/dhui/dktest v0.4.5 h1:uUfYBIVREmj/Rw6MvgmqNAYzTiKOHJak+enB5Di73MM=
710716
github.com/dhui/dktest v0.4.5/go.mod h1:tmcyeHDKagvlDrz7gDKq4UAJOLIfVZYkfD5OnHDwcCo=
711717
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
@@ -766,6 +772,12 @@ github.com/fyne-io/image v0.1.1/go.mod h1:xrfYBh6yspc+KjkgdZU/ifUC9sPA5Iv7WYUBzQ
766772
github.com/fyne-io/oksvg v0.1.0 h1:7EUKk3HV3Y2E+qypp3nWqMXD7mum0hCw2KEGhI1fnBw=
767773
github.com/fyne-io/oksvg v0.1.0/go.mod h1:dJ9oEkPiWhnTFNCmRgEze+YNprJF7YRbpjgpWS4kzoI=
768774
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
775+
github.com/go-dedup/megophone v0.0.0-20170830025436-f01be21026f5 h1:4U+x+EB1P66zwYgTjxWXSOT8vF+651Ksr1lojiCZnT8=
776+
github.com/go-dedup/megophone v0.0.0-20170830025436-f01be21026f5/go.mod h1:poR/Cp00iqtqu9ltFwl6C00sKC0HY13u/Gh05ZBmP54=
777+
github.com/go-dedup/simhash v0.0.0-20170904020510-9ecaca7b509c h1:mucYYQn+sMGNSxidhleonzAdwL203RxhjJGnxQU4NWU=
778+
github.com/go-dedup/simhash v0.0.0-20170904020510-9ecaca7b509c/go.mod h1:gO3u2bjRAgUaLdQd2XK+3oooxrheOAx1BzS7WmPzw1s=
779+
github.com/go-dedup/text v0.0.0-20170907015346-8bb1b95e3cb7 h1:11wFcswN+37U+ByjxdKzsRY5KzNqqq5Uk5ztxnLOc7w=
780+
github.com/go-dedup/text v0.0.0-20170907015346-8bb1b95e3cb7/go.mod h1:wSsK4VOECOSfSYTzkBFw+iGY7wj59e7X96ABtNj9aCQ=
769781
github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
770782
github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
771783
github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
@@ -789,8 +801,6 @@ github.com/go-latex/latex v0.0.0-20210823091927-c0d11ff05a81/go.mod h1:SX0U8uGpx
789801
github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
790802
github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
791803
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
792-
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
793-
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
794804
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
795805
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
796806
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
@@ -1110,6 +1120,7 @@ github.com/sagikazarmark/locafero v0.7.0 h1:5MqpDsTGNDhY8sGp0Aowyf0qKsPrhewaLSsF
11101120
github.com/sagikazarmark/locafero v0.7.0/go.mod h1:2za3Cg5rMaTMoG/2Ulr9AwtFaIppKXTRYnozin4aB5k=
11111121
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
11121122
github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
1123+
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
11131124
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
11141125
github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4=
11151126
github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
@@ -1183,8 +1194,6 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.5
11831194
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
11841195
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
11851196
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
1186-
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
1187-
go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E=
11881197
go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
11891198
go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
11901199
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 h1:dNzwXjZKpMpE2JhmO+9HsPl42NIXFIFSUSSs0fiqra0=
@@ -1193,16 +1202,12 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 h1:JgtbA
11931202
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0/go.mod h1:179AK5aar5R3eS9FucPy6rggvU0g52cvKId8pv4+v0c=
11941203
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.36.0 h1:G8Xec/SgZQricwWBJF/mHZc7A02YHedfFDENwJEdRA0=
11951204
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.36.0/go.mod h1:PD57idA/AiFD5aqoxGxCvT/ILJPeHy3MjqU/NS7KogY=
1196-
go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE=
1197-
go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs=
11981205
go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
11991206
go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
12001207
go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs=
12011208
go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY=
12021209
go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o=
12031210
go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
1204-
go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w=
1205-
go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA=
12061211
go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
12071212
go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
12081213
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
@@ -1893,6 +1898,8 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8
18931898
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
18941899
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
18951900
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
1901+
gopkg.in/go-dedup/simhash.v1 v1.0.0-20170701025421-ab6ea107ab65 h1:TJ8gu/i0KT5Sc0rsphBYjf3yU5BKZcG5DPcP2syQqQ8=
1902+
gopkg.in/go-dedup/simhash.v1 v1.0.0-20170701025421-ab6ea107ab65/go.mod h1:BHrmqRyqVLqZ7iHTOShMqvNcbExK9Cfyyc+tVN547+4=
18961903
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
18971904
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
18981905
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

internal/linksim/helpers_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package linksim
2+
3+
import (
4+
"github.com/moov-io/watchman/pkg/search"
5+
)
6+
7+
var (
8+
john = (search.Entity[search.Value]{
9+
Name: "John Smith",
10+
Type: search.EntityPerson,
11+
Source: search.SourceUSOFAC,
12+
Person: &search.Person{
13+
Name: "John Smith",
14+
GovernmentIDs: []search.GovernmentID{
15+
{
16+
Type: search.GovernmentIDPassport,
17+
Country: "US",
18+
Identifier: "1234567890",
19+
},
20+
},
21+
},
22+
Contact: search.ContactInfo{
23+
EmailAddresses: []string{"[email protected]"},
24+
},
25+
Addresses: []search.Address{
26+
{
27+
Line1: "541 First St",
28+
City: "Anytown",
29+
State: "CA",
30+
PostalCode: "90210",
31+
Country: "US",
32+
},
33+
},
34+
}).Normalize()
35+
36+
johnathon = (search.Entity[search.Value]{
37+
Name: "Johnathon Smith",
38+
Type: search.EntityPerson,
39+
Source: search.SourceUSOFAC,
40+
Person: &search.Person{
41+
Name: "Johnathon Smith",
42+
GovernmentIDs: []search.GovernmentID{
43+
{
44+
Type: search.GovernmentIDPassport,
45+
Country: "US",
46+
Identifier: "1234567890",
47+
},
48+
},
49+
},
50+
Contact: search.ContactInfo{
51+
EmailAddresses: []string{"[email protected]"},
52+
},
53+
Addresses: []search.Address{
54+
{
55+
Line1: "541 First St",
56+
Line2: "Apt 301",
57+
City: "Anytown",
58+
State: "CA",
59+
PostalCode: "90210",
60+
Country: "US",
61+
},
62+
},
63+
}).Normalize()
64+
)

0 commit comments

Comments
 (0)