Skip to content

Commit 15e258d

Browse files
authored
Merge pull request #39 from zmap/phillip/31-ipinfo-enrichment
IPInfo.io Annotation
2 parents 20a3b03 + b8d7c9b commit 15e258d

File tree

10 files changed

+497
-1
lines changed

10 files changed

+497
-1
lines changed
67.7 KB
Loading

README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ with network metadata. Right now this includes:
66

77
* Maxmind GeoIP2
88
* AS/Routing Data (based on an MRT routing table)
9+
* IPInfo.io ASN and Geolocation data
910

1011
For example, you can add Maxmind geolocation data to a list of IPs:
1112

@@ -111,3 +112,27 @@ echo '{"ip": "1.1.1.1"}' | ./zannotate --rdns --geoasn --geoasn-database=/path-
111112
```json
112113
{"ip":"1.1.1.1","zannotate":{"geoasn":{"asn":13335,"org":"CLOUDFLARENET"},"rdns":{"domain_names":["one.one.one.one"]}}}
113114
```
115+
116+
# Acquiring Datasets
117+
118+
> [!NOTE]
119+
> URLs and instructions may change over time. These are up-to-date as of September 2025.
120+
Below are instructions for getting datasets from the below providers.
121+
122+
### IPInfo.io
123+
IPInfo.io provides a free dataset that includes ASN and geolocation data, scoped to the country level. Paid tiers provide
124+
more granular geo-location data.
125+
126+
1. Sign up for a free account at [IPInfo.io](https://ipinfo.io/signup).
127+
2. Navigate to the [Data Download page](https://ipinfo.io/dashboard/downloads)
128+
3. Download the `mmdb` file
129+
![IPInfo Download Page](.github/readme-images/ipinfoio-data-downloads-screenshot.png)
130+
4. Example CLI usage
131+
132+
```shell
133+
echo "1.1.1.1" | ./zannotate --ipinfo --ipinfo-database=./path-to-ipinfo-db.mmdb
134+
```
135+
136+
```json
137+
{"ip":"1.1.1.1","ipinfo":{"country":"Australia","country_code":"AU","continent":"Oceania","continent_code":"OC","asn":"AS13335","as_name":"Cloudflare, Inc.","as_domain":"cloudflare.com"}}
138+
```

data-snapshots/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Data Snapshots
2+
These database files are not intended to be used for anything other than internal testing.
3+
They are not updated regularly and may contain outdated or incomplete data.
3.8 KB
Binary file not shown.

data-snapshots/ipinfo_lite.mmdb

42.2 MB
Binary file not shown.
7.23 KB
Binary file not shown.

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ go 1.24.0
55
require (
66
github.com/json-iterator/go v1.1.12
77
github.com/oschwald/geoip2-golang v1.13.0
8+
github.com/oschwald/maxminddb-golang/v2 v2.0.0-beta.10
89
github.com/osrg/gobgp/v3 v3.37.0
910
github.com/sirupsen/logrus v1.9.3
11+
github.com/zmap/dns v1.1.67
1012
github.com/zmap/go-iptree v0.0.0-20251001212402-0a55a77d6804
1113
github.com/zmap/zdns/v2 v2.0.5
1214
gotest.tools/v3 v3.5.2
@@ -29,7 +31,6 @@ require (
2931
github.com/prometheus/common v0.63.0 // indirect
3032
github.com/prometheus/procfs v0.16.0 // indirect
3133
github.com/weppos/publicsuffix-go v0.40.3-0.20250311103038-7794c8c0723b // indirect
32-
github.com/zmap/dns v1.1.67 // indirect
3334
github.com/zmap/go-dns-root-anchors v0.0.0-20250415191259-6d65fb878756 // indirect
3435
github.com/zmap/zcrypto v0.0.0-20250416162916-8ff8dfaa718d // indirect
3536
github.com/zmap/zflags v1.4.0-beta.1.0.20200204220219-9d95409821b6 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ github.com/oschwald/geoip2-golang v1.13.0 h1:Q44/Ldc703pasJeP5V9+aFSZFmBN7DKHbNs
3737
github.com/oschwald/geoip2-golang v1.13.0/go.mod h1:P9zG+54KPEFOliZ29i7SeYZ/GM6tfEL+rgSn03hYuUo=
3838
github.com/oschwald/maxminddb-golang v1.13.1 h1:G3wwjdN9JmIK2o/ermkHM+98oX5fS+k5MbwsmL4MRQE=
3939
github.com/oschwald/maxminddb-golang v1.13.1/go.mod h1:K4pgV9N/GcK694KSTmVSDTODk4IsCNThNdTmnaBZ/F8=
40+
github.com/oschwald/maxminddb-golang/v2 v2.0.0-beta.10 h1:d9tiCD1ueYjGStkagZmLYMbItMnJPpmn27jBctlyRg8=
41+
github.com/oschwald/maxminddb-golang/v2 v2.0.0-beta.10/go.mod h1:EkyB0XWibbE1/+tXyR+ZehlGg66bRtMzxQSPotYH2EA=
4042
github.com/osrg/gobgp/v3 v3.37.0 h1:+ObuOdvj7G7nxrT0fKFta+EAupdWf/q1WzbXydr8IOY=
4143
github.com/osrg/gobgp/v3 v3.37.0/go.mod h1:kVHVFy1/fyZHJ8P32+ctvPeJogn9qKwa1YCeMRXXrP0=
4244
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=

ipinfo.go

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
/*
2+
* ZAnnotate Copyright 2025 Regents of the University of Michigan
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
* use this file except in compliance with the License. You may obtain a copy
6+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
11+
* implied. See the License for the specific language governing
12+
* permissions and limitations under the License.
13+
*/
14+
15+
package zannotate
16+
17+
import (
18+
"errors"
19+
"flag"
20+
"fmt"
21+
"net"
22+
"net/netip"
23+
"strconv"
24+
25+
"github.com/oschwald/maxminddb-golang/v2"
26+
log "github.com/sirupsen/logrus"
27+
)
28+
29+
// This module provides IPInfo.io annotations for IP addresses using a local MaxMind DB file.
30+
31+
// ------------------------------------------------------------------------------------
32+
// The MaxMind DB formats were pulled from IPInfo.io's API documentation on 07/29/2025.
33+
// IPInfo provides data at various tiers of access: Lite, Core, and Plus.
34+
// Since the MaxMindDB decode is best-effort, we'll just define the Plus format which includes all lower tiers.
35+
// If a user has a Lite or Core DB file, the fields not present in those tiers will not appear in output.
36+
// See https://ipinfo.io/products/plus for more info on field definitions.
37+
// ------------------------------------------------------------------------------------
38+
39+
// IPInfoMMDBOutput includes both the Plus/Core/Lite IPInfo fields and their maxminddb tags. We'll convert this into a
40+
// IPInfoModuleOutput for JSON output (converting string fields to appropriate types).
41+
type IPInfoMMDBOutput struct {
42+
City string `maxminddb:"city"`
43+
Region string `maxminddb:"region"`
44+
RegionCode string `maxminddb:"region_code"`
45+
Country string `maxminddb:"country"`
46+
CountryCode string `maxminddb:"country_code"`
47+
Continent string `maxminddb:"continent"`
48+
ContinentCode string `maxminddb:"continent_code"`
49+
Latitude string `maxminddb:"latitude"`
50+
Longitude string `maxminddb:"longitude"`
51+
Timezone string `maxminddb:"timezone"`
52+
PostalCode string `maxminddb:"postal_code"`
53+
DMACode string `maxminddb:"dma_code"`
54+
GeonameID string `maxminddb:"geoname_id"` // GeoNames database identifier (if available).
55+
Radius string `maxminddb:"radius"` // Accuracy radius in kilometers (if available).
56+
GeoChanged string `maxminddb:"geo_changed"` // Timestamp or flag indicating when the geolocation last changed (if available).
57+
ASN string `maxminddb:"asn"`
58+
ASName string `maxminddb:"as_name"`
59+
ASDomain string `maxminddb:"as_domain"`
60+
ASType string `maxminddb:"as_type"`
61+
ASChanged string `maxminddb:"as_changed"`
62+
CarrierName string `maxminddb:"carrier_name"` // Name of the mobile carrier (if available).
63+
MobileCountryCode string `maxminddb:"mcc"`
64+
MobileNetworkCode string `maxminddb:"mnc"`
65+
PrivacyName string `maxminddb:"privacy_name"` // Specific name of the privacy or anonymization service detected (e.g., “NordVPN”).
66+
IsProxy string `maxminddb:"is_proxy"`
67+
IsRelay string `maxminddb:"is_relay"` // Boolean flag indicating use of a general relay service
68+
IsTOR string `maxminddb:"is_tor"` // Whether the IP is a known TOR exit node.
69+
IsVPN string `maxminddb:"is_vpn"` // Flag indicating use of a VPN Service
70+
IsAnonymous string `maxminddb:"is_anonymous"` // True if the IP is associated with VPN, proxy, Tor, or a relay service.
71+
IsAnycast string `maxminddb:"is_anycast"` // Whether the IP is using anycast routing.
72+
IsHosting string `maxminddb:"is_hosting"` // True if the IP address is an internet service hosting IP address
73+
IsMobile string `maxminddb:"is_mobile"` // True if the IP address is associated with a mobile network or carrier.
74+
IsSatellite string `maxminddb:"is_satellite"` // True if the IP address is associated with a satellite connection
75+
}
76+
77+
// IPInfoModuleOutput is the final output struct with appropriate types for JSON output
78+
type IPInfoModuleOutput struct {
79+
City string `json:"city,omitempty"`
80+
Region string `json:"region,omitempty"`
81+
RegionCode string `json:"region_code,omitempty"`
82+
Country string `json:"country,omitempty"`
83+
CountryCode string `json:"country_code,omitempty"`
84+
Continent string `json:"continent,omitempty"`
85+
ContinentCode string `json:"continent_code,omitempty"`
86+
Latitude float64 `json:"latitude,omitempty"`
87+
Longitude float64 `json:"longitude,omitempty"`
88+
Timezone string `json:"timezone,omitempty"`
89+
PostalCode string `json:"postal_code,omitempty"`
90+
DMACode string `json:"dma_code,omitempty"` // Nielsen Designated Market Area code (if available).
91+
GeonameID uint64 `json:"geoname_id,omitempty"` // GeoNames database identifier (if available).
92+
Radius uint64 `json:"radius,omitempty"` // Accuracy radius in kilometers (if available).
93+
GeoChanged string `json:"geo_changed,omitempty"` // Timestamp or flag indicating when the geolocation last changed (if available).
94+
ASN string `json:"asn,omitempty"`
95+
ASName string `json:"as_name,omitempty"`
96+
ASDomain string `json:"as_domain,omitempty"`
97+
ASType string `json:"as_type,omitempty"`
98+
ASChanged string `json:"as_changed,omitempty"`
99+
CarrierName string `json:"carrier_name,omitempty"` // Name of the mobile carrier (if available).
100+
MobileCountryCode string `json:"mobile_country_code,omitempty"`
101+
MobileNetworkCode string `json:"mobile_network_code,omitempty"`
102+
PrivacyName string `json:"privacy_name,omitempty"` // Specific name of the privacy or anonymization service detected (e.g., “NordVPN”).
103+
IsProxy *bool `json:"is_proxy,omitempty"`
104+
IsRelay *bool `json:"is_relay,omitempty"` // Boolean flag indicating use of a general relay service
105+
IsTOR *bool `json:"is_tor,omitempty"` // Whether the IP is a known TOR exit node.
106+
IsVPN *bool `json:"is_vpn,omitempty"` // Flag indicating use of a VPN Service
107+
IsAnonymous *bool `json:"is_anonymous,omitempty"` // True if the IP is associated with VPN, proxy, Tor, or a relay service.
108+
IsAnycast *bool `json:"is_anycast,omitempty"` // Whether the IP is using anycast routing.
109+
IsHosting *bool `json:"is_hosting,omitempty"` // True if the IP address is an internet service hosting IP address
110+
IsMobile *bool `json:"is_mobile,omitempty"` // True if the IP address is associated with a mobile network or carrier.
111+
IsSatellite *bool `json:"is_satellite,omitempty"` // True if the IP address is associated with a satellite connection
112+
}
113+
114+
func (in *IPInfoMMDBOutput) ToModuleOutput() *IPInfoModuleOutput {
115+
out := &IPInfoModuleOutput{
116+
City: in.City,
117+
Region: in.Region,
118+
RegionCode: in.RegionCode,
119+
Country: in.Country,
120+
CountryCode: in.CountryCode,
121+
Continent: in.Continent,
122+
ContinentCode: in.ContinentCode,
123+
Timezone: in.Timezone,
124+
PostalCode: in.PostalCode,
125+
GeoChanged: in.GeoChanged,
126+
ASN: in.ASN,
127+
ASName: in.ASName,
128+
ASDomain: in.ASDomain,
129+
ASType: in.ASType,
130+
ASChanged: in.ASChanged,
131+
CarrierName: in.CarrierName,
132+
MobileCountryCode: in.MobileCountryCode,
133+
MobileNetworkCode: in.MobileNetworkCode,
134+
PrivacyName: in.PrivacyName,
135+
}
136+
// Convert string fields to appropriate types
137+
var err error
138+
if out.Latitude, err = strconv.ParseFloat(in.Latitude, 64); err != nil {
139+
out.Latitude = 0
140+
}
141+
if out.Longitude, err = strconv.ParseFloat(in.Longitude, 64); err != nil {
142+
out.Longitude = 0
143+
}
144+
if out.GeonameID, err = strconv.ParseUint(in.GeonameID, 10, 64); err != nil {
145+
out.GeonameID = 0
146+
}
147+
if out.Radius, err = strconv.ParseUint(in.Radius, 10, 64); err != nil {
148+
out.Radius = 0
149+
}
150+
var temp bool
151+
if temp, err = strconv.ParseBool(in.IsProxy); err == nil {
152+
t := temp // avoid taking address of a short-lived variable
153+
out.IsProxy = &t
154+
}
155+
if temp, err = strconv.ParseBool(in.IsRelay); err == nil {
156+
t := temp // avoid taking address of a short-lived variable
157+
out.IsRelay = &t
158+
}
159+
if temp, err = strconv.ParseBool(in.IsTOR); err == nil {
160+
t := temp // avoid taking address of a short-lived variable
161+
out.IsTOR = &t
162+
}
163+
if temp, err = strconv.ParseBool(in.IsVPN); err == nil {
164+
t := temp // avoid taking address of a short-lived variable
165+
out.IsVPN = &t
166+
}
167+
if temp, err = strconv.ParseBool(in.IsAnonymous); err == nil {
168+
t := temp // avoid taking address of a short-lived variable
169+
out.IsAnonymous = &t
170+
}
171+
if temp, err = strconv.ParseBool(in.IsAnycast); err == nil {
172+
t := temp // avoid taking address of a short-lived variable
173+
out.IsAnycast = &t
174+
}
175+
if temp, err = strconv.ParseBool(in.IsHosting); err == nil {
176+
t := temp // avoid taking address of a short-lived variable
177+
out.IsHosting = &t
178+
}
179+
if temp, err = strconv.ParseBool(in.IsMobile); err == nil {
180+
t := temp // avoid taking address of a short-lived variable
181+
out.IsMobile = &t
182+
}
183+
if temp, err = strconv.ParseBool(in.IsSatellite); err == nil {
184+
t := temp // avoid taking address of a short-lived variable
185+
out.IsSatellite = &t
186+
}
187+
return out
188+
}
189+
190+
type IPInfoAnnotatorFactory struct {
191+
BasePluginConf
192+
DatabaseFilePath string
193+
db *maxminddb.Reader // MMDB Database Reader is thread-safe
194+
}
195+
196+
type IPInfoAnnotator struct {
197+
Factory *IPInfoAnnotatorFactory
198+
Id int
199+
}
200+
201+
// IPInfo Annotator Factory (Global)
202+
203+
func (a *IPInfoAnnotatorFactory) MakeAnnotator(i int) Annotator {
204+
var v IPInfoAnnotator
205+
v.Factory = a
206+
v.Id = i
207+
return &v
208+
}
209+
210+
func (a *IPInfoAnnotatorFactory) Initialize(conf *GlobalConf) (err error) {
211+
if len(a.DatabaseFilePath) == 0 {
212+
return errors.New("ipinfo database file path is required")
213+
}
214+
if a.db, err = maxminddb.Open(a.DatabaseFilePath); err != nil {
215+
return fmt.Errorf("error opening IPInfo database reader: %w", err)
216+
}
217+
// verify the MaxMind DB is not corrupted
218+
if err = a.db.Verify(); err != nil {
219+
return fmt.Errorf("error occured while trying to validate the MaxMind DB file: %w", err)
220+
}
221+
return nil
222+
}
223+
224+
func (a *IPInfoAnnotatorFactory) GetWorkers() int {
225+
return a.Threads
226+
}
227+
228+
func (a *IPInfoAnnotatorFactory) Close() error {
229+
if err := a.db.Close(); err != nil {
230+
return fmt.Errorf("error closing IPInfo database reader: %w", err)
231+
}
232+
return nil
233+
}
234+
235+
func (a *IPInfoAnnotatorFactory) IsEnabled() bool {
236+
return a.Enabled
237+
}
238+
239+
func (a *IPInfoAnnotatorFactory) AddFlags(flags *flag.FlagSet) {
240+
flags.BoolVar(&a.Enabled, "ipinfo", false, "annotate with IPInfo.io data using a local MaxMind DB file")
241+
flags.StringVar(&a.DatabaseFilePath, "ipinfo-database", "", "path to MaxMind DB data file for IPInfo.io annotation")
242+
// On a quick benchmark of 1M IPs using a local DB file on a M2 Macbook Air, 1 thread vs. 10 threads were about the same speed, annotating about 212k IPs/second.
243+
flags.IntVar(&a.Threads, "ipinfo-threads", 1, "how many ipinfo annotator threads")
244+
}
245+
246+
// IPInfo Annotator (Per-Worker)
247+
248+
func (a *IPInfoAnnotator) Initialize() error {
249+
return nil
250+
}
251+
252+
func (a *IPInfoAnnotator) GetFieldName() string {
253+
return "ipinfo"
254+
}
255+
256+
func (a *IPInfoAnnotator) Annotate(inputIP net.IP) interface{} {
257+
ip, err := netip.ParseAddr(inputIP.String())
258+
if err != nil {
259+
return nil // not a valid IP address, nothing to be done
260+
}
261+
// Decode the IP address using the MaxMind DB reader
262+
var out *IPInfoMMDBOutput
263+
if err = a.Factory.db.Lookup(ip).Decode(&out); err != nil {
264+
log.Debugf("error decoding IP %s in IPInfo database: %v", ip.String(), err)
265+
}
266+
if out == nil {
267+
return nil // no data found for this IP
268+
}
269+
return out.ToModuleOutput() // convert from the full-string struct to a typed struct
270+
}
271+
272+
func (a *IPInfoAnnotator) Close() error {
273+
return nil
274+
}
275+
276+
func init() {
277+
s := new(IPInfoAnnotatorFactory)
278+
RegisterAnnotator(s)
279+
}

0 commit comments

Comments
 (0)