|
| 1 | +/* |
| 2 | + * ZAnnotate Copyright 2025 Regents of the University of Michigan |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| 5 | + * use this file except in compliance with the License. You may obtain a copy |
| 6 | + * of the License at http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | + * |
| 8 | + * Unless required by applicable law or agreed to in writing, software |
| 9 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| 11 | + * implied. See the License for the specific language governing |
| 12 | + * permissions and limitations under the License. |
| 13 | + */ |
| 14 | + |
| 15 | +package zannotate |
| 16 | + |
| 17 | +import ( |
| 18 | + "errors" |
| 19 | + "flag" |
| 20 | + "fmt" |
| 21 | + "net" |
| 22 | + "net/netip" |
| 23 | + "strconv" |
| 24 | + |
| 25 | + "github.com/oschwald/maxminddb-golang/v2" |
| 26 | + log "github.com/sirupsen/logrus" |
| 27 | +) |
| 28 | + |
| 29 | +// This module provides IPInfo.io annotations for IP addresses using a local MaxMind DB file. |
| 30 | + |
| 31 | +// ------------------------------------------------------------------------------------ |
| 32 | +// The MaxMind DB formats were pulled from IPInfo.io's API documentation on 07/29/2025. |
| 33 | +// IPInfo provides data at various tiers of access: Lite, Core, and Plus. |
| 34 | +// Since the MaxMindDB decode is best-effort, we'll just define the Plus format which includes all lower tiers. |
| 35 | +// If a user has a Lite or Core DB file, the fields not present in those tiers will not appear in output. |
| 36 | +// See https://ipinfo.io/products/plus for more info on field definitions. |
| 37 | +// ------------------------------------------------------------------------------------ |
| 38 | + |
| 39 | +// IPInfoMMDBOutput includes both the Plus/Core/Lite IPInfo fields and their maxminddb tags. We'll convert this into a |
| 40 | +// IPInfoModuleOutput for JSON output (converting string fields to appropriate types). |
| 41 | +type IPInfoMMDBOutput struct { |
| 42 | + City string `maxminddb:"city"` |
| 43 | + Region string `maxminddb:"region"` |
| 44 | + RegionCode string `maxminddb:"region_code"` |
| 45 | + Country string `maxminddb:"country"` |
| 46 | + CountryCode string `maxminddb:"country_code"` |
| 47 | + Continent string `maxminddb:"continent"` |
| 48 | + ContinentCode string `maxminddb:"continent_code"` |
| 49 | + Latitude string `maxminddb:"latitude"` |
| 50 | + Longitude string `maxminddb:"longitude"` |
| 51 | + Timezone string `maxminddb:"timezone"` |
| 52 | + PostalCode string `maxminddb:"postal_code"` |
| 53 | + DMACode string `maxminddb:"dma_code"` |
| 54 | + GeonameID string `maxminddb:"geoname_id"` // GeoNames database identifier (if available). |
| 55 | + Radius string `maxminddb:"radius"` // Accuracy radius in kilometers (if available). |
| 56 | + GeoChanged string `maxminddb:"geo_changed"` // Timestamp or flag indicating when the geolocation last changed (if available). |
| 57 | + ASN string `maxminddb:"asn"` |
| 58 | + ASName string `maxminddb:"as_name"` |
| 59 | + ASDomain string `maxminddb:"as_domain"` |
| 60 | + ASType string `maxminddb:"as_type"` |
| 61 | + ASChanged string `maxminddb:"as_changed"` |
| 62 | + CarrierName string `maxminddb:"carrier_name"` // Name of the mobile carrier (if available). |
| 63 | + MobileCountryCode string `maxminddb:"mcc"` |
| 64 | + MobileNetworkCode string `maxminddb:"mnc"` |
| 65 | + PrivacyName string `maxminddb:"privacy_name"` // Specific name of the privacy or anonymization service detected (e.g., “NordVPN”). |
| 66 | + IsProxy string `maxminddb:"is_proxy"` |
| 67 | + IsRelay string `maxminddb:"is_relay"` // Boolean flag indicating use of a general relay service |
| 68 | + IsTOR string `maxminddb:"is_tor"` // Whether the IP is a known TOR exit node. |
| 69 | + IsVPN string `maxminddb:"is_vpn"` // Flag indicating use of a VPN Service |
| 70 | + IsAnonymous string `maxminddb:"is_anonymous"` // True if the IP is associated with VPN, proxy, Tor, or a relay service. |
| 71 | + IsAnycast string `maxminddb:"is_anycast"` // Whether the IP is using anycast routing. |
| 72 | + IsHosting string `maxminddb:"is_hosting"` // True if the IP address is an internet service hosting IP address |
| 73 | + IsMobile string `maxminddb:"is_mobile"` // True if the IP address is associated with a mobile network or carrier. |
| 74 | + IsSatellite string `maxminddb:"is_satellite"` // True if the IP address is associated with a satellite connection |
| 75 | +} |
| 76 | + |
| 77 | +// IPInfoModuleOutput is the final output struct with appropriate types for JSON output |
| 78 | +type IPInfoModuleOutput struct { |
| 79 | + City string `json:"city,omitempty"` |
| 80 | + Region string `json:"region,omitempty"` |
| 81 | + RegionCode string `json:"region_code,omitempty"` |
| 82 | + Country string `json:"country,omitempty"` |
| 83 | + CountryCode string `json:"country_code,omitempty"` |
| 84 | + Continent string `json:"continent,omitempty"` |
| 85 | + ContinentCode string `json:"continent_code,omitempty"` |
| 86 | + Latitude float64 `json:"latitude,omitempty"` |
| 87 | + Longitude float64 `json:"longitude,omitempty"` |
| 88 | + Timezone string `json:"timezone,omitempty"` |
| 89 | + PostalCode string `json:"postal_code,omitempty"` |
| 90 | + DMACode string `json:"dma_code,omitempty"` // Nielsen Designated Market Area code (if available). |
| 91 | + GeonameID uint64 `json:"geoname_id,omitempty"` // GeoNames database identifier (if available). |
| 92 | + Radius uint64 `json:"radius,omitempty"` // Accuracy radius in kilometers (if available). |
| 93 | + GeoChanged string `json:"geo_changed,omitempty"` // Timestamp or flag indicating when the geolocation last changed (if available). |
| 94 | + ASN string `json:"asn,omitempty"` |
| 95 | + ASName string `json:"as_name,omitempty"` |
| 96 | + ASDomain string `json:"as_domain,omitempty"` |
| 97 | + ASType string `json:"as_type,omitempty"` |
| 98 | + ASChanged string `json:"as_changed,omitempty"` |
| 99 | + CarrierName string `json:"carrier_name,omitempty"` // Name of the mobile carrier (if available). |
| 100 | + MobileCountryCode string `json:"mobile_country_code,omitempty"` |
| 101 | + MobileNetworkCode string `json:"mobile_network_code,omitempty"` |
| 102 | + PrivacyName string `json:"privacy_name,omitempty"` // Specific name of the privacy or anonymization service detected (e.g., “NordVPN”). |
| 103 | + IsProxy *bool `json:"is_proxy,omitempty"` |
| 104 | + IsRelay *bool `json:"is_relay,omitempty"` // Boolean flag indicating use of a general relay service |
| 105 | + IsTOR *bool `json:"is_tor,omitempty"` // Whether the IP is a known TOR exit node. |
| 106 | + IsVPN *bool `json:"is_vpn,omitempty"` // Flag indicating use of a VPN Service |
| 107 | + IsAnonymous *bool `json:"is_anonymous,omitempty"` // True if the IP is associated with VPN, proxy, Tor, or a relay service. |
| 108 | + IsAnycast *bool `json:"is_anycast,omitempty"` // Whether the IP is using anycast routing. |
| 109 | + IsHosting *bool `json:"is_hosting,omitempty"` // True if the IP address is an internet service hosting IP address |
| 110 | + IsMobile *bool `json:"is_mobile,omitempty"` // True if the IP address is associated with a mobile network or carrier. |
| 111 | + IsSatellite *bool `json:"is_satellite,omitempty"` // True if the IP address is associated with a satellite connection |
| 112 | +} |
| 113 | + |
| 114 | +func (in *IPInfoMMDBOutput) ToModuleOutput() *IPInfoModuleOutput { |
| 115 | + out := &IPInfoModuleOutput{ |
| 116 | + City: in.City, |
| 117 | + Region: in.Region, |
| 118 | + RegionCode: in.RegionCode, |
| 119 | + Country: in.Country, |
| 120 | + CountryCode: in.CountryCode, |
| 121 | + Continent: in.Continent, |
| 122 | + ContinentCode: in.ContinentCode, |
| 123 | + Timezone: in.Timezone, |
| 124 | + PostalCode: in.PostalCode, |
| 125 | + GeoChanged: in.GeoChanged, |
| 126 | + ASN: in.ASN, |
| 127 | + ASName: in.ASName, |
| 128 | + ASDomain: in.ASDomain, |
| 129 | + ASType: in.ASType, |
| 130 | + ASChanged: in.ASChanged, |
| 131 | + CarrierName: in.CarrierName, |
| 132 | + MobileCountryCode: in.MobileCountryCode, |
| 133 | + MobileNetworkCode: in.MobileNetworkCode, |
| 134 | + PrivacyName: in.PrivacyName, |
| 135 | + } |
| 136 | + // Convert string fields to appropriate types |
| 137 | + var err error |
| 138 | + if out.Latitude, err = strconv.ParseFloat(in.Latitude, 64); err != nil { |
| 139 | + out.Latitude = 0 |
| 140 | + } |
| 141 | + if out.Longitude, err = strconv.ParseFloat(in.Longitude, 64); err != nil { |
| 142 | + out.Longitude = 0 |
| 143 | + } |
| 144 | + if out.GeonameID, err = strconv.ParseUint(in.GeonameID, 10, 64); err != nil { |
| 145 | + out.GeonameID = 0 |
| 146 | + } |
| 147 | + if out.Radius, err = strconv.ParseUint(in.Radius, 10, 64); err != nil { |
| 148 | + out.Radius = 0 |
| 149 | + } |
| 150 | + var temp bool |
| 151 | + if temp, err = strconv.ParseBool(in.IsProxy); err == nil { |
| 152 | + t := temp // avoid taking address of a short-lived variable |
| 153 | + out.IsProxy = &t |
| 154 | + } |
| 155 | + if temp, err = strconv.ParseBool(in.IsRelay); err == nil { |
| 156 | + t := temp // avoid taking address of a short-lived variable |
| 157 | + out.IsRelay = &t |
| 158 | + } |
| 159 | + if temp, err = strconv.ParseBool(in.IsTOR); err == nil { |
| 160 | + t := temp // avoid taking address of a short-lived variable |
| 161 | + out.IsTOR = &t |
| 162 | + } |
| 163 | + if temp, err = strconv.ParseBool(in.IsVPN); err == nil { |
| 164 | + t := temp // avoid taking address of a short-lived variable |
| 165 | + out.IsVPN = &t |
| 166 | + } |
| 167 | + if temp, err = strconv.ParseBool(in.IsAnonymous); err == nil { |
| 168 | + t := temp // avoid taking address of a short-lived variable |
| 169 | + out.IsAnonymous = &t |
| 170 | + } |
| 171 | + if temp, err = strconv.ParseBool(in.IsAnycast); err == nil { |
| 172 | + t := temp // avoid taking address of a short-lived variable |
| 173 | + out.IsAnycast = &t |
| 174 | + } |
| 175 | + if temp, err = strconv.ParseBool(in.IsHosting); err == nil { |
| 176 | + t := temp // avoid taking address of a short-lived variable |
| 177 | + out.IsHosting = &t |
| 178 | + } |
| 179 | + if temp, err = strconv.ParseBool(in.IsMobile); err == nil { |
| 180 | + t := temp // avoid taking address of a short-lived variable |
| 181 | + out.IsMobile = &t |
| 182 | + } |
| 183 | + if temp, err = strconv.ParseBool(in.IsSatellite); err == nil { |
| 184 | + t := temp // avoid taking address of a short-lived variable |
| 185 | + out.IsSatellite = &t |
| 186 | + } |
| 187 | + return out |
| 188 | +} |
| 189 | + |
| 190 | +type IPInfoAnnotatorFactory struct { |
| 191 | + BasePluginConf |
| 192 | + DatabaseFilePath string |
| 193 | + db *maxminddb.Reader // MMDB Database Reader is thread-safe |
| 194 | +} |
| 195 | + |
| 196 | +type IPInfoAnnotator struct { |
| 197 | + Factory *IPInfoAnnotatorFactory |
| 198 | + Id int |
| 199 | +} |
| 200 | + |
| 201 | +// IPInfo Annotator Factory (Global) |
| 202 | + |
| 203 | +func (a *IPInfoAnnotatorFactory) MakeAnnotator(i int) Annotator { |
| 204 | + var v IPInfoAnnotator |
| 205 | + v.Factory = a |
| 206 | + v.Id = i |
| 207 | + return &v |
| 208 | +} |
| 209 | + |
| 210 | +func (a *IPInfoAnnotatorFactory) Initialize(conf *GlobalConf) (err error) { |
| 211 | + if len(a.DatabaseFilePath) == 0 { |
| 212 | + return errors.New("ipinfo database file path is required") |
| 213 | + } |
| 214 | + if a.db, err = maxminddb.Open(a.DatabaseFilePath); err != nil { |
| 215 | + return fmt.Errorf("error opening IPInfo database reader: %w", err) |
| 216 | + } |
| 217 | + // verify the MaxMind DB is not corrupted |
| 218 | + if err = a.db.Verify(); err != nil { |
| 219 | + return fmt.Errorf("error occured while trying to validate the MaxMind DB file: %w", err) |
| 220 | + } |
| 221 | + return nil |
| 222 | +} |
| 223 | + |
| 224 | +func (a *IPInfoAnnotatorFactory) GetWorkers() int { |
| 225 | + return a.Threads |
| 226 | +} |
| 227 | + |
| 228 | +func (a *IPInfoAnnotatorFactory) Close() error { |
| 229 | + if err := a.db.Close(); err != nil { |
| 230 | + return fmt.Errorf("error closing IPInfo database reader: %w", err) |
| 231 | + } |
| 232 | + return nil |
| 233 | +} |
| 234 | + |
| 235 | +func (a *IPInfoAnnotatorFactory) IsEnabled() bool { |
| 236 | + return a.Enabled |
| 237 | +} |
| 238 | + |
| 239 | +func (a *IPInfoAnnotatorFactory) AddFlags(flags *flag.FlagSet) { |
| 240 | + flags.BoolVar(&a.Enabled, "ipinfo", false, "annotate with IPInfo.io data using a local MaxMind DB file") |
| 241 | + flags.StringVar(&a.DatabaseFilePath, "ipinfo-database", "", "path to MaxMind DB data file for IPInfo.io annotation") |
| 242 | + // On a quick benchmark of 1M IPs using a local DB file on a M2 Macbook Air, 1 thread vs. 10 threads were about the same speed, annotating about 212k IPs/second. |
| 243 | + flags.IntVar(&a.Threads, "ipinfo-threads", 1, "how many ipinfo annotator threads") |
| 244 | +} |
| 245 | + |
| 246 | +// IPInfo Annotator (Per-Worker) |
| 247 | + |
| 248 | +func (a *IPInfoAnnotator) Initialize() error { |
| 249 | + return nil |
| 250 | +} |
| 251 | + |
| 252 | +func (a *IPInfoAnnotator) GetFieldName() string { |
| 253 | + return "ipinfo" |
| 254 | +} |
| 255 | + |
| 256 | +func (a *IPInfoAnnotator) Annotate(inputIP net.IP) interface{} { |
| 257 | + ip, err := netip.ParseAddr(inputIP.String()) |
| 258 | + if err != nil { |
| 259 | + return nil // not a valid IP address, nothing to be done |
| 260 | + } |
| 261 | + // Decode the IP address using the MaxMind DB reader |
| 262 | + var out *IPInfoMMDBOutput |
| 263 | + if err = a.Factory.db.Lookup(ip).Decode(&out); err != nil { |
| 264 | + log.Debugf("error decoding IP %s in IPInfo database: %v", ip.String(), err) |
| 265 | + } |
| 266 | + if out == nil { |
| 267 | + return nil // no data found for this IP |
| 268 | + } |
| 269 | + return out.ToModuleOutput() // convert from the full-string struct to a typed struct |
| 270 | +} |
| 271 | + |
| 272 | +func (a *IPInfoAnnotator) Close() error { |
| 273 | + return nil |
| 274 | +} |
| 275 | + |
| 276 | +func init() { |
| 277 | + s := new(IPInfoAnnotatorFactory) |
| 278 | + RegisterAnnotator(s) |
| 279 | +} |
0 commit comments