diff --git a/hivemind/dht/dht.py b/hivemind/dht/dht.py index 957c8d3df..24a5d7fa6 100644 --- a/hivemind/dht/dht.py +++ b/hivemind/dht/dht.py @@ -7,12 +7,11 @@ from functools import partial from typing import Awaitable, Callable, Iterable, List, Optional, Sequence, TypeVar, Union -from multiaddr import Multiaddr - from hivemind.dht.node import DEFAULT_NUM_WORKERS, DHTNode from hivemind.dht.routing import DHTKey, DHTValue, Subkey from hivemind.dht.validation import CompositeValidator, RecordValidatorBase from hivemind.p2p import P2P, PeerID +from hivemind.p2p.multiaddr import Multiaddr from hivemind.utils import MPFuture, get_logger, switch_to_uvloop from hivemind.utils.timed_storage import DHTExpiration, ValueWithExpiration diff --git a/hivemind/dht/node.py b/hivemind/dht/node.py index ee56da11a..0f8ec0cc4 100644 --- a/hivemind/dht/node.py +++ b/hivemind/dht/node.py @@ -23,7 +23,6 @@ Union, ) -from multiaddr import Multiaddr from sortedcontainers import SortedSet from hivemind.dht.crypto import DHTRecord, RecordValidatorBase @@ -32,6 +31,7 @@ from hivemind.dht.storage import DictionaryDHTValue from hivemind.dht.traverse import traverse_dht from hivemind.p2p import P2P, PeerID +from hivemind.p2p.multiaddr import Multiaddr from hivemind.utils import MSGPackSerializer, SerializerBase, get_logger from hivemind.utils.auth import AuthorizerBase from hivemind.utils.timed_storage import DHTExpiration, TimedStorage, ValueWithExpiration diff --git a/hivemind/p2p/multiaddr/__init__.py b/hivemind/p2p/multiaddr/__init__.py new file mode 100755 index 000000000..d3eafe3bc --- /dev/null +++ b/hivemind/p2p/multiaddr/__init__.py @@ -0,0 +1,5 @@ +from .multiaddr import Multiaddr # NOQA + +__author__ = "Steven Buss" +__email__ = "steven.buss@gmail.com" +__version__ = "0.0.9" diff --git a/hivemind/p2p/multiaddr/codecs/__init__.py b/hivemind/p2p/multiaddr/codecs/__init__.py new file mode 100644 index 000000000..ce76d6ea2 --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/__init__.py @@ -0,0 +1,21 @@ +import importlib + +# These are special sizes +LENGTH_PREFIXED_VAR_SIZE = -1 + + +class NoneCodec: + SIZE = 0 + IS_PATH = False + + +CODEC_CACHE = {} + + +def codec_by_name(name): + if name is None: # Special “do nothing – expect nothing” pseudo-codec + return NoneCodec + codec = CODEC_CACHE.get(name) + if not codec: + codec = CODEC_CACHE[name] = importlib.import_module(".{0}".format(name), __name__) + return codec diff --git a/hivemind/p2p/multiaddr/codecs/cid.py b/hivemind/p2p/multiaddr/codecs/cid.py new file mode 100644 index 000000000..c9ad682fa --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/cid.py @@ -0,0 +1,128 @@ +import base58 +import cid + +from . import LENGTH_PREFIXED_VAR_SIZE + +SIZE = LENGTH_PREFIXED_VAR_SIZE +IS_PATH = False + + +# Spec: https://github.com/libp2p/specs/blob/master/peer-ids/peer-ids.md#string-representation +CIDv0_PREFIX_TO_LENGTH = { + # base58btc prefixes for valid lengths 1 – 42 with the identity “hash” function + "12": [5, 12, 19, 23, 30, 41, 52, 56], + "13": [9, 16, 34, 45], + "14": [27, 38, 49, 60], + "15": [3, 6, 20], + "16": [3, 6, 13, 20, 31, 42, 53], + "17": [3, 13, 42], + "18": [3], + "19": [3, 24, 57], + "1A": [24, 35, 46], + "1B": [35], + "1D": [17], + "1E": [10, 17], + "1F": [10], + "1G": [10, 28, 50], + "1H": [28, 39], + "1P": [21], + "1Q": [21], + "1R": [21, 54], + "1S": [54], + "1T": [7, 32, 43], + "1U": [7, 32, 43], + "1V": [7], + "1W": [7, 14], + "1X": [7, 14], + "1Y": [7, 14], + "1Z": [7, 14], + "1f": [4], + "1g": [4, 58], + "1h": [4, 25, 58], + "1i": [4, 25], + "1j": [4, 25], + "1k": [4, 25, 47], + "1m": [4, 36, 47], + "1n": [4, 36], + "1o": [4, 36], + "1p": [4], + "1q": [4], + "1r": [4], + "1s": [4], + "1t": [4], + "1u": [4], + "1v": [4], + "1w": [4], + "1x": [4], + "1y": [4], + "1z": [4, 18], + # base58btc prefix for length 42 with the sha256 hash function + "Qm": [46], +} + +PROTO_NAME_TO_CIDv1_CODEC = { + # The “p2p” multiaddr protocol requires all keys to use the “libp2p-key” multicodec + "p2p": "libp2p-key", +} + + +def to_bytes(proto, string): + expected_codec = PROTO_NAME_TO_CIDv1_CODEC.get(proto.name) + + if len(string) in CIDv0_PREFIX_TO_LENGTH.get(string[0:2], ()): # CIDv0 + # Upgrade the wire (binary) representation of any received CIDv0 string + # to CIDv1 if we can determine which multicodec value to use + if expected_codec: + return cid.make_cid(1, expected_codec, base58.b58decode(string)).buffer + + return base58.b58decode(string) + else: # CIDv1+ + parsed = cid.from_string(string) + + # Ensure CID has correct codec for protocol + if expected_codec and parsed.codec != expected_codec: + raise ValueError("“{0}” multiaddr CIDs must use the “{1}” multicodec".format(proto.name, expected_codec)) + + return parsed.buffer + + +def _is_binary_cidv0_multihash(buf): + if buf.startswith(b"\x12\x20") and len(buf) == 34: # SHA2-256 + return True + + if (buf[0] == 0x00 and buf[1] in range(43)) and len(buf) == (buf[1] + 2): # Identity hash + return True + + return False + + +def to_string(proto, buf): + expected_codec = PROTO_NAME_TO_CIDv1_CODEC.get(proto.name) + + if _is_binary_cidv0_multihash(buf): # CIDv0 + if not expected_codec: + # Simply encode as base58btc as there is nothing better to do + return base58.b58encode(buf).decode("ascii") + + # “Implementations SHOULD display peer IDs using the first (raw + # base58btc encoded multihash) format until the second format is + # widely supported.” + # + # In the future the following line should instead convert the multihash + # to CIDv1 and with the `expected_codec` and wrap it in base32: + # return cid.make_cid(1, expected_codec, buf).encode("base32").decode("ascii") + return base58.b58encode(buf).decode("ascii") + else: # CIDv1+ + parsed = cid.from_bytes(buf) + + # Ensure CID has correct codec for protocol + if expected_codec and parsed.codec != expected_codec: + raise ValueError("“{0}” multiaddr CIDs must use the “{1}” multicodec".format(proto.name, expected_codec)) + + # “Implementations SHOULD display peer IDs using the first (raw + # base58btc encoded multihash) format until the second format is + # widely supported.” + if expected_codec and _is_binary_cidv0_multihash(parsed.multihash): + return base58.b58encode(parsed.multihash).decode("ascii") + + return parsed.encode("base32").decode("ascii") diff --git a/hivemind/p2p/multiaddr/codecs/domain.py b/hivemind/p2p/multiaddr/codecs/domain.py new file mode 100644 index 000000000..129ad6888 --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/domain.py @@ -0,0 +1,17 @@ +import idna + +from . import LENGTH_PREFIXED_VAR_SIZE + +SIZE = LENGTH_PREFIXED_VAR_SIZE +IS_PATH = False + + +def to_bytes(proto, string): + return idna.uts46_remap(string).encode("utf-8") + + +def to_string(proto, buf): + string = buf.decode("utf-8") + for label in string.split("."): + idna.check_label(label) + return string diff --git a/hivemind/p2p/multiaddr/codecs/fspath.py b/hivemind/p2p/multiaddr/codecs/fspath.py new file mode 100644 index 000000000..a93f5f6dc --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/fspath.py @@ -0,0 +1,14 @@ +import os + +from . import LENGTH_PREFIXED_VAR_SIZE + +SIZE = LENGTH_PREFIXED_VAR_SIZE +IS_PATH = True + + +def to_bytes(proto, string): + return os.fsencode(string) + + +def to_string(proto, buf): + return os.fsdecode(buf) diff --git a/hivemind/p2p/multiaddr/codecs/ip4.py b/hivemind/p2p/multiaddr/codecs/ip4.py new file mode 100644 index 000000000..bcef47979 --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/ip4.py @@ -0,0 +1,12 @@ +import netaddr + +SIZE = 32 +IS_PATH = False + + +def to_bytes(proto, string): + return netaddr.IPAddress(string, version=4).packed + + +def to_string(proto, buf): + return str(netaddr.IPAddress(int.from_bytes(buf, byteorder="big"), version=4)) diff --git a/hivemind/p2p/multiaddr/codecs/ip6.py b/hivemind/p2p/multiaddr/codecs/ip6.py new file mode 100644 index 000000000..5b8edf2c0 --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/ip6.py @@ -0,0 +1,12 @@ +import netaddr + +SIZE = 128 +IS_PATH = False + + +def to_bytes(proto, string): + return netaddr.IPAddress(string, version=6).packed + + +def to_string(proto, buf): + return str(netaddr.IPAddress(int.from_bytes(buf, byteorder="big"), version=6)) diff --git a/hivemind/p2p/multiaddr/codecs/onion.py b/hivemind/p2p/multiaddr/codecs/onion.py new file mode 100644 index 000000000..77df5c049 --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/onion.py @@ -0,0 +1,36 @@ +import base64 +import struct + +SIZE = 96 +IS_PATH = False + + +def to_bytes(proto, string): + addr = string.split(":") + if len(addr) != 2: + raise ValueError("Does not contain a port number") + + # onion address without the ".onion" substring + if len(addr[0]) != 16: + raise ValueError("Invalid onion host address length (must be 16 characters)") + try: + onion_host_bytes = base64.b32decode(addr[0].upper()) + except Exception as exc: + raise ValueError("Cannot decode {0!r} as base32: {1}".format(addr[0], exc)) from exc + + # onion port number + try: + port = int(addr[1], 10) + except ValueError as exc: + raise ValueError("Port number is not a base 10 integer") from exc + if port not in range(1, 65536): + raise ValueError("Port number is not in range(1, 65536)") + + return b"".join((onion_host_bytes, struct.pack(">H", port))) + + +def to_string(proto, buf): + addr_bytes, port_bytes = (buf[:-2], buf[-2:]) + addr = base64.b32encode(addr_bytes).decode("ascii").lower() + port = str(struct.unpack(">H", port_bytes)[0]) + return ":".join([addr, port]) diff --git a/hivemind/p2p/multiaddr/codecs/onion3.py b/hivemind/p2p/multiaddr/codecs/onion3.py new file mode 100644 index 000000000..c489ead8f --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/onion3.py @@ -0,0 +1,36 @@ +import base64 +import struct + +SIZE = 296 +IS_PATH = False + + +def to_bytes(proto, string): + addr = string.split(":") + if len(addr) != 2: + raise ValueError("Does not contain a port number") + + # onion3 address without the ".onion" substring + if len(addr[0]) != 56: + raise ValueError("Invalid onion3 host address length (must be 56 characters)") + try: + onion3_host_bytes = base64.b32decode(addr[0].upper()) + except Exception as exc: + raise ValueError("Cannot decode {0!r} as base32: {1}".format(addr[0], exc)) from exc + + # onion3 port number + try: + port = int(addr[1], 10) + except ValueError as exc: + raise ValueError("Port number is not a base 10 integer") from exc + if port not in range(1, 65536): + raise ValueError("Port number is not in range(1, 65536)") + + return b"".join((onion3_host_bytes, struct.pack(">H", port))) + + +def to_string(proto, buf): + addr_bytes, port_bytes = (buf[:-2], buf[-2:]) + addr = base64.b32encode(addr_bytes).decode("ascii").lower() + port = str(struct.unpack(">H", port_bytes)[0]) + return ":".join([addr, port]) diff --git a/hivemind/p2p/multiaddr/codecs/uint16be.py b/hivemind/p2p/multiaddr/codecs/uint16be.py new file mode 100644 index 000000000..a4b3edde5 --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/uint16be.py @@ -0,0 +1,19 @@ +import struct + +SIZE = 16 +IS_PATH = False + + +def to_bytes(proto, string): + try: + return struct.pack(">H", int(string, 10)) + except ValueError as exc: + raise ValueError("Not a base 10 integer") from exc + except struct.error as exc: + raise ValueError("Integer not in range(65536)") from exc + + +def to_string(proto, buf): + if len(buf) != 2: + raise ValueError("Invalid integer length (must be 2 bytes / 16 bits)") + return str(struct.unpack(">H", buf)[0]) diff --git a/hivemind/p2p/multiaddr/codecs/utf8.py b/hivemind/p2p/multiaddr/codecs/utf8.py new file mode 100644 index 000000000..6b84077c3 --- /dev/null +++ b/hivemind/p2p/multiaddr/codecs/utf8.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import + +from . import LENGTH_PREFIXED_VAR_SIZE + +SIZE = LENGTH_PREFIXED_VAR_SIZE +IS_PATH = False + + +def to_bytes(proto, string): + if len(string) == 0: + raise ValueError("{0} value must not be empty".format(proto.name)) + return string.encode("utf-8") + + +def to_string(proto, buf): + if len(buf) == 0: + raise ValueError("invalid length (should be > 0)") + return buf.decode("utf-8") diff --git a/hivemind/p2p/multiaddr/exceptions.py b/hivemind/p2p/multiaddr/exceptions.py new file mode 100644 index 000000000..d65c3a402 --- /dev/null +++ b/hivemind/p2p/multiaddr/exceptions.py @@ -0,0 +1,91 @@ +class Error(Exception): + pass + + +class LookupError(LookupError, Error): + pass + + +class ProtocolLookupError(LookupError): + """ + MultiAddr did not contain a protocol with the requested code + """ + + def __init__(self, proto, string): + self.proto = proto + self.string = string + + super().__init__("MultiAddr {0!r} does not contain protocol {1}".format(string, proto)) + + +class ParseError(ValueError, Error): + pass + + +class StringParseError(ParseError): + """ + MultiAddr string representation could not be parsed + """ + + def __init__(self, message, string, protocol=None, original=None): + self.message = message + self.string = string + self.protocol = protocol + self.original = original + + if protocol: + message = "Invalid MultiAddr {0!r} protocol {1}: {2}".format(string, protocol, message) + else: + message = "Invalid MultiAddr {0!r}: {1}".format(string, message) + + super().__init__(message) + + +class BinaryParseError(ParseError): + """ + MultiAddr binary representation could not be parsed + """ + + def __init__(self, message, binary, protocol, original=None): + self.message = message + self.binary = binary + self.protocol = protocol + self.original = original + + message = "Invalid binary MultiAddr protocol {0}: {1}".format(protocol, message) + + super().__init__(message) + + +class ProtocolRegistryError(Error): + pass + + +ProtocolManagerError = ProtocolRegistryError + + +class ProtocolRegistryLocked(Error): + """Protocol registry was locked and doesn't allow any further additions""" + + def __init__(self): + super().__init__("Protocol registry is locked and does not accept any new values") + + +class ProtocolExistsError(ProtocolRegistryError): + """Protocol with the given name or code already exists""" + + def __init__(self, proto, kind="name"): + self.proto = proto + self.kind = kind + + super().__init__("Protocol with {0} {1!r} already exists".format(kind, getattr(proto, kind))) + + +class ProtocolNotFoundError(ProtocolRegistryError): + """No protocol with the given name or code found""" + + def __init__(self, value, kind="name"): + self.value = value + self.kind = kind + + super().__init__("No protocol with {0} {1!r} found".format(kind, value)) diff --git a/hivemind/p2p/multiaddr/multiaddr.py b/hivemind/p2p/multiaddr/multiaddr.py new file mode 100644 index 000000000..0f9a0f491 --- /dev/null +++ b/hivemind/p2p/multiaddr/multiaddr.py @@ -0,0 +1,234 @@ +import collections.abc + +import varint + +from . import exceptions, protocols +from .transforms import bytes_iter, bytes_to_string, string_to_bytes + +__all__ = ("Multiaddr",) + + +class MultiAddrKeys(collections.abc.KeysView, collections.abc.Sequence): + def __contains__(self, proto): + proto = self._mapping.registry.find(proto) + return collections.abc.Sequence.__contains__(self, proto) + + def __getitem__(self, idx): + if idx < 0: + idx = len(self) + idx + for idx2, proto in enumerate(self): + if idx2 == idx: + return proto + raise IndexError("Protocol list index out of range") + + __hash__ = collections.abc.KeysView._hash + + def __iter__(self): + for _, proto, _, _ in bytes_iter(self._mapping.to_bytes()): + yield proto + + +class MultiAddrItems(collections.abc.ItemsView, collections.abc.Sequence): + def __contains__(self, item): + proto, value = item + proto = self._mapping.registry.find(proto) + return collections.abc.Sequence.__contains__(self, (proto, value)) + + def __getitem__(self, idx): + if idx < 0: + idx = len(self) + idx + for idx2, item in enumerate(self): + if idx2 == idx: + return item + raise IndexError("Protocol item list index out of range") + + def __iter__(self): + for _, proto, codec, part in bytes_iter(self._mapping.to_bytes()): + if codec.SIZE != 0: + try: + # If we have an address, return it + yield proto, codec.to_string(proto, part) + except Exception as exc: + raise exceptions.BinaryParseError( + str(exc), + self._mapping.to_bytes(), + proto.name, + exc, + ) from exc + else: + # We were given something like '/utp', which doesn't have + # an address, so return None + yield proto, None + + +class MultiAddrValues(collections.abc.ValuesView, collections.abc.Sequence): + __contains__ = collections.abc.Sequence.__contains__ + + def __getitem__(self, idx): + if idx < 0: + idx = len(self) + idx + for idx2, proto in enumerate(self): + if idx2 == idx: + return proto + raise IndexError("Protocol value list index out of range") + + def __iter__(self): + for _, value in MultiAddrItems(self._mapping): + yield value + + +class Multiaddr(collections.abc.Mapping): + """Multiaddr is a representation of multiple nested internet addresses. + + Multiaddr is a cross-protocol, cross-platform format for representing + internet addresses. It emphasizes explicitness and self-description. + + Learn more here: https://multiformats.io/multiaddr/ + + Multiaddrs have both a binary and string representation. + + >>> from multiaddr import Multiaddr + >>> addr = Multiaddr("/ip4/1.2.3.4/tcp/80") + + Multiaddr objects are immutable, so `encapsulate` and `decapsulate` + return new objects rather than modify internal state. + """ + + __slots__ = ("_bytes", "registry") + + def __init__(self, addr, *, registry=protocols.REGISTRY): + """Instantiate a new Multiaddr. + + Args: + addr : A string-encoded or a byte-encoded Multiaddr + + """ + self.registry = registry + if isinstance(addr, str): + self._bytes = string_to_bytes(addr) + elif isinstance(addr, bytes): + self._bytes = addr + elif isinstance(addr, Multiaddr): + self._bytes = addr.to_bytes() + else: + raise TypeError("MultiAddr must be bytes, str or another MultiAddr instance") + + @classmethod + def join(cls, *addrs): + """Concatenate the values of the given MultiAddr strings or objects, + encapsulating each successive MultiAddr value with the previous ones.""" + return cls(b"".join(map(lambda a: cls(a).to_bytes(), addrs))) + + def __eq__(self, other): + """Checks if two Multiaddr objects are exactly equal.""" + return self._bytes == other._bytes + + def __str__(self): + """Return the string representation of this Multiaddr. + + May raise a :class:`~multiaddr.exceptions.BinaryParseError` if the + stored MultiAddr binary representation is invalid.""" + return bytes_to_string(self._bytes) + + def __contains__(self, proto): + return proto in MultiAddrKeys(self) + + def __iter__(self): + return iter(MultiAddrKeys(self)) + + def __len__(self): + return sum(1 for _ in bytes_iter(self.to_bytes())) + + def __repr__(self): + return "" % str(self) + + def __hash__(self): + return self._bytes.__hash__() + + def to_bytes(self): + """Returns the byte array representation of this Multiaddr.""" + return self._bytes + + __bytes__ = to_bytes + + def protocols(self): + """Returns a list of Protocols this Multiaddr includes.""" + return MultiAddrKeys(self) + + def split(self, maxsplit=-1): + """Returns the list of individual path components this MultiAddr is made + up of.""" + final_split_offset = -1 + results = [] + for idx, (offset, proto, codec, part_value) in enumerate(bytes_iter(self._bytes)): + # Split at most `maxplit` times + if idx == maxsplit: + final_split_offset = offset + break + + # Re-assemble binary MultiAddr representation + part_size = varint.encode(len(part_value)) if codec.SIZE < 0 else b"" + part = b"".join((proto.vcode, part_size, part_value)) + + # Add MultiAddr with the given value + results.append(self.__class__(part)) + # Add final item with remainder of MultiAddr if there is anything left + if final_split_offset >= 0: + results.append(self.__class__(self._bytes[final_split_offset:])) + + return results + + keys = protocols + + def items(self): + return MultiAddrItems(self) + + def values(self): + return MultiAddrValues(self) + + def encapsulate(self, other): + """Wrap this Multiaddr around another. + + For example: + /ip4/1.2.3.4 encapsulate /tcp/80 = /ip4/1.2.3.4/tcp/80 + """ + return self.join(self, other) + + def decapsulate(self, other): + """Remove a Multiaddr wrapping. + + For example: + /ip4/1.2.3.4/tcp/80 decapsulate /ip4/1.2.3.4 = /tcp/80 + """ + s1 = self.to_bytes() + s2 = Multiaddr(other).to_bytes() + try: + idx = s1.rindex(s2) + except ValueError: + # if multiaddr not contained, returns a copy + return Multiaddr(self) + return Multiaddr(s1[:idx]) + + def value_for_protocol(self, proto): + """Return the value (if any) following the specified protocol + + Returns + ------- + Union[object, NoneType] + The parsed protocol value for the given protocol code or ``None`` + if the given protocol does not require any value + + Raises + ------ + ~multiaddr.exceptions.BinaryParseError + The stored MultiAddr binary representation is invalid + ~multiaddr.exceptions.ProtocolLookupError + MultiAddr does not contain any instance of this protocol + """ + proto = self.registry.find(proto) + for proto2, value in self.items(): + if proto2 is proto or proto2 == proto: + return value + raise exceptions.ProtocolLookupError(proto, str(self)) + + __getitem__ = value_for_protocol diff --git a/hivemind/p2p/multiaddr/protocols.py b/hivemind/p2p/multiaddr/protocols.py new file mode 100644 index 000000000..57321860d --- /dev/null +++ b/hivemind/p2p/multiaddr/protocols.py @@ -0,0 +1,305 @@ +import varint + +from . import exceptions +from .codecs import codec_by_name + +__all__ = ("Protocol", "PROTOCOLS", "REGISTRY") + + +# source of protocols https://github.com/multiformats/multicodec/blob/master/table.csv#L382 +# replicating table here to: +# 1. avoid parsing the csv +# 2. ensuring errors in the csv don't screw up code. +# 3. changing a number has to happen in two places. +P_IP4 = 0x04 +P_IP6 = 0x29 +P_IP6ZONE = 0x2A +P_TCP = 0x06 +P_UDP = 0x0111 +P_DCCP = 0x21 +P_SCTP = 0x84 +P_UDT = 0x012D +P_UTP = 0x012E +P_P2P = 0x01A5 +P_HTTP = 0x01E0 +P_HTTPS = 0x01BB +P_TLS = 0x01C0 +P_QUIC = 0x01CC +P_QUIC1 = 0x01CD +P_WS = 0x01DD +P_WSS = 0x01DE +P_ONION = 0x01BC +P_ONION3 = 0x01BD +P_P2P_CIRCUIT = 0x0122 +P_DNS = 0x35 +P_DNS4 = 0x36 +P_DNS6 = 0x37 +P_DNSADDR = 0x38 +P_P2P_WEBSOCKET_STAR = 0x01DF +P_P2P_WEBRTC_STAR = 0x0113 +P_P2P_WEBRTC_DIRECT = 0x0114 +P_UNIX = 0x0190 + + +class Protocol: + __slots__ = [ + "code", # int + "name", # string + "codec", # string + ] + + def __init__(self, code, name, codec): + if not isinstance(code, int): + raise TypeError("code must be an integer") + if not isinstance(name, str): + raise TypeError("name must be a string") + if not isinstance(codec, str) and codec is not None: + raise TypeError("codec must be a string or None") + + self.code = code + self.name = name + self.codec = codec + + @property + def size(self): + return codec_by_name(self.codec).SIZE + + @property + def path(self): + return codec_by_name(self.codec).IS_PATH + + @property + def vcode(self): + return varint.encode(self.code) + + def __eq__(self, other): + if not isinstance(other, Protocol): + return NotImplemented + + return all( + (self.code == other.code, self.name == other.name, self.codec == other.codec, self.path == other.path) + ) + + def __hash__(self): + return self.code + + def __repr__(self): + return "Protocol(code={code!r}, name={name!r}, codec={codec!r})".format( + code=self.code, + name=self.name, + codec=self.codec, + ) + + +# List of multiaddr protocols supported by this module by default +PROTOCOLS = [ + Protocol(P_IP4, "ip4", "ip4"), + Protocol(P_TCP, "tcp", "uint16be"), + Protocol(P_UDP, "udp", "uint16be"), + Protocol(P_DCCP, "dccp", "uint16be"), + Protocol(P_IP6, "ip6", "ip6"), + Protocol(P_IP6ZONE, "ip6zone", "utf8"), + Protocol(P_DNS, "dns", "domain"), + Protocol(P_DNS4, "dns4", "domain"), + Protocol(P_DNS6, "dns6", "domain"), + Protocol(P_DNSADDR, "dnsaddr", "domain"), + Protocol(P_SCTP, "sctp", "uint16be"), + Protocol(P_UDT, "udt", None), + Protocol(P_UTP, "utp", None), + Protocol(P_P2P, "p2p", "cid"), + Protocol(P_ONION, "onion", "onion"), + Protocol(P_ONION3, "onion3", "onion3"), + Protocol(P_QUIC, "quic", None), + Protocol(P_QUIC1, "quic-v1", None), + Protocol(P_HTTP, "http", None), + Protocol(P_HTTPS, "https", None), + Protocol(P_TLS, "tls", None), + Protocol(P_WS, "ws", None), + Protocol(P_WSS, "wss", None), + Protocol(P_P2P_WEBSOCKET_STAR, "p2p-websocket-star", None), + Protocol(P_P2P_WEBRTC_STAR, "p2p-webrtc-star", None), + Protocol(P_P2P_WEBRTC_DIRECT, "p2p-webrtc-direct", None), + Protocol(P_P2P_CIRCUIT, "p2p-circuit", None), + Protocol(P_UNIX, "unix", "fspath"), +] + + +class ProtocolRegistry: + """A collection of individual Multiaddr protocols indexed for fast lookup""" + + __slots__ = ("_codes_to_protocols", "_locked", "_names_to_protocols") + + def __init__(self, protocols=()): + self._locked = False + self._codes_to_protocols = {proto.code: proto for proto in protocols} + self._names_to_protocols = {proto.name: proto for proto in protocols} + + def add(self, proto): + """Add the given protocol description to this registry + + Raises + ------ + ~multiaddr.exceptions.ProtocolRegistryLocked + Protocol registry is locked and does not accept any new entries. + + You can use `.copy(unlock=True)` to copy an existing locked registry + and unlock it. + ~multiaddr.exceptions.ProtocolExistsError + A protocol with the given name or code already exists. + """ + if self._locked: + raise exceptions.ProtocolRegistryLocked() + + if proto.name in self._names_to_protocols: + raise exceptions.ProtocolExistsError(proto, "name") + + if proto.code in self._codes_to_protocols: + raise exceptions.ProtocolExistsError(proto, "code") + + self._names_to_protocols[proto.name] = proto + self._codes_to_protocols[proto.code] = proto + return proto + + def add_alias_name(self, proto, alias_name): + """Add an alternate name for an existing protocol description to the registry + + Raises + ------ + ~multiaddr.exceptions.ProtocolRegistryLocked + Protocol registry is locked and does not accept any new entries. + + You can use `.copy(unlock=True)` to copy an existing locked registry + and unlock it. + ~multiaddr.exceptions.ProtocolExistsError + A protocol with the given name already exists. + ~multiaddr.exceptions.ProtocolNotFoundError + No protocol matching *proto* could be found. + """ + if self._locked: + raise exceptions.ProtocolRegistryLocked() + + proto = self.find(proto) + assert ( + self._names_to_protocols.get(proto.name) is proto + ), "Protocol to alias must have already been added to the registry" + + if alias_name in self._names_to_protocols: + raise exceptions.ProtocolExistsError(self._names_to_protocols[alias_name], "name") + + self._names_to_protocols[alias_name] = proto + + def add_alias_code(self, proto, alias_code): + """Add an alternate code for an existing protocol description to the registry + + Raises + ------ + ~multiaddr.exceptions.ProtocolRegistryLocked + Protocol registry is locked and does not accept any new entries. + + You can use `.copy(unlock=True)` to copy an existing locked registry + and unlock it. + ~multiaddr.exceptions.ProtocolExistsError + A protocol with the given code already exists. + ~multiaddr.exceptions.ProtocolNotFoundError + No protocol matching *proto* could be found. + """ + if self._locked: + raise exceptions.ProtocolRegistryLocked() + + proto = self.find(proto) + assert ( + self._codes_to_protocols.get(proto.code) is proto + ), "Protocol to alias must have already been added to the registry" + + if alias_code in self._codes_to_protocols: + raise exceptions.ProtocolExistsError(self._codes_to_protocols[alias_code], "name") + + self._codes_to_protocols[alias_code] = proto + + def lock(self): + """Lock this registry instance to deny any further changes""" + self._locked = True + + @property + def locked(self): + return self._locked + + def copy(self, *, unlock=False): + """Create a copy of this protocol registry + + Arguments + --------- + unlock + Create the copied registry unlocked even if the current one is locked? + """ + registry = ProtocolRegistry() + registry._locked = self._locked and not unlock + registry._codes_to_protocols = self._codes_to_protocols.copy() + registry._names_to_protocols = self._names_to_protocols.copy() + return registry + + __copy__ = copy + + def find_by_name(self, name): + """Look up a protocol by its human-readable name + + Raises + ------ + ~multiaddr.exceptions.ProtocolNotFoundError + """ + if name not in self._names_to_protocols: + raise exceptions.ProtocolNotFoundError(name, "name") + return self._names_to_protocols[name] + + def find_by_code(self, code): + """Look up a protocol by its binary representation code + + Raises + ------ + ~multiaddr.exceptions.ProtocolNotFoundError + """ + if code not in self._codes_to_protocols: + raise exceptions.ProtocolNotFoundError(code, "code") + return self._codes_to_protocols[code] + + def find(self, proto): + """Look up a protocol by its name or code, return existing protocol objects unchanged + + Raises + ------ + ~multiaddr.exceptions.ProtocolNotFoundError + """ + if isinstance(proto, Protocol): + return proto + elif isinstance(proto, str): + return self.find_by_name(proto) + elif isinstance(proto, int): + return self.find_by_code(proto) + else: + raise TypeError("Protocol object, name or code expected, got {0!r}".format(proto)) + + +REGISTRY = ProtocolRegistry(PROTOCOLS) +REGISTRY.add_alias_name("p2p", "ipfs") +REGISTRY.lock() + + +def protocol_with_name(name): + return REGISTRY.find_by_name(name) + + +def protocol_with_code(code): + return REGISTRY.find_by_code(code) + + +def protocol_with_any(proto): + return REGISTRY.find(proto) + + +def protocols_with_string(string): + """Return a list of protocols matching given string.""" + ret = [] + for name in string.split("/"): + if len(name) > 0: + ret.append(protocol_with_name(name)) + return ret diff --git a/hivemind/p2p/multiaddr/transforms.py b/hivemind/p2p/multiaddr/transforms.py new file mode 100644 index 000000000..dd95bdf6a --- /dev/null +++ b/hivemind/p2p/multiaddr/transforms.py @@ -0,0 +1,93 @@ +import io + +import varint + +from . import exceptions +from .codecs import LENGTH_PREFIXED_VAR_SIZE, codec_by_name +from .protocols import protocol_with_code, protocol_with_name + + +def string_to_bytes(string): + bs = [] + for proto, codec, value in string_iter(string): + bs.append(varint.encode(proto.code)) + if value is not None: + try: + buf = codec.to_bytes(proto, value) + except Exception as exc: + raise exceptions.StringParseError(str(exc), string, proto.name, exc) from exc + if codec.SIZE == LENGTH_PREFIXED_VAR_SIZE: + bs.append(varint.encode(len(buf))) + bs.append(buf) + return b"".join(bs) + + +def bytes_to_string(buf): + st = [""] # start with empty string so we get a leading slash on join() + for _, proto, codec, part in bytes_iter(buf): + st.append(proto.name) + if codec.SIZE != 0: + try: + value = codec.to_string(proto, part) + except Exception as exc: + raise exceptions.BinaryParseError(str(exc), buf, proto.name, exc) from exc + if codec.IS_PATH and value[0] == "/": + st.append(value[1:]) + else: + st.append(value) + return "/".join(st) + + +def size_for_addr(codec, buf_io): + if codec.SIZE >= 0: + return codec.SIZE // 8 + else: + return varint.decode_stream(buf_io) + + +def string_iter(string): + if not string.startswith("/"): + raise exceptions.StringParseError("Must begin with /", string) + # consume trailing slashes + string = string.rstrip("/") + sp = string.split("/") + + # skip the first element, since it starts with / + sp.pop(0) + while sp: + element = sp.pop(0) + try: + proto = protocol_with_name(element) + codec = codec_by_name(proto.codec) + except (ImportError, exceptions.ProtocolNotFoundError) as exc: + raise exceptions.StringParseError("Unknown Protocol", string, element) from exc + value = None + if codec.SIZE != 0: + if len(sp) < 1: + raise exceptions.StringParseError("Protocol requires address", string, proto.name) + if codec.IS_PATH: + value = "/" + "/".join(sp) + sp.clear() + else: + value = sp.pop(0) + yield proto, codec, value + + +def bytes_iter(buf): + buf_io = io.BytesIO(buf) + while buf_io.tell() < len(buf): + offset = buf_io.tell() + code = varint.decode_stream(buf_io) + proto = None + try: + proto = protocol_with_code(code) + codec = codec_by_name(proto.codec) + except (ImportError, exceptions.ProtocolNotFoundError) as exc: + raise exceptions.BinaryParseError( + "Unknown Protocol", + buf, + proto.name if proto else code, + ) from exc + + size = size_for_addr(codec, buf_io) + yield offset, proto, codec, buf_io.read(size) diff --git a/hivemind/p2p/p2p_daemon.py b/hivemind/p2p/p2p_daemon.py index 4acea4d72..66a87ae09 100644 --- a/hivemind/p2p/p2p_daemon.py +++ b/hivemind/p2p/p2p_daemon.py @@ -12,10 +12,10 @@ from typing import Any, AsyncIterator, Awaitable, Callable, List, Optional, Sequence, Tuple, Type, TypeVar, Union from google.protobuf.message import Message -from multiaddr import Multiaddr import hivemind.hivemind_cli as cli import hivemind.p2p.p2p_daemon_bindings.p2pclient as p2pclient +from hivemind.p2p.multiaddr import Multiaddr from hivemind.p2p.p2p_daemon_bindings.control import DEFAULT_MAX_MSG_SIZE, P2PDaemonError, P2PHandlerError from hivemind.p2p.p2p_daemon_bindings.datastructures import PeerID, PeerInfo, StreamInfo from hivemind.p2p.p2p_daemon_bindings.utils import ControlFailure diff --git a/hivemind/p2p/p2p_daemon_bindings/control.py b/hivemind/p2p/p2p_daemon_bindings/control.py index a8de5d74e..fa5239d72 100644 --- a/hivemind/p2p/p2p_daemon_bindings/control.py +++ b/hivemind/p2p/p2p_daemon_bindings/control.py @@ -9,8 +9,7 @@ from typing import AsyncIterator, Awaitable, Callable, Dict, Iterable, Optional, Sequence, Tuple from uuid import UUID, uuid4 -from multiaddr import Multiaddr, protocols - +from hivemind.p2p.multiaddr import Multiaddr, protocols from hivemind.p2p.p2p_daemon_bindings.datastructures import PeerID, PeerInfo, StreamInfo from hivemind.p2p.p2p_daemon_bindings.utils import ( DispatchFailure, diff --git a/hivemind/p2p/p2p_daemon_bindings/datastructures.py b/hivemind/p2p/p2p_daemon_bindings/datastructures.py index 03b2c7b2b..b0be1bb38 100644 --- a/hivemind/p2p/p2p_daemon_bindings/datastructures.py +++ b/hivemind/p2p/p2p_daemon_bindings/datastructures.py @@ -12,6 +12,7 @@ from cryptography.hazmat.primitives import serialization from multiaddr import Multiaddr +from hivemind.p2p.multiaddr import Multiaddr, protocols from hivemind.proto import crypto_pb2, p2pd_pb2 diff --git a/hivemind/p2p/p2p_daemon_bindings/p2pclient.py b/hivemind/p2p/p2p_daemon_bindings/p2pclient.py index 7fee00477..677101be9 100644 --- a/hivemind/p2p/p2p_daemon_bindings/p2pclient.py +++ b/hivemind/p2p/p2p_daemon_bindings/p2pclient.py @@ -8,8 +8,7 @@ from contextlib import asynccontextmanager from typing import AsyncIterator, Iterable, Sequence, Tuple -from multiaddr import Multiaddr - +from hivemind.p2p.multiaddr import Multiaddr from hivemind.p2p.p2p_daemon_bindings.control import ( DEFAULT_MAX_MSG_SIZE, ControlClient, diff --git a/hivemind/utils/networking.py b/hivemind/utils/networking.py index dc9a60440..a348404d9 100644 --- a/hivemind/utils/networking.py +++ b/hivemind/utils/networking.py @@ -1,8 +1,7 @@ from ipaddress import ip_address from typing import List, Sequence -from multiaddr import Multiaddr - +from hivemind.p2p.multiaddr import Multiaddr from hivemind.utils.logging import TextStyle, get_logger LOCALHOST = "127.0.0.1" diff --git a/requirements.txt b/requirements.txt index f32fc94c8..951eb31da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ msgpack>=0.5.6 sortedcontainers uvloop>=0.14.0 grpcio-tools>=1.33.2 -protobuf>=3.12.2,<5.28.0 +protobuf>=3.12.2,<5.29.0 configargparse>=1.2.3 py-multihash>=0.2.3 multiaddr @ git+https://github.com/multiformats/py-multiaddr.git@e01dbd38f2c0464c0f78b556691d655265018cce diff --git a/tests/test_dht.py b/tests/test_dht.py index 63dee8983..c980adc53 100644 --- a/tests/test_dht.py +++ b/tests/test_dht.py @@ -4,9 +4,9 @@ import time import pytest -from multiaddr import Multiaddr import hivemind +from hivemind.p2p.multiaddr import Multiaddr from test_utils.dht_swarms import launch_dht_instances from test_utils.networking import get_free_port diff --git a/tests/test_dht_protocol.py b/tests/test_dht_protocol.py index 908f8f3e5..d8567013e 100644 --- a/tests/test_dht_protocol.py +++ b/tests/test_dht_protocol.py @@ -5,13 +5,13 @@ from typing import List, Sequence, Tuple import pytest -from multiaddr import Multiaddr import hivemind from hivemind import P2P, PeerID, get_dht_time, get_logger from hivemind.dht import DHTID from hivemind.dht.protocol import DHTProtocol from hivemind.dht.storage import DictionaryDHTValue +from hivemind.p2p.multiaddr import Multiaddr logger = get_logger(__name__) diff --git a/tests/test_p2p_daemon.py b/tests/test_p2p_daemon.py index 55ff36af5..eaa0b469d 100644 --- a/tests/test_p2p_daemon.py +++ b/tests/test_p2p_daemon.py @@ -9,9 +9,9 @@ import numpy as np import pytest -from multiaddr import Multiaddr from hivemind.p2p import P2P, P2PDaemonError, P2PHandlerError +from hivemind.p2p.multiaddr import Multiaddr from hivemind.proto import dht_pb2, test_pb2 from hivemind.utils.serializer import MSGPackSerializer diff --git a/tests/test_p2p_daemon_bindings.py b/tests/test_p2p_daemon_bindings.py index d9160e173..e19e97722 100644 --- a/tests/test_p2p_daemon_bindings.py +++ b/tests/test_p2p_daemon_bindings.py @@ -4,8 +4,8 @@ import pytest from google.protobuf.message import EncodeError -from multiaddr import Multiaddr, protocols +from hivemind.p2p.multiaddr import Multiaddr, protocols from hivemind.p2p.p2p_daemon_bindings.control import ControlClient, DaemonConnector, parse_conn_protocol from hivemind.p2p.p2p_daemon_bindings.datastructures import PeerID, PeerInfo, StreamInfo from hivemind.p2p.p2p_daemon_bindings.utils import ( diff --git a/tests/test_utils/dht_swarms.py b/tests/test_utils/dht_swarms.py index 732c5b294..2538de4d6 100644 --- a/tests/test_utils/dht_swarms.py +++ b/tests/test_utils/dht_swarms.py @@ -5,11 +5,10 @@ import threading from typing import Dict, List, Tuple -from multiaddr import Multiaddr - from hivemind.dht import DHT from hivemind.dht.node import DHTID, DHTNode from hivemind.p2p import PeerID +from hivemind.p2p.multiaddr import Multiaddr def run_node(initial_peers: List[Multiaddr], info_queue: mp.Queue, **kwargs): diff --git a/tests/test_utils/p2p_daemon.py b/tests/test_utils/p2p_daemon.py index bc889bd30..84d16edb4 100644 --- a/tests/test_utils/p2p_daemon.py +++ b/tests/test_utils/p2p_daemon.py @@ -7,9 +7,9 @@ from contextlib import asynccontextmanager, suppress from typing import NamedTuple -from multiaddr import Multiaddr, protocols from pkg_resources import resource_filename +from hivemind.p2p.multiaddr import Multiaddr, protocols from hivemind.p2p.p2p_daemon_bindings.p2pclient import Client from test_utils.networking import get_free_port