Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions multiaddr/codecs/http_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import re
from typing import Any
from urllib.parse import quote, unquote

from ..codecs import CodecBase
from ..exceptions import BinaryParseError, StringParseError

IS_PATH = False
SIZE = -1 # LengthPrefixedVarSize


class Codec(CodecBase):
SIZE = SIZE
IS_PATH = IS_PATH

def to_bytes(self, proto: Any, string: str) -> bytes:
"""
Convert an HTTP path string to bytes
Unescape URL-encoded characters, validated non-empty, then encode
as UTF-8
"""

# Reject invalid percent-escapes like "%zz" or "%f" (but allow standalone %)
# Look for % followed by exactly 1 hex digit OR % followed by non-hex characters OR % at end
invalid_escape = (
re.search(r"%[0-9A-Fa-f](?![0-9A-Fa-f])", string)
or re.search(r"%[^0-9A-Fa-f]", string)
or re.search(r"%$", string)
)
if invalid_escape:
raise StringParseError("Invalid percent-escape in path", string)

# Now safely unquote
try:
unescaped = unquote(string)
except Exception:
raise StringParseError("Invalid HTTP path string", string)

if not unescaped:
raise StringParseError("empty http path is not allowed", string)

return unescaped.encode("utf-8")

def to_string(self, proto: Any, buf: bytes) -> str:
"""
Convert bytes to an HTTP path string
Decode as UTF-8 and URL-encode (matches Go implementation)
"""
if len(buf) == 0:
raise BinaryParseError("Empty http path is not allowed", buf, "http-path")

return quote(buf.decode("utf-8"), safe="")

def validate(self, b: bytes) -> None:
"""
Validate an HTTP path buffer.
Just check non-empty.
"""
if len(b) == 0:
raise ValueError("Empty http path is not allowed")
14 changes: 7 additions & 7 deletions multiaddr/multiaddr.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,12 +354,12 @@ def _from_string(self, addr: str) -> None:
continue

# Special handling for unix paths
if part == "unix":
if part in ("unix",):
try:
# Get the next part as the path value
unix_path_value = next(parts)
if not unix_path_value:
raise exceptions.StringParseError("empty unix path", addr)
protocol_path_value = next(parts)
if not protocol_path_value:
raise exceptions.StringParseError("empty protocol path", addr)

# Join any remaining parts as part of the path
remaining_parts = []
Expand All @@ -373,16 +373,16 @@ def _from_string(self, addr: str) -> None:
break

if remaining_parts:
unix_path_value = unix_path_value + "/" + "/".join(remaining_parts)
protocol_path_value = protocol_path_value + "/" + "/".join(remaining_parts)

proto = protocol_with_name("unix")
proto = protocol_with_name(part)
codec = codec_by_name(proto.codec)
if not codec:
raise exceptions.StringParseError(f"unknown codec: {proto.codec}", addr)

try:
self._bytes += varint.encode(proto.code)
buf = codec.to_bytes(proto, unix_path_value)
buf = codec.to_bytes(proto, protocol_path_value)
# Add length prefix for variable-sized or zero-sized codecs
if codec.SIZE <= 0:
self._bytes += varint.encode(len(buf))
Expand Down
1 change: 1 addition & 0 deletions multiaddr/protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def __repr__(self) -> str:
Protocol(P_QUIC1, "quic-v1", None),
Protocol(P_HTTP, "http", None),
Protocol(P_HTTPS, "https", None),
Protocol(P_HTTP_PATH, "http-path", "http_path"),
Protocol(P_TLS, "tls", None),
Protocol(P_WS, "ws", None),
Protocol(P_WSS, "wss", None),
Expand Down
5 changes: 3 additions & 2 deletions multiaddr/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def string_to_bytes(string: str) -> bytes:
logger.debug(f"[DEBUG string_to_bytes] Encoded protocol code: {encoded_code}")
bs.append(encoded_code)

# Special case: protocols with codec=None are flag protocols
# Special case: protocols with codec=None or SIZE=0 are flag protocols
# (no value, no length prefix, no buffer)
if codec is None:
if codec is None or getattr(codec, "SIZE", None) == 0:
logger.debug(
f"[DEBUG string_to_bytes] Protocol {proto.name} has no data, "
"skipping value encoding"
Expand Down Expand Up @@ -93,6 +93,7 @@ def bytes_to_string(buf: bytes) -> str:
value = codec.to_string(proto, bs.read(size))
logger.debug(f"[DEBUG] bytes_to_string: proto={proto.name}, value='{value}'")
if codec.IS_PATH and value.startswith("/"):
# For path protocols, the codec already handles URL encoding
strings.append("/" + proto.name + value) # type: ignore[arg-type]
else:
strings.append("/" + proto.name + "/" + value) # type: ignore[arg-type]
Expand Down
1 change: 1 addition & 0 deletions newsfragments/94.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added the http-path protocol in reference with go-multiaddr.
156 changes: 156 additions & 0 deletions tests/test_multiaddr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from multiaddr.multiaddr import Multiaddr
from multiaddr.protocols import (
P_DNS,
P_HTTP_PATH,
P_IP4,
P_IP6,
P_P2P,
Expand Down Expand Up @@ -825,3 +826,158 @@ def test_memory_protocol_properties():
assert proto.code == 777
assert proto.name == "memory"
assert proto.codec == "memory"


def test_http_path_multiaddr_roundtrip():
"""Test basic http-path in multiaddr string roundtrip"""
test_cases = [
"/http-path/foo",
"/http-path/foo%2Fbar", # URL-encoded forward slashes
"/http-path/api%2Fv1%2Fusers", # URL-encoded forward slashes
]

for addr_str in test_cases:
m = Multiaddr(addr_str)
assert str(m) == addr_str
# Verify protocol value extraction
path_value = m.value_for_protocol(P_HTTP_PATH)
expected_path = addr_str.replace("/http-path/", "")
assert path_value == expected_path


def test_http_path_url_encoding():
"""Test special characters and URL encoding behavior"""
test_cases = [
("/foo%20bar", "/foo%20bar"), # Already URL-encoded input
(
"/path%2Fwith%2Fspecial%21%40%23",
"/path%2Fwith%2Fspecial%21%40%23",
), # Already URL-encoded input
(
"/%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF",
"/%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF",
), # Already URL-encoded input
("/tmp%2Fbar", "/tmp%2Fbar"), # Already URL-encoded input
]

for input_path, expected_encoded in test_cases:
addr_str = f"/http-path{input_path}"
m = Multiaddr(addr_str)
# The string representation should show URL-encoded path
assert str(m) == f"/http-path{expected_encoded}"


def test_http_path_in_complex_multiaddr():
"""Test http-path as part of larger multiaddr chains"""
test_cases = [
("/ip4/127.0.0.1/tcp/443/tls/http/http-path/api%2Fv1", "api%2Fv1"),
("/ip4/127.0.0.1/tcp/80/http/http-path/static%2Fcss", "static%2Fcss"),
("/dns/example.com/tcp/443/tls/http/http-path/docs", "docs"),
]

for addr_str, expected_path in test_cases:
m = Multiaddr(addr_str)
assert str(m) == addr_str

# Extract the http-path value
path_value = m.value_for_protocol(P_HTTP_PATH)
assert path_value == expected_path


def test_http_path_error_cases():
"""Test error handling for invalid http-path values"""

# Empty path should raise error
with pytest.raises(StringParseError):
Multiaddr("/http-path/")

# Missing path value should raise error
with pytest.raises(StringParseError):
Multiaddr("/http-path")

# Invalid URL encoding should raise error
with pytest.raises(StringParseError):
Multiaddr("/http-path/invalid%zz")


def test_http_path_value_extraction():
"""Test extracting http-path values from multiaddr"""
test_cases = [
("/http-path/foo", "foo"),
("/http-path/foo%2Fbar", "foo%2Fbar"),
("/http-path/api%2Fv1%2Fusers", "api%2Fv1%2Fusers"),
("/ip4/127.0.0.1/tcp/80/http/http-path/docs", "docs"),
]

for addr_str, expected_path in test_cases:
m = Multiaddr(addr_str)
path_value = m.value_for_protocol(P_HTTP_PATH)
assert path_value == expected_path


def test_http_path_edge_cases():
"""Test edge cases and special character handling"""

# Test with various special characters (URL-encoded input)
special_paths = [
"path%20with%20spaces",
"path%2Fwith%2Fmultiple%2Fslashes",
"path%2Fwith%2Funicode%2F%E6%B5%8B%E8%AF%95",
"path%2Fwith%2Fsymbols%21%40%23%24%25%5E%26%2A%28%29",
]

for path in special_paths:
addr_str = f"/http-path/{path}"
m = Multiaddr(addr_str)
# Should handle encoding properly
assert m.value_for_protocol(P_HTTP_PATH) == path


def test_http_path_only_reads_http_path_part():
"""Test that http-path only reads its own part, not subsequent protocols"""
# This test verifies that when we have /http-path/tmp%2Fbar/p2p-circuit,
# the ValueForProtocol only returns the http-path part (tmp%2Fbar)
# and doesn't include the /p2p-circuit part
addr_str = "/http-path/tmp%2Fbar/p2p-circuit"
m = Multiaddr(addr_str)

# Should only return the http-path part, not the p2p-circuit part
http_path_value = m.value_for_protocol(P_HTTP_PATH)
assert http_path_value == "tmp%2Fbar"

# The full string should still include both parts
assert str(m) == addr_str


def test_http_path_malformed_percent_escape():
"""Test that malformed percent-escapes are properly rejected"""
# This tests the specific case from Go: /http-path/thisIsMissingAfullByte%f
# The %f is an incomplete percent-escape and should be rejected
bad_addr = "/http-path/thisIsMissingAfullByte%f"

with pytest.raises(StringParseError, match="Invalid percent-escape"):
Multiaddr(bad_addr)


def test_http_path_raw_value_access():
"""Test accessing raw unescaped values from http-path components"""
# This test demonstrates how to get the raw unescaped value
# similar to Go's SplitLast and RawValue functionality
addr_str = "/http-path/tmp%2Fbar"
m = Multiaddr(addr_str)

# Get the URL-encoded value (what ValueForProtocol returns)
encoded_value = m.value_for_protocol(P_HTTP_PATH)
assert encoded_value == "tmp%2Fbar"

# Get the raw unescaped value by accessing the component directly
# This is similar to Go's component.RawValue()
from urllib.parse import unquote

raw_value = unquote(encoded_value)
assert raw_value == "tmp/bar"

# Verify the roundtrip
from urllib.parse import quote

assert quote(raw_value, safe="") == encoded_value
51 changes: 50 additions & 1 deletion tests/test_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import varint

from multiaddr import Multiaddr, exceptions, protocols
from multiaddr.codecs import memory
from multiaddr.codecs import http_path, memory
from multiaddr.exceptions import BinaryParseError


Expand Down Expand Up @@ -269,3 +269,52 @@ def test_memory_integration_invalid_values():
# Too large (overflow > uint64)
with pytest.raises(ValueError):
Multiaddr(f"/memory/{2**64}")


def test_http_path_bytes_string_roundtrip():
codec = http_path.Codec()

# some valid HTTP path strings (URL-encoded input as expected by multiaddr system)
from urllib.parse import quote

for s in ["/foo", "/foo/bar", "/a b", "/こんにちは", "/path/with/special!@#"]:
encoded_s = quote(s, safe="") # Use same encoding as codec
b = codec.to_bytes(None, encoded_s)
assert isinstance(b, bytes)
out = codec.to_string(None, b)
# Should return the same URL-encoded string
assert out == encoded_s


def test_http_path_empty_string_raises():
codec = http_path.Codec()
with pytest.raises(ValueError):
codec.to_bytes(None, "")


def test_http_path_empty_bytes_raises():
codec = http_path.Codec()
with pytest.raises(BinaryParseError):
codec.to_string(None, b"")


def test_http_path_special_characters():
codec = http_path.Codec()
path = "/foo bar/あいうえお"
from urllib.parse import quote

encoded_path = quote(path, safe="") # Use same encoding as codec
b = codec.to_bytes(None, encoded_path)

assert codec.to_string(None, b) == encoded_path


def test_http_path_validate_function():
codec = http_path.Codec()

# valid path
codec.validate(b"/valid/path") # should not raise

# empty path
with pytest.raises(ValueError):
codec.validate(b"")
Loading