diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index 9f6ab20335..f63774f551 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -43,8 +43,9 @@ "https://bcr.bazel.build/modules/bazel_features/1.20.0/MODULE.bazel": "8b85300b9c8594752e0721a37210e34879d23adc219ed9dc8f4104a4a1750920", "https://bcr.bazel.build/modules/bazel_features/1.21.0/MODULE.bazel": "675642261665d8eea09989aa3b8afb5c37627f1be178382c320d1b46afba5e3b", "https://bcr.bazel.build/modules/bazel_features/1.23.0/MODULE.bazel": "fd1ac84bc4e97a5a0816b7fd7d4d4f6d837b0047cf4cbd81652d616af3a6591a", - "https://bcr.bazel.build/modules/bazel_features/1.23.0/source.json": "c72c61b722d7c3f884994fe647afeb2ed1ae66c437f8f370753551f7b4d8be7f", "https://bcr.bazel.build/modules/bazel_features/1.3.0/MODULE.bazel": "cdcafe83ec318cda34e02948e81d790aab8df7a929cec6f6969f13a489ccecd9", + "https://bcr.bazel.build/modules/bazel_features/1.30.0/MODULE.bazel": "a14b62d05969a293b80257e72e597c2da7f717e1e69fa8b339703ed6731bec87", + "https://bcr.bazel.build/modules/bazel_features/1.30.0/source.json": "b07e17f067fe4f69f90b03b36ef1e08fe0d1f3cac254c1241a1818773e3423bc", "https://bcr.bazel.build/modules/bazel_features/1.4.1/MODULE.bazel": "e45b6bb2350aff3e442ae1111c555e27eac1d915e77775f6fdc4b351b758b5d7", "https://bcr.bazel.build/modules/bazel_features/1.9.0/MODULE.bazel": "885151d58d90d8d9c811eb75e3288c11f850e1d6b481a8c9f766adee4712358b", "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a", @@ -155,7 +156,8 @@ "https://bcr.bazel.build/modules/rules_java/7.3.2/MODULE.bazel": "50dece891cfdf1741ea230d001aa9c14398062f2b7c066470accace78e412bc2", "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe", "https://bcr.bazel.build/modules/rules_java/8.11.0/MODULE.bazel": "c3d280bc5ff1038dcb3bacb95d3f6b83da8dd27bba57820ec89ea4085da767ad", - "https://bcr.bazel.build/modules/rules_java/8.11.0/source.json": "302b52a39259a85aa06ca3addb9787864ca3e03b432a5f964ea68244397e7544", + "https://bcr.bazel.build/modules/rules_java/8.12.0/MODULE.bazel": "8e6590b961f2defdfc2811c089c75716cb2f06c8a4edeb9a8d85eaa64ee2a761", + "https://bcr.bazel.build/modules/rules_java/8.12.0/source.json": "cbd5d55d9d38d4008a7d00bee5b5a5a4b6031fcd4a56515c9accbcd42c7be2ba", "https://bcr.bazel.build/modules/rules_java/8.3.2/MODULE.bazel": "7336d5511ad5af0b8615fdc7477535a2e4e723a357b6713af439fe8cf0195017", "https://bcr.bazel.build/modules/rules_java/8.5.1/MODULE.bazel": "d8a9e38cc5228881f7055a6079f6f7821a073df3744d441978e7a43e20226939", "https://bcr.bazel.build/modules/rules_java/8.6.1/MODULE.bazel": "f4808e2ab5b0197f094cabce9f4b006a27766beb6a9975931da07099560ca9c2", @@ -232,7 +234,6 @@ "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27", - "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/MODULE.bazel": "af322bc08976524477c79d1e45e241b6efbeb918c497e8840b8ab116802dda79", "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/MODULE.bazel": "eec517b5bbe5492629466e11dae908d043364302283de25581e3eb944326c4ca", "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/source.json": "22bc55c47af97246cfc093d0acf683a7869377de362b5d1c552c2c2e16b7a806", "https://bcr.bazel.build/modules/zlib/1.3.1/MODULE.bazel": "751c9940dcfe869f5f7274e1295422a34623555916eb98c174c1e945594bf198" @@ -314,7 +315,7 @@ }, "//:non_module_deps.bzl%non_module_deps": { "general": { - "bzlTransitiveDigest": "W7OHR2aK0tHHQ+ZwitEbMw64w5t2o11I9uGx8Q/EMBs=", + "bzlTransitiveDigest": "zG4n7gygzlALs/kuLE9tTdQSt37Va84wPS8mg+N1bTo=", "usagesDigest": "fLhA19vQJL+2bg82b9f+PZFRpAkxBzsz3xYcgEJpMzs=", "recordedFileInputs": {}, "recordedDirentsInputs": {}, @@ -515,7 +516,7 @@ }, "@@buildifier_prebuilt+//:defs.bzl%buildifier_prebuilt_deps_extension": { "general": { - "bzlTransitiveDigest": "iAZPs3u6yr/D7l5qY95YrItbStIZiUNP1b4Grcb0eOE=", + "bzlTransitiveDigest": "cYOZcRGaK/PnWH5WjotOrzDBpSnC7rMgUTyA6BC9Y3A=", "usagesDigest": "+DCF+pKaoblObvcYmMhzDybVwZu9JpS+TMSEnDEXxbo=", "recordedFileInputs": {}, "recordedDirentsInputs": {}, @@ -654,7 +655,7 @@ }, "@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": { "general": { - "bzlTransitiveDigest": "sFhcgPbDQehmbD1EOXzX4H1q/CD5df8zwG4kp4jbvr8=", + "bzlTransitiveDigest": "hUTp2w+RUVdL7ma5esCXZJAFnX7vLbVfLd7FwnQI6bU=", "usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=", "recordedFileInputs": {}, "recordedDirentsInputs": {}, @@ -839,7 +840,7 @@ }, "@@rules_oci+//oci:extensions.bzl%oci": { "general": { - "bzlTransitiveDigest": "7wkDiYmAq1S80kieWuq7OWbNmslhLkUaDeb+rwnQ9AM=", + "bzlTransitiveDigest": "7TDVwic75vzbT64nXbnVMUM9cn+Y1ptg7O7rbs5+HVU=", "usagesDigest": "E+3fNvcF/VP9AhwlXNJp2H0jYYeY/srgMf49bfcncQs=", "recordedFileInputs": {}, "recordedDirentsInputs": {}, @@ -1303,7 +1304,7 @@ }, "@@rules_python+//python/extensions:pip.bzl%pip": { "general": { - "bzlTransitiveDigest": "ynEctVXvD5j3EVd/ITbHx2D6CkTijK64au5zmriT7cg=", + "bzlTransitiveDigest": "A8HAuATXvF1hBtClkCU8tsIAiy1A11+fyf8pxabFW3o=", "usagesDigest": "xr+U7navw2+SHogogmHRboKLbXAnkZfsctPQuyjmArw=", "recordedFileInputs": { "@@//doc/requirements.txt": "76028466c185c5f8bd40c59bcb03d97d9586456ba2aabee82bbebc2c459c7143", @@ -4692,7 +4693,7 @@ }, "@@rules_rust+//rust:extensions.bzl%rust": { "general": { - "bzlTransitiveDigest": "YsaTPw20Z8ZMM3WlG6OGXJBzyoyBZcY7FTiU04eEdYw=", + "bzlTransitiveDigest": "7+PaQ+OMKFzt89XpJFQe/0Clrp0iVTQ/jIUGpi8TpCk=", "usagesDigest": "ozx08ZbgRXTJw0zCaO/xtMUzgGLvwaQkZGnUo6tlyHM=", "recordedFileInputs": {}, "recordedDirentsInputs": {}, diff --git a/gateway/dataplane/BUILD.bazel b/gateway/dataplane/BUILD.bazel index c65bd346c1..5ab0e9c9d3 100644 --- a/gateway/dataplane/BUILD.bazel +++ b/gateway/dataplane/BUILD.bazel @@ -67,6 +67,7 @@ go_test( "//pkg/private/xtest:go_default_library", "//pkg/snet:go_default_library", "//pkg/snet/mock_snet:go_default_library", + "//pkg/snet/multihomed:go_default_library", "//pkg/snet/path:go_default_library", "//private/ringbuf:go_default_library", "@com_github_golang_mock//gomock:go_default_library", diff --git a/gateway/dataplane/session_test.go b/gateway/dataplane/session_test.go index bb1f40a06c..780ae77586 100644 --- a/gateway/dataplane/session_test.go +++ b/gateway/dataplane/session_test.go @@ -30,6 +30,7 @@ import ( "github.com/scionproto/scion/pkg/private/mocks/net/mock_net" "github.com/scionproto/scion/pkg/snet" "github.com/scionproto/scion/pkg/snet/mock_snet" + "github.com/scionproto/scion/pkg/snet/multihomed" snetpath "github.com/scionproto/scion/pkg/snet/path" ) @@ -82,6 +83,7 @@ func TestTwoPaths(t *testing.T) { } func TestNoLeak(t *testing.T) { + multihomed.StopContinuousCheckInterfaces(t) defer goleak.VerifyNone(t) ctrl := gomock.NewController(t) diff --git a/pkg/snet/BUILD.bazel b/pkg/snet/BUILD.bazel index a6a366f45a..f77a947030 100644 --- a/pkg/snet/BUILD.bazel +++ b/pkg/snet/BUILD.bazel @@ -37,6 +37,7 @@ go_library( "//pkg/slayers/path/epic:go_default_library", "//pkg/slayers/path/onehop:go_default_library", "//pkg/slayers/path/scion:go_default_library", + "//pkg/snet/multihomed:go_default_library", "//private/topology:go_default_library", "//private/topology/underlay:go_default_library", "@com_github_gopacket_gopacket//:go_default_library", diff --git a/pkg/snet/conn.go b/pkg/snet/conn.go index 1db4f1988e..57c3cdc9f4 100644 --- a/pkg/snet/conn.go +++ b/pkg/snet/conn.go @@ -22,7 +22,6 @@ import ( "github.com/scionproto/scion/pkg/private/common" "github.com/scionproto/scion/pkg/private/ctrl/path_mgmt" - "github.com/scionproto/scion/pkg/private/serrors" "github.com/scionproto/scion/pkg/slayers" ) @@ -58,7 +57,12 @@ type Conn struct { // NewCookedConn returns a "cooked" Conn. The Conn object can be used to // send/receive SCION traffic with the usual methods. // It takes as arguments a non-nil PacketConn and a non-nil Topology parameter. -// Nil or unspecified addresses for the PacketConn object are not supported. +// The local address of the PacketConn can be nil or unspecified, leaving the socket not bound +// to any particular interface; however it has its limitations, namely a created Conn not +// being able to properly react to a routing change in the OS unless the routing change is +// accompanied by a change in the local network interfaces, in the form of changing their +// local IP addresses, or adding/removing one or more local network interfaces. +// // This is an advanced API, that allows fine-tuning of the Conn underlay functionality. // The general methods for obtaining a Conn object are still SCIONNetwork.Listen and // SCIONNetwork.Dial. @@ -72,9 +76,7 @@ func NewCookedConn( IA: topo.LocalIA, Host: pconn.LocalAddr().(*net.UDPAddr), } - if local.Host == nil || local.Host.IP.IsUnspecified() { - return nil, serrors.New("nil or unspecified address is not supported.") - } + return &Conn{ conn: pconn, local: local, diff --git a/pkg/snet/multihomed/BUILD.bazel b/pkg/snet/multihomed/BUILD.bazel new file mode 100644 index 0000000000..a19cdde893 --- /dev/null +++ b/pkg/snet/multihomed/BUILD.bazel @@ -0,0 +1,34 @@ +load("@rules_go//go:def.bzl", "go_library") +load("//tools:go.bzl", "go_test") + +go_library( + name = "go_default_library", + srcs = [ + "interfaces.go", + "outbound_addr.go", + ], + importpath = "github.com/scionproto/scion/pkg/snet/multihomed", + visibility = ["//visibility:public"], + deps = [ + "//pkg/log:go_default_library", + "//pkg/private/serrors:go_default_library", + ], +) + +go_test( + name = "go_default_test", + srcs = [ + "export_test.go", + "integrated_test.go", + "multihomed_test.go", + ], + embed = [":go_default_library"], + deps = [ + "//pkg/addr:go_default_library", + "//pkg/daemon:go_default_library", + "//pkg/private/xtest:go_default_library", + "//pkg/snet:go_default_library", + "@com_github_stretchr_testify//require:go_default_library", + "@com_github_stretchr_testify//suite:go_default_library", + ], +) diff --git a/pkg/snet/multihomed/export_test.go b/pkg/snet/multihomed/export_test.go new file mode 100644 index 0000000000..d1f4290cff --- /dev/null +++ b/pkg/snet/multihomed/export_test.go @@ -0,0 +1,49 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed + +import ( + "net/netip" + "sync" + "testing" + + "github.com/stretchr/testify/require" +) + +func MustGetEgressIpAddresses(t *testing.T) []netip.Addr { + addrs, err := egressIpAddresses() + require.NoError(t, err) + return addrs +} + +func GetInternalMutex() *sync.RWMutex { + return &muRemoteToEgress +} + +func StopTicker() { + stopContinuousCheckInterfaces() +} + +func GetRemoteToEgressMap() map[netip.Addr]netip.Addr { + return remoteToEgress +} + +func ReplaceRemoteToEgressMap(newMap map[netip.Addr]netip.Addr) { + remoteToEgress = newMap +} + +func GetEgressesLastState() *[]netip.Addr { + return &localAddresses +} diff --git a/pkg/snet/multihomed/integrated_test.go b/pkg/snet/multihomed/integrated_test.go new file mode 100644 index 0000000000..00670f873b --- /dev/null +++ b/pkg/snet/multihomed/integrated_test.go @@ -0,0 +1,332 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed_test + +import ( + "context" + "fmt" + "net" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/scionproto/scion/pkg/addr" + "github.com/scionproto/scion/pkg/daemon" + "github.com/scionproto/scion/pkg/private/xtest" + "github.com/scionproto/scion/pkg/snet" +) + +// Using test suite declared at multihomed_test.go + +// XXX(juagargi) This is not a valid unit test, should be moved to some kind of +// integration test run within a docker container. For now: +// +// Generate the tiny topology with IPv4 only: +// ./scion.sh stop ; rm -r gen/ +// ./scion.sh topology -c topology/tiny4.topo +// ./scion.sh start +// Then run the tests: +// go test ./pkg/snet/multihomed -count=1 -v -run TestUDP +// go test ./pkg/snet/multihomed -count=1 -v -run TestBasic +// go test ./pkg/snet/multihomed -count=1 -v -run TestMultihomed + +const serverPortInitial = 12345 + +var ( + serverIA = "1-ff00:0:111" + serverIPAddress = "127.0.0.1" + clientAddress = "127.0.0.1:0" + serverDaemon = "127.0.0.19:30255" // 111 + clientDaemon = "127.0.0.27:30255" // 112 +) + +func (s *MultihomedTestSuite) TestUDP() { + t := s.T() + t.Parallel() + + // Bind server to any interface. + serverAddress := fmt.Sprintf("0.0.0.0:%d", s.getServerPort()) + serverAddr := xtest.MustParseUDPAddr(t, serverAddress) + + runUDPServerAt(t, serverAddr) + runUDPClientWith(t, serverAddr, nil) +} + +// TestNoRegressionCheck checks that using bound sockets (like before "multihomed" changes) +// works as expected. +func (s *MultihomedTestSuite) TestNoRegressionCheck() { + t := s.T() + t.Parallel() + + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelF() + + serverAddress := fmt.Sprintf("%s:%d", serverIPAddress, s.getServerPort()) + serverAddr := xtest.MustParseUDPAddr(t, serverAddress) + clientAddr := xtest.MustParseUDPAddr(t, clientAddress) + + runServerAt(ctx, t, serverAddr) + runClientWith(ctx, t, serverAddr, clientAddr) +} + +// TestMultihomedServer temporary documentation. +// This is a test intended to debug the multihomed scion socket, remove it and make it +// an integration test. +// A multihomed socket is bound to several interfaces, or conversely to an address that spans +// multiple interfaces. +// Multihomed sockets are necessary to get connectivity via several interfaces, +// 1. At the RX side, listening to both cellular and ethernet WAN interfaces: necessary for handling +// failover one another. +// 2. At the TX side, the ability to use several paths that start on different local interfaces +// relies on a multihomed socket +func (s *MultihomedTestSuite) TestMultihomedServer() { + t := s.T() + t.Parallel() + + ctx, cancelF := context.WithTimeout(context.Background(), 3*time.Second) + defer cancelF() + + serverAddress := fmt.Sprintf("%s:%d", serverIPAddress, s.getServerPort()) + serverAddr := xtest.MustParseUDPAddr(t, serverAddress) + + runMultihomedServer(ctx, t, serverAddr.Port) + runClientWith(ctx, t, serverAddr, nil) +} + +func runUDPServerAt( + t *testing.T, + serverAddr *net.UDPAddr, +) { + conn, err := net.ListenUDP("udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) + + // Run in a different thread so that the creation of the server finishes without blocking. + go func() { + handleUDPPing(t, conn) + }() + + t.Log("runUDPServerAt: done") +} + +func runServerAt( + ctx context.Context, + t *testing.T, + serverAddr *net.UDPAddr, +) { + sd, err := daemon.NewService(serverDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + conn, err := sn.Listen(ctx, "udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) + + // Run in a different thread so that the creation of the server finishes without blocking. + go func() { + handlePing(t, conn) + }() + t.Log("runServerAt: done") +} + +func runMultihomedServer( + ctx context.Context, + t *testing.T, + port int, +) { + sd, err := daemon.NewService(serverDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + serverAddr := xtest.MustParseUDPAddr(t, fmt.Sprintf("0.0.0.0:%d", port)) + conn, err := sn.Listen(ctx, "udp", serverAddr) + require.NoError(t, err) + require.NotNil(t, conn) + + go func() { + handlePing(t, conn) + }() + t.Log("runServerAt: done") +} + +func runUDPClientWith( + t *testing.T, + serverAddr *net.UDPAddr, + clientAddr *net.UDPAddr, +) { + conn, err := net.DialUDP("udp", clientAddr, serverAddr) + require.NoError(t, err) + + _, err = conn.Write([]byte("ping")) + require.NoError(t, err) + t.Logf(" ---> runUDPClientWith: ping sent") + + // Read answer + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + t.Logf(" <--- runUDPClientWith: pong received. Err? %v, remote: %s read %d bytes", + err != nil, remoteAddr, n) + require.NoError(t, err) + + buff = buff[:n] + require.Equal(t, "pong", string(buff)) + + err = conn.Close() + require.NoError(t, err) +} + +func runClientWith( + ctx context.Context, + t *testing.T, + serverAddr *net.UDPAddr, + clientAddr *net.UDPAddr, +) { + sd, err := daemon.NewService(clientDaemon).Connect(ctx) + require.NoError(t, err) + defer sd.Close() + + info, err := sd.ASInfo(ctx, 0) + require.NoError(t, err) + t.Logf("client local IA: %s", info.IA.String()) + + interfaces, err := sd.Interfaces(ctx) + require.NoError(t, err) + for k, v := range interfaces { + t.Logf("iface %3d: %s", k, v.String()) + } + + topo, err := daemon.LoadTopology(ctx, sd) + require.NoError(t, err) + + sn := &snet.SCIONNetwork{ + SCMPHandler: snet.DefaultSCMPHandler{ + RevocationHandler: daemon.RevHandler{Connector: sd}, + }, + Topology: topo, + } + + completeServerAddrStr := fmt.Sprintf("%s,[%s]:%d", + serverIA, + serverAddr.IP.String(), + serverAddr.Port) + completeServerAddr, err := snet.ParseUDPAddr(completeServerAddrStr) + require.NoError(t, err) + + paths := getRemote(ctx, t, sd, completeServerAddr.IA, info.IA) + require.Greater(t, len(paths), 0) + completeServerAddr.Path = paths[0].Dataplane() + completeServerAddr.NextHop = paths[0].UnderlayNextHop() + + conn, err := sn.Dial(ctx, "udp", clientAddr, completeServerAddr) + require.NoError(t, err) + _, err = conn.Write([]byte("ping")) + require.NoError(t, err) + t.Logf(" ---> runUDPClientWith: ping sent") + + // Read answer. + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + t.Logf(" <--- runClientWith: pong received. Err? %v, remote: %s read %d bytes", + err != nil, remoteAddr, n) + require.NoError(t, err) + require.Equal(t, completeServerAddr.String(), remoteAddr.String()) + buff = buff[:n] + require.Equal(t, "pong", string(buff)) + + err = conn.Close() + require.NoError(t, err) +} + +func handlePing(t *testing.T, conn *snet.Conn) { + t.Logf("handlePing conn = %p", conn) + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFrom(buff) + t.Logf(" ---> handlePing. Err? %v, remote: %s, read %d bytes", + err != nil, remoteAddr, n) + require.NoError(t, err) + buff = buff[:n] + t.Logf("read from %s", remoteAddr) + // Check ping. + require.Equal(t, "ping", string(buff)) + + // Pong. + _, err = conn.WriteTo([]byte("pong"), remoteAddr) + require.NoError(t, err) + + err = conn.Close() + require.NoError(t, err) +} + +func handleUDPPing(t *testing.T, conn *net.UDPConn) { + t.Logf("handleUDPPing conn = %p", conn) + buff := make([]byte, 2048) + n, remoteAddr, err := conn.ReadFromUDP(buff) + t.Logf(" <--- handleUDPPing. Err? %v, remote: %s, read %d bytes", + err != nil, remoteAddr, n) + require.NoError(t, err) + + buff = buff[:n] + t.Logf("read from %s", remoteAddr) + // Check ping. + require.Equal(t, "ping", string(buff)) + + // Pong. + _, err = conn.WriteTo([]byte("pong"), remoteAddr) + require.NoError(t, err) + + err = conn.Close() + require.NoError(t, err) +} + +func getRemote( + ctx context.Context, + t *testing.T, + sd daemon.Connector, + remote, local addr.IA, +) []snet.Path { + paths, err := sd.Paths(ctx, remote, local, daemon.PathReqFlags{}) + require.NoError(t, err) + return paths +} + +var lastPortUsed atomic.Int32 + +func (s *MultihomedTestSuite) getServerPort() int { + // Initialize to 12344 if it's uninitialized. + lastPortUsed.CompareAndSwap(0, serverPortInitial-1) + return int(lastPortUsed.Add(1)) +} diff --git a/pkg/snet/multihomed/interfaces.go b/pkg/snet/multihomed/interfaces.go new file mode 100644 index 0000000000..3344c4aba8 --- /dev/null +++ b/pkg/snet/multihomed/interfaces.go @@ -0,0 +1,159 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed + +// The reasons to keep track of the current local addresses are that these two things can happen: +// 1. The interface is brought down and can't be used anymore. This requires the new +// packet to be sent using a different interface, if any is available. +// 2. The interface changed address, which needs us to update the tables and record the +// new address in use. +// The second event deals with the address only, without touching the routing table, while the +// first one modifies the routing table. For our purposes, both events modify the local address +// of the interface, and for both events the solution is to query the kernel again. +// This is why on the event of any address change, we completely clear the table, forcing +// the caller to perform a syscall to find the appropriate route. + +// XXX(juagargi): The right way to keep this routing information updated is to use netlink. +// We however just keep a cache of the last used remote addresses mapped to our interfaces' +// local addresses. Additionally, if the current interfaces' local addresses change, we +// completely clear the cache. + +import ( + "fmt" + "net" + "net/netip" + "os" + "slices" + "sort" + "sync" + "testing" + "time" + + "github.com/scionproto/scion/pkg/log" + "github.com/scionproto/scion/pkg/private/serrors" +) + +const ( + CheckInterfacesPeriod = time.Second + MaxAllowedCacheSize = 65536 // Maximum number of entries present in `remoteToEgress`. +) + +var ( + remoteToEgress map[netip.Addr]netip.Addr = make(map[netip.Addr]netip.Addr) + muRemoteToEgress sync.RWMutex = sync.RWMutex{} + localAddresses = make([]netip.Addr, 0) + stopTicker = make(chan struct{}) +) + +func init() { + go func() { + defer log.HandlePanic() + continuousCheckInterfaces() + }() +} + +// StopContinuousCheckInterfaces is used in tests where they need to stop the running +// goroutine that checks the state of the local interfaces. +func StopContinuousCheckInterfaces(*testing.T) { + if testing.Testing() { + stopContinuousCheckInterfaces() + } +} + +func continuousCheckInterfaces() { + clearCacheIfLocalChanges() + ticker := time.NewTicker(CheckInterfacesPeriod) +loop: + for { + select { + case <-ticker.C: + clearCacheIfLocalChanges() + case <-stopTicker: + ticker.Stop() + break loop + } + } +} + +func clearCacheIfLocalChanges() { + addrs := getInterfacesLocalAddresses() + if addrs == nil { + // Internal error, bail. + return + } + + // Compare with previous result. + if slices.Equal(addrs, localAddresses) { + // They are the same, bail. + return + } + + // Not equal, invalidate every entry. + invalidateAll() + // And store current state. + localAddresses = addrs +} + +func getInterfacesLocalAddresses() []netip.Addr { + // We only look at the local addresses. If they are not identical to the last call, + // remove all entries from the map, forcing the callers to obtain a new routed egress. + addrs, err := egressIpAddresses() + if err != nil { + // What do we do in this case? + // We should at least log the error and erase all entries in the table. + fmt.Fprintf(os.Stderr, "cannot list the network interfaces and their addresses: %s", err) + invalidateAll() + return nil + } + // Sort the result. + sort.Slice(addrs, func(i, j int) bool { + return addrs[i].Compare(addrs[j]) < 0 + }) + return addrs +} + +func invalidateAll() { + muRemoteToEgress.Lock() + remoteToEgress = make(map[netip.Addr]netip.Addr) + muRemoteToEgress.Unlock() +} + +func egressIpAddresses() ([]netip.Addr, error) { + interfaces, err := net.Interfaces() + if err != nil { + return nil, serrors.Wrap("listing interfaces", err) + } + ipAddrs := make([]netip.Addr, 0, len(interfaces)) + + for _, iface := range interfaces { + addrs, err := iface.Addrs() + if err != nil { + return nil, serrors.Wrap("getting interface addresses", err, "interface", iface.Name) + } + for _, addr := range addrs { + ipAddr, ok := addr.(*net.IPNet) + if ok { + a, _ := netip.AddrFromSlice(ipAddr.IP) + ipAddrs = append(ipAddrs, a) + } + } + } + + return ipAddrs, nil +} + +func stopContinuousCheckInterfaces() { + stopTicker <- struct{}{} +} diff --git a/pkg/snet/multihomed/multihomed_test.go b/pkg/snet/multihomed/multihomed_test.go new file mode 100644 index 0000000000..3519a72c7d --- /dev/null +++ b/pkg/snet/multihomed/multihomed_test.go @@ -0,0 +1,194 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed_test + +import ( + "crypto/rand" + "net/netip" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/scionproto/scion/pkg/private/xtest" + "github.com/scionproto/scion/pkg/snet/multihomed" +) + +func TestMultihomed(t *testing.T) { + suite.Run(t, NewMultihomedTestSuite()) +} + +// MultihomedTestSuite ensures that each test in this package is run correctly, even +// in the presence of test functions that alter the internal behaviour of mutexes or +// critical data structures. This is done by protecting the execution of each test function +// with a RWMutex, allowing "regular" tests to obtain a read lock, and "special" tests +// to get a write lock, forcing them run in isolation. +// It allows to call t.Parallel() in any and all test functions. +type MultihomedTestSuite struct { + suite.Suite + muInternalsIsolated sync.RWMutex +} + +func NewMultihomedTestSuite() *MultihomedTestSuite { + return &MultihomedTestSuite{} +} + +func (s *MultihomedTestSuite) SetupTest() { + s.muInternalsIsolated.RLock() +} + +func (s *MultihomedTestSuite) TearDownTest() { + s.muInternalsIsolated.RUnlock() +} + +func (s *MultihomedTestSuite) TestListInterfaces() { + t := s.T() + t.Parallel() + addrs := multihomed.MustGetEgressIpAddresses(t) + require.NotEmpty(t, addrs) +} + +func (s *MultihomedTestSuite) TestInternalEgressCache() { + t := s.T() + // We require this function to run in isolation: lock every other test. + s.muInternalsIsolated.RUnlock() + s.muInternalsIsolated.Lock() + defer func() { + // Because we hold the write lock, unlock it. + s.muInternalsIsolated.Unlock() + // Because the test suite will expect a read lock, get it. + s.muInternalsIsolated.RLock() + }() + + // Synchronize with the internal ticker routine to ensure it finished the update. + checkTicker := time.NewTicker(10 * time.Millisecond) + for i, _ := 0, <-checkTicker.C; i < 10; i, _ = i+1, <-checkTicker.C { + // Check that the egress table is not empty. + if len(*multihomed.GetEgressesLastState()) > 0 { + break + } + } + checkTicker.Stop() + require.NotEmpty(t, *multihomed.GetEgressesLastState()) + + // Stop internal refresh method. + multihomed.StopTicker() + + // Clear map. + multihomed.ReplaceRemoteToEgressMap(make(map[netip.Addr]netip.Addr)) + require.Empty(t, multihomed.GetRemoteToEgressMap()) + + // Create a pretend remote endpoint. + const mockRemoteAddress = "127.1.2.3" + const mockEgressAddress = "127.1.2.100" + mockRemote := xtest.MustParseUDPAddr(t, mockRemoteAddress+":22") + + // Add mock remote entry to map. + multihomed.ReplaceRemoteToEgressMap(map[netip.Addr]netip.Addr{ + netip.MustParseAddr(mockRemoteAddress): netip.MustParseAddr(mockEgressAddress), + }) + + // Actual test, get the egress address for the remote. + expected := xtest.MustParseIP(t, mockEgressAddress).To4() + got, err := multihomed.OutboundIP(mockRemote) + require.NoError(t, err) + require.Equal(t, expected, got) +} + +// BenchmarkSyncMapWrites (and the other 3 analogous benchmarks) are used to check the performance +// of a sync.Map (any->any) and a regular map (IP->IP) with a RWMutex. +func BenchmarkSyncMapWrites(b *testing.B) { + // Create a set of `size` IP addresses. + addrs := generateIpAddrs(b.N) + + m := sync.Map{} + b.ResetTimer() + storeInSyncMap(&m, addrs) +} + +func BenchmarkSyncMapReads(b *testing.B) { + addrs := generateIpAddrs(b.N) + m := sync.Map{} + storeInSyncMap(&m, addrs) + + // Refrain optimizer from removing code by adding the values to a discard buffer. + discardBuff := make([]netip.Addr, b.N) + b.ResetTimer() + for i, addr := range addrs { + a, ok := m.Load(addr) + addr = a.(netip.Addr) + _ = ok + discardBuff[i] = addr + } + b.StopTimer() + require.NotEmpty(b, discardBuff) + require.Len(b, discardBuff, b.N) +} + +func BenchmarkMuMapWrites(b *testing.B) { + addrs := generateIpAddrs(b.N) + + m := make(map[netip.Addr]netip.Addr) + mu := sync.RWMutex{} + b.ResetTimer() + storeInMuMap(m, &mu, addrs) +} + +func BenchmarkMuMapReads(b *testing.B) { + addrs := generateIpAddrs(b.N) + m := make(map[netip.Addr]netip.Addr) + mu := sync.RWMutex{} + storeInMuMap(m, &mu, addrs) + + // Refrain optimizer from removing code by adding the values to a discard buffer. + discardBuff := make([]netip.Addr, b.N) + b.ResetTimer() + for i, addr := range addrs { + mu.RLock() + addr, ok := m[addr] + mu.RUnlock() + _ = ok + discardBuff[i] = addr + } + b.StopTimer() + require.NotEmpty(b, discardBuff) + require.Len(b, discardBuff, b.N) +} + +func generateIpAddrs(size int) []netip.Addr { + addrs := make([]netip.Addr, size) + raw := [4]byte{} + for i := range size { + rand.Read(raw[:]) + addrs[i] = netip.AddrFrom4(raw) + } + return addrs +} + +func storeInSyncMap(m *sync.Map, addrs []netip.Addr) { + for _, addr := range addrs { + m.Store(addr, addr) + } +} + +func storeInMuMap(m map[netip.Addr]netip.Addr, mu *sync.RWMutex, addrs []netip.Addr) { + for _, addr := range addrs { + mu.Lock() + m[addr] = addr + mu.Unlock() + } +} diff --git a/pkg/snet/multihomed/outbound_addr.go b/pkg/snet/multihomed/outbound_addr.go new file mode 100644 index 0000000000..88bb6c10eb --- /dev/null +++ b/pkg/snet/multihomed/outbound_addr.go @@ -0,0 +1,75 @@ +// Copyright 2025 ETH Zurich +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multihomed + +import ( + "net" + "net/netip" + + "github.com/scionproto/scion/pkg/private/serrors" +) + +// OutboundIP returns the IP address used by this host to dial to the specified remote host. +// The port value in the remote udp address is irrelevant. +// It relies on a previously populated table that maps remote addresses to egress addresses. +// If the remote is not present, it is added. +// Note that NAT address discovery support in scionproto via STUN will also dial periodically +// connections to the STUN server, which should be enough to find the local address used +// to route to the next hop. +func OutboundIP(nextHop *net.UDPAddr) (net.IP, error) { + remote, ok := netip.AddrFromSlice(nextHop.IP) + if !ok { + return nil, serrors.New("invalid IP address", "address", nextHop.IP) + } + + // Check if the table contains an entry. + muRemoteToEgress.RLock() + egress, ok := remoteToEgress[remote] + muRemoteToEgress.RUnlock() + if ok { + return net.IP(egress.AsSlice()), nil + } + + // Not found, find it and add it. The dialing involves a syscall, but no network traffic. + eg, err := dialRemote(nextHop) + if err != nil { + return nil, err + } + egress, _ = netip.AddrFromSlice(eg) + + muRemoteToEgress.Lock() + // Check if our cache is not too big already. + if len(remoteToEgress) < MaxAllowedCacheSize { + remoteToEgress[remote] = egress + } + muRemoteToEgress.Unlock() + + return eg, nil +} + +// dialRemote creates a socket used to send UDP packets to the remote endpoint. +// Note that while a syscall is performed (two including Close), there will be no network traffic. +// Anyhow, this is somewhat expensive, so try to reduce its usage. +func dialRemote(raddr *net.UDPAddr) (net.IP, error) { + conn, err := net.DialUDP("udp", nil, raddr) + if err != nil { + return nil, err + } + defer conn.Close() + + // The conn object is always a net.UDPConn, with LocalAddr statically returning + // always a *net.UDPAddr. + return conn.LocalAddr().(*net.UDPAddr).IP, nil +} diff --git a/pkg/snet/reader.go b/pkg/snet/reader.go index 3769918220..e6efb9085a 100644 --- a/pkg/snet/reader.go +++ b/pkg/snet/reader.go @@ -16,7 +16,6 @@ package snet import ( "net" - "net/netip" "sync" "time" @@ -88,21 +87,6 @@ func (c *scionConnReader) read(b []byte) (int, *UDPAddr, error) { return 0, nil, serrors.New("unexpected payload", "type", common.TypeOf(pkt.Payload)) } - // XXX(JordiSubira): We explicitly forbid nil or unspecified address in the current constructor - // for Conn. - // If this were ever to change, we would always fall into the following if statement, then - // we would like to replace this logic (e.g., using IP_PKTINFO, with its caveats). - pktAddrPort := netip.AddrPortFrom(pkt.Destination.Host.IP(), udp.DstPort) - if c.local.IA != pkt.Destination.IA || - c.local.Host.AddrPort() != pktAddrPort { - return 0, nil, serrors.New("packet is destined to a different host", - "local_isd_as", c.local.IA, - "local_host", c.local.Host, - "pkt_destination_isd_as", pkt.Destination.IA, - "pkt_destination_host", pktAddrPort, - ) - } - // Extract remote address. // Copy the address data to prevent races. See // https://github.com/scionproto/scion/issues/1659. diff --git a/pkg/snet/snet.go b/pkg/snet/snet.go index b661641c5e..2a298f8d91 100644 --- a/pkg/snet/snet.go +++ b/pkg/snet/snet.go @@ -92,17 +92,13 @@ type SCIONNetwork struct { } // OpenRaw returns a PacketConn which listens on the specified address. -// Nil or unspecified addresses are not supported. // If the address port is 0 a valid and free SCION/UDP port is automatically chosen. // Otherwise, the specified port must be a valid SCION/UDP port. func (n *SCIONNetwork) OpenRaw(ctx context.Context, addr *net.UDPAddr) (PacketConn, error) { var pconn *net.UDPConn var err error - if addr == nil || addr.IP.IsUnspecified() { - return nil, serrors.New("nil or unspecified address is not supported") - } start, end := n.Topology.PortRange.Start, n.Topology.PortRange.End - if addr.Port == 0 { + if addr == nil || addr.Port == 0 { pconn, err = listenUDPRange(addr, start, end) } else { if addr.Port < int(start) || addr.Port > int(end) { @@ -182,7 +178,9 @@ func (n *SCIONNetwork) Listen( return NewCookedConn(packetConn, n.Topology, WithReplyPather(n.ReplyPather)) } -func listenUDPRange(addr *net.UDPAddr, start, end uint16) (*net.UDPConn, error) { +// listenUDPRange creates a new net.UDPConn using a suitable port between the specified range. +// If laddr is not nil, the returned connection is bound to its IP address. +func listenUDPRange(laddr *net.UDPAddr, start, end uint16) (*net.UDPConn, error) { // XXX(JordiSubira): For now, we iterate on the complete SCION/UDP // range, in decreasing order, taking the first unused port. // @@ -203,11 +201,18 @@ func listenUDPRange(addr *net.UDPAddr, start, end uint16) (*net.UDPConn, error) if start < 1024 { restrictedStart = 1024 } + + // Local address used later in a loop to check if the different ports are available. + tryLocalAddr := &net.UDPAddr{} + if laddr != nil { + tryLocalAddr.IP = laddr.IP + tryLocalAddr.Zone = laddr.Zone + } + for port := end; port >= restrictedStart; port-- { - pconn, err := net.ListenUDP(addr.Network(), &net.UDPAddr{ - IP: addr.IP, - Port: int(port), - }) + tryLocalAddr.Port = int(port) + pconn, err := net.ListenUDP("udp", tryLocalAddr) + if err == nil { return pconn, nil } diff --git a/pkg/snet/writer.go b/pkg/snet/writer.go index f6661b1d51..437a42e0f5 100644 --- a/pkg/snet/writer.go +++ b/pkg/snet/writer.go @@ -24,6 +24,7 @@ import ( "github.com/scionproto/scion/pkg/addr" "github.com/scionproto/scion/pkg/private/serrors" + "github.com/scionproto/scion/pkg/snet/multihomed" "github.com/scionproto/scion/private/topology" ) @@ -82,6 +83,16 @@ func (c *scionConnWriter) WriteTo(b []byte, raddr net.Addr) (int, error) { if !ok { return 0, serrors.New("invalid listen host IP", "ip", c.local.Host.IP) } + if listenHostIP.IsUnspecified() { + // Sending data to an unbound socket, we need to find the appropriate local address + // to write it as host address in the SCION packet. The interface to use in the local + // host will be that one routed to reach the next hop. + localIP, err := multihomed.OutboundIP(nextHop) + if err != nil { + return 0, serrors.Wrap("interface IP not bound and cannot find one", err) + } + listenHostIP, _ = netip.AddrFromSlice(localIP) + } pkt := &Packet{ Bytes: Bytes(c.buffer),