From 1acb0edc70b3ce0501b6a240ce670fe0b6f7dd8a Mon Sep 17 00:00:00 2001 From: "David J. Wilder" Date: Wed, 3 May 2017 11:01:11 -0700 Subject: [PATCH 1/2] Enable Linux bridge ProxyArpWifi This change enables the Linux bridge's ProxyArpWiFi feature eliminating the need flood ARP packets. When an endpoint is created the bridge driver already has the data needed to complete arp and fdb table entries. Rather than let the kernel discover this information on its own we populate the arp and fdb tables when the endpoint is configured. All other broadcast traffic will pass normal allowing the administrator to manage it using ebtables. Linux bridge ProxyArpWifi is enabled with: --opt com.docker.network.bridge.proxyarp=1 Dependencies: linux kernel v4.1-rc1 or later(commit 842a9ae08a25671db3d4f689eed68b4d64be15) Updated based on review comments from aboch. Signed-off-by: David Wilder --- drivers/bridge/bridge.go | 49 +++++++++++++++++++++++++++++ drivers/bridge/bridge_store.go | 5 +++ drivers/bridge/bridge_test.go | 56 ++++++++++++++++++++++++++++++++++ drivers/bridge/labels.go | 3 ++ 4 files changed, 113 insertions(+) diff --git a/drivers/bridge/bridge.go b/drivers/bridge/bridge.go index e681b8f7c4..9a85acbed5 100644 --- a/drivers/bridge/bridge.go +++ b/drivers/bridge/bridge.go @@ -60,6 +60,7 @@ type networkConfiguration struct { EnableIPv6 bool EnableIPMasquerade bool EnableICC bool + EnableProxyArp bool Mtu int DefaultBindingIP net.IP DefaultBridge bool @@ -240,6 +241,10 @@ func (c *networkConfiguration) fromLabels(labels map[string]string) error { if c.DefaultBindingIP = net.ParseIP(value); c.DefaultBindingIP == nil { return parseErr(label, value, "nil ip") } + case EnableProxyArp: + if c.EnableProxyArp, err = strconv.ParseBool(value); err != nil { + return parseErr(label, value, err.Error()) + } case netlabel.ContainerIfacePrefix: c.ContainerIfacePrefix = value } @@ -449,6 +454,7 @@ func parseNetworkGenericOptions(data interface{}) (*networkConfiguration, error) config = &networkConfiguration{ EnableICC: true, EnableIPMasquerade: true, + EnableProxyArp: false, } err = config.fromLabels(opt) case options.Generic: @@ -1043,6 +1049,49 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo, } } + // The bridge proxy arp feature requires three things to happen: + // 1) Set the proxyarpwifi flag on the container side bridge port to disable ARP packet flooding. + // 2) Generate an arp entry in the host's ARP table mapping the containers IP address to its MAC. + // 3) Generate a static FDB entry for the containers MAC address. + + if config.EnableProxyArp { + + BridgeIF, err := d.nlh.LinkByName(config.BridgeName) + if err != nil { + return fmt.Errorf("could not find interface with destination name %s: %v", config.BridgeName, err) + } + + err = d.nlh.LinkSetBrProxyArpWiFi(host, true) + if err != nil { + return fmt.Errorf("unable to set BridgeProxyArp mode on %s: %v", hostIfName, err) + } + + nlnh := &netlink.Neigh{ + IP: endpoint.addr.IP, + HardwareAddr: endpoint.macAddress, + } + + // Generate the permanent arp entry. + nlnh.State = netlink.NUD_PERMANENT + nlnh.LinkIndex = BridgeIF.Attrs().Index + + if err := d.nlh.NeighSet(nlnh); err != nil { + return fmt.Errorf("Failed to add neighbor entry: %v", err) + } + logrus.Debugf("An arp entry has been created: Interface=%s Ip=%s MAC=%s", config.BridgeName, nlnh.IP, nlnh.HardwareAddr) + + // Generate the static fdb entry. + nlnh.State = netlink.NUD_NOARP + nlnh.Flags = netlink.NTF_MASTER + nlnh.Family = syscall.AF_BRIDGE + nlnh.LinkIndex = host.Attrs().Index + + if err := d.nlh.NeighSet(nlnh); err != nil { + return fmt.Errorf("Failed to add fdb entry: %v", err) + } + logrus.Debugf("An fdb entry has been created: Interface=%s MAC=%s", hostIfName, nlnh.HardwareAddr) + } + // Up the host interface after finishing all netlink configuration if err = d.nlh.LinkSetUp(host); err != nil { return fmt.Errorf("could not set link up for host interface %s: %v", hostIfName, err) diff --git a/drivers/bridge/bridge_store.go b/drivers/bridge/bridge_store.go index c7c83d8369..841860cbfa 100644 --- a/drivers/bridge/bridge_store.go +++ b/drivers/bridge/bridge_store.go @@ -137,6 +137,7 @@ func (ncfg *networkConfiguration) MarshalJSON() ([]byte, error) { nMap["EnableIPv6"] = ncfg.EnableIPv6 nMap["EnableIPMasquerade"] = ncfg.EnableIPMasquerade nMap["EnableICC"] = ncfg.EnableICC + nMap["EnableProxyArp"] = ncfg.EnableProxyArp nMap["Mtu"] = ncfg.Mtu nMap["Internal"] = ncfg.Internal nMap["DefaultBridge"] = ncfg.DefaultBridge @@ -201,6 +202,10 @@ func (ncfg *networkConfiguration) UnmarshalJSON(b []byte) error { ncfg.BridgeIfaceCreator = ifaceCreator(v.(float64)) } + if v, ok := nMap["EnableProxyArp"]; ok { + ncfg.EnableProxyArp = v.(bool) + } + return nil } diff --git a/drivers/bridge/bridge_test.go b/drivers/bridge/bridge_test.go index 5acb8dbebc..2d1a8fad33 100644 --- a/drivers/bridge/bridge_test.go +++ b/drivers/bridge/bridge_test.go @@ -276,6 +276,7 @@ func TestCreateFullOptionsLabels(t *testing.T) { DefaultBridge: "true", EnableICC: "true", EnableIPMasquerade: "true", + EnableProxyArp: "true", DefaultBindingIP: bndIPs, } @@ -319,6 +320,10 @@ func TestCreateFullOptionsLabels(t *testing.T) { t.Fatal("incongruent EnableIPMasquerade in bridge network") } + if !nw.config.EnableProxyArp { + t.Fatal("incongruent EnableProxyArp in bridge network") + } + bndIP := net.ParseIP(bndIPs) if !bndIP.Equal(nw.config.DefaultBindingIP) { t.Fatalf("Unexpected: %v", nw.config.DefaultBindingIP) @@ -1071,3 +1076,54 @@ func TestCreateWithExistingBridge(t *testing.T) { t.Fatal("Deleting bridge network that using existing bridge interface unexpectedly deleted the bridge interface") } } + +func TestProxyArp(t *testing.T) { + if !testutils.IsRunningInContainer() { + defer testutils.SetupTestOSContext(t)() + } + d := newDriver() + + err := d.configure(nil) + if err != nil { + t.Fatalf("Failed to setup driver config: %v", err) + } + + netconfig := &networkConfiguration{BridgeName: DefaultBridgeName, EnableProxyArp: true, DefaultBridge: true} + genericOption := make(map[string]interface{}) + genericOption[netlabel.GenericData] = netconfig + + ipdList := getIPv4Data(t, "") + + err = d.CreateNetwork("ProxyArpTest", genericOption, nil, ipdList, nil) + if err != nil { + t.Fatalf("Bridge creation failed: %v", err) + } + + te := newTestEndpoint(ipdList[0].Pool, 10) + err = d.CreateEndpoint("ProxyArpTest", "ep", te.Interface(), nil) + if err != nil { + t.Fatalf("Failed to create endpoint: %v", err) + } + + BridgeIF, err := netlink.LinkByName(DefaultBridgeName) + if err != nil { + t.Fatalf("Failed to lookup bridge interface: %v", err) + } + + dump, err := netlink.NeighList(BridgeIF.Attrs().Index, 0) + if err != nil { + t.Errorf("Failed to NeighList: %v", err) + } + + FoundNeigh := 0 + for _, v := range dump { + if v.State&netlink.NUD_PERMANENT != 0 && + bytes.Equal(te.iface.mac, v.HardwareAddr) && + te.iface.addr.IP.Equal(v.IP) { + FoundNeigh++ + } + } + if FoundNeigh != 1 { + t.Errorf("Expected a single match in the neighbor table got %d matches", FoundNeigh) + } +} diff --git a/drivers/bridge/labels.go b/drivers/bridge/labels.go index 7447bd3f93..49c10f05d8 100644 --- a/drivers/bridge/labels.go +++ b/drivers/bridge/labels.go @@ -15,4 +15,7 @@ const ( // DefaultBridge label DefaultBridge = "com.docker.network.bridge.default_bridge" + + // EnableProxyArp label + EnableProxyArp = "com.docker.network.bridge.proxyarp" ) From 808e5cb02841c6c4a6ea2f46f937ac2cc0f399e0 Mon Sep 17 00:00:00 2001 From: "David J. Wilder" Date: Wed, 3 May 2017 11:44:01 -0700 Subject: [PATCH 2/2] Vendoring the netlink changes. Signed-off-by: David Wilder --- vendor.conf | 16 +++++----- vendor/github.com/vishvananda/netlink/addr.go | 12 ++++---- .../vishvananda/netlink/addr_linux.go | 16 +++++++++- .../vishvananda/netlink/filter_linux.go | 4 --- .../vishvananda/netlink/link_linux.go | 22 ++++++++++++-- .../vishvananda/netlink/nl/addr_linux.go | 29 +++++++++++++++++++ .../vishvananda/netlink/nl/link_linux.go | 5 +++- .../vishvananda/netlink/protinfo.go | 20 +++++++++---- .../vishvananda/netlink/protinfo_linux.go | 4 +++ 9 files changed, 99 insertions(+), 29 deletions(-) diff --git a/vendor.conf b/vendor.conf index 45f2784192..d21fe8c13f 100644 --- a/vendor.conf +++ b/vendor.conf @@ -10,32 +10,30 @@ github.com/codegangsta/cli a65b733b303f0055f8d324d805f393cd3e7a7904 github.com/coreos/etcd 925d1d74cec8c3b169c52fd4b2dc234a35934fce github.com/coreos/go-systemd b4a58d95188dd092ae20072bac14cece0e67c388 github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d - github.com/docker/docker 9c96768eae4b3a65147b47a55c850c103ab8972d github.com/docker/go-connections 34b5052da6b11e27f5f2e357b38b571ddddd3928 github.com/docker/go-events 2e7d352816128aa84f4d29b2a21d400133701a0d github.com/docker/go-units 8e2d4523730c73120e10d4652f36ad6010998f4e github.com/docker/libkv 1d8431073ae03cdaedb198a89722f3aab6d418ef - github.com/godbus/dbus 5f6efc7ef2759c81b7ba876593971bfce311eab3 github.com/gogo/protobuf 8d70fb3182befc465c4a1eac8ad4d38ff49778e2 -github.com/golang/protobuf/proto f7137ae6b19afbfd61a94b746fda3b3fe0491874 +github.com/golang/protobuf f7137ae6b19afbfd61a94b746fda3b3fe0491874 github.com/gorilla/context 215affda49addc4c8ef7e2534915df2c8c35c6cd github.com/gorilla/mux 8096f47503459bcc74d1f4c487b7e6e42e5746b5 -github.com/hashicorp/consul/api 954aec66231b79c161a4122b023fbcad13047f79 -github.com/hashicorp/go-msgpack/codec 71c2886f5a673a35f909803f38ece5810165097b +github.com/hashicorp/consul 954aec66231b79c161a4122b023fbcad13047f79 +github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b github.com/hashicorp/go-multierror 2167c8ec40776024589f483a6b836489e47e1049 github.com/hashicorp/memberlist 88ac4de0d1a0ca6def284b571342db3b777a4c37 github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870 github.com/mattn/go-shellwords 525bedee691b5a8df547cb5cf9f86b7fb1883e24 github.com/miekg/dns d27455715200c7d3e321a1e5cadb27c9ee0b0f02 -github.com/opencontainers/runc/libcontainer ba1568de399395774ad84c2ace65937814c542ed -github.com/samuel/go-zookeeper/zk d0e0d8e11f318e000a8cc434616d69e329edc374 +github.com/opencontainers/runc ba1568de399395774ad84c2ace65937814c542ed +github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374 github.com/seccomp/libseccomp-golang 1b506fc7c24eec5a3693cdcbed40d9c226cfc6a1 github.com/stretchr/testify dab07ac62d4905d3e48d17dc549c684ac3b7c15a -github.com/syndtr/gocapability/capability 2c00daeb6c3b45114c80ac44119e7b8801fdd852 +github.com/syndtr/gocapability 2c00daeb6c3b45114c80ac44119e7b8801fdd852 github.com/ugorji/go f1f1a805ed361a0e078bb537e4ea78cd37dcf065 -github.com/vishvananda/netlink 1e86b2bee5b6a7d377e4c02bb7f98209d6a7297c +github.com/vishvananda/netlink b71e0bb214aebd980216cb2516e8bd7bca9e9672 github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25 golang.org/x/net c427ad74c6d7a814201695e9ffde0c5d400a7674 golang.org/x/sys 8f0908ab3b2457e2e15403d3697c9ef5cb4b57a9 diff --git a/vendor/github.com/vishvananda/netlink/addr.go b/vendor/github.com/vishvananda/netlink/addr.go index fe3e3d3668..f08c956969 100644 --- a/vendor/github.com/vishvananda/netlink/addr.go +++ b/vendor/github.com/vishvananda/netlink/addr.go @@ -10,11 +10,13 @@ import ( // include a mask, so it stores the address as a net.IPNet. type Addr struct { *net.IPNet - Label string - Flags int - Scope int - Peer *net.IPNet - Broadcast net.IP + Label string + Flags int + Scope int + Peer *net.IPNet + Broadcast net.IP + PreferedLft int + ValidLft int } // String returns $ip/$netmask $label diff --git a/vendor/github.com/vishvananda/netlink/addr_linux.go b/vendor/github.com/vishvananda/netlink/addr_linux.go index 220f0f22f1..43daa4473a 100644 --- a/vendor/github.com/vishvananda/netlink/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -199,6 +199,10 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { addr.Label = string(attr.Value[:len(attr.Value)-1]) case IFA_FLAGS: addr.Flags = int(native.Uint32(attr.Value[0:4])) + case nl.IFA_CACHEINFO: + ci := nl.DeserializeIfaCacheInfo(attr.Value) + addr.PreferedLft = int(ci.IfaPrefered) + addr.ValidLft = int(ci.IfaValid) } } @@ -216,6 +220,10 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { type AddrUpdate struct { LinkAddress net.IPNet LinkIndex int + Flags int + Scope int + PreferedLft int + ValidLft int NewAddr bool // true=added false=deleted } @@ -263,7 +271,13 @@ func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-cha continue } - ch <- AddrUpdate{LinkAddress: *addr.IPNet, LinkIndex: ifindex, NewAddr: msgType == syscall.RTM_NEWADDR} + ch <- AddrUpdate{LinkAddress: *addr.IPNet, + LinkIndex: ifindex, + NewAddr: msgType == syscall.RTM_NEWADDR, + Flags: addr.Flags, + Scope: addr.Scope, + PreferedLft: addr.PreferedLft, + ValidLft: addr.ValidLft} } } }() diff --git a/vendor/github.com/vishvananda/netlink/filter_linux.go b/vendor/github.com/vishvananda/netlink/filter_linux.go index eb1802c444..a0e000ca1f 100644 --- a/vendor/github.com/vishvananda/netlink/filter_linux.go +++ b/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -458,10 +458,6 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) key.Val = native.Uint32(htonl(key.Val)) } } - // only parse if we have a very basic redirect - if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 { - return detailed, nil - } case nl.TCA_U32_ACT: tables, err := nl.ParseRouteAttr(datum.Value) if err != nil { diff --git a/vendor/github.com/vishvananda/netlink/link_linux.go b/vendor/github.com/vishvananda/netlink/link_linux.go index 0c04d3adde..fb2013a136 100644 --- a/vendor/github.com/vishvananda/netlink/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/link_linux.go @@ -103,7 +103,7 @@ func (h *Handle) SetPromiscOn(link Link) error { msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) msg.Change = syscall.IFF_PROMISC - msg.Flags = syscall.IFF_UP + msg.Flags = syscall.IFF_PROMISC msg.Index = int32(base.Index) req.AddData(msg) @@ -122,7 +122,7 @@ func (h *Handle) SetPromiscOff(link Link) error { msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) msg.Change = syscall.IFF_PROMISC - msg.Flags = 0 & ^syscall.IFF_UP + msg.Flags = 0 & ^syscall.IFF_PROMISC msg.Index = int32(base.Index) req.AddData(msg) @@ -1288,6 +1288,22 @@ func (h *Handle) LinkSetFlood(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD) } +func LinkSetBrProxyArp(link Link, mode bool) error { + return pkgHandle.LinkSetBrProxyArp(link, mode) +} + +func (h *Handle) LinkSetBrProxyArp(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROXYARP) +} + +func LinkSetBrProxyArpWiFi(link Link, mode bool) error { + return pkgHandle.LinkSetBrProxyArpWiFi(link, mode) +} + +func (h *Handle) LinkSetBrProxyArpWiFi(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROXYARP_WIFI) +} + func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { base := link.Attrs() h.ensureIndex(base) @@ -1370,7 +1386,7 @@ func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { } func parseBondData(link Link, data []syscall.NetlinkRouteAttr) { - bond := NewLinkBond(NewLinkAttrs()) + bond := link.(*Bond) for i := range data { switch data[i].Attr.Type { case nl.IFLA_BOND_MODE: diff --git a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go index 17088fa0c0..fe362e9fa7 100644 --- a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go @@ -45,3 +45,32 @@ func (msg *IfAddrmsg) Serialize() []byte { func (msg *IfAddrmsg) Len() int { return syscall.SizeofIfAddrmsg } + +// struct ifa_cacheinfo { +// __u32 ifa_prefered; +// __u32 ifa_valid; +// __u32 cstamp; /* created timestamp, hundredths of seconds */ +// __u32 tstamp; /* updated timestamp, hundredths of seconds */ +// }; + +const IFA_CACHEINFO = 6 +const SizeofIfaCacheInfo = 0x10 + +type IfaCacheInfo struct { + IfaPrefered uint32 + IfaValid uint32 + Cstamp uint32 + Tstamp uint32 +} + +func (msg *IfaCacheInfo) Len() int { + return SizeofIfaCacheInfo +} + +func DeserializeIfaCacheInfo(b []byte) *IfaCacheInfo { + return (*IfaCacheInfo)(unsafe.Pointer(&b[0:SizeofIfaCacheInfo][0])) +} + +func (msg *IfaCacheInfo) Serialize() []byte { + return (*(*[SizeofIfaCacheInfo]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/vendor/github.com/vishvananda/netlink/nl/link_linux.go index 6d9af56998..dd0385295c 100644 --- a/vendor/github.com/vishvananda/netlink/nl/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -102,7 +102,10 @@ const ( IFLA_BRPORT_FAST_LEAVE IFLA_BRPORT_LEARNING IFLA_BRPORT_UNICAST_FLOOD - IFLA_BRPORT_MAX = IFLA_BRPORT_UNICAST_FLOOD + IFLA_BRPORT_PROXYARP + IFLA_BRPORT_LEARNING_SYNC + IFLA_BRPORT_PROXYARP_WIFI + IFLA_BRPORT_MAX = IFLA_BRPORT_PROXYARP_WIFI ) const ( diff --git a/vendor/github.com/vishvananda/netlink/protinfo.go b/vendor/github.com/vishvananda/netlink/protinfo.go index ead3f2f15e..0087c4438b 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo.go +++ b/vendor/github.com/vishvananda/netlink/protinfo.go @@ -6,12 +6,14 @@ import ( // Protinfo represents bridge flags from netlink. type Protinfo struct { - Hairpin bool - Guard bool - FastLeave bool - RootBlock bool - Learning bool - Flood bool + Hairpin bool + Guard bool + FastLeave bool + RootBlock bool + Learning bool + Flood bool + ProxyArp bool + ProxyArpWiFi bool } // String returns a list of enabled flags @@ -35,6 +37,12 @@ func (prot *Protinfo) String() string { if prot.Flood { boolStrings = append(boolStrings, "Flood") } + if prot.ProxyArp { + boolStrings = append(boolStrings, "ProxyArp") + } + if prot.ProxyArpWiFi { + boolStrings = append(boolStrings, "ProxyArpWiFi") + } return strings.Join(boolStrings, " ") } diff --git a/vendor/github.com/vishvananda/netlink/protinfo_linux.go b/vendor/github.com/vishvananda/netlink/protinfo_linux.go index ea72695343..10dd0d5335 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo_linux.go +++ b/vendor/github.com/vishvananda/netlink/protinfo_linux.go @@ -64,6 +64,10 @@ func parseProtinfo(infos []syscall.NetlinkRouteAttr) *Protinfo { pi.Learning = byteToBool(info.Value[0]) case nl.IFLA_BRPORT_UNICAST_FLOOD: pi.Flood = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_PROXYARP: + pi.ProxyArp = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_PROXYARP_WIFI: + pi.ProxyArpWiFi = byteToBool(info.Value[0]) } } return &pi