diff --git a/felix/dataplane/linux/dscp_mgr.go b/felix/dataplane/linux/dscp_mgr.go index 5feacac25b3..88b2dd7a766 100644 --- a/felix/dataplane/linux/dscp_mgr.go +++ b/felix/dataplane/linux/dscp_mgr.go @@ -15,11 +15,13 @@ package intdataplane import ( + "fmt" "sort" "strings" "github.com/sirupsen/logrus" + "github.com/projectcalico/calico/felix/nftables" "github.com/projectcalico/calico/felix/proto" "github.com/projectcalico/calico/felix/rules" "github.com/projectcalico/calico/felix/types" @@ -29,6 +31,7 @@ type dscpManager struct { ipVersion uint8 ruleRenderer rules.RuleRenderer mangleTable Table + mangleMaps nftables.MapsDataplane // QoS policies. wepPolicies map[types.WorkloadEndpointID]rules.DSCPRule @@ -40,11 +43,13 @@ type dscpManager struct { func newDSCPManager( mangleTable Table, + mangleMaps nftables.MapsDataplane, ruleRenderer rules.RuleRenderer, ipVersion uint8, ) *dscpManager { return &dscpManager{ mangleTable: mangleTable, + mangleMaps: mangleMaps, ruleRenderer: ruleRenderer, ipVersion: ipVersion, wepPolicies: map[types.WorkloadEndpointID]rules.DSCPRule{}, @@ -151,6 +156,14 @@ func (m *dscpManager) CompleteDeferredWork() error { return dscpRules[i].SrcAddrs < dscpRules[j].SrcAddrs }) + logrus.Infof("marva0 %v", dscpRules) + if m.mangleMaps != nil { + mappings := nftablesMappings(dscpRules) + logrus.Infof("marva %v", mappings) + mapMeta := nftables.MapMetadata{Name: rules.NftablesQoSPolicyMap, Type: nftables.MapTypeSourceNetMatch} + m.mangleMaps.AddOrReplaceMap(mapMeta, mappings) + } + chain := m.ruleRenderer.EgressDSCPChain(dscpRules) m.mangleTable.UpdateChain(chain) m.dirty = false @@ -158,3 +171,13 @@ func (m *dscpManager) CompleteDeferredWork() error { return nil } + +func nftablesMappings(rules []rules.DSCPRule) map[string][]string { + mappings := map[string][]string{} + logrus.Infof("marva1 %v", rules) + for _, r := range rules { + mappings[r.SrcAddrs] = []string{fmt.Sprintf("%d", r.Value)} + } + logrus.Infof("marva2 %v", mappings) + return mappings +} diff --git a/felix/dataplane/linux/dscp_mgr_test.go b/felix/dataplane/linux/dscp_mgr_test.go index de286651f69..e2b91c7bbf9 100644 --- a/felix/dataplane/linux/dscp_mgr_test.go +++ b/felix/dataplane/linux/dscp_mgr_test.go @@ -45,7 +45,8 @@ func dscpManagerTests(ipVersion uint8) func() { MarkDrop: 0x10, MarkEndpoint: 0x11110000, }) - manager = newDSCPManager(mangleTable, ruleRenderer, ipVersion) + // TODO (mazdak): add more tests for new nftables optimization + manager = newDSCPManager(mangleTable, nil, ruleRenderer, ipVersion) }) It("should program DSCP chain with no rule", func() { diff --git a/felix/dataplane/linux/int_dataplane.go b/felix/dataplane/linux/int_dataplane.go index a09f7d2a072..dbb77748486 100644 --- a/felix/dataplane/linux/int_dataplane.go +++ b/felix/dataplane/linux/int_dataplane.go @@ -1123,7 +1123,11 @@ func NewIntDataplaneDriver(config Config) *InternalDataplane { dp.RegisterManager(newHostsIPSetManager(ipSetsV4, 4, config)) if !config.BPFEnabled { - dp.RegisterManager(newDSCPManager(mangleTableV4, ruleRenderer, 4)) + var mangleMaps nftables.MapsDataplane + if config.RulesConfig.NFTables { + mangleMaps = mangleTableV4.(nftables.MapsDataplane) + } + dp.RegisterManager(newDSCPManager(mangleTableV4, mangleMaps, ruleRenderer, 4)) } if config.RulesConfig.IPIPEnabled { @@ -1324,7 +1328,11 @@ func NewIntDataplaneDriver(config Config) *InternalDataplane { dp.RegisterManager(newHostsIPSetManager(ipSetsV6, 6, config)) if !config.BPFEnabled { - dp.RegisterManager(newDSCPManager(mangleTableV6, ruleRenderer, 6)) + var mangleMapsV6 nftables.MapsDataplane + if config.RulesConfig.NFTables { + mangleMapsV6 = mangleTableV6.(nftables.MapsDataplane) + } + dp.RegisterManager(newDSCPManager(mangleTableV6, mangleMapsV6, ruleRenderer, 6)) } // Add a manager for IPv6 wireguard configuration. This is added irrespective of whether wireguard is actually enabled diff --git a/felix/fv/dscp_test.go b/felix/fv/dscp_test.go index 07a4bbbadb4..a1a5d3719b0 100644 --- a/felix/fv/dscp_test.go +++ b/felix/fv/dscp_test.go @@ -176,7 +176,7 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ dscp tests", []apiconfig.Da } }) - It("applying DSCP annotation should result is adding correct rules", func() { + It("pepper1 applying DSCP annotation should result is adding correct rules", func() { dscp0 := numorstring.DSCPFromInt(0) // 0x0 dscp20 := numorstring.DSCPFromInt(20) // 0x14 dscp32 := numorstring.DSCPFromInt(32) // 0x20 @@ -243,6 +243,7 @@ var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ dscp tests", []apiconfig.Da } ep2_2.UpdateInInfra(infra) + //time.Sleep(time.Minute * 20) cc.ResetExpectations() cc.ExpectSome(extClient, hostw) cc.ExpectSome(extClient, ep1_1) diff --git a/felix/generictables/match_builder.go b/felix/generictables/match_builder.go index 45382cb3102..15e236442e5 100644 --- a/felix/generictables/match_builder.go +++ b/felix/generictables/match_builder.go @@ -75,6 +75,7 @@ type MatchCriteria interface { // Only supported in nftables. InInterfaceVMAP(mapname string) MatchCriteria OutInterfaceVMAP(mapname string) MatchCriteria + SourceNetVMAP(mapname string) MatchCriteria } type AddrType string diff --git a/felix/iptables/match_builder.go b/felix/iptables/match_builder.go index a57011f2a7f..5b628fe6098 100644 --- a/felix/iptables/match_builder.go +++ b/felix/iptables/match_builder.go @@ -335,6 +335,11 @@ func (m matchCriteria) OutInterfaceVMAP(mapname string) generictables.MatchCrite return m } +func (m matchCriteria) SourceNetVMAP(_ string) generictables.MatchCriteria { + log.Panic("SourceNetVMAP not supported in iptables") + return m +} + func PortsToMultiport(ports []uint16) string { portFragments := make([]string, len(ports)) for i, port := range ports { diff --git a/felix/nftables/actions.go b/felix/nftables/actions.go index cd135f116ae..7190a6679a2 100644 --- a/felix/nftables/actions.go +++ b/felix/nftables/actions.go @@ -517,7 +517,8 @@ type DSCPAction struct { } func (a DSCPAction) ToFragment(features *environment.Features) string { - return fmt.Sprintf(" dscp set %d", a.Value) + return "ip dscp" + //return fmt.Sprintf(" dscp set %d", a.Value) } func (a DSCPAction) String() string { diff --git a/felix/nftables/maps.go b/felix/nftables/maps.go index f7743a5ff52..a92b7f5bb83 100644 --- a/felix/nftables/maps.go +++ b/felix/nftables/maps.go @@ -38,7 +38,10 @@ var gaugeVecNumMaps = prometheus.NewGaugeVec(prometheus.GaugeOpts{ type MapType string -const MapTypeInterfaceMatch MapType = "interfaceMatch" +const ( + MapTypeInterfaceMatch MapType = "interfaceMatch" + MapTypeSourceNetMatch MapType = "sourceNetMatch" +) type MapsDataplane interface { AddOrReplaceMap(meta MapMetadata, members map[string][]string) @@ -374,7 +377,7 @@ func (s *Maps) LoadDataplaneState() error { for _, e := range mapData.elems { logCxt.WithField("element", e).Debug("Processing element") switch metadata.Type { - case MapTypeInterfaceMatch: + case MapTypeInterfaceMatch, MapTypeSourceNetMatch: strElems[e.Key[0]] = e.Value default: unknownElems.Add(UnknownMapMember(e.Key, e.Value)) @@ -442,7 +445,7 @@ func (s *Maps) NFTablesMap(name string) *knftables.Map { var flags []knftables.SetFlag switch metadata.Type { - case MapTypeInterfaceMatch: + case MapTypeInterfaceMatch, MapTypeSourceNetMatch: default: logrus.WithField("type", metadata.Type).Panic("Unexpected map type") } @@ -623,12 +626,32 @@ func CanonicaliseMapMember(mtype MapType, key string, value []string) MapMember } // An action and a chain. return interfaceToChain{key, splits[0], splits[1]} + case MapTypeSourceNetMatch: + return sourceNetToAction{source: key, action: value[0]} default: logrus.Errorf("Unknown map type: %v", mtype) } return nil } +// sourceNetToAction is a MapMember that represents a mapping from a source net to an terminal action. +type sourceNetToAction struct { + source string + action string +} + +func (m sourceNetToAction) Key() []string { + return []string{m.source} +} + +func (m sourceNetToAction) Value() []string { + return []string{m.action} +} + +func (m sourceNetToAction) String() string { + return fmt.Sprintf("%s -> %s", m.source, m.action) +} + // interfaceToAction is a MapMember that represents a mapping from an interface to an terminal action. type interfaceToAction struct { iface string @@ -670,6 +693,12 @@ func mapType(t MapType, ipVersion int) string { switch t { case MapTypeInterfaceMatch: return "ifname : verdict" + case MapTypeSourceNetMatch: + if ipVersion == 4 { + return "ipv4_addr : dscp" + } else { + return "ipv6_addr : dscp" + } default: logrus.WithField("type", string(t)).Panic("Unknown MapType") } diff --git a/felix/nftables/match_builder.go b/felix/nftables/match_builder.go index e206c5d6adf..dc19de30c84 100644 --- a/felix/nftables/match_builder.go +++ b/felix/nftables/match_builder.go @@ -544,6 +544,11 @@ func (m nftMatch) OutInterfaceVMAP(name string) generictables.MatchCriteria { return m } +func (m nftMatch) SourceNetVMAP(name string) generictables.MatchCriteria { + m.clauses = append(m.clauses, fmt.Sprintf(" saddr @-%s", LegalizeSetName(name))) + return m +} + // PortsToMultiport converts a list of ports to a multiport set suitable for inline use in nftables rules. func PortsToMultiport(ports []uint16) string { portFragments := make([]string, len(ports)) diff --git a/felix/nftables/table_layer.go b/felix/nftables/table_layer.go index b440d0a5b7f..2ba553c3082 100644 --- a/felix/nftables/table_layer.go +++ b/felix/nftables/table_layer.go @@ -86,12 +86,20 @@ func (t *tableLayer) namespaceMapMember(m []string) []string { return m } + action := strings.Split(strings.TrimSpace(m[0]), " ")[0] + // TODO: We only use "goto" in map members right now. If we add other actions, we'll need to namespace them. // This is a very brittle implementation that assumes that the map member is a single string that starts with "goto ". - if !strings.HasPrefix(m[0], "goto ") { - logrus.Panicf("Unexpected map member: %s", m) + switch action { + case "goto": + return []string{fmt.Sprintf("goto %s", t.namespaceName(m[0][5:]))} + case "dscp": + return m + //default: + // logrus.Panicf("Unexpected map member: %s", m) } - return []string{fmt.Sprintf("goto %s", t.namespaceName(m[0][5:]))} + + return m } func (t *tableLayer) Name() string { diff --git a/felix/rules/dscp.go b/felix/rules/dscp.go index cd743b7d73b..9aec4632b87 100644 --- a/felix/rules/dscp.go +++ b/felix/rules/dscp.go @@ -24,8 +24,31 @@ type DSCPRule struct { } func (r *DefaultRuleRenderer) EgressDSCPChain(rules []DSCPRule) *generictables.Chain { + if r.NFTables { + return r.nftablesQoSPolicyRules(rules) + } + return r.defaultQoSPolicyRules(rules) +} + +func (r *DefaultRuleRenderer) nftablesQoSPolicyRules(rules []DSCPRule) *generictables.Chain { + var renderedRules []generictables.Rule + // DSCP Ruls are sorted and validated by DSCP manager. + + renderedRules = append(renderedRules, generictables.Rule{ + Match: r.NewMatch().SourceNetVMAP(NftablesQoSPolicyMap), + //Match: r.NewMatch(), + //Action: r.DSCP("vmap"), + }) + + return &generictables.Chain{ + Name: ChainEgressDSCP, + Rules: renderedRules, + } +} + +func (r *DefaultRuleRenderer) defaultQoSPolicyRules(rules []DSCPRule) *generictables.Chain { var renderedRules []generictables.Rule - // Rules are sorted and validated by DSCP manager. + // DSCP Ruls are sorted and validated by DSCP manager. for _, rule := range rules { renderedRules = append(renderedRules, generictables.Rule{ Match: r.NewMatch().SourceNet(rule.SrcAddrs), diff --git a/felix/rules/rule_defs.go b/felix/rules/rule_defs.go index 38990354252..2b6f586f75f 100644 --- a/felix/rules/rule_defs.go +++ b/felix/rules/rule_defs.go @@ -61,7 +61,8 @@ const ( ChainManglePrerouting = ChainNamePrefix + "PREROUTING" ChainManglePostrouting = ChainNamePrefix + "POSTROUTING" - ChainEgressDSCP = ChainNamePrefix + "egress-dscp" + ChainEgressDSCP = ChainNamePrefix + "egress-dscp" + NftablesQoSPolicyMap = ChainEgressDSCP + "-map" IPSetIDAllPools = "all-ipam-pools" IPSetIDNATOutgoingMasqPools = "masq-ipam-pools"