diff --git a/COPYING-5.14.0-570.49.1.el9_6 b/COPYING-5.14.0-570.52.1.el9_6 similarity index 100% rename from COPYING-5.14.0-570.49.1.el9_6 rename to COPYING-5.14.0-570.52.1.el9_6 diff --git a/Makefile.rhelver b/Makefile.rhelver index 1578ada719273..fdd8a18af9c15 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 6 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 570.49.1 +RHEL_RELEASE = 570.52.1 # # ZSTREAM diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 4f0a94346d009..d344e49133114 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1980,6 +1980,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. diff --git a/ciq/ciq_backports/kernel-5.14.0-570.51.1.el9_6/rebuild.details.txt b/ciq/ciq_backports/kernel-5.14.0-570.51.1.el9_6/rebuild.details.txt new file mode 100644 index 0000000000000..8fe9d2ccda5b7 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-570.51.1.el9_6/rebuild.details.txt @@ -0,0 +1,18 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v5.14~1..kernel-mainline: 324124 +Number of commits in rpm: 6 +Number of commits matched with upstream: 4 (66.67%) +Number of commits in upstream but not in rpm: 324120 +Number of commits NOT found in upstream: 2 (33.33%) + +Rebuilding Kernel on Branch rocky9_6_rebuild_kernel-5.14.0-570.51.1.el9_6 for kernel-5.14.0-570.51.1.el9_6 +Clean Cherry Picks: 4 (100.00%) +Empty Cherry Picks: 0 (0.00%) +_______________________________ + +__EMPTY COMMITS__________________________ + +__CHANGES NOT IN UPSTREAM________________ +Porting to Rocky Linux 9, debranding and Rocky branding' +Ensure aarch64 kernel is not compressed' diff --git a/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/a8445cfe.failed b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/a8445cfe.failed new file mode 100644 index 0000000000000..2fd626f2412c4 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/a8445cfe.failed @@ -0,0 +1,127 @@ +net: mana: Change the function signature of mana_get_primary_netdev_rcu + +jira LE-4385 +Rebuild_History Non-Buildable kernel-5.14.0-570.52.1.el9_6 +commit-author Long Li +commit a8445cfec101c42e9d64cdb2dac13973b22c205c +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/a8445cfe.failed + +Change mana_get_primary_netdev_rcu() to mana_get_primary_netdev(), and +return the ndev with refcount held. The caller is responsible for dropping +the refcount. + +Also drop the check for IFF_SLAVE as it is not necessary if the upper +device is present. + + Signed-off-by: Long Li +Link: https://patch.msgid.link/1741821332-9392-1-git-send-email-longli@linuxonhyperv.com + Signed-off-by: Leon Romanovsky +(cherry picked from commit a8445cfec101c42e9d64cdb2dac13973b22c205c) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/infiniband/hw/mana/mana_ib.h +# drivers/net/ethernet/microsoft/mana/mana_en.c +diff --cc drivers/infiniband/hw/mana/mana_ib.h +index c3c9dc1c8d8b,81a7e7474462..000000000000 +--- a/drivers/infiniband/hw/mana/mana_ib.h ++++ b/drivers/infiniband/hw/mana/mana_ib.h +@@@ -65,6 -77,8 +65,11 @@@ struct mana_ib_dev + struct gdma_queue **eqs; + struct xarray qp_table_wq; + struct mana_ib_adapter_caps adapter_caps; +++<<<<<<< HEAD +++======= ++ struct dma_pool *av_pool; ++ netdevice_tracker dev_tracker; +++>>>>>>> a8445cfec101 (net: mana: Change the function signature of mana_get_primary_netdev_rcu) + }; + + struct mana_ib_wq { +diff --cc drivers/net/ethernet/microsoft/mana/mana_en.c +index 2f82b39f3002,4e870b11f946..000000000000 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@@ -3170,24 -3129,29 +3170,34 @@@ out + gd->driver_data = NULL; + gd->gdma_context = NULL; + kfree(ac); + + dev_dbg(dev, "%s succeeded\n", __func__); + } + +- struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index) ++ struct net_device *mana_get_primary_netdev(struct mana_context *ac, ++ u32 port_index, ++ netdevice_tracker *tracker) + { + struct net_device *ndev; + + if (port_index >= ac->num_ports) + return NULL; + +- /* When mana is used in netvsc, the upper netdevice should be returned. */ +- if (ac->ports[port_index]->flags & IFF_SLAVE) +- ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); +- else ++ rcu_read_lock(); ++ ++ /* If mana is used in netvsc, the upper netdevice should be returned. */ ++ ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); ++ ++ /* If there is no upper device, use the parent Ethernet device */ ++ if (!ndev) + ndev = ac->ports[port_index]; + ++ netdev_hold(ndev, tracker, GFP_ATOMIC); ++ rcu_read_unlock(); ++ + return ndev; + } +++<<<<<<< HEAD + +EXPORT_SYMBOL_NS(mana_get_primary_netdev_rcu, NET_MANA); +++======= ++ EXPORT_SYMBOL_NS(mana_get_primary_netdev, "NET_MANA"); +++>>>>>>> a8445cfec101 (net: mana: Change the function signature of mana_get_primary_netdev_rcu) +diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c +index 50437272b7b9..16442d53ec8e 100644 +--- a/drivers/infiniband/hw/mana/device.c ++++ b/drivers/infiniband/hw/mana/device.c +@@ -85,10 +85,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, + dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues; + dev->ib_dev.dev.parent = mdev->gdma_context->dev; + +- rcu_read_lock(); /* required to get primary netdev */ +- ndev = mana_get_primary_netdev_rcu(mc, 0); ++ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); + if (!ndev) { +- rcu_read_unlock(); + ret = -ENODEV; + ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1"); + goto free_ib_device; +@@ -96,7 +94,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, + ether_addr_copy(mac_addr, ndev->dev_addr); + addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr); + ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1); +- rcu_read_unlock(); ++ /* mana_get_primary_netdev() returns ndev with refcount held */ ++ netdev_put(ndev, &dev->dev_tracker); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret); + goto free_ib_device; +* Unmerged path drivers/infiniband/hw/mana/mana_ib.h +* Unmerged path drivers/net/ethernet/microsoft/mana/mana_en.c +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 301785a934b2..5283276c2def 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -825,5 +825,7 @@ int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + u32 doorbell_pg_id); + void mana_uncfg_vport(struct mana_port_context *apc); + +-struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index); ++struct net_device *mana_get_primary_netdev(struct mana_context *ac, ++ u32 port_index, ++ netdevice_tracker *tracker); + #endif /* _MANA_H */ diff --git a/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/bee35b71.failed b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/bee35b71.failed new file mode 100644 index 0000000000000..3a8177da9127c --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/bee35b71.failed @@ -0,0 +1,109 @@ +RDMA/mana_ib: Handle net event for pointing to the current netdev + +jira LE-4385 +Rebuild_History Non-Buildable kernel-5.14.0-570.52.1.el9_6 +commit-author Long Li +commit bee35b7161aaaed9831e2f14876c374b9c566952 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/bee35b71.failed + +When running under Hyper-V, the master device to the RDMA device is always +bonded to this RDMA device. This is not user-configurable. + +The master device can be unbind/bind from the kernel. During those events, +the RDMA device should set to the current netdev to reflect the change of +master device from those events. + + Signed-off-by: Long Li +Link: https://patch.msgid.link/1741821332-9392-2-git-send-email-longli@linuxonhyperv.com + Signed-off-by: Leon Romanovsky +(cherry picked from commit bee35b7161aaaed9831e2f14876c374b9c566952) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/infiniband/hw/mana/device.c +# drivers/infiniband/hw/mana/mana_ib.h +diff --cc drivers/infiniband/hw/mana/device.c +index 50437272b7b9,b31089320aa5..000000000000 +--- a/drivers/infiniband/hw/mana/device.c ++++ b/drivers/infiniband/hw/mana/device.c +@@@ -52,6 -60,43 +52,46 @@@ static const struct ib_device_ops mana_ + ib_ind_table), + }; + +++<<<<<<< HEAD +++======= ++ static const struct ib_device_ops mana_ib_stats_ops = { ++ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats, ++ .get_hw_stats = mana_ib_get_hw_stats, ++ }; ++ ++ static int mana_ib_netdev_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++ { ++ struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb); ++ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); ++ struct gdma_context *gc = dev->gdma_dev->gdma_context; ++ struct mana_context *mc = gc->mana.driver_data; ++ struct net_device *ndev; ++ ++ /* Only process events from our parent device */ ++ if (event_dev != mc->ports[0]) ++ return NOTIFY_DONE; ++ ++ switch (event) { ++ case NETDEV_CHANGEUPPER: ++ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); ++ /* ++ * RDMA core will setup GID based on updated netdev. ++ * It's not possible to race with the core as rtnl lock is being ++ * held. ++ */ ++ ib_device_set_netdev(&dev->ib_dev, ndev, 1); ++ ++ /* mana_get_primary_netdev() returns ndev with refcount held */ ++ netdev_put(ndev, &dev->dev_tracker); ++ ++ return NOTIFY_OK; ++ default: ++ return NOTIFY_DONE; ++ } ++ } ++ +++>>>>>>> bee35b7161aa (RDMA/mana_ib: Handle net event for pointing to the current netdev) + static int mana_ib_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) + { +@@@ -114,9 -166,11 +162,9 @@@ + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", + ret); +- goto deregister_device; ++ goto deregister_net_notifier; + } + + - ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops); + - + ret = mana_ib_create_eqs(dev); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); +diff --cc drivers/infiniband/hw/mana/mana_ib.h +index c3c9dc1c8d8b,6903946677e5..000000000000 +--- a/drivers/infiniband/hw/mana/mana_ib.h ++++ b/drivers/infiniband/hw/mana/mana_ib.h +@@@ -65,6 -77,9 +65,12 @@@ struct mana_ib_dev + struct gdma_queue **eqs; + struct xarray qp_table_wq; + struct mana_ib_adapter_caps adapter_caps; +++<<<<<<< HEAD +++======= ++ struct dma_pool *av_pool; ++ netdevice_tracker dev_tracker; ++ struct notifier_block nb; +++>>>>>>> bee35b7161aa (RDMA/mana_ib: Handle net event for pointing to the current netdev) + }; + + struct mana_ib_wq { +* Unmerged path drivers/infiniband/hw/mana/device.c +* Unmerged path drivers/infiniband/hw/mana/mana_ib.h diff --git a/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/ca8ac489.failed b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/ca8ac489.failed new file mode 100644 index 0000000000000..5109125cf6f5f --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/ca8ac489.failed @@ -0,0 +1,159 @@ +net: mana: Handle unsupported HWC commands + +jira LE-4385 +Rebuild_History Non-Buildable kernel-5.14.0-570.52.1.el9_6 +commit-author Erni Sri Satya Vennela +commit ca8ac489ca33c986ff02ee14c3e1c10b86355428 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/ca8ac489.failed + +If any of the HWC commands are not recognized by the +underlying hardware, the hardware returns the response +header status of -1. Log the information using +netdev_info_once to avoid multiple error logs in dmesg. + + Signed-off-by: Erni Sri Satya Vennela + Reviewed-by: Haiyang Zhang + Reviewed-by: Shradha Gupta + Reviewed-by: Saurabh Singh Sengar + Reviewed-by: Dipayaan Roy +Link: https://patch.msgid.link/1750144656-2021-5-git-send-email-ernis@linux.microsoft.com + Signed-off-by: Paolo Abeni + +(cherry picked from commit ca8ac489ca33c986ff02ee14c3e1c10b86355428) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/microsoft/mana/mana_en.c +diff --cc drivers/net/ethernet/microsoft/mana/mana_en.c +index cf8ccab43478,5aee7bda1504..000000000000 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@@ -1160,6 -1238,95 +1163,98 @@@ out + return err; + } + +++<<<<<<< HEAD +++======= ++ int mana_query_link_cfg(struct mana_port_context *apc) ++ { ++ struct net_device *ndev = apc->ndev; ++ struct mana_query_link_config_resp resp = {}; ++ struct mana_query_link_config_req req = {}; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG, ++ sizeof(req), sizeof(resp)); ++ ++ req.vport = apc->port_handle; ++ req.hdr.resp.msg_version = GDMA_MESSAGE_V2; ++ ++ err = mana_send_request(apc->ac, &req, sizeof(req), &resp, ++ sizeof(resp)); ++ ++ if (err) { ++ if (err == -EOPNOTSUPP) { ++ netdev_info_once(ndev, "MANA_QUERY_LINK_CONFIG not supported\n"); ++ return err; ++ } ++ netdev_err(ndev, "Failed to query link config: %d\n", err); ++ return err; ++ } ++ ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG, ++ sizeof(resp)); ++ ++ if (err || resp.hdr.status) { ++ netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err, ++ resp.hdr.status); ++ if (!err) ++ err = -EOPNOTSUPP; ++ return err; ++ } ++ ++ if (resp.qos_unconfigured) { ++ err = -EINVAL; ++ return err; ++ } ++ apc->speed = resp.link_speed_mbps; ++ apc->max_speed = resp.qos_speed_mbps; ++ return 0; ++ } ++ ++ int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, ++ int enable_clamping) ++ { ++ struct mana_set_bw_clamp_resp resp = {}; ++ struct mana_set_bw_clamp_req req = {}; ++ struct net_device *ndev = apc->ndev; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, MANA_SET_BW_CLAMP, ++ sizeof(req), sizeof(resp)); ++ req.vport = apc->port_handle; ++ req.link_speed_mbps = speed; ++ req.enable_clamping = enable_clamping; ++ ++ err = mana_send_request(apc->ac, &req, sizeof(req), &resp, ++ sizeof(resp)); ++ ++ if (err) { ++ if (err == -EOPNOTSUPP) { ++ netdev_info_once(ndev, "MANA_SET_BW_CLAMP not supported\n"); ++ return err; ++ } ++ netdev_err(ndev, "Failed to set bandwidth clamp for speed %u, err = %d", ++ speed, err); ++ return err; ++ } ++ ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_SET_BW_CLAMP, ++ sizeof(resp)); ++ ++ if (err || resp.hdr.status) { ++ netdev_err(ndev, "Failed to set bandwidth clamp: %d, 0x%x\n", err, ++ resp.hdr.status); ++ if (!err) ++ err = -EOPNOTSUPP; ++ return err; ++ } ++ ++ if (resp.qos_unconfigured) ++ netdev_info(ndev, "QoS is unconfigured\n"); ++ ++ return 0; ++ } ++ +++>>>>>>> ca8ac489ca33 (net: mana: Handle unsupported HWC commands) + int mana_create_wq_obj(struct mana_port_context *apc, + mana_handle_t vport, + u32 wq_type, struct mana_obj_spec *wq_spec, +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +index df8b52c991d8..7455e1a36a86 100644 +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -871,6 +871,10 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + } + + if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) { ++ if (ctx->status_code == GDMA_STATUS_CMD_UNSUPPORTED) { ++ err = -EOPNOTSUPP; ++ goto out; ++ } + if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT) + dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", + ctx->status_code); +* Unmerged path drivers/net/ethernet/microsoft/mana/mana_en.c +diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h +index a1661ec549f4..18f3761416dd 100644 +--- a/include/net/mana/gdma.h ++++ b/include/net/mana/gdma.h +@@ -10,6 +10,7 @@ + #include "shm_channel.h" + + #define GDMA_STATUS_MORE_ENTRIES 0x00000105 ++#define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff + + /* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. diff --git a/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/fbe346ce.failed b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/fbe346ce.failed new file mode 100644 index 0000000000000..c30672a9dd80b --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/fbe346ce.failed @@ -0,0 +1,348 @@ +net: mana: Handle Reset Request from MANA NIC + +jira LE-4385 +Rebuild_History Non-Buildable kernel-5.14.0-570.52.1.el9_6 +commit-author Haiyang Zhang +commit fbe346ce9d626680a4dd0f079e17c7b5dd32ffad +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/fbe346ce.failed + +Upon receiving the Reset Request, pause the connection and clean up +queues, wait for the specified period, then resume the NIC. +In the cleanup phase, the HWC is no longer responding, so set hwc_timeout +to zero to skip waiting on the response. + + Signed-off-by: Haiyang Zhang +Link: https://patch.msgid.link/1751055983-29760-1-git-send-email-haiyangz@linux.microsoft.com + Signed-off-by: Jakub Kicinski +(cherry picked from commit fbe346ce9d626680a4dd0f079e17c7b5dd32ffad) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/microsoft/mana/mana_en.c +# include/net/mana/gdma.h +diff --cc drivers/net/ethernet/microsoft/mana/mana_en.c +index 42b72d4f42a8,a7973651ae51..000000000000 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@@ -776,7 -860,11 +785,15 @@@ static int mana_send_request(struct man + err = mana_gd_send_request(gc, in_len, in_buf, out_len, + out_buf); + if (err || resp->status) { +++<<<<<<< HEAD + + if (req->req.msg_type != MANA_QUERY_PHY_STAT) +++======= ++ if (err == -EOPNOTSUPP) ++ return err; ++ ++ if (req->req.msg_type != MANA_QUERY_PHY_STAT && ++ mana_need_log(gc, err)) +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) + dev_err(dev, "Failed to send mana message: %d, 0x%x\n", + err, resp->status); + return err ? err : -EPROTO; +diff --cc include/net/mana/gdma.h +index a1661ec549f4,57df78cfbf82..000000000000 +--- a/include/net/mana/gdma.h ++++ b/include/net/mana/gdma.h +@@@ -60,6 -61,8 +60,11 @@@ enum gdma_eqe_type + GDMA_EQE_HWC_INIT_DONE = 131, + GDMA_EQE_HWC_FPGA_RECONFIG = 132, + GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, +++<<<<<<< HEAD +++======= ++ GDMA_EQE_HWC_SOC_SERVICE = 134, ++ GDMA_EQE_HWC_RESET_REQUEST = 135, +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) + GDMA_EQE_RNIC_QP_FATAL = 176, + }; + +@@@ -560,6 -582,12 +565,15 @@@ enum + /* Driver can handle holes (zeros) in the device list */ + #define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) + +++<<<<<<< HEAD +++======= ++ /* Driver supports dynamic MSI-X vector allocation */ ++ #define GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT BIT(13) ++ ++ /* Driver can self reset on EQE notification */ ++ #define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14) ++ +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) + /* Driver can self reset on FPGA Reconfig EQE notification */ + #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) + +@@@ -569,6 -597,8 +583,11 @@@ + GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ +++<<<<<<< HEAD +++======= ++ GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \ ++ GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) + GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) + + #define GDMA_DRV_CAP_FLAGS2 0 +@@@ -893,4 -924,11 +912,14 @@@ int mana_gd_destroy_dma_region(struct g + void mana_register_debugfs(void); + void mana_unregister_debugfs(void); + +++<<<<<<< HEAD +++======= ++ int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event); ++ ++ int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state); ++ int mana_gd_resume(struct pci_dev *pdev); ++ ++ bool mana_need_log(struct gdma_context *gc, int err); ++ +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) + #endif /* _GDMA_H */ +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +index 7fc19e4d13ec..79b25843a27a 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -8,6 +8,7 @@ + #include + + #include ++#include + + #include + struct dentry *mana_debugfs_root; +@@ -67,6 +68,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev) + mana_gd_init_vf_regs(pdev); + } + ++/* Suppress logging when we set timeout to zero */ ++bool mana_need_log(struct gdma_context *gc, int err) ++{ ++ struct hw_channel_context *hwc; ++ ++ if (err != -ETIMEDOUT) ++ return true; ++ ++ if (!gc) ++ return true; ++ ++ hwc = gc->hwc.driver_data; ++ if (hwc && hwc->hwc_timeout == 0) ++ return false; ++ ++ return true; ++} ++ + static int mana_gd_query_max_resources(struct pci_dev *pdev) + { + struct gdma_context *gc = pci_get_drvdata(pdev); +@@ -270,8 +289,9 @@ static int mana_gd_disable_queue(struct gdma_queue *queue) + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { +- dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, +- resp.hdr.status); ++ if (mana_need_log(gc, err)) ++ dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, ++ resp.hdr.status); + return err ? err : -EPROTO; + } + +@@ -356,25 +376,12 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) + + #define MANA_SERVICE_PERIOD 10 + +-struct mana_serv_work { +- struct work_struct serv_work; +- struct pci_dev *pdev; +-}; +- +-static void mana_serv_func(struct work_struct *w) ++static void mana_serv_fpga(struct pci_dev *pdev) + { +- struct mana_serv_work *mns_wk; + struct pci_bus *bus, *parent; +- struct pci_dev *pdev; +- +- mns_wk = container_of(w, struct mana_serv_work, serv_work); +- pdev = mns_wk->pdev; + + pci_lock_rescan_remove(); + +- if (!pdev) +- goto out; +- + bus = pdev->bus; + if (!bus) { + dev_err(&pdev->dev, "MANA service: no bus\n"); +@@ -395,7 +402,74 @@ static void mana_serv_func(struct work_struct *w) + + out: + pci_unlock_rescan_remove(); ++} ++ ++static void mana_serv_reset(struct pci_dev *pdev) ++{ ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ struct hw_channel_context *hwc; ++ ++ if (!gc) { ++ dev_err(&pdev->dev, "MANA service: no GC\n"); ++ return; ++ } ++ ++ hwc = gc->hwc.driver_data; ++ if (!hwc) { ++ dev_err(&pdev->dev, "MANA service: no HWC\n"); ++ goto out; ++ } ++ ++ /* HWC is not responding in this case, so don't wait */ ++ hwc->hwc_timeout = 0; ++ ++ dev_info(&pdev->dev, "MANA reset cycle start\n"); + ++ mana_gd_suspend(pdev, PMSG_SUSPEND); ++ ++ msleep(MANA_SERVICE_PERIOD * 1000); ++ ++ mana_gd_resume(pdev); ++ ++ dev_info(&pdev->dev, "MANA reset cycle completed\n"); ++ ++out: ++ gc->in_service = false; ++} ++ ++struct mana_serv_work { ++ struct work_struct serv_work; ++ struct pci_dev *pdev; ++ enum gdma_eqe_type type; ++}; ++ ++static void mana_serv_func(struct work_struct *w) ++{ ++ struct mana_serv_work *mns_wk; ++ struct pci_dev *pdev; ++ ++ mns_wk = container_of(w, struct mana_serv_work, serv_work); ++ pdev = mns_wk->pdev; ++ ++ if (!pdev) ++ goto out; ++ ++ switch (mns_wk->type) { ++ case GDMA_EQE_HWC_FPGA_RECONFIG: ++ mana_serv_fpga(pdev); ++ break; ++ ++ case GDMA_EQE_HWC_RESET_REQUEST: ++ mana_serv_reset(pdev); ++ break; ++ ++ default: ++ dev_err(&pdev->dev, "MANA service: unknown type %d\n", ++ mns_wk->type); ++ break; ++ } ++ ++out: + pci_dev_put(pdev); + kfree(mns_wk); + module_put(THIS_MODULE); +@@ -451,6 +525,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) + break; + + case GDMA_EQE_HWC_FPGA_RECONFIG: ++ case GDMA_EQE_HWC_RESET_REQUEST: + dev_info(gc->dev, "Recv MANA service type:%d\n", type); + + if (gc->in_service) { +@@ -472,6 +547,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) + dev_info(gc->dev, "Start MANA service type:%d\n", type); + gc->in_service = true; + mns_wk->pdev = to_pci_dev(gc->dev); ++ mns_wk->type = type; + pci_dev_get(mns_wk->pdev); + INIT_WORK(&mns_wk->serv_work, mana_serv_func); + schedule_work(&mns_wk->serv_work); +@@ -618,7 +694,8 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { +- dev_err(dev, "test_eq failed: %d\n", err); ++ if (mana_need_log(gc, err)) ++ dev_err(dev, "test_eq failed: %d\n", err); + goto out; + } + +@@ -653,7 +730,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, + + if (flush_evenets) { + err = mana_gd_test_eq(gc, queue); +- if (err) ++ if (err && mana_need_log(gc, err)) + dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); + } + +@@ -799,8 +876,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { +- dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", +- err, resp.hdr.status); ++ if (mana_need_log(gc, err)) ++ dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", ++ err, resp.hdr.status); + return -EPROTO; + } + +@@ -1099,8 +1177,9 @@ int mana_gd_deregister_device(struct gdma_dev *gd) + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { +- dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", +- err, resp.hdr.status); ++ if (mana_need_log(gc, err)) ++ dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", ++ err, resp.hdr.status); + if (!err) + err = -EPROTO; + } +@@ -1700,7 +1779,7 @@ static void mana_gd_remove(struct pci_dev *pdev) + } + + /* The 'state' parameter is not used. */ +-static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) ++int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) + { + struct gdma_context *gc = pci_get_drvdata(pdev); + +@@ -1715,7 +1794,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) + * fail -- if this happens, it's safer to just report an error than try to undo + * what has been done. + */ +-static int mana_gd_resume(struct pci_dev *pdev) ++int mana_gd_resume(struct pci_dev *pdev) + { + struct gdma_context *gc = pci_get_drvdata(pdev); + int err; +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +index df8b52c991d8..afab02b0f168 100644 +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -860,7 +860,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + + if (!wait_for_completion_timeout(&ctx->comp_event, + (msecs_to_jiffies(hwc->hwc_timeout)))) { +- dev_err(hwc->dev, "HWC: Request timed out!\n"); ++ if (hwc->hwc_timeout != 0) ++ dev_err(hwc->dev, "HWC: Request timed out!\n"); ++ + err = -ETIMEDOUT; + goto out; + } +* Unmerged path drivers/net/ethernet/microsoft/mana/mana_en.c +* Unmerged path include/net/mana/gdma.h diff --git a/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/rebuild.details.txt b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/rebuild.details.txt new file mode 100644 index 0000000000000..a8d74e3d2e428 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-570.52.1.el9_6/rebuild.details.txt @@ -0,0 +1,23 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v5.14~1..kernel-mainline: 324124 +Number of commits in rpm: 40 +Number of commits matched with upstream: 37 (92.50%) +Number of commits in upstream but not in rpm: 324087 +Number of commits NOT found in upstream: 3 (7.50%) + +Rebuilding Kernel on Branch rocky9_6_rebuild_kernel-5.14.0-570.52.1.el9_6 for kernel-5.14.0-570.52.1.el9_6 +Clean Cherry Picks: 33 (89.19%) +Empty Cherry Picks: 4 (10.81%) +_______________________________ + +__EMPTY COMMITS__________________________ +a8445cfec101c42e9d64cdb2dac13973b22c205c net: mana: Change the function signature of mana_get_primary_netdev_rcu +bee35b7161aaaed9831e2f14876c374b9c566952 RDMA/mana_ib: Handle net event for pointing to the current netdev +ca8ac489ca33c986ff02ee14c3e1c10b86355428 net: mana: Handle unsupported HWC commands +fbe346ce9d626680a4dd0f079e17c7b5dd32ffad net: mana: Handle Reset Request from MANA NIC + +__CHANGES NOT IN UPSTREAM________________ +Porting to Rocky Linux 9, debranding and Rocky branding' +Ensure aarch64 kernel is not compressed' +ibmvnic: Increase max subcrq indirect entries with fallback diff --git a/crypto/seqiv.c b/crypto/seqiv.c index 86bb33644dd0b..ae7256cfc77f1 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -23,7 +23,7 @@ static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err) struct aead_request *subreq = aead_request_ctx(req); struct crypto_aead *geniv; - if (err == -EINPROGRESS) + if (err == -EINPROGRESS || err == -EBUSY) return; if (err) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 106112a50a9a2..6fee890cd94b4 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -45,6 +45,38 @@ static int hid_ignore_special_drivers = 0; module_param_named(ignore_special_drivers, hid_ignore_special_drivers, int, 0600); MODULE_PARM_DESC(ignore_special_drivers, "Ignore any special drivers and handle all devices by generic driver"); +/* + * Convert a signed n-bit integer to signed 32-bit integer. + */ + +static s32 snto32(__u32 value, unsigned int n) +{ + if (!value || !n) + return 0; + + if (n > 32) + n = 32; + + return sign_extend32(value, n - 1); +} + +/* + * Convert a signed 32-bit integer to a signed n-bit integer. + */ + +static u32 s32ton(__s32 value, unsigned int n) +{ + s32 a; + + if (!value || !n) + return 0; + + a = value >> (n - 1); + if (a && a != -1) + return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1; + return value & ((1 << n) - 1); +} + /* * Register a new report for a device. */ @@ -425,7 +457,7 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item) * both this and the standard encoding. */ raw_value = item_sdata(item); if (!(raw_value & 0xfffffff0)) - parser->global.unit_exponent = hid_snto32(raw_value, 4); + parser->global.unit_exponent = snto32(raw_value, 4); else parser->global.unit_exponent = raw_value; return 0; @@ -1315,46 +1347,6 @@ int hid_open_report(struct hid_device *device) } EXPORT_SYMBOL_GPL(hid_open_report); -/* - * Convert a signed n-bit integer to signed 32-bit integer. Common - * cases are done through the compiler, the screwed things has to be - * done by hand. - */ - -static s32 snto32(__u32 value, unsigned n) -{ - if (!value || !n) - return 0; - - if (n > 32) - n = 32; - - switch (n) { - case 8: return ((__s8)value); - case 16: return ((__s16)value); - case 32: return ((__s32)value); - } - return value & (1 << (n - 1)) ? value | (~0U << n) : value; -} - -s32 hid_snto32(__u32 value, unsigned n) -{ - return snto32(value, n); -} -EXPORT_SYMBOL_GPL(hid_snto32); - -/* - * Convert a signed 32-bit integer to a signed n-bit integer. - */ - -static u32 s32ton(__s32 value, unsigned n) -{ - s32 a = value >> (n - 1); - if (a && a != -1) - return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1; - return value & ((1 << n) - 1); -} - /* * Extract/implement a data field from/to a little endian report (bit array). * diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 400d70e6dbe2d..30ad42aac8044 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3296,13 +3296,13 @@ static int m560_raw_event(struct hid_device *hdev, u8 *data, int size) 120); } - v = hid_snto32(hid_field_extract(hdev, data+3, 0, 12), 12); + v = sign_extend32(hid_field_extract(hdev, data + 3, 0, 12), 11); input_report_rel(hidpp->input, REL_X, v); - v = hid_snto32(hid_field_extract(hdev, data+3, 12, 12), 12); + v = sign_extend32(hid_field_extract(hdev, data + 3, 12, 12), 11); input_report_rel(hidpp->input, REL_Y, v); - v = hid_snto32(data[6], 8); + v = sign_extend32(data[6], 7); if (v != 0) hidpp_scroll_counter_handle_scroll(hidpp->input, &hidpp->vertical_wheel_counter, v); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 7b5c4522b426a..955f061a55e9a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1599,6 +1599,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, int count; int rq_flushed = 0, sq_flushed; unsigned long flag; + struct ib_event ev; pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); @@ -1607,6 +1608,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, if (schp != rchp) spin_lock(&schp->lock); spin_lock(&qhp->lock); + if (qhp->srq && qhp->attr.state == C4IW_QP_STATE_ERROR && + qhp->ibqp.event_handler) { + ev.device = qhp->ibqp.device; + ev.element.qp = &qhp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qhp->ibqp.event_handler(&ev, qhp->ibqp.qp_context); + } if (qhp->wq.flushed) { spin_unlock(&qhp->lock); diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 50437272b7b96..181717fd4fa05 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -52,6 +52,38 @@ static const struct ib_device_ops mana_ib_dev_ops = { ib_ind_table), }; +static int mana_ib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb); + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct gdma_context *gc = dev->gdma_dev->gdma_context; + struct mana_context *mc = gc->mana.driver_data; + struct net_device *ndev; + + /* Only process events from our parent device */ + if (event_dev != mc->ports[0]) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGEUPPER: + ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); + /* + * RDMA core will setup GID based on updated netdev. + * It's not possible to race with the core as rtnl lock is being + * held. + */ + ib_device_set_netdev(&dev->ib_dev, ndev, 1); + + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + static int mana_ib_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { @@ -85,10 +117,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues; dev->ib_dev.dev.parent = mdev->gdma_context->dev; - rcu_read_lock(); /* required to get primary netdev */ - ndev = mana_get_primary_netdev_rcu(mc, 0); + ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); if (!ndev) { - rcu_read_unlock(); ret = -ENODEV; ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1"); goto free_ib_device; @@ -96,7 +126,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, ether_addr_copy(mac_addr, ndev->dev_addr); addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr); ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1); - rcu_read_unlock(); + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); if (ret) { ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret); goto free_ib_device; @@ -110,17 +141,25 @@ static int mana_ib_probe(struct auxiliary_device *adev, } dev->gdma_dev = &mdev->gdma_context->mana_ib; + dev->nb.notifier_call = mana_ib_netdev_event; + ret = register_netdevice_notifier(&dev->nb); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d", + ret); + goto deregister_device; + } + ret = mana_ib_gd_query_adapter_caps(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret); - goto deregister_device; + goto deregister_net_notifier; } ret = mana_ib_create_eqs(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); - goto deregister_device; + goto deregister_net_notifier; } ret = mana_ib_gd_create_rnic_adapter(dev); @@ -149,6 +188,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, mana_ib_gd_destroy_rnic_adapter(dev); destroy_eqs: mana_ib_destroy_eqs(dev); +deregister_net_notifier: + unregister_netdevice_notifier(&dev->nb); deregister_device: mana_gd_deregister_device(dev->gdma_dev); free_ib_device: @@ -164,6 +205,7 @@ static void mana_ib_remove(struct auxiliary_device *adev) xa_destroy(&dev->qp_table_wq); mana_ib_gd_destroy_rnic_adapter(dev); mana_ib_destroy_eqs(dev); + unregister_netdevice_notifier(&dev->nb); mana_gd_deregister_device(dev->gdma_dev); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index d13abc954d2aa..739f60f3cefc4 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -174,7 +174,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; req.num_resources = 1; - req.alignment = 1; + req.alignment = PAGE_SIZE / MANA_PAGE_SIZE; /* Have GDMA start searching from 0 */ req.allocated_resources = 0; @@ -358,7 +358,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem unsigned int tail = 0; u64 *page_addr_list; void *request_buf; - int err; + int err = 0; gc = mdev_to_gc(dev); hwc = gc->hwc.driver_data; @@ -383,7 +383,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem create_req->length = umem->length; create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz); - create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT; + create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT; create_req->page_count = num_pages_total; ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n", @@ -511,13 +511,13 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) PAGE_SHIFT; prot = pgprot_writecombine(vma->vm_page_prot); - ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot, + ret = rdma_user_mmap_io(ibcontext, vma, pfn, PAGE_SIZE, prot, NULL); if (ret) ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret); else - ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n", - pfn, gc->db_page_size, ret); + ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %lu, ret %d\n", + pfn, PAGE_SIZE, ret); return ret; } @@ -634,7 +634,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V3; + req.hdr.resp.msg_version = GDMA_MESSAGE_V4; req.hdr.dev_id = dev->gdma_dev->dev_id; err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), @@ -663,6 +663,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) caps->max_inline_data_size = resp.max_inline_data_size; caps->max_send_sge_count = resp.max_send_sge_count; caps->max_recv_sge_count = resp.max_recv_sge_count; + caps->feature_flags = resp.feature_flags; return 0; } @@ -762,6 +763,9 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev) req.hdr.dev_id = gc->mana_ib.dev_id; req.notify_eq_id = mdev->fatal_err_eq->id; + if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT) + req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err) { ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err); diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index b53a5b4de908d..d10175eaa5976 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -48,6 +48,7 @@ struct mana_ib_adapter_caps { u32 max_send_sge_count; u32 max_recv_sge_count; u32 max_inline_data_size; + u64 feature_flags; }; struct mana_ib_queue { @@ -64,6 +65,8 @@ struct mana_ib_dev { struct gdma_queue **eqs; struct xarray qp_table_wq; struct mana_ib_adapter_caps adapter_caps; + netdevice_tracker dev_tracker; + struct notifier_block nb; }; struct mana_ib_wq { @@ -156,6 +159,10 @@ struct mana_ib_query_adapter_caps_req { struct gdma_req_hdr hdr; }; /*HW Data */ +enum mana_ib_adapter_features { + MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4), +}; + struct mana_ib_query_adapter_caps_resp { struct gdma_resp_hdr hdr; u32 max_sq_id; @@ -176,8 +183,13 @@ struct mana_ib_query_adapter_caps_resp { u32 max_send_sge_count; u32 max_recv_sge_count; u32 max_inline_data_size; + u64 feature_flags; }; /* HW Data */ +enum mana_ib_adapter_features_request { + MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1), +}; /*HW Data */ + struct mana_rnic_create_adapter_req { struct gdma_req_hdr hdr; u32 notify_eq_id; diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 73d67c853b6f3..48fef989318b4 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -561,7 +561,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, req.ah_attr.dest_port = ROCE_V2_UDP_DPORT; req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, ibqp->qp_num, attr->dest_qp_num); - req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class; + req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2; req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 8acffe8b14957..1479d16cd0fb4 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -547,6 +547,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) adapter->rx_pool[i].active = 0; } +static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter) +{ + if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) { + netdev_info(adapter->netdev, + "set max ind descs from %u to safe limit %u\n", + adapter->cur_max_ind_descs, + IBMVNIC_SAFE_IND_DESC); + adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC; + } +} + static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct ibmvnic_rx_pool *pool) { @@ -633,7 +644,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); /* if send_subcrq_indirect queue is full, flush to VIOS */ - if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + if (ind_bufp->index == adapter->cur_max_ind_descs || i == count - 1) { lpar_rc = send_subcrq_indirect(adapter, handle, @@ -652,6 +663,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); + + /* Detect platform limit H_PARAMETER */ + if (lpar_rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ for (i = ind_bufp->index - 1; i >= 0; --i) { struct ibmvnic_rx_buff *rx_buff; @@ -2174,16 +2193,28 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, rc = send_subcrq_direct(adapter, handle, (u64 *)ind_bufp->indir_arr); - if (rc) + if (rc) { + dev_err_ratelimited(&adapter->vdev->dev, + "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n", + rc, entries, &dma_addr, handle); + /* Detect platform limit H_PARAMETER */ + if (rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); - else + } else { ind_bufp->index = 0; + } return rc; } static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); + u32 cur_max_ind_descs = adapter->cur_max_ind_descs; int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; @@ -2377,7 +2408,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.n_crq_elem = num_entries; tx_buff->num_entries = num_entries; /* flush buffer if current entry can not fit */ - if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + if (num_entries + ind_bufp->index > cur_max_ind_descs) { lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_flush_err; @@ -2390,7 +2421,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ind_bufp->index += num_entries; if (__netdev_tx_sent_queue(txq, skb->len, netdev_xmit_more() && - ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + ind_bufp->index < cur_max_ind_descs)) { lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_err; @@ -3842,7 +3873,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } dma_free_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, scrq->ind_buf.indir_arr, scrq->ind_buf.indir_dma); @@ -3899,7 +3930,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->ind_buf.indir_arr = dma_alloc_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, &scrq->ind_buf.indir_dma, GFP_KERNEL); @@ -6204,6 +6235,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) rc = reset_sub_crq_queues(adapter); } } else { + if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + /* After an LPM, reset the max number of indirect + * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT + * hcall to the default max (e.g POWER8 -> POWER10) + * + * If the new destination platform does not support + * the higher limit max (e.g. POWER10-> POWER8 LPM) + * H_PARAMETER will trigger automatic fallback to the + * safe minimum limit. + */ + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; + } + rc = init_sub_crqs(adapter); } @@ -6355,6 +6399,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->wait_for_reset = false; adapter->last_reset_time = jiffies; + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; rc = register_netdev(netdev); if (rc) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index a145c0d11f641..3f7902b3dbcba 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -29,8 +29,9 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 -#define IBMVNIC_MAX_IND_DESCS 16 -#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) +#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_SAFE_IND_DESC 16 +#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 #define IBMVNIC_TSO_BUFS 64 @@ -885,6 +886,7 @@ struct ibmvnic_adapter { dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; u32 priv_flags; + u32 cur_max_ind_descs; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 0df906f2cdf30..79b25843a27a0 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -8,6 +8,7 @@ #include #include +#include #include struct dentry *mana_debugfs_root; @@ -32,6 +33,9 @@ static void mana_gd_init_pf_regs(struct pci_dev *pdev) gc->db_page_base = gc->bar0_va + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + gc->phys_db_page_base = gc->bar0_pa + + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); sriov_base_va = gc->bar0_va + sriov_base_off; @@ -64,6 +68,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev) mana_gd_init_vf_regs(pdev); } +/* Suppress logging when we set timeout to zero */ +bool mana_need_log(struct gdma_context *gc, int err) +{ + struct hw_channel_context *hwc; + + if (err != -ETIMEDOUT) + return true; + + if (!gc) + return true; + + hwc = gc->hwc.driver_data; + if (hwc && hwc->hwc_timeout == 0) + return false; + + return true; +} + static int mana_gd_query_max_resources(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); @@ -135,9 +157,10 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) struct gdma_list_devices_resp resp = {}; struct gdma_general_req req = {}; struct gdma_dev_id dev; - u32 i, max_num_devs; + int found_dev = 0; u16 dev_type; int err; + u32 i; mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), sizeof(resp)); @@ -149,12 +172,17 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) return err ? err : -EPROTO; } - max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); - - for (i = 0; i < max_num_devs; i++) { + for (i = 0; i < GDMA_DEV_LIST_SIZE && + found_dev < resp.num_of_devs; i++) { dev = resp.devs[i]; dev_type = dev.type; + /* Skip empty devices */ + if (dev.as_uint32 == 0) + continue; + + found_dev++; + /* HWC is already detected in mana_hwc_create_channel(). */ if (dev_type == GDMA_DEVICE_HWC) continue; @@ -261,8 +289,9 @@ static int mana_gd_disable_queue(struct gdma_queue *queue) err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { - dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, - resp.hdr.status); + if (mana_need_log(gc, err)) + dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, + resp.hdr.status); return err ? err : -EPROTO; } @@ -345,11 +374,113 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) head, arm_bit); } +#define MANA_SERVICE_PERIOD 10 + +static void mana_serv_fpga(struct pci_dev *pdev) +{ + struct pci_bus *bus, *parent; + + pci_lock_rescan_remove(); + + bus = pdev->bus; + if (!bus) { + dev_err(&pdev->dev, "MANA service: no bus\n"); + goto out; + } + + parent = bus->parent; + if (!parent) { + dev_err(&pdev->dev, "MANA service: no parent bus\n"); + goto out; + } + + pci_stop_and_remove_bus_device(bus->self); + + msleep(MANA_SERVICE_PERIOD * 1000); + + pci_rescan_bus(parent); + +out: + pci_unlock_rescan_remove(); +} + +static void mana_serv_reset(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct hw_channel_context *hwc; + + if (!gc) { + dev_err(&pdev->dev, "MANA service: no GC\n"); + return; + } + + hwc = gc->hwc.driver_data; + if (!hwc) { + dev_err(&pdev->dev, "MANA service: no HWC\n"); + goto out; + } + + /* HWC is not responding in this case, so don't wait */ + hwc->hwc_timeout = 0; + + dev_info(&pdev->dev, "MANA reset cycle start\n"); + + mana_gd_suspend(pdev, PMSG_SUSPEND); + + msleep(MANA_SERVICE_PERIOD * 1000); + + mana_gd_resume(pdev); + + dev_info(&pdev->dev, "MANA reset cycle completed\n"); + +out: + gc->in_service = false; +} + +struct mana_serv_work { + struct work_struct serv_work; + struct pci_dev *pdev; + enum gdma_eqe_type type; +}; + +static void mana_serv_func(struct work_struct *w) +{ + struct mana_serv_work *mns_wk; + struct pci_dev *pdev; + + mns_wk = container_of(w, struct mana_serv_work, serv_work); + pdev = mns_wk->pdev; + + if (!pdev) + goto out; + + switch (mns_wk->type) { + case GDMA_EQE_HWC_FPGA_RECONFIG: + mana_serv_fpga(pdev); + break; + + case GDMA_EQE_HWC_RESET_REQUEST: + mana_serv_reset(pdev); + break; + + default: + dev_err(&pdev->dev, "MANA service: unknown type %d\n", + mns_wk->type); + break; + } + +out: + pci_dev_put(pdev); + kfree(mns_wk); + module_put(THIS_MODULE); +} + static void mana_gd_process_eqe(struct gdma_queue *eq) { u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); struct gdma_context *gc = eq->gdma_dev->gdma_context; struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; + struct mana_serv_work *mns_wk; union gdma_eqe_info eqe_info; enum gdma_eqe_type type; struct gdma_event event; @@ -393,6 +524,35 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) eq->eq.callback(eq->eq.context, eq, &event); break; + case GDMA_EQE_HWC_FPGA_RECONFIG: + case GDMA_EQE_HWC_RESET_REQUEST: + dev_info(gc->dev, "Recv MANA service type:%d\n", type); + + if (gc->in_service) { + dev_info(gc->dev, "Already in service\n"); + break; + } + + if (!try_module_get(THIS_MODULE)) { + dev_info(gc->dev, "Module is unloading\n"); + break; + } + + mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC); + if (!mns_wk) { + module_put(THIS_MODULE); + break; + } + + dev_info(gc->dev, "Start MANA service type:%d\n", type); + gc->in_service = true; + mns_wk->pdev = to_pci_dev(gc->dev); + mns_wk->type = type; + pci_dev_get(mns_wk->pdev); + INIT_WORK(&mns_wk->serv_work, mana_serv_func); + schedule_work(&mns_wk->serv_work); + break; + default: break; } @@ -534,7 +694,8 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err) { - dev_err(dev, "test_eq failed: %d\n", err); + if (mana_need_log(gc, err)) + dev_err(dev, "test_eq failed: %d\n", err); goto out; } @@ -569,7 +730,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, if (flush_evenets) { err = mana_gd_test_eq(gc, queue); - if (err) + if (err && mana_need_log(gc, err)) dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); } @@ -667,8 +828,11 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } queue->head = 0; queue->tail = 0; @@ -689,6 +853,8 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -710,8 +876,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { - dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", - err, resp.hdr.status); + if (mana_need_log(gc, err)) + dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", + err, resp.hdr.status); return -EPROTO; } @@ -771,7 +938,13 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, } gmi->dma_region_handle = resp.dma_region_handle; + dev_dbg(gc->dev, "Created DMA region handle 0x%llx\n", + gmi->dma_region_handle); out: + if (err) + dev_dbg(gc->dev, + "Failed to create DMA region of length: %u, page_type: %d, status: 0x%x, err: %d\n", + length, req->gdma_page_type, resp.hdr.status, err); kfree(req); return err; } @@ -794,8 +967,11 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -816,6 +992,8 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -842,8 +1020,11 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -863,6 +1044,8 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -994,8 +1177,9 @@ int mana_gd_deregister_device(struct gdma_dev *gd) err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { - dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", - err, resp.hdr.status); + if (mana_need_log(gc, err)) + dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", + err, resp.hdr.status); if (!err) err = -EPROTO; } @@ -1158,8 +1342,11 @@ int mana_gd_post_and_ring(struct gdma_queue *queue, int err; err = mana_gd_post_work_request(queue, wqe_req, wqe_info); - if (err) + if (err) { + dev_err(gc->dev, "Failed to post work req from queue type %d of size %u (err=%d)\n", + queue->type, queue->queue_size, err); return err; + } mana_gd_wq_ring_doorbell(gc, queue); @@ -1436,8 +1623,10 @@ static int mana_gd_setup(struct pci_dev *pdev) mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); err = mana_gd_setup_irqs(pdev); - if (err) + if (err) { + dev_err(gc->dev, "Failed to setup IRQs: %d\n", err); return err; + } err = mana_hwc_create_channel(gc); if (err) @@ -1455,12 +1644,14 @@ static int mana_gd_setup(struct pci_dev *pdev) if (err) goto destroy_hwc; + dev_dbg(&pdev->dev, "mana gdma setup successful\n"); return 0; destroy_hwc: mana_hwc_destroy_channel(gc); remove_irq: mana_gd_remove_irqs(pdev); + dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err); return err; } @@ -1471,6 +1662,7 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_hwc_destroy_channel(gc); mana_gd_remove_irqs(pdev); + dev_dbg(&pdev->dev, "mana gdma cleanup successful\n"); } static bool mana_is_pf(unsigned short dev_id) @@ -1489,8 +1681,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE); err = pci_enable_device(pdev); - if (err) + if (err) { + dev_err(&pdev->dev, "Failed to enable pci device (err=%d)\n", err); return -ENXIO; + } pci_set_master(pdev); @@ -1499,9 +1693,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto disable_dev; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (err) + if (err) { + dev_err(&pdev->dev, "DMA set mask failed: %d\n", err); goto release_region; - + } dma_set_max_seg_size(&pdev->dev, UINT_MAX); err = -ENOMEM; @@ -1579,10 +1774,12 @@ static void mana_gd_remove(struct pci_dev *pdev) pci_release_regions(pdev); pci_disable_device(pdev); + + dev_dbg(&pdev->dev, "mana gdma remove successful\n"); } /* The 'state' parameter is not used. */ -static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) +int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) { struct gdma_context *gc = pci_get_drvdata(pdev); @@ -1597,7 +1794,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) * fail -- if this happens, it's safer to just report an error than try to undo * what has been done. */ -static int mana_gd_resume(struct pci_dev *pdev) +int mana_gd_resume(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); int err; diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 48b899834d0aa..b68e665234045 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021, Microsoft Corporation. */ #include +#include #include static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) @@ -439,7 +440,8 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); if (err) { - dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); + dev_err(hwc->dev, "Failed to allocate DMA buffer size: %u, err %d\n", + buf_size, err); goto out; } @@ -528,6 +530,9 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, out: if (err) mana_hwc_destroy_wq(hwc, hwc_wq); + + dev_err(hwc->dev, "Failed to create HWC queue size= %u type= %d err= %d\n", + queue_size, q_type, err); return err; } @@ -855,7 +860,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, if (!wait_for_completion_timeout(&ctx->comp_event, (msecs_to_jiffies(hwc->hwc_timeout)))) { - dev_err(hwc->dev, "HWC: Request timed out!\n"); + if (hwc->hwc_timeout != 0) + dev_err(hwc->dev, "HWC: Request timed out!\n"); + err = -ETIMEDOUT; goto out; } @@ -866,8 +873,13 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, } if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) { - dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", - ctx->status_code); + if (ctx->status_code == GDMA_STATUS_CMD_UNSUPPORTED) { + err = -EOPNOTSUPP; + goto out; + } + if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT) + dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", + ctx->status_code); err = -EPROTO; goto out; } diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c index 23b1521c0df96..d30721d4516fc 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -91,7 +91,7 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, goto out; xdp_init_buff(xdp, PAGE_SIZE, &rxq->xdp_rxq); - xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, false); + xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, true); act = bpf_prog_run_xdp(prog, xdp); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ae796e64d24ca..c59e1b9beacbf 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -45,16 +45,27 @@ static const struct file_operations mana_dbg_q_fops = { .read = mana_dbg_q_read, }; +static bool mana_en_need_log(struct mana_port_context *apc, int err) +{ + if (apc && apc->ac && apc->ac->gdma_dev && + apc->ac->gdma_dev->gdma_context) + return mana_need_log(apc->ac->gdma_dev->gdma_context, err); + else + return true; +} + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); int err; - err = mana_alloc_queues(ndev); - if (err) + + if (err) { + netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err); return err; + } apc->port_is_up = true; @@ -63,7 +74,7 @@ static int mana_open(struct net_device *ndev) netif_carrier_on(ndev); netif_tx_wake_all_queues(ndev); - + netdev_dbg(ndev, "%s successful\n", __func__); return 0; } @@ -175,6 +186,9 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, return 0; frag_err: + if (net_ratelimit()) + netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n", + skb->len); for (i = sg_i - 1; i >= hsg; i--) dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], DMA_TO_DEVICE); @@ -244,10 +258,10 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct netdev_queue *net_txq; struct mana_stats_tx *tx_stats; struct gdma_queue *gdma_sq; + int err, len, num_gso_seg; unsigned int csum_type; struct mana_txq *txq; struct mana_cq *cq; - int err, len; if (unlikely(!apc->port_is_up)) goto tx_drop; @@ -255,6 +269,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (skb_cow_head(skb, MANA_HEADROOM)) goto tx_drop_count; + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto tx_drop_count; + txq = &apc->tx_qp[txq_idx].txq; gdma_sq = txq->gdma_sq; cq = &apc->tx_qp[txq_idx].tx_cq; @@ -397,6 +414,7 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_queue_tail(&txq->pending_skbs, skb); len = skb->len; + num_gso_seg = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; net_txq = netdev_get_tx_queue(ndev, txq_idx); err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, @@ -421,10 +439,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* skb may be freed after mana_gd_post_work_request. Do not use it. */ skb = NULL; + /* Populated the packet and bytes counters based on post GSO packet + * calculations + */ tx_stats = &txq->stats; u64_stats_update_begin(&tx_stats->syncp); - tx_stats->packets++; - tx_stats->bytes += len; + tx_stats->packets += num_gso_seg; + tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs); u64_stats_update_end(&tx_stats->syncp); tx_busy: @@ -614,7 +635,11 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; *datasize = mtu + ETH_HLEN; } @@ -647,30 +672,16 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu mpc->rxbpre_total = 0; for (i = 0; i < num_rxb; i++) { - if (mpc->rxbpre_alloc_size > PAGE_SIZE) { - va = netdev_alloc_frag(mpc->rxbpre_alloc_size); - if (!va) - goto error; - - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < - get_order(mpc->rxbpre_alloc_size)) { - put_page(page); - goto error; - } - } else { - page = dev_alloc_page(); - if (!page) - goto error; + page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size)); + if (!page) + goto error; - va = page_to_virt(page); - } + va = page_to_virt(page); da = dma_map_single(dev, va + mpc->rxbpre_headroom, mpc->rxbpre_datasize, DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { - put_page(virt_to_head_page(va)); + put_page(page); goto error; } @@ -682,6 +693,7 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu return 0; error: + netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues); mana_pre_dealloc_rxbufs(mpc); return -ENOMEM; } @@ -773,8 +785,13 @@ static int mana_send_request(struct mana_context *ac, void *in_buf, err = mana_gd_send_request(gc, in_len, in_buf, out_len, out_buf); if (err || resp->status) { - dev_err(dev, "Failed to send mana message: %d, 0x%x\n", - err, resp->status); + if (err == -EOPNOTSUPP) + return err; + + if (req->req.msg_type != MANA_QUERY_PHY_STAT && + mana_need_log(gc, err)) + dev_err(dev, "Failed to send mana message: %d, 0x%x\n", + err, resp->status); return err ? err : -EPROTO; } @@ -849,8 +866,10 @@ static void mana_pf_deregister_hw_vport(struct mana_port_context *apc) err = mana_send_request(apc->ac, &req, sizeof(req), &resp, sizeof(resp)); if (err) { - netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", - err); + if (mana_en_need_log(apc, err)) + netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", + err); + return; } @@ -905,8 +924,10 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc) err = mana_send_request(apc->ac, &req, sizeof(req), &resp, sizeof(resp)); if (err) { - netdev_err(apc->ndev, "Failed to unregister filter: %d\n", - err); + if (mana_en_need_log(apc, err)) + netdev_err(apc->ndev, "Failed to unregister filter: %d\n", + err); + return; } @@ -920,7 +941,7 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc) static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, u32 proto_minor_ver, u32 proto_micro_ver, - u16 *max_num_vports) + u16 *max_num_vports, u8 *bm_hostmode) { struct gdma_context *gc = ac->gdma_dev->gdma_context; struct mana_query_device_cfg_resp resp = {}; @@ -931,7 +952,7 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V2; + req.hdr.resp.msg_version = GDMA_MESSAGE_V3; req.proto_major_ver = proto_major_ver; req.proto_minor_ver = proto_minor_ver; @@ -955,11 +976,16 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, *max_num_vports = resp.max_num_vports; - if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2) + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) gc->adapter_mtu = resp.adapter_mtu; else gc->adapter_mtu = ETH_FRAME_LEN; + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3) + *bm_hostmode = resp.bm_hostmode; + else + *bm_hostmode = 0; + debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu); return 0; @@ -1131,7 +1157,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, err = mana_send_request(apc->ac, req, req_buf_size, &resp, sizeof(resp)); if (err) { - netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + if (mana_en_need_log(apc, err)) + netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + goto out; } @@ -1226,7 +1254,9 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, err = mana_send_request(apc->ac, &req, sizeof(req), &resp, sizeof(resp)); if (err) { - netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); + if (mana_en_need_log(apc, err)) + netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); + return; } @@ -1299,8 +1329,10 @@ static int mana_create_eq(struct mana_context *ac) for (i = 0; i < gc->max_num_queues; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); - if (err) + if (err) { + dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err); goto out; + } mana_create_eq_debugfs(ac, i); } @@ -1542,8 +1574,12 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, return NULL; if (xdp->data_hard_start) { + u32 metasize = xdp->data - xdp->data_meta; + skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); return skb; } @@ -1655,7 +1691,7 @@ static void mana_rx_skb(void *buf_va, bool from_pool, } static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, - dma_addr_t *da, bool *from_pool, bool is_napi) + dma_addr_t *da, bool *from_pool) { struct page *page; void *va; @@ -1666,21 +1702,6 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, if (rxq->xdp_save_va) { va = rxq->xdp_save_va; rxq->xdp_save_va = NULL; - } else if (rxq->alloc_size > PAGE_SIZE) { - if (is_napi) - va = napi_alloc_frag(rxq->alloc_size); - else - va = netdev_alloc_frag(rxq->alloc_size); - - if (!va) - return NULL; - - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < get_order(rxq->alloc_size)) { - put_page(page); - return NULL; - } } else { page = page_pool_dev_alloc_pages(rxq->page_pool); if (!page) @@ -1713,7 +1734,7 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, dma_addr_t da; void *va; - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return; @@ -1829,7 +1850,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1840,16 +1860,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; - - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + cq->work_done_since_doorbell += w; + + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } @@ -2070,6 +2097,8 @@ static int mana_create_txq(struct mana_port_context *apc, return 0; out: + netdev_err(net, "Failed to create %d TX queues, %d\n", + apc->num_queues, err); mana_destroy_txq(apc); return err; } @@ -2147,7 +2176,7 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, if (mpc->rxbufs_pre) va = mana_get_rxbuf_pre(rxq, &da); else - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return -ENOMEM; @@ -2233,6 +2262,7 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; + pprm.order = get_order(rxq->alloc_size); rxq->page_pool = page_pool_create(&pprm); @@ -2406,6 +2436,7 @@ static int mana_add_rx_queues(struct mana_port_context *apc, rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); if (!rxq) { err = -ENOMEM; + netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err); goto out; } @@ -2439,7 +2470,7 @@ static void mana_destroy_vport(struct mana_port_context *apc) mana_destroy_txq(apc); mana_uncfg_vport(apc); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_hw_vport(apc); } @@ -2451,7 +2482,7 @@ static int mana_create_vport(struct mana_port_context *apc, apc->default_rxobj = INVALID_MANA_HANDLE; - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_hw_vport(apc); if (err) return err; @@ -2604,6 +2635,88 @@ void mana_query_gf_stats(struct mana_port_context *apc) apc->eth_stats.hc_tx_err_gdma = resp.tx_err_gdma; } +void mana_query_phy_stats(struct mana_port_context *apc) +{ + struct mana_query_phy_stat_resp resp = {}; + struct mana_query_phy_stat_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_PHY_STAT, + sizeof(req), sizeof(resp)); + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) + return; + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_PHY_STAT, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(ndev, + "Failed to query PHY stats: %d, resp:0x%x\n", + err, resp.hdr.status); + return; + } + + /* Aggregate drop counters */ + apc->phy_stats.rx_pkt_drop_phy = resp.rx_pkt_drop_phy; + apc->phy_stats.tx_pkt_drop_phy = resp.tx_pkt_drop_phy; + + /* Per TC traffic Counters */ + apc->phy_stats.rx_pkt_tc0_phy = resp.rx_pkt_tc0_phy; + apc->phy_stats.tx_pkt_tc0_phy = resp.tx_pkt_tc0_phy; + apc->phy_stats.rx_pkt_tc1_phy = resp.rx_pkt_tc1_phy; + apc->phy_stats.tx_pkt_tc1_phy = resp.tx_pkt_tc1_phy; + apc->phy_stats.rx_pkt_tc2_phy = resp.rx_pkt_tc2_phy; + apc->phy_stats.tx_pkt_tc2_phy = resp.tx_pkt_tc2_phy; + apc->phy_stats.rx_pkt_tc3_phy = resp.rx_pkt_tc3_phy; + apc->phy_stats.tx_pkt_tc3_phy = resp.tx_pkt_tc3_phy; + apc->phy_stats.rx_pkt_tc4_phy = resp.rx_pkt_tc4_phy; + apc->phy_stats.tx_pkt_tc4_phy = resp.tx_pkt_tc4_phy; + apc->phy_stats.rx_pkt_tc5_phy = resp.rx_pkt_tc5_phy; + apc->phy_stats.tx_pkt_tc5_phy = resp.tx_pkt_tc5_phy; + apc->phy_stats.rx_pkt_tc6_phy = resp.rx_pkt_tc6_phy; + apc->phy_stats.tx_pkt_tc6_phy = resp.tx_pkt_tc6_phy; + apc->phy_stats.rx_pkt_tc7_phy = resp.rx_pkt_tc7_phy; + apc->phy_stats.tx_pkt_tc7_phy = resp.tx_pkt_tc7_phy; + + /* Per TC byte Counters */ + apc->phy_stats.rx_byte_tc0_phy = resp.rx_byte_tc0_phy; + apc->phy_stats.tx_byte_tc0_phy = resp.tx_byte_tc0_phy; + apc->phy_stats.rx_byte_tc1_phy = resp.rx_byte_tc1_phy; + apc->phy_stats.tx_byte_tc1_phy = resp.tx_byte_tc1_phy; + apc->phy_stats.rx_byte_tc2_phy = resp.rx_byte_tc2_phy; + apc->phy_stats.tx_byte_tc2_phy = resp.tx_byte_tc2_phy; + apc->phy_stats.rx_byte_tc3_phy = resp.rx_byte_tc3_phy; + apc->phy_stats.tx_byte_tc3_phy = resp.tx_byte_tc3_phy; + apc->phy_stats.rx_byte_tc4_phy = resp.rx_byte_tc4_phy; + apc->phy_stats.tx_byte_tc4_phy = resp.tx_byte_tc4_phy; + apc->phy_stats.rx_byte_tc5_phy = resp.rx_byte_tc5_phy; + apc->phy_stats.tx_byte_tc5_phy = resp.tx_byte_tc5_phy; + apc->phy_stats.rx_byte_tc6_phy = resp.rx_byte_tc6_phy; + apc->phy_stats.tx_byte_tc6_phy = resp.tx_byte_tc6_phy; + apc->phy_stats.rx_byte_tc7_phy = resp.rx_byte_tc7_phy; + apc->phy_stats.tx_byte_tc7_phy = resp.tx_byte_tc7_phy; + + /* Per TC pause Counters */ + apc->phy_stats.rx_pause_tc0_phy = resp.rx_pause_tc0_phy; + apc->phy_stats.tx_pause_tc0_phy = resp.tx_pause_tc0_phy; + apc->phy_stats.rx_pause_tc1_phy = resp.rx_pause_tc1_phy; + apc->phy_stats.tx_pause_tc1_phy = resp.tx_pause_tc1_phy; + apc->phy_stats.rx_pause_tc2_phy = resp.rx_pause_tc2_phy; + apc->phy_stats.tx_pause_tc2_phy = resp.tx_pause_tc2_phy; + apc->phy_stats.rx_pause_tc3_phy = resp.rx_pause_tc3_phy; + apc->phy_stats.tx_pause_tc3_phy = resp.tx_pause_tc3_phy; + apc->phy_stats.rx_pause_tc4_phy = resp.rx_pause_tc4_phy; + apc->phy_stats.tx_pause_tc4_phy = resp.tx_pause_tc4_phy; + apc->phy_stats.rx_pause_tc5_phy = resp.rx_pause_tc5_phy; + apc->phy_stats.tx_pause_tc5_phy = resp.tx_pause_tc5_phy; + apc->phy_stats.rx_pause_tc6_phy = resp.rx_pause_tc6_phy; + apc->phy_stats.tx_pause_tc6_phy = resp.tx_pause_tc6_phy; + apc->phy_stats.rx_pause_tc7_phy = resp.rx_pause_tc7_phy; + apc->phy_stats.tx_pause_tc7_phy = resp.tx_pause_tc7_phy; +} + static int mana_init_port(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); @@ -2652,12 +2765,18 @@ int mana_alloc_queues(struct net_device *ndev) int err; err = mana_create_vport(apc, ndev); - if (err) + if (err) { + netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err); return err; + } err = netif_set_real_num_tx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } err = mana_add_rx_queues(apc, ndev); if (err) @@ -2666,16 +2785,22 @@ int mana_alloc_queues(struct net_device *ndev) apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; err = netif_set_real_num_rx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } mana_rss_table_init(apc); err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); - if (err) + if (err) { + netdev_err(ndev, "Failed to configure RSS table: %d\n", err); goto destroy_vport; + } - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_filter(apc); if (err) goto destroy_vport; @@ -2737,7 +2862,7 @@ static int mana_dealloc_queues(struct net_device *ndev) mana_chn_setxdp(apc, NULL); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_filter(apc); /* No packet can be transmitted now since apc->port_is_up is false. @@ -2786,11 +2911,10 @@ static int mana_dealloc_queues(struct net_device *ndev) apc->rss_state = TRI_STATE_FALSE; err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); - if (err) { + if (err && mana_en_need_log(apc, err)) netdev_err(ndev, "Failed to disable vPort: %d\n", err); - return err; - } + /* Even in err case, still need to cleanup the vPort */ mana_destroy_vport(apc); return 0; @@ -2814,8 +2938,10 @@ int mana_detach(struct net_device *ndev, bool from_close) if (apc->port_st_save) { err = mana_dealloc_queues(ndev); - if (err) + if (err) { + netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err); return err; + } } if (!from_close) { @@ -2864,6 +2990,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->dev_port = port_idx; SET_NETDEV_DEV(ndev, gc->dev); + netif_set_tso_max_size(ndev, GSO_MAX_SIZE); + netif_carrier_off(ndev); netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); @@ -2959,6 +3087,8 @@ static int add_adev(struct gdma_dev *gd) goto add_fail; gd->adev = adev; + dev_dbg(gd->gdma_context->dev, + "Auxiliary device added successfully\n"); return 0; add_fail: @@ -2978,6 +3108,7 @@ int mana_probe(struct gdma_dev *gd, bool resuming) struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; struct device *dev = gc->dev; + u8 bm_hostmode = 0; u16 num_ports = 0; int err; int i; @@ -3000,14 +3131,18 @@ int mana_probe(struct gdma_dev *gd, bool resuming) } err = mana_create_eq(ac); - if (err) + if (err) { + dev_err(dev, "Failed to create EQs: %d\n", err); goto out; + } err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, - MANA_MICRO_VERSION, &num_ports); + MANA_MICRO_VERSION, &num_ports, &bm_hostmode); if (err) goto out; + ac->bm_hostmode = bm_hostmode; + if (!resuming) { ac->num_ports = num_ports; } else { @@ -3057,8 +3192,14 @@ int mana_probe(struct gdma_dev *gd, bool resuming) err = add_adev(gd); out: - if (err) + if (err) { mana_remove(gd, false); + } else { + dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n", + gd, gd->dev_id.as_uint32, ac->num_ports, + gd->dev_id.type, gd->dev_id.instance); + dev_dbg(dev, "%s succeeded\n", __func__); + } return err; } @@ -3120,23 +3261,30 @@ void mana_remove(struct gdma_dev *gd, bool suspending) gd->driver_data = NULL; gd->gdma_context = NULL; kfree(ac); + dev_dbg(dev, "%s succeeded\n", __func__); } -struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index) +struct net_device *mana_get_primary_netdev(struct mana_context *ac, + u32 port_index, + netdevice_tracker *tracker) { struct net_device *ndev; - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "Taking primary netdev without holding the RCU read lock"); if (port_index >= ac->num_ports) return NULL; - /* When mana is used in netvsc, the upper netdevice should be returned. */ - if (ac->ports[port_index]->flags & IFF_SLAVE) - ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); - else + rcu_read_lock(); + + /* If mana is used in netvsc, the upper netdevice should be returned. */ + ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); + + /* If there is no upper device, use the parent Ethernet device */ + if (!ndev) ndev = ac->ports[port_index]; + netdev_hold(ndev, tracker, GFP_ATOMIC); + rcu_read_unlock(); + return ndev; } -EXPORT_SYMBOL_NS(mana_get_primary_netdev_rcu, NET_MANA); +EXPORT_SYMBOL_NS(mana_get_primary_netdev, NET_MANA); diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index c419626073f5f..4fb3a04994a2d 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -7,10 +7,12 @@ #include -static const struct { +struct mana_stats_desc { char name[ETH_GSTRING_LEN]; u16 offset; -} mana_eth_stats[] = { +}; + +static const struct mana_stats_desc mana_eth_stats[] = { {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, {"hc_rx_discards_no_wqe", offsetof(struct mana_ethtool_stats, @@ -75,6 +77,59 @@ static const struct { rx_cqe_unknown_type)}, }; +static const struct mana_stats_desc mana_phy_stats[] = { + { "hc_rx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_drop_phy) }, + { "hc_tx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_drop_phy) }, + { "hc_tc0_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc0_phy) }, + { "hc_tc0_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc0_phy) }, + { "hc_tc0_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc0_phy) }, + { "hc_tc0_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc0_phy) }, + { "hc_tc1_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc1_phy) }, + { "hc_tc1_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc1_phy) }, + { "hc_tc1_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc1_phy) }, + { "hc_tc1_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc1_phy) }, + { "hc_tc2_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc2_phy) }, + { "hc_tc2_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc2_phy) }, + { "hc_tc2_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc2_phy) }, + { "hc_tc2_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc2_phy) }, + { "hc_tc3_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc3_phy) }, + { "hc_tc3_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc3_phy) }, + { "hc_tc3_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc3_phy) }, + { "hc_tc3_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc3_phy) }, + { "hc_tc4_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc4_phy) }, + { "hc_tc4_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc4_phy) }, + { "hc_tc4_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc4_phy) }, + { "hc_tc4_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc4_phy) }, + { "hc_tc5_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc5_phy) }, + { "hc_tc5_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc5_phy) }, + { "hc_tc5_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc5_phy) }, + { "hc_tc5_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc5_phy) }, + { "hc_tc6_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc6_phy) }, + { "hc_tc6_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc6_phy) }, + { "hc_tc6_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc6_phy) }, + { "hc_tc6_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc6_phy) }, + { "hc_tc7_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc7_phy) }, + { "hc_tc7_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc7_phy) }, + { "hc_tc7_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc7_phy) }, + { "hc_tc7_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc7_phy) }, + { "hc_tc0_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc0_phy) }, + { "hc_tc0_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc0_phy) }, + { "hc_tc1_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc1_phy) }, + { "hc_tc1_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc1_phy) }, + { "hc_tc2_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc2_phy) }, + { "hc_tc2_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc2_phy) }, + { "hc_tc3_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc3_phy) }, + { "hc_tc3_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc3_phy) }, + { "hc_tc4_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc4_phy) }, + { "hc_tc4_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc4_phy) }, + { "hc_tc5_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc5_phy) }, + { "hc_tc5_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc5_phy) }, + { "hc_tc6_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc6_phy) }, + { "hc_tc6_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc6_phy) }, + { "hc_tc7_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc7_phy) }, + { "hc_tc7_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc7_phy) }, +}; + static int mana_get_sset_count(struct net_device *ndev, int stringset) { struct mana_port_context *apc = netdev_priv(ndev); @@ -83,8 +138,8 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset) if (stringset != ETH_SS_STATS) return -EINVAL; - return ARRAY_SIZE(mana_eth_stats) + num_queues * - (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); + return ARRAY_SIZE(mana_eth_stats) + ARRAY_SIZE(mana_phy_stats) + + num_queues * (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); } static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) @@ -99,6 +154,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) ethtool_puts(&data, mana_eth_stats[i].name); + for (i = 0; i < ARRAY_SIZE(mana_phy_stats); i++) + ethtool_puts(&data, mana_phy_stats[i].name); + for (i = 0; i < num_queues; i++) { ethtool_sprintf(&data, "rx_%d_packets", i); ethtool_sprintf(&data, "rx_%d_bytes", i); @@ -128,6 +186,7 @@ static void mana_get_ethtool_stats(struct net_device *ndev, struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; void *eth_stats = &apc->eth_stats; + void *phy_stats = &apc->phy_stats; struct mana_stats_rx *rx_stats; struct mana_stats_tx *tx_stats; unsigned int start; @@ -151,9 +210,18 @@ static void mana_get_ethtool_stats(struct net_device *ndev, /* we call mana function to update stats from GDMA */ mana_query_gf_stats(apc); + /* We call this mana function to get the phy stats from GDMA and includes + * aggregate tx/rx drop counters, Per-TC(Traffic Channel) tx/rx and pause + * counters. + */ + mana_query_phy_stats(apc); + for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++) data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); + for (q = 0; q < ARRAY_SIZE(mana_phy_stats); q++) + data[i++] = *(u64 *)(phy_stats + mana_phy_stats[q].offset); + for (q = 0; q < num_queues; q++) { rx_stats = &apc->rxqs[q]->stats; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 2e868d352228a..fd23530e65b11 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1049,6 +1049,7 @@ struct net_device_context { struct net_device __rcu *vf_netdev; struct netvsc_vf_pcpu_stats __percpu *vf_stats; struct delayed_work vf_takeover; + struct delayed_work vfns_work; /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; @@ -1063,6 +1064,8 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +void netvsc_vfns_work(struct work_struct *w); + /* Azure hosts don't support non-TCP port numbers in hashing for fragmented * packets. We can use ethtool to change UDP hash level when necessary. */ @@ -1165,6 +1168,8 @@ struct netvsc_device { u32 max_chn; u32 num_chn; + u32 netvsc_gso_max_size; + atomic_t open_chn; struct work_struct subchan_work; wait_queue_head_t subchan_open; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 2b6ec979a62f2..9afb08dbc350a 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -712,8 +712,13 @@ void netvsc_device_remove(struct hv_device *device) for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ /* only disable enabled NAPI channel */ - if (i < ndev->real_num_rx_queues) + if (i < ndev->real_num_rx_queues) { + netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_TX, + NULL); + netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_RX, + NULL); napi_disable(&net_device->chan_table[i].napi); + } netif_napi_del(&net_device->chan_table[i].napi); } @@ -1787,6 +1792,10 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); napi_enable(&net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, + &net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, + &net_device->chan_table[0].napi); /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device, net_device, device_info); @@ -1805,6 +1814,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, close: RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL); napi_disable(&net_device->chan_table[0].napi); /* Now, we can close the channel safely */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 23180f7b67b6a..86c3bdad0f1b5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2350,8 +2350,11 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) if (!ndev) return NOTIFY_DONE; - /* set slave flag before open to prevent IPv6 addrconf */ + /* Set slave flag and no addrconf flag before open + * to prevent IPv6 addrconf. + */ vf_netdev->flags |= IFF_SLAVE; + vf_netdev->priv_flags |= IFF_NO_ADDRCONF; return NOTIFY_DONE; } @@ -2461,6 +2464,21 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) } else { netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); + + /* In Azure, when accelerated networking in enabled, other NICs + * like MANA, MLX, are configured as a bonded nic with + * Netvsc(failover) NIC. For bonded NICs, the min of the max + * pkt aggregate size of the members is propagated in the stack. + * In order to allow these NICs (MANA/MLX) to use up to + * GSO_MAX_SIZE gso packet size, we need to allow Netvsc NIC to + * also support this in the guest. + * This value is only increased for netvsc NIC when datapath is + * switched over to the VF + */ + if (vf_is_up) + netif_set_tso_max_size(ndev, vf_netdev->tso_max_size); + else + netif_set_tso_max_size(ndev, netvsc_dev->netvsc_gso_max_size); } return NOTIFY_OK; @@ -2547,6 +2565,7 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work); net_device_ctx->vf_stats = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); @@ -2689,6 +2708,8 @@ static void netvsc_remove(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); + nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev) { cancel_work_sync(&nvdev->subchan_work); @@ -2730,6 +2751,7 @@ static int netvsc_suspend(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev == NULL) { @@ -2823,6 +2845,27 @@ static void netvsc_event_set_vf_ns(struct net_device *ndev) } } +void netvsc_vfns_work(struct work_struct *w) +{ + struct net_device_context *ndev_ctx = + container_of(w, struct net_device_context, vfns_work.work); + struct net_device *ndev; + + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->vfns_work, 1); + return; + } + + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (!ndev) + goto out; + + netvsc_event_set_vf_ns(ndev); + +out: + rtnl_unlock(); +} + /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -2833,10 +2876,12 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net_device_context *ndev_ctx; int ret = 0; if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { - netvsc_event_set_vf_ns(event_dev); + ndev_ctx = netdev_priv(event_dev); + schedule_delayed_work(&ndev_ctx->vfns_work, 0); return NOTIFY_DONE; } diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index ecc2128ca9b72..82747dfacd70f 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1269,10 +1269,15 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) ret = vmbus_open(new_sc, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); - if (ret == 0) + if (ret == 0) { napi_enable(&nvchan->napi); - else + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, + &nvchan->napi); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, + &nvchan->napi); + } else { netdev_notice(ndev, "sub channel open failed: %d\n", ret); + } if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn) wake_up(&nvscdev->subchan_open); @@ -1351,9 +1356,10 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, struct net_device_context *net_device_ctx = netdev_priv(net); struct ndis_offload hwcaps; struct ndis_offload_params offloads; - unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE; int ret; + nvdev->netvsc_gso_max_size = GSO_LEGACY_MAX_SIZE; + /* Find HW offload capabilities */ ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps); if (ret != 0) @@ -1385,8 +1391,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO; - if (hwcaps.lsov2.ip4_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip4_maxsz; + if (hwcaps.lsov2.ip4_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip4_maxsz; } if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { @@ -1406,8 +1412,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO6; - if (hwcaps.lsov2.ip6_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip6_maxsz; + if (hwcaps.lsov2.ip6_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip6_maxsz; } if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) { @@ -1433,7 +1439,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, */ net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features; - netif_set_tso_max_size(net, gso_max_size); + netif_set_tso_max_size(net, nvdev->netvsc_gso_max_size); ret = rndis_filter_set_offload_params(net, nvdev, &offloads); diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index 217eb57663f05..7e35fae5b9caa 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -79,6 +79,7 @@ int ath12k_dp_peer_setup(struct ath12k *ar, int vdev_id, const u8 *addr) ret = ath12k_dp_rx_peer_frag_setup(ar, addr, vdev_id); if (ret) { ath12k_warn(ab, "failed to setup rx defrag context\n"); + tid--; goto peer_clean; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index eb5884a7f0649..f354667e22186 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -217,6 +217,7 @@ struct eventpoll { /* used to optimize loop detection check */ u64 gen; struct hlist_head refs; + u8 loop_check_depth; /* * usage count, used together with epitem->dying to @@ -1966,23 +1967,24 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } /** - * ep_loop_check_proc - verify that adding an epoll file inside another - * epoll structure does not violate the constraints, in - * terms of closed loops, or too deep chains (which can - * result in excessive stack usage). + * ep_loop_check_proc - verify that adding an epoll file @ep inside another + * epoll file does not create closed loops, and + * determine the depth of the subtree starting at @ep * * @ep: the &struct eventpoll to be currently checked. * @depth: Current depth of the path being checked. * - * Return: %zero if adding the epoll @file inside current epoll - * structure @ep does not violate the constraints, or %-1 otherwise. + * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep. */ static int ep_loop_check_proc(struct eventpoll *ep, int depth) { - int error = 0; + int result = 0; struct rb_node *rbp; struct epitem *epi; + if (ep->gen == loop_check_gen) + return ep->loop_check_depth; + mutex_lock_nested(&ep->mtx, depth + 1); ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { @@ -1990,13 +1992,11 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) if (unlikely(is_file_epoll(epi->ffd.file))) { struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; - if (ep_tovisit->gen == loop_check_gen) - continue; if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) - error = -1; + result = INT_MAX; else - error = ep_loop_check_proc(ep_tovisit, depth + 1); - if (error != 0) + result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1); + if (result > EP_MAX_NESTS) break; } else { /* @@ -2010,9 +2010,27 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) list_file(epi->ffd.file); } } + ep->loop_check_depth = result; mutex_unlock(&ep->mtx); - return error; + return result; +} + +/** + * ep_get_upwards_depth_proc - determine depth of @ep when traversed upwards + */ +static int ep_get_upwards_depth_proc(struct eventpoll *ep, int depth) +{ + int result = 0; + struct epitem *epi; + + if (ep->gen == loop_check_gen) + return ep->loop_check_depth; + hlist_for_each_entry_rcu(epi, &ep->refs, fllink) + result = max(result, ep_get_upwards_depth_proc(epi->ep, depth + 1) + 1); + ep->gen = loop_check_gen; + ep->loop_check_depth = result; + return result; } /** @@ -2028,8 +2046,22 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) */ static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) { + int depth, upwards_depth; + inserting_into = ep; - return ep_loop_check_proc(to, 0); + /* + * Check how deep down we can get from @to, and whether it is possible + * to loop up to @ep. + */ + depth = ep_loop_check_proc(to, 0); + if (depth > EP_MAX_NESTS) + return -1; + /* Check how far up we can go from @ep. */ + rcu_read_lock(); + upwards_depth = ep_get_upwards_depth_proc(ep, 0); + rcu_read_unlock(); + + return (depth+1+upwards_depth > EP_MAX_NESTS) ? -1 : 0; } static void clear_tfile_check_list(void) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 99755b607b856..26f52ca17c95c 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -329,7 +329,7 @@ static struct ntlmssp2_name *find_next_av(struct cifs_ses *ses, len = AV_LEN(av); if (AV_TYPE(av) == NTLMSSP_AV_EOL) return NULL; - if (!len || (u8 *)av + sizeof(*av) + len > end) + if ((u8 *)av + sizeof(*av) + len > end) return NULL; return av; } @@ -349,7 +349,7 @@ static int find_av_name(struct cifs_ses *ses, u16 type, char **name, u16 maxlen) av_for_each_entry(ses, av) { len = AV_LEN(av); - if (AV_TYPE(av) != type) + if (AV_TYPE(av) != type || !len) continue; if (!IS_ALIGNED(len, sizeof(__le16))) { cifs_dbg(VFS | ONCE, "%s: bad length(%u) for type %u\n", @@ -518,17 +518,67 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_ return rc; } +/* + * Set up NTLMv2 response blob with SPN (cifs/) appended to the + * existing list of AV pairs. + */ +static int set_auth_key_response(struct cifs_ses *ses) +{ + size_t baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); + size_t len, spnlen, tilen = 0, num_avs = 2 /* SPN + EOL */; + struct TCP_Server_Info *server = ses->server; + char *spn __free(kfree) = NULL; + struct ntlmssp2_name *av; + char *rsp = NULL; + int rc; + + spnlen = strlen(server->hostname); + len = sizeof("cifs/") + spnlen; + spn = kmalloc(len, GFP_KERNEL); + if (!spn) { + rc = -ENOMEM; + goto out; + } + + spnlen = scnprintf(spn, len, "cifs/%.*s", + (int)spnlen, server->hostname); + + av_for_each_entry(ses, av) + tilen += sizeof(*av) + AV_LEN(av); + + len = baselen + tilen + spnlen * sizeof(__le16) + num_avs * sizeof(*av); + rsp = kmalloc(len, GFP_KERNEL); + if (!rsp) { + rc = -ENOMEM; + goto out; + } + + memcpy(rsp + baselen, ses->auth_key.response, tilen); + av = (void *)(rsp + baselen + tilen); + av->type = cpu_to_le16(NTLMSSP_AV_TARGET_NAME); + av->length = cpu_to_le16(spnlen * sizeof(__le16)); + cifs_strtoUTF16((__le16 *)av->data, spn, spnlen, ses->local_nls); + av = (void *)((__u8 *)av + sizeof(*av) + AV_LEN(av)); + av->type = cpu_to_le16(NTLMSSP_AV_EOL); + av->length = 0; + + rc = 0; + ses->auth_key.len = len; +out: + ses->auth_key.response = rsp; + return rc; +} + int setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) { struct shash_desc *hmacmd5 = NULL; - int rc; - int baselen; - unsigned int tilen; + unsigned char *tiblob = NULL; /* target info blob */ struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; - unsigned char *tiblob = NULL; /* target info blob */ __le64 rsp_timestamp; + __u64 cc; + int rc; if (nls_cp == NULL) { cifs_dbg(VFS, "%s called with nls_cp==NULL\n", __func__); @@ -574,32 +624,25 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) * (as Windows 7 does) */ rsp_timestamp = find_timestamp(ses); + get_random_bytes(&cc, sizeof(cc)); - baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); - tilen = ses->auth_key.len; - tiblob = ses->auth_key.response; + cifs_server_lock(ses->server); - ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); - if (!ses->auth_key.response) { - rc = -ENOMEM; + tiblob = ses->auth_key.response; + rc = set_auth_key_response(ses); + if (rc) { ses->auth_key.len = 0; - goto setup_ntlmv2_rsp_ret; + goto unlock; } - ses->auth_key.len += baselen; ntlmv2 = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); ntlmv2->blob_signature = cpu_to_le32(0x00000101); ntlmv2->reserved = 0; ntlmv2->time = rsp_timestamp; - - get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); + ntlmv2->client_chal = cc; ntlmv2->reserved2 = 0; - memcpy(ses->auth_key.response + baselen, tiblob, tilen); - - cifs_server_lock(ses->server); - rc = cifs_alloc_hash("hmac(md5)", &hmacmd5); if (rc) { cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc); diff --git a/include/linux/hid.h b/include/linux/hid.h index 1533c9dcd3a67..6dd0eb6cda686 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -972,7 +972,6 @@ const struct hid_device_id *hid_match_device(struct hid_device *hdev, struct hid_driver *hdrv); bool hid_compare_device_paths(struct hid_device *hdev_a, struct hid_device *hdev_b, char separator); -s32 hid_snto32(__u32 value, unsigned n); __u32 hid_field_extract(const struct hid_device *hid, __u8 *report, unsigned offset, unsigned n); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 90f56656b5724..5fb7e8770849b 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -10,6 +10,7 @@ #include "shm_channel.h" #define GDMA_STATUS_MORE_ENTRIES 0x00000105 +#define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff /* Structures labeled with "HW DATA" are exchanged with the hardware. All of * them are naturally aligned and hence don't need __packed. @@ -58,8 +59,9 @@ enum gdma_eqe_type { GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, GDMA_EQE_HWC_INIT_DATA = 130, GDMA_EQE_HWC_INIT_DONE = 131, - GDMA_EQE_HWC_SOC_RECONFIG = 132, + GDMA_EQE_HWC_FPGA_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, + GDMA_EQE_HWC_RESET_REQUEST = 135, GDMA_EQE_RNIC_QP_FATAL = 176, }; @@ -152,6 +154,7 @@ struct gdma_general_req { #define GDMA_MESSAGE_V1 1 #define GDMA_MESSAGE_V2 2 #define GDMA_MESSAGE_V3 3 +#define GDMA_MESSAGE_V4 4 struct gdma_general_resp { struct gdma_resp_hdr hdr; @@ -387,6 +390,8 @@ struct gdma_context { u32 test_event_eq_id; bool is_pf; + bool in_service; + phys_addr_t bar0_pa; void __iomem *bar0_va; void __iomem *shm_base; @@ -408,8 +413,6 @@ struct gdma_context { struct gdma_dev mana_ib; }; -#define MAX_NUM_GDMA_DEVICES 4 - static inline bool mana_gd_is_mana(struct gdma_dev *gd) { return gd->dev_id.type == GDMA_DEVICE_MANA; @@ -556,11 +559,23 @@ enum { #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) #define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) +/* Driver can handle holes (zeros) in the device list */ +#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) + +/* Driver can self reset on EQE notification */ +#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14) + +/* Driver can self reset on FPGA Reconfig EQE notification */ +#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ - GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT) + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ + GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ + GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) #define GDMA_DRV_CAP_FLAGS2 0 @@ -621,11 +636,12 @@ struct gdma_query_max_resources_resp { }; /* HW DATA */ /* GDMA_LIST_DEVICES */ +#define GDMA_DEV_LIST_SIZE 64 struct gdma_list_devices_resp { struct gdma_resp_hdr hdr; u32 num_of_devs; u32 reserved; - struct gdma_dev_id devs[64]; + struct gdma_dev_id devs[GDMA_DEV_LIST_SIZE]; }; /* HW DATA */ /* GDMA_REGISTER_DEVICE */ @@ -883,4 +899,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle); void mana_register_debugfs(void); void mana_unregister_debugfs(void); +int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state); +int mana_gd_resume(struct pci_dev *pdev); + +bool mana_need_log(struct gdma_context *gc, int err); + #endif /* _GDMA_H */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index ceac201caa3a8..5a2dccc0117a8 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -287,6 +287,7 @@ struct mana_cq { /* NAPI data */ struct napi_struct napi; int work_done; + int work_done_since_doorbell; int budget; }; @@ -401,10 +402,70 @@ struct mana_ethtool_stats { u64 rx_cqe_unknown_type; }; +struct mana_ethtool_phy_stats { + /* Drop Counters */ + u64 rx_pkt_drop_phy; + u64 tx_pkt_drop_phy; + + /* Per TC traffic Counters */ + u64 rx_pkt_tc0_phy; + u64 tx_pkt_tc0_phy; + u64 rx_pkt_tc1_phy; + u64 tx_pkt_tc1_phy; + u64 rx_pkt_tc2_phy; + u64 tx_pkt_tc2_phy; + u64 rx_pkt_tc3_phy; + u64 tx_pkt_tc3_phy; + u64 rx_pkt_tc4_phy; + u64 tx_pkt_tc4_phy; + u64 rx_pkt_tc5_phy; + u64 tx_pkt_tc5_phy; + u64 rx_pkt_tc6_phy; + u64 tx_pkt_tc6_phy; + u64 rx_pkt_tc7_phy; + u64 tx_pkt_tc7_phy; + + u64 rx_byte_tc0_phy; + u64 tx_byte_tc0_phy; + u64 rx_byte_tc1_phy; + u64 tx_byte_tc1_phy; + u64 rx_byte_tc2_phy; + u64 tx_byte_tc2_phy; + u64 rx_byte_tc3_phy; + u64 tx_byte_tc3_phy; + u64 rx_byte_tc4_phy; + u64 tx_byte_tc4_phy; + u64 rx_byte_tc5_phy; + u64 tx_byte_tc5_phy; + u64 rx_byte_tc6_phy; + u64 tx_byte_tc6_phy; + u64 rx_byte_tc7_phy; + u64 tx_byte_tc7_phy; + + /* Per TC pause Counters */ + u64 rx_pause_tc0_phy; + u64 tx_pause_tc0_phy; + u64 rx_pause_tc1_phy; + u64 tx_pause_tc1_phy; + u64 rx_pause_tc2_phy; + u64 tx_pause_tc2_phy; + u64 rx_pause_tc3_phy; + u64 tx_pause_tc3_phy; + u64 rx_pause_tc4_phy; + u64 tx_pause_tc4_phy; + u64 rx_pause_tc5_phy; + u64 tx_pause_tc5_phy; + u64 rx_pause_tc6_phy; + u64 tx_pause_tc6_phy; + u64 rx_pause_tc7_phy; + u64 tx_pause_tc7_phy; +}; + struct mana_context { struct gdma_dev *gdma_dev; u16 num_ports; + u8 bm_hostmode; struct mana_eq *eqs; struct dentry *mana_eqs_debugfs; @@ -470,6 +531,8 @@ struct mana_port_context { struct mana_ethtool_stats eth_stats; + struct mana_ethtool_phy_stats phy_stats; + /* Debugfs */ struct dentry *mana_port_debugfs; }; @@ -494,6 +557,7 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); void mana_query_gf_stats(struct mana_port_context *apc); +void mana_query_phy_stats(struct mana_port_context *apc); int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); @@ -520,6 +584,7 @@ enum mana_command_code { MANA_FENCE_RQ = 0x20006, MANA_CONFIG_VPORT_RX = 0x20007, MANA_QUERY_VPORT_CONFIG = 0x20008, + MANA_QUERY_PHY_STAT = 0x2000c, /* Privileged commands for the PF mode */ MANA_REGISTER_FILTER = 0x28000, @@ -554,7 +619,8 @@ struct mana_query_device_cfg_resp { u64 pf_cap_flags4; u16 max_num_vports; - u16 reserved; + u8 bm_hostmode; /* response v3: Bare Metal Host Mode */ + u8 reserved; u32 max_num_eqs; /* response v2: */ @@ -681,6 +747,74 @@ struct mana_query_gf_stat_resp { u64 tx_err_gdma; }; /* HW DATA */ +/* Query phy stats */ +struct mana_query_phy_stat_req { + struct gdma_req_hdr hdr; + u64 req_stats; +}; /* HW DATA */ + +struct mana_query_phy_stat_resp { + struct gdma_resp_hdr hdr; + u64 reported_stats; + + /* Aggregate Drop Counters */ + u64 rx_pkt_drop_phy; + u64 tx_pkt_drop_phy; + + /* Per TC(Traffic class) traffic Counters */ + u64 rx_pkt_tc0_phy; + u64 tx_pkt_tc0_phy; + u64 rx_pkt_tc1_phy; + u64 tx_pkt_tc1_phy; + u64 rx_pkt_tc2_phy; + u64 tx_pkt_tc2_phy; + u64 rx_pkt_tc3_phy; + u64 tx_pkt_tc3_phy; + u64 rx_pkt_tc4_phy; + u64 tx_pkt_tc4_phy; + u64 rx_pkt_tc5_phy; + u64 tx_pkt_tc5_phy; + u64 rx_pkt_tc6_phy; + u64 tx_pkt_tc6_phy; + u64 rx_pkt_tc7_phy; + u64 tx_pkt_tc7_phy; + + u64 rx_byte_tc0_phy; + u64 tx_byte_tc0_phy; + u64 rx_byte_tc1_phy; + u64 tx_byte_tc1_phy; + u64 rx_byte_tc2_phy; + u64 tx_byte_tc2_phy; + u64 rx_byte_tc3_phy; + u64 tx_byte_tc3_phy; + u64 rx_byte_tc4_phy; + u64 tx_byte_tc4_phy; + u64 rx_byte_tc5_phy; + u64 tx_byte_tc5_phy; + u64 rx_byte_tc6_phy; + u64 tx_byte_tc6_phy; + u64 rx_byte_tc7_phy; + u64 tx_byte_tc7_phy; + + /* Per TC(Traffic Class) pause Counters */ + u64 rx_pause_tc0_phy; + u64 tx_pause_tc0_phy; + u64 rx_pause_tc1_phy; + u64 tx_pause_tc1_phy; + u64 rx_pause_tc2_phy; + u64 tx_pause_tc2_phy; + u64 rx_pause_tc3_phy; + u64 tx_pause_tc3_phy; + u64 rx_pause_tc4_phy; + u64 tx_pause_tc4_phy; + u64 rx_pause_tc5_phy; + u64 tx_pause_tc5_phy; + u64 rx_pause_tc6_phy; + u64 tx_pause_tc6_phy; + u64 rx_pause_tc7_phy; + u64 tx_pause_tc7_phy; +}; /* HW DATA */ + /* Configure vPort Rx Steering */ struct mana_cfg_rx_steer_req_v2 { struct gdma_req_hdr hdr; @@ -824,5 +958,7 @@ int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, u32 doorbell_pg_id); void mana_uncfg_vport(struct mana_port_context *apc); -struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index); +struct net_device *mana_get_primary_netdev(struct mana_context *ac, + u32 port_index, + netdevice_tracker *tracker); #endif /* _MANA_H */ diff --git a/redhat/kernel.changelog-9.6 b/redhat/kernel.changelog-9.6 index 76f1806b2c6a4..7d7c07b8e323a 100644 --- a/redhat/kernel.changelog-9.6 +++ b/redhat/kernel.changelog-9.6 @@ -1,3 +1,54 @@ +* Sat Oct 04 2025 CKI KWF Bot [5.14.0-570.52.1.el9_6] +- crypto: seqiv - Handle EBUSY correctly (CKI Backport Bot) [RHEL-117232] {CVE-2023-53373} +- ibmvnic: Increase max subcrq indirect entries with fallback (Mamatha Inamdar) [RHEL-116186] +- hv_netvsc: Fix panic during namespace deletion with VF (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Fix DSCP value in modify QP (Maxim Levitsky) [RHEL-115069] +- net: mana: Handle Reset Request from MANA NIC (Maxim Levitsky) [RHEL-115069] +- net: mana: Set tx_packets to post gso processing packet count (Maxim Levitsky) [RHEL-115069] +- net: mana: Handle unsupported HWC commands (Maxim Levitsky) [RHEL-115069] +- net: mana: Add handler for hardware servicing events (Maxim Levitsky) [RHEL-115069] +- net: mana: Expose additional hardware counters for drop and TC via ethtool. (Maxim Levitsky) [RHEL-115069] +- hv_netvsc: Set VF priv_flags to IFF_NO_ADDRCONF before open to prevent IPv6 addrconf (Maxim Levitsky) [RHEL-115069] +- net: mana: Record doorbell physical address in PF mode (Maxim Levitsky) [RHEL-115069] +- net: mana: Add support for Multi Vports on Bare metal (Maxim Levitsky) [RHEL-115069] +- net: mana: Switch to page pool for jumbo frames (Maxim Levitsky) [RHEL-115069] +- net: mana: Add metadata support for xdp mode (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Handle net event for pointing to the current netdev (Maxim Levitsky) [RHEL-115069] +- net: mana: Change the function signature of mana_get_primary_netdev_rcu (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Ensure variable err is initialized (Maxim Levitsky) [RHEL-115069] +- net: mana: Add debug logs in MANA network driver (Maxim Levitsky) [RHEL-115069] +- hv_netvsc: Use VF's tso_max_size value when data path is VF (Maxim Levitsky) [RHEL-115069] +- net: mana: Allow tso_max_size to go up-to GSO_MAX_SIZE (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: request error CQEs when supported (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Query feature_flags bitmask from FW (Maxim Levitsky) [RHEL-115069] +- net: mana: Support holes in device list reply msg (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Allocate PAGE aligned doorbell index (Maxim Levitsky) [RHEL-115069] +- hv_netvsc: Link queues to NAPIs (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: use the correct page size for mapping user-mode doorbell page (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: use the correct page table index based on hardware page size (Maxim Levitsky) [RHEL-115069] +- net: mana: Fix doorbell out of order violation and avoid unnecessary doorbell rings (Maxim Levitsky) [RHEL-115069] +- net: mana: Fix RX buf alloc_size alignment and atomic op panic (Maxim Levitsky) [RHEL-115069] +- ALSA: usb-audio: Validate UAC3 power domain descriptors, too (CKI Backport Bot) [RHEL-114688] {CVE-2025-38729} +- ALSA: usb-audio: Fix size validation in convert_chmap_v3() (CKI Backport Bot) [RHEL-114688] +- ALSA: usb-audio: Validate UAC3 cluster segment descriptors (CKI Backport Bot) [RHEL-114688] {CVE-2025-39757} +- HID: core: Harden s32ton() against conversion to 0 bits (CKI Backport Bot) [RHEL-111036] {CVE-2025-38556} +- HID: stop exporting hid_snto32() (CKI Backport Bot) [RHEL-111036] {CVE-2025-38556} +- HID: simplify snto32() (CKI Backport Bot) [RHEL-111036] {CVE-2025-38556} +- eventpoll: Fix semi-unbounded recursion (CKI Backport Bot) [RHEL-111052] {CVE-2025-38614} +- smb: client: fix session setup against servers that require SPN (Paulo Alcantara) [RHEL-107109] +- smb: client: allow parsing zero-length AV pairs (Paulo Alcantara) [RHEL-107109] +Resolves: RHEL-107109, RHEL-111036, RHEL-111052, RHEL-114688, RHEL-115069, RHEL-116186, RHEL-117232 + +* Sat Sep 27 2025 CKI KWF Bot [5.14.0-570.51.1.el9_6] +- wifi: ath12k: Decrement TID on RX peer frag setup error handling (CKI Backport Bot) [RHEL-114705] {CVE-2025-39761} +- RDMA/cxgb4: Notify rdma stack for IB_EVENT_QP_LAST_WQE_REACHED event (CKI Backport Bot) [RHEL-100798] +Resolves: RHEL-100798, RHEL-114705 + +* Thu Sep 25 2025 CKI KWF Bot [5.14.0-570.50.1.el9_6] +- security/keys: fix slab-out-of-bounds in key_task_permission (CKI Backport Bot) [RHEL-68092] {CVE-2024-50301} +- KVM: x86/hyper-v: Skip non-canonical addresses during PV TLB flush (Jon Maloy) [RHEL-104730] {CVE-2025-38351} +Resolves: RHEL-104730, RHEL-68092 + * Tue Sep 23 2025 CKI KWF Bot [5.14.0-570.49.1.el9_6] - io_uring/futex: ensure io_futex_wait() cleans up properly on failure (CKI Backport Bot) [RHEL-114335] {CVE-2025-39698} - selftests: tls: add tests for zero-length records (Sabrina Dubroca) [RHEL-114326] {CVE-2025-39682} diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 5e6a907607530..1febc2a8abcf6 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -772,8 +772,11 @@ static bool search_nested_keyrings(struct key *keyring, for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); - if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) - goto descend_to_node; + if (assoc_array_ptr_is_meta(ptr)) { + if (node->back_pointer || + assoc_array_ptr_is_shortcut(ptr)) + goto descend_to_node; + } if (!keyring_ptr_is_keyring(ptr)) continue; diff --git a/sound/usb/stream.c b/sound/usb/stream.c index c1ea8844a46fc..410f33c077865 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -341,20 +341,28 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor len = le16_to_cpu(cluster->wLength); c = 0; - p += sizeof(struct uac3_cluster_header_descriptor); + p += sizeof(*cluster); + len -= sizeof(*cluster); - while (((p - (void *)cluster) < len) && (c < channels)) { + while (len > 0 && (c < channels)) { struct uac3_cluster_segment_descriptor *cs_desc = p; u16 cs_len; u8 cs_type; + if (len < sizeof(*cs_desc)) + break; cs_len = le16_to_cpu(cs_desc->wLength); + if (len < cs_len) + break; cs_type = cs_desc->bSegmentType; if (cs_type == UAC3_CHANNEL_INFORMATION) { struct uac3_cluster_information_segment_descriptor *is = p; unsigned char map; + if (cs_len < sizeof(*is)) + break; + /* * TODO: this conversion is not complete, update it * after adding UAC3 values to asound.h @@ -456,6 +464,7 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor chmap->map[c++] = map; } p += cs_len; + len -= cs_len; } if (channels < c) @@ -880,7 +889,7 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, u64 badd_formats = 0; unsigned int num_channels; struct audioformat *fp; - u16 cluster_id, wLength; + u16 cluster_id, wLength, cluster_wLength; int clock = 0; int err; @@ -1008,6 +1017,16 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, return ERR_PTR(-EIO); } + cluster_wLength = le16_to_cpu(cluster->wLength); + if (cluster_wLength < sizeof(*cluster) || + cluster_wLength > wLength) { + dev_err(&dev->dev, + "%u:%d : invalid Cluster Descriptor size\n", + iface_no, altno); + kfree(cluster); + return ERR_PTR(-EIO); + } + num_channels = cluster->bNrChannels; chmap = convert_chmap_v3(cluster); kfree(cluster); diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 6fe206f6e9110..4f4e8e87a14cd 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -221,6 +221,17 @@ static bool validate_uac3_feature_unit(const void *p, return d->bLength >= sizeof(*d) + 4 + 2; } +static bool validate_uac3_power_domain_unit(const void *p, + const struct usb_desc_validator *v) +{ + const struct uac3_power_domain_descriptor *d = p; + + if (d->bLength < sizeof(*d)) + return false; + /* baEntities[] + wPDomainDescrStr */ + return d->bLength >= sizeof(*d) + d->bNrEntities + 2; +} + static bool validate_midi_out_jack(const void *p, const struct usb_desc_validator *v) { @@ -285,6 +296,7 @@ static const struct usb_desc_validator audio_validators[] = { struct uac3_clock_multiplier_descriptor), /* UAC_VERSION_3, UAC3_SAMPLE_RATE_CONVERTER: not implemented yet */ /* UAC_VERSION_3, UAC3_CONNECTORS: not implemented yet */ + FUNC(UAC_VERSION_3, UAC3_POWER_DOMAIN, validate_uac3_power_domain_unit), { } /* terminator */ };