From 24ea2de9c342a09dc7c87cc428c09176af5ee063 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Tue, 19 Jan 2021 17:12:08 -0800 Subject: [PATCH 01/58] net: usb: cdc_ncm: don't spew notifications [ Upstream commit de658a195ee23ca6aaffe197d1d2ea040beea0a2 ] RTL8156 sends notifications about every 32ms. Only display/log notifications when something changes. This issue has been reported by others: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1832472 https://lkml.org/lkml/2020/8/27/1083 ... [785962.779840] usb 1-1: new high-speed USB device number 5 using xhci_hcd [785962.929944] usb 1-1: New USB device found, idVendor=0bda, idProduct=8156, bcdDevice=30.00 [785962.929949] usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=6 [785962.929952] usb 1-1: Product: USB 10/100/1G/2.5G LAN [785962.929954] usb 1-1: Manufacturer: Realtek [785962.929956] usb 1-1: SerialNumber: 000000001 [785962.991755] usbcore: registered new interface driver cdc_ether [785963.017068] cdc_ncm 1-1:2.0: MAC-Address: 00:24:27:88:08:15 [785963.017072] cdc_ncm 1-1:2.0: setting rx_max = 16384 [785963.017169] cdc_ncm 1-1:2.0: setting tx_max = 16384 [785963.017682] cdc_ncm 1-1:2.0 usb0: register 'cdc_ncm' at usb-0000:00:14.0-1, CDC NCM, 00:24:27:88:08:15 [785963.019211] usbcore: registered new interface driver cdc_ncm [785963.023856] usbcore: registered new interface driver cdc_wdm [785963.025461] usbcore: registered new interface driver cdc_mbim [785963.038824] cdc_ncm 1-1:2.0 enx002427880815: renamed from usb0 [785963.089586] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected [785963.121673] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected [785963.153682] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected ... This is about 2KB per second and will overwrite all contents of a 1MB dmesg buffer in under 10 minutes rendering them useless for debugging many kernel problems. This is also an extra 180 MB/day in /var/logs (or 1GB per week) rendering the majority of those logs useless too. When the link is up (expected state), spew amount is >2x higher: ... [786139.600992] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.632997] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink [786139.665097] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.697100] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink [786139.729094] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.761108] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink ... Chrome OS cannot support RTL8156 until this is fixed. Signed-off-by: Grant Grundler Reviewed-by: Hayes Wang Link: https://lore.kernel.org/r/20210120011208.3768105-1-grundler@chromium.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/cdc_ncm.c | 12 +++++++++++- include/linux/usb/usbnet.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index faca70c3647d..82ec00a7370d 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1590,6 +1590,15 @@ cdc_ncm_speed_change(struct usbnet *dev, uint32_t rx_speed = le32_to_cpu(data->DLBitRRate); uint32_t tx_speed = le32_to_cpu(data->ULBitRate); + /* if the speed hasn't changed, don't report it. + * RTL8156 shipped before 2021 sends notification about every 32ms. + */ + if (dev->rx_speed == rx_speed && dev->tx_speed == tx_speed) + return; + + dev->rx_speed = rx_speed; + dev->tx_speed = tx_speed; + /* * Currently the USB-NET API does not support reporting the actual * device speed. Do print it instead. @@ -1633,7 +1642,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ - usbnet_link_change(dev, !!event->wValue, 0); + if (netif_carrier_ok(dev->net) != !!event->wValue) + usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index e2ec3582e549..452ca06ed253 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -83,6 +83,8 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 + u32 rx_speed; /* in bps - NOT Mbps */ + u32 tx_speed; /* in bps - NOT Mbps */ }; static inline struct usb_driver *driver_of(struct usb_interface *intf) From 3f9186ee7a306d7f974e759b084245155e5e709a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Sat, 11 Jan 2020 15:47:36 -0600 Subject: [PATCH 02/58] ALSA: usb: update old-style static const declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ff40e0d41af19e36b43693fcb9241b4a6795bb44 ] GCC reports the following warning with W=1 sound/usb/mixer_quirks.c: In function ‘snd_microii_controls_create’: sound/usb/mixer_quirks.c:1694:2: warning: ‘static’ is not at beginning of declaration [-Wold-style-declaration] 1694 | const static usb_mixer_elem_resume_func_t resume_funcs[] = { | ^~~~~ Move static to the beginning of declaration Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200111214736.3002-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/mixer_quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 169679419b39..a74e07eff60c 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1708,7 +1708,7 @@ static struct snd_kcontrol_new snd_microii_mixer_spdif[] = { static int snd_microii_controls_create(struct usb_mixer_interface *mixer) { int err, i; - const static usb_mixer_elem_resume_func_t resume_funcs[] = { + static const usb_mixer_elem_resume_func_t resume_funcs[] = { snd_microii_spdif_default_update, NULL, snd_microii_spdif_switch_update From 1e9cd487b04f6715c4f14ddd81c5740be852aff2 Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Thu, 3 Jun 2021 09:28:52 -0700 Subject: [PATCH 03/58] nl80211: validate key indexes for cfg80211_registered_device commit 2d9463083ce92636a1bdd3e30d1236e3e95d859e upstream syzbot discovered a bug in which an OOB access was being made because an unsuitable key_idx value was wrongly considered to be acceptable while deleting a key in nl80211_del_key(). Since we don't know the cipher at the time of deletion, if cfg80211_validate_key_settings() were to be called directly in nl80211_del_key(), even valid keys would be wrongly determined invalid, and deletion wouldn't occur correctly. For this reason, a new function - cfg80211_valid_key_idx(), has been created, to determine if the key_idx value provided is valid or not. cfg80211_valid_key_idx() is directly called in 2 places - nl80211_del_key(), and cfg80211_validate_key_settings(). Reported-by: syzbot+49d4cab497c2142ee170@syzkaller.appspotmail.com Tested-by: syzbot+49d4cab497c2142ee170@syzkaller.appspotmail.com Suggested-by: Johannes Berg Signed-off-by: Anant Thazhemadam Link: https://lore.kernel.org/r/20201204215825.129879-1-anant.thazhemadam@gmail.com Cc: stable@vger.kernel.org [also disallow IGTK key IDs if no IGTK cipher is supported] Signed-off-by: Johannes Berg Signed-off-by: Zubin Mithra Signed-off-by: Sasha Levin --- net/wireless/core.h | 2 ++ net/wireless/nl80211.c | 7 ++++--- net/wireless/util.c | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/net/wireless/core.h b/net/wireless/core.h index f5d58652108d..5f177dad2fa8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -404,6 +404,8 @@ void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); +bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, + int key_idx, bool pairwise); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5f0605275fa3..04c4fd376e1d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3624,9 +3624,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (key.idx < 0) - return -EINVAL; - if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -3642,6 +3639,10 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) key.type != NL80211_KEYTYPE_GROUP) return -EINVAL; + if (!cfg80211_valid_key_idx(rdev, key.idx, + key.type == NL80211_KEYTYPE_PAIRWISE)) + return -EINVAL; + if (!rdev->ops->del_key) return -EOPNOTSUPP; diff --git a/net/wireless/util.c b/net/wireless/util.c index 6f9cff2ee795..c4536468dfbe 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -214,11 +214,48 @@ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher) return false; } +static bool +cfg80211_igtk_cipher_supported(struct cfg80211_registered_device *rdev) +{ + struct wiphy *wiphy = &rdev->wiphy; + int i; + + for (i = 0; i < wiphy->n_cipher_suites; i++) { + switch (wiphy->cipher_suites[i]) { + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + return true; + } + } + + return false; +} + +bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev, + int key_idx, bool pairwise) +{ + int max_key_idx; + + if (pairwise) + max_key_idx = 3; + else if (cfg80211_igtk_cipher_supported(rdev)) + max_key_idx = 5; + else + max_key_idx = 3; + + if (key_idx < 0 || key_idx > max_key_idx) + return false; + + return true; +} + int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr) { - if (key_idx < 0 || key_idx > 5) + if (!cfg80211_valid_key_idx(rdev, key_idx, pairwise)) return -EINVAL; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) From 5fdb418b14e41b33880b949db64b18f1f448916b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 30 Apr 2021 16:22:51 +0200 Subject: [PATCH 04/58] efi: Allow EFI_MEMORY_XP and EFI_MEMORY_RO both to be cleared [ Upstream commit 45add3cc99feaaf57d4b6f01d52d532c16a1caee ] UEFI spec 2.9, p.108, table 4-1 lists the scenario that both attributes are cleared with the description "No memory access protection is possible for Entry". So we can have valid entries where both attributes are cleared, so remove the check. Signed-off-by: Heiner Kallweit Fixes: 10f0d2f577053 ("efi: Implement generic support for the Memory Attributes table") Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/memattr.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c index aac972b056d9..e0889922cc6d 100644 --- a/drivers/firmware/efi/memattr.c +++ b/drivers/firmware/efi/memattr.c @@ -69,11 +69,6 @@ static bool entry_is_valid(const efi_memory_desc_t *in, efi_memory_desc_t *out) return false; } - if (!(in->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))) { - pr_warn("Entry attributes invalid: RO and XP bits both cleared\n"); - return false; - } - if (PAGE_SIZE > EFI_PAGE_SIZE && (!PAGE_ALIGNED(in->phys_addr) || !PAGE_ALIGNED(in->num_pages << EFI_PAGE_SHIFT))) { From dd47a33e11fdd6c9d31570500e6ecc369ba1f08e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 21 Apr 2021 21:46:36 +0200 Subject: [PATCH 05/58] efi: cper: fix snprintf() use in cper_dimm_err_location() [ Upstream commit 942859d969de7f6f7f2659a79237a758b42782da ] snprintf() should be given the full buffer size, not one less. And it guarantees nul-termination, so doing it manually afterwards is pointless. It's even potentially harmful (though probably not in practice because CPER_REC_LEN is 256), due to the "return how much would have been written had the buffer been big enough" semantics. I.e., if the bank and/or device strings are long enough that the "DIMM location ..." output gets truncated, writing to msg[n] is a buffer overflow. Signed-off-by: Rasmus Villemoes Fixes: 3760cd20402d4 ("CPER: Adjust code flow of some functions") Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/cper.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 116989cf3d45..97da083afd32 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -275,8 +275,7 @@ static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) return 0; - n = 0; - len = CPER_REC_LEN - 1; + len = CPER_REC_LEN; dmi_memdev_name(mem->mem_dev_handle, &bank, &device); if (bank && device) n = snprintf(msg, len, "DIMM location: %s %s ", bank, device); @@ -285,7 +284,6 @@ static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) "DIMM location: not present. DMI handle: 0x%.4x ", mem->mem_dev_handle); - msg[n] = '\0'; return n; } From c953aee0d8a162d249dedaa47111e95dde0461cd Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 15 May 2021 10:04:58 +0800 Subject: [PATCH 06/58] vfio/pci: Fix error return code in vfio_ecap_init() [ Upstream commit d1ce2c79156d3baf0830990ab06d296477b93c26 ] The error code returned from vfio_ext_cap_len() is stored in 'len', not in 'ret'. Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Reviewed-by: Max Gurtovoy Message-Id: <20210515020458.6771-1-thunder.leizhen@huawei.com> Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index a1a26465d224..86e917f1cc21 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1579,7 +1579,7 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev) if (len == 0xFF) { len = vfio_ext_cap_len(vdev, ecap, epos); if (len < 0) - return ret; + return len; } } From 2b2ca3ee36e4e40d98c01b41a5819243197d4609 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 15 May 2021 12:08:56 -0700 Subject: [PATCH 07/58] vfio/pci: zap_vma_ptes() needs MMU [ Upstream commit 2a55ca37350171d9b43d561528f23d4130097255 ] zap_vma_ptes() is only available when CONFIG_MMU is set/enabled. Without CONFIG_MMU, vfio_pci.o has build errors, so make VFIO_PCI depend on MMU. riscv64-linux-ld: drivers/vfio/pci/vfio_pci.o: in function `vfio_pci_mmap_open': vfio_pci.c:(.text+0x1ec): undefined reference to `zap_vma_ptes' riscv64-linux-ld: drivers/vfio/pci/vfio_pci.o: in function `.L0 ': vfio_pci.c:(.text+0x165c): undefined reference to `zap_vma_ptes' Fixes: 11c4cd07ba11 ("vfio-pci: Fault mmaps to enable vma tracking") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Alex Williamson Cc: Cornelia Huck Cc: kvm@vger.kernel.org Cc: Jason Gunthorpe Cc: Eric Auger Message-Id: <20210515190856.2130-1-rdunlap@infradead.org> Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 42dc1d3d71cf..fcbfd0aacebc 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,6 +1,7 @@ config VFIO_PCI tristate "VFIO support for PCI devices" depends on VFIO && PCI && EVENTFD + depends on MMU select VFIO_VIRQFD select IRQ_BYPASS_MANAGER help From f12bd9caafedebd8e468cc3ec1dcee1920d7a9d2 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 20 May 2021 13:36:41 +0000 Subject: [PATCH 08/58] samples: vfio-mdev: fix error handing in mdpy_fb_probe() [ Upstream commit 752774ce7793a1f8baa55aae31f3b4caac49cbe4 ] Fix to return a negative error code from the framebuffer_alloc() error handling case instead of 0, also release regions in some error handing cases. Fixes: cacade1946a4 ("sample: vfio mdev display - guest driver") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Message-Id: <20210520133641.1421378-1-weiyongjun1@huawei.com> Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- samples/vfio-mdev/mdpy-fb.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c index 2719bb259653..a760e130bd0d 100644 --- a/samples/vfio-mdev/mdpy-fb.c +++ b/samples/vfio-mdev/mdpy-fb.c @@ -117,22 +117,27 @@ static int mdpy_fb_probe(struct pci_dev *pdev, if (format != DRM_FORMAT_XRGB8888) { pci_err(pdev, "format mismatch (0x%x != 0x%x)\n", format, DRM_FORMAT_XRGB8888); - return -EINVAL; + ret = -EINVAL; + goto err_release_regions; } if (width < 100 || width > 10000) { pci_err(pdev, "width (%d) out of range\n", width); - return -EINVAL; + ret = -EINVAL; + goto err_release_regions; } if (height < 100 || height > 10000) { pci_err(pdev, "height (%d) out of range\n", height); - return -EINVAL; + ret = -EINVAL; + goto err_release_regions; } pci_info(pdev, "mdpy found: %dx%d framebuffer\n", width, height); info = framebuffer_alloc(sizeof(struct mdpy_fb_par), &pdev->dev); - if (!info) + if (!info) { + ret = -ENOMEM; goto err_release_regions; + } pci_set_drvdata(pdev, info); par = info->par; From 2bd07ebcb949b17a431aacd0e99fa4558c4a5600 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 18 May 2021 22:21:31 +0300 Subject: [PATCH 09/58] vfio/platform: fix module_put call in error flow [ Upstream commit dc51ff91cf2d1e9a2d941da483602f71d4a51472 ] The ->parent_module is the one that use in try_module_get. It should also be the one the we use in module_put during vfio_platform_open(). Fixes: 32a2d71c4e80 ("vfio: platform: introduce vfio-platform-base module") Signed-off-by: Max Gurtovoy Message-Id: <20210518192133.59195-1-mgurtovoy@nvidia.com> Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/platform/vfio_platform_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 460760d0becf..c29fc6844f84 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -295,7 +295,7 @@ err_irq: vfio_platform_regions_cleanup(vdev); err_reg: mutex_unlock(&driver_lock); - module_put(THIS_MODULE); + module_put(vdev->parent_module); return ret; } From 6895ac910b73f7f886bb292c0a87d6e4d6eb6801 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 24 May 2021 22:54:57 +0300 Subject: [PATCH 10/58] ipvs: ignore IP_VS_SVC_F_HASHED flag when adding service [ Upstream commit 56e4ee82e850026d71223262c07df7d6af3bd872 ] syzbot reported memory leak [1] when adding service with HASHED flag. We should ignore this flag both from sockopt and netlink provided data, otherwise the service is not hashed and not visible while releasing resources. [1] BUG: memory leak unreferenced object 0xffff888115227800 (size 512): comm "syz-executor263", pid 8658, jiffies 4294951882 (age 12.560s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc include/linux/slab.h:556 [inline] [] kzalloc include/linux/slab.h:686 [inline] [] ip_vs_add_service+0x598/0x7c0 net/netfilter/ipvs/ip_vs_ctl.c:1343 [] do_ip_vs_set_ctl+0x810/0xa40 net/netfilter/ipvs/ip_vs_ctl.c:2570 [] nf_setsockopt+0x68/0xa0 net/netfilter/nf_sockopt.c:101 [] ip_setsockopt+0x259/0x1ff0 net/ipv4/ip_sockglue.c:1435 [] raw_setsockopt+0x18c/0x1b0 net/ipv4/raw.c:857 [] __sys_setsockopt+0x1b0/0x360 net/socket.c:2117 [] __do_sys_setsockopt net/socket.c:2128 [inline] [] __se_sys_setsockopt net/socket.c:2125 [inline] [] __x64_sys_setsockopt+0x22/0x30 net/socket.c:2125 [] do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47 [] entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-and-tested-by: syzbot+e562383183e4b1766930@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Julian Anastasov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 3ad1de081e3c..6208fa09fe71 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1269,7 +1269,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); svc->port = u->port; svc->fwmark = u->fwmark; - svc->flags = u->flags; + svc->flags = u->flags & ~IP_VS_SVC_F_HASHED; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; svc->ipvs = ipvs; From 173da500cfcef27f315dd3d1d1e8a6d5b146b227 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 10:47:37 +0800 Subject: [PATCH 11/58] HID: pidff: fix error return code in hid_pidff_init() [ Upstream commit 3dd653c077efda8152f4dd395359617d577a54cd ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 224ee88fe395 ("Input: add force feedback driver for PID devices") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/usbhid/hid-pidff.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c index 08174d341f4a..bc75f1efa0f4 100644 --- a/drivers/hid/usbhid/hid-pidff.c +++ b/drivers/hid/usbhid/hid-pidff.c @@ -1304,6 +1304,7 @@ int hid_pidff_init(struct hid_device *hid) if (pidff->pool[PID_DEVICE_MANAGED_POOL].value && pidff->pool[PID_DEVICE_MANAGED_POOL].value[0] == 0) { + error = -EPERM; hid_notice(hid, "device does not support device managed pool\n"); goto fail; From 85e8c3b43f47a009b0ef924924d5fc35de054d54 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 May 2021 15:58:50 +0200 Subject: [PATCH 12/58] HID: i2c-hid: fix format string mismatch [ Upstream commit dc5f9f55502e13ba05731d5046a14620aa2ff456 ] clang doesn't like printing a 32-bit integer using %hX format string: drivers/hid/i2c-hid/i2c-hid-core.c:994:18: error: format specifies type 'unsigned short' but the argument has type '__u32' (aka 'unsigned int') [-Werror,-Wformat] client->name, hid->vendor, hid->product); ^~~~~~~~~~~ drivers/hid/i2c-hid/i2c-hid-core.c:994:31: error: format specifies type 'unsigned short' but the argument has type '__u32' (aka 'unsigned int') [-Werror,-Wformat] client->name, hid->vendor, hid->product); ^~~~~~~~~~~~ Use an explicit cast to truncate it to the low 16 bits instead. Fixes: 9ee3e06610fd ("HID: i2c-hid: override HID descriptors for certain devices") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/i2c-hid/i2c-hid-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 1f8d403d3db4..19f4b807a5d1 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -1160,8 +1160,8 @@ static int i2c_hid_probe(struct i2c_client *client, hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID); hid->product = le16_to_cpu(ihid->hdesc.wProductID); - snprintf(hid->name, sizeof(hid->name), "%s %04hX:%04hX", - client->name, hid->vendor, hid->product); + snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X", + client->name, (u16)hid->vendor, (u16)hid->product); strlcpy(hid->phys, dev_name(&client->dev), sizeof(hid->phys)); ihid->quirks = i2c_hid_lookup_quirk(hid->vendor, hid->product); From 8aed10cd9497933ebe3fc0afe8d8ddc1af8b0d48 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 28 May 2021 13:45:16 +0200 Subject: [PATCH 13/58] netfilter: nfnetlink_cthelper: hit EBUSY on updates if size mismatches [ Upstream commit 8971ee8b087750a23f3cd4dc55bff2d0303fd267 ] The private helper data size cannot be updated. However, updates that contain NFCTH_PRIV_DATA_LEN might bogusly hit EBUSY even if the size is the same. Fixes: 12f7a505331e ("netfilter: add user-space connection tracking helper infrastructure") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nfnetlink_cthelper.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index ddcb1b607474..c8b0f1122c44 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -381,10 +381,14 @@ static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { + u32 size; int ret; - if (tb[NFCTH_PRIV_DATA_LEN]) - return -EBUSY; + if (tb[NFCTH_PRIV_DATA_LEN]) { + size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + if (size != helper->data_len) + return -EBUSY; + } if (tb[NFCTH_POLICY]) { ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); From 813a23e0f130c6c4f1aaf84ad0c72b5b1234a409 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 14:25:17 +0800 Subject: [PATCH 14/58] ieee802154: fix error return code in ieee802154_add_iface() [ Upstream commit 79c6b8ed30e54b401c873dbad2511f2a1c525fd5 ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: be51da0f3e34 ("ieee802154: Stop using NLA_PUT*().") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20210508062517.2574-1-thunder.leizhen@huawei.com Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- net/ieee802154/nl-phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index b231e40f006a..ca1dd9ff07ab 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -249,8 +249,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) } if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || - nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) + nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) { + rc = -EMSGSIZE; goto nla_put_failure; + } dev_put(dev); wpan_phy_put(phy); From 60d59c52235201446c01b131460399b041cfc221 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 19 May 2021 14:16:14 +0000 Subject: [PATCH 15/58] ieee802154: fix error return code in ieee802154_llsec_getparams() [ Upstream commit 373e864cf52403b0974c2f23ca8faf9104234555 ] Fix to return negative error code -ENOBUFS from the error handling case instead of 0, as done elsewhere in this function. Fixes: 3e9c156e2c21 ("ieee802154: add netlink interfaces for llsec") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20210519141614.3040055-1-weiyongjun1@huawei.com Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin --- net/ieee802154/nl-mac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index c0930b9fe848..7531cb1665d2 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -688,8 +688,10 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, be32_to_cpu(params.frame_counter)) || - ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) + ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) { + rc = -ENOBUFS; goto out_free; + } dev_put(dev); From 9144f434bebd1eab8af91e2de17d30599a5e6525 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 10 May 2021 11:38:53 +0200 Subject: [PATCH 16/58] ixgbevf: add correct exception tracing for XDP [ Upstream commit faae81420d162551b6ef2d804aafc00f4cd68e0e ] Add missing exception tracing to XDP when a number of different errors can occur. The support was only partial. Several errors where not logged which would confuse the user quite a lot not knowing where and why the packets disappeared. Fixes: 21092e9ce8b1 ("ixgbevf: Add support for XDP_TX action") Reported-by: Jesper Dangaard Brouer Signed-off-by: Magnus Karlsson Tested-by: Vishakha Jambekar Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a10756f0b0d8..7f94b445595c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1071,11 +1071,14 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, case XDP_TX: xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp); + if (result == IXGBEVF_XDP_CONSUMED) + goto out_failure; break; default: bpf_warn_invalid_xdp_action(act); /* fallthrough */ case XDP_ABORTED: +out_failure: trace_xdp_exception(rx_ring->netdev, xdp_prog, act); /* fallthrough -- handle aborts by dropping packet */ case XDP_DROP: From 65281d6aeca781b024a3cc83df6b55b987877ae8 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Thu, 25 Mar 2021 08:56:41 +0700 Subject: [PATCH 17/58] tipc: add extack messages for bearer/media failure [ Upstream commit b83e214b2e04204f1fc674574362061492c37245 ] Add extack error messages for -EINVAL errors when enabling bearer, getting/setting properties for a media/bearer Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/bearer.c | 50 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 2649a0a0d45e..8ab17f0da026 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -231,7 +231,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, - struct nlattr *attr[]) + struct nlattr *attr[], + struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; @@ -245,17 +246,20 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (!bearer_name_validate(name, &b_names)) { errstr = "illegal name"; + NL_SET_ERR_MSG(extack, "Illegal name"); goto rejected; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; + NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; + NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } @@ -269,6 +273,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, break; if (!strcmp(name, b->name)) { errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } bearer_id++; @@ -280,6 +285,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, name, prio); if (prio == TIPC_MIN_LINK_PRI) { errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); goto rejected; } pr_warn("Bearer <%s>: trying with adjusted priority\n", name); @@ -290,6 +296,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; + NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } @@ -303,6 +310,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { kfree(b); errstr = "failed to enable media"; + NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } @@ -318,6 +326,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; + NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } @@ -795,6 +804,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } @@ -834,8 +844,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); - if (!bearer) + if (!bearer) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } bearer_disable(net, bearer); @@ -893,7 +905,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } - return tipc_enable_bearer(net, bearer, domain, prio, attrs); + return tipc_enable_bearer(net, bearer, domain, prio, attrs, + info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) @@ -932,6 +945,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } @@ -972,8 +986,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); b = tipc_bearer_find(net, name); - if (!b) + if (!b) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; @@ -992,12 +1008,18 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { - if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); return -EINVAL; + } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 - (props[TIPC_NLA_PROP_MTU]))) + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); return -EINVAL; + } b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); #endif @@ -1125,6 +1147,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); media = tipc_media_find(name); if (!media) { + NL_SET_ERR_MSG(info->extack, "Media not found"); err = -EINVAL; goto err_out; } @@ -1161,9 +1184,10 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); m = tipc_media_find(name); - if (!m) + if (!m) { + NL_SET_ERR_MSG(info->extack, "Media not found"); return -EINVAL; - + } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; @@ -1179,12 +1203,18 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { - if (m->type_id != TIPC_MEDIA_TYPE_UDP) + if (m->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); return -EINVAL; + } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 - (props[TIPC_NLA_PROP_MTU]))) + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); return -EINVAL; + } m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); #endif } From a13a42c573a496a05b528ddbe6ea32083f44e9f1 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Thu, 1 Apr 2021 09:30:48 +0700 Subject: [PATCH 18/58] tipc: fix unique bearer names sanity check [ Upstream commit f20a46c3044c3f75232b3d0e2d09af9b25efaf45 ] When enabling a bearer by name, we don't sanity check its name with higher slot in bearer list. This may have the effect that the name of an already enabled bearer bypasses the check. To fix the above issue, we just perform an extra checking with all existing bearers. Fixes: cb30a63384bc9 ("tipc: refactor function tipc_enable_bearer()") Cc: stable@vger.kernel.org Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/bearer.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 8ab17f0da026..e1006ed4d90a 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -243,6 +243,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, int bearer_id = 0; int res = -EINVAL; char *errstr = ""; + u32 i; if (!bearer_name_validate(name, &b_names)) { errstr = "illegal name"; @@ -267,31 +268,38 @@ static int tipc_enable_bearer(struct net *net, const char *name, prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ - while (bearer_id < MAX_BEARERS) { - b = rtnl_dereference(tn->bearer_list[bearer_id]); - if (!b) - break; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + while (i-- != 0) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { + bearer_id = i; + continue; + } if (!strcmp(name, b->name)) { errstr = "already enabled"; NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } - bearer_id++; - if (b->priority != prio) - continue; - if (++with_this_prio <= 2) - continue; - pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", - name, prio); - if (prio == TIPC_MIN_LINK_PRI) { - errstr = "cannot adjust to lower"; - NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); - goto rejected; + + if (b->priority == prio && + (++with_this_prio > 2)) { + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); + goto rejected; + } + + pr_warn("Bearer <%s>: trying with adjusted priority\n", + name); + prio--; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + with_this_prio = 1; } - pr_warn("Bearer <%s>: trying with adjusted priority\n", name); - prio--; - bearer_id = 0; - with_this_prio = 1; } if (bearer_id >= MAX_BEARERS) { From 64700748e8a7af4883538c72ada57999d9a78e92 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 25 May 2021 14:39:02 +0200 Subject: [PATCH 19/58] Bluetooth: fix the erroneous flush_work() order commit 6a137caec23aeb9e036cdfd8a46dd8a366460e5d upstream. In the cleanup routine for failed initialization of HCI device, the flush_work(&hdev->rx_work) need to be finished before the flush_work(&hdev->cmd_work). Otherwise, the hci_rx_work() can possibly invoke new cmd_work and cause a bug, like double free, in late processings. This was assigned CVE-2021-3564. This patch reorder the flush_work() to fix this bug. Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Luiz Augusto von Dentz Cc: "David S. Miller" Cc: Jakub Kicinski Cc: linux-bluetooth@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Lin Ma Signed-off-by: Hao Xiong Cc: stable Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 04d6f50798c9..219cdbb476fb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1498,8 +1498,13 @@ static int hci_dev_do_open(struct hci_dev *hdev) } else { /* Init failed, cleanup */ flush_work(&hdev->tx_work); - flush_work(&hdev->cmd_work); + + /* Since hci_rx_work() is possible to awake new cmd_work + * it should be flushed first to avoid unexpected call of + * hci_cmd_work() + */ flush_work(&hdev->rx_work); + flush_work(&hdev->cmd_work); skb_queue_purge(&hdev->cmd_q); skb_queue_purge(&hdev->rx_q); From 2b9e9c2ed0f1910b5201c5d37b355b60201df415 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Sun, 30 May 2021 21:37:43 +0800 Subject: [PATCH 20/58] Bluetooth: use correct lock to prevent UAF of hdev object commit e305509e678b3a4af2b3cfd410f409f7cdaabb52 upstream. The hci_sock_dev_event() function will cleanup the hdev object for sockets even if this object may still be in used within the hci_sock_bound_ioctl() function, result in UAF vulnerability. This patch replace the BH context lock to serialize these affairs and prevent the race condition. Signed-off-by: Lin Ma Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index e506c51ff765..06156de24c50 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -755,7 +755,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) /* Detach sockets from device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - bh_lock_sock_nested(sk); + lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; @@ -764,7 +764,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) hci_dev_put(hdev); } - bh_unlock_sock(sk); + release_sock(sk); } read_unlock(&hci_sk_list.lock); } From ef1461e1198e7ede8cea9b6cb437d5dbd17b5ac7 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 3 Jun 2021 19:38:12 +0300 Subject: [PATCH 21/58] net: caif: added cfserl_release function commit bce130e7f392ddde8cfcb09927808ebd5f9c8669 upstream. Added cfserl_release() function. Cc: stable@vger.kernel.org Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/caif/cfserl.h | 1 + net/caif/cfserl.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h index b5b020f3c72e..bc3fae07a25f 100644 --- a/include/net/caif/cfserl.h +++ b/include/net/caif/cfserl.h @@ -9,4 +9,5 @@ #include struct cflayer *cfserl_create(int instance, bool use_stx); +void cfserl_release(struct cflayer *layer); #endif diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index ce60f06d76de..af1e1e36dc90 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -31,6 +31,11 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid); +void cfserl_release(struct cflayer *layer) +{ + kfree(layer); +} + struct cflayer *cfserl_create(int instance, bool use_stx) { struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC); From 758f725c392ec073f9f1cfbe34323ca1372afbf8 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 3 Jun 2021 19:38:51 +0300 Subject: [PATCH 22/58] net: caif: add proper error handling commit a2805dca5107d5603f4bbc027e81e20d93476e96 upstream. caif_enroll_dev() can fail in some cases. Ingnoring these cases can lead to memory leak due to not assigning link_support pointer to anywhere. Fixes: 7c18d2205ea7 ("caif: Restructure how link caif link layer enroll") Cc: stable@vger.kernel.org Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/caif/caif_dev.h | 2 +- include/net/caif/cfcnfg.h | 2 +- net/caif/caif_dev.c | 8 +++++--- net/caif/cfcnfg.c | 16 +++++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h index 028b754ae9b1..0baf2e21a533 100644 --- a/include/net/caif/caif_dev.h +++ b/include/net/caif/caif_dev.h @@ -119,7 +119,7 @@ void caif_free_client(struct cflayer *adap_layer); * The link_support layer is used to add any Link Layer specific * framing. */ -void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, +int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, struct cflayer *link_support, int head_room, struct cflayer **layer, int (**rcv_func)( struct sk_buff *, struct net_device *, diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h index 70bfd017581f..219094ace893 100644 --- a/include/net/caif/cfcnfg.h +++ b/include/net/caif/cfcnfg.h @@ -62,7 +62,7 @@ void cfcnfg_remove(struct cfcnfg *cfg); * @fcs: Specify if checksum is used in CAIF Framing Layer. * @head_room: Head space needed by link specific protocol. */ -void +int cfcnfg_add_phy_layer(struct cfcnfg *cnfg, struct net_device *dev, struct cflayer *phy_layer, enum cfcnfg_phy_preference pref, diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 711d7156efd8..32452796b90d 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -303,7 +303,7 @@ static void dev_flowctrl(struct net_device *dev, int on) caifd_put(caifd); } -void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, +int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, struct cflayer *link_support, int head_room, struct cflayer **layer, int (**rcv_func)(struct sk_buff *, struct net_device *, @@ -314,11 +314,12 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, enum cfcnfg_phy_preference pref; struct cfcnfg *cfg = get_cfcnfg(dev_net(dev)); struct caif_device_entry_list *caifdevs; + int res; caifdevs = caif_device_list(dev_net(dev)); caifd = caif_device_alloc(dev); if (!caifd) - return; + return -ENOMEM; *layer = &caifd->layer; spin_lock_init(&caifd->flow_lock); @@ -339,7 +340,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, strlcpy(caifd->layer.name, dev->name, sizeof(caifd->layer.name)); caifd->layer.transmit = transmit; - cfcnfg_add_phy_layer(cfg, + res = cfcnfg_add_phy_layer(cfg, dev, &caifd->layer, pref, @@ -349,6 +350,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, mutex_unlock(&caifdevs->lock); if (rcv_func) *rcv_func = receive; + return res; } EXPORT_SYMBOL(caif_enroll_dev); diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 8f00bea093b9..b456b79abd3b 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -450,7 +450,7 @@ unlock: rcu_read_unlock(); } -void +int cfcnfg_add_phy_layer(struct cfcnfg *cnfg, struct net_device *dev, struct cflayer *phy_layer, enum cfcnfg_phy_preference pref, @@ -459,7 +459,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, { struct cflayer *frml; struct cfcnfg_phyinfo *phyinfo = NULL; - int i; + int i, res = 0; u8 phyid; mutex_lock(&cnfg->lock); @@ -473,12 +473,15 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, goto got_phyid; } pr_warn("Too many CAIF Link Layers (max 6)\n"); + res = -EEXIST; goto out; got_phyid: phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); - if (!phyinfo) + if (!phyinfo) { + res = -ENOMEM; goto out_err; + } phy_layer->id = phyid; phyinfo->pref = pref; @@ -492,8 +495,10 @@ got_phyid: frml = cffrml_create(phyid, fcs); - if (!frml) + if (!frml) { + res = -ENOMEM; goto out_err; + } phyinfo->frm_layer = frml; layer_set_up(frml, cnfg->mux); @@ -511,11 +516,12 @@ got_phyid: list_add_rcu(&phyinfo->node, &cnfg->phys); out: mutex_unlock(&cnfg->lock); - return; + return res; out_err: kfree(phyinfo); mutex_unlock(&cnfg->lock); + return res; } EXPORT_SYMBOL(cfcnfg_add_phy_layer); From 3be863c11cab725add9fef4237ed4e232c3fc3bb Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 3 Jun 2021 19:39:11 +0300 Subject: [PATCH 23/58] net: caif: fix memory leak in caif_device_notify commit b53558a950a89824938e9811eddfc8efcd94e1bb upstream. In case of caif_enroll_dev() fail, allocated link_support won't be assigned to the corresponding structure. So simply free allocated pointer in case of error Fixes: 7c18d2205ea7 ("caif: Restructure how link caif link layer enroll") Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+7ec324747ce876a29db6@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/caif/caif_dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 32452796b90d..cc305d84168f 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -365,6 +365,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, struct cflayer *layer, *link_support; int head_room = 0; struct caif_device_entry_list *caifdevs; + int res; cfg = get_cfcnfg(dev_net(dev)); caifdevs = caif_device_list(dev_net(dev)); @@ -390,8 +391,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, break; } } - caif_enroll_dev(dev, caifdev, link_support, head_room, + res = caif_enroll_dev(dev, caifdev, link_support, head_room, &layer, NULL); + if (res) + cfserl_release(link_support); caifdev->flowctrl = dev_flowctrl; break; From 9ea0ab48e755d8f29fe89eb235fb86176fdb597f Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 3 Jun 2021 19:39:35 +0300 Subject: [PATCH 24/58] net: caif: fix memory leak in cfusbl_device_notify commit 7f5d86669fa4d485523ddb1d212e0a2d90bd62bb upstream. In case of caif_enroll_dev() fail, allocated link_support won't be assigned to the corresponding structure. So simply free allocated pointer in case of error. Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface") Cc: stable@vger.kernel.org Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/caif/caif_usb.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 1a082a946045..76d49a1bc6f6 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -116,6 +116,11 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], return (struct cflayer *) this; } +static void cfusbl_release(struct cflayer *layer) +{ + kfree(layer); +} + static struct packet_type caif_usb_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_EX1), }; @@ -128,6 +133,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, struct cflayer *layer, *link_support; struct usbnet *usbnet; struct usb_device *usbdev; + int res; /* Check whether we have a NCM device, and find its VID/PID. */ if (!(dev->dev.parent && dev->dev.parent->driver && @@ -170,8 +176,11 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, if (dev->num_tx_queues > 1) pr_warn("USB device uses more than one tx queue\n"); - caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN, + res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN, &layer, &caif_usb_type.func); + if (res) + goto err; + if (!pack_added) dev_add_pack(&caif_usb_type); pack_added = true; @@ -179,6 +188,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, strlcpy(layer->name, dev->name, sizeof(layer->name)); return 0; +err: + cfusbl_release(link_support); + return res; } static struct notifier_block caif_device_notifier = { From cb7bb81ac98d806ee47fc0ab3849db22df27471f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?= Date: Mon, 8 Mar 2021 18:42:03 +0100 Subject: [PATCH 25/58] HID: multitouch: require Finger field to mark Win8 reports as MT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a2353e3b26012ff43bcdf81d37a3eaddd7ecdbf3 upstream. This effectively changes collection_is_mt from contact ID in report->field to (device is Win8 => collection is finger) && contact ID in report->field Some devices erroneously report Pen for fingers, and Win8 stylus-on-touchscreen devices report contact ID, but mark the accompanying touchscreen device's collection correctly Cc: stable@vger.kernel.org Signed-off-by: Ahelenia Ziemiańska Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-multitouch.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index ccda72f748ee..c20945ed1dc1 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -588,9 +588,13 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td, if (!(HID_MAIN_ITEM_VARIABLE & field->flags)) continue; - for (n = 0; n < field->report_count; n++) { - if (field->usage[n].hid == HID_DG_CONTACTID) - rdata->is_mt_collection = true; + if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) { + for (n = 0; n < field->report_count; n++) { + if (field->usage[n].hid == HID_DG_CONTACTID) { + rdata->is_mt_collection = true; + break; + } + } } } From db6e9d1cc260e3d333221d05b74029793742d5dc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 2 Jun 2021 13:38:23 +0200 Subject: [PATCH 26/58] ALSA: timer: Fix master timer notification commit 9c1fe96bded935369f8340c2ac2e9e189f697d5d upstream. snd_timer_notify1() calls the notification to each slave for a master event, but it passes a wrong event number. It should be +10 offset, corresponding to SNDRV_TIMER_EVENT_MXXX, but it's incorrectly with +100 offset. Casually this was spotted by UBSAN check via syzkaller. Reported-by: syzbot+d102fa5b35335a7e544e@syzkaller.appspotmail.com Reviewed-by: Jaroslav Kysela Cc: Link: https://lore.kernel.org/r/000000000000e5560e05c3bd1d63@google.com Link: https://lore.kernel.org/r/20210602113823.23777-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index b5dc51030316..b4fe1324b56c 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -500,9 +500,10 @@ static void snd_timer_notify1(struct snd_timer_instance *ti, int event) return; if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE) return; + event += 10; /* convert to SNDRV_TIMER_EVENT_MXXX */ list_for_each_entry(ts, &ti->slave_active_head, active_list) if (ts->ccallback) - ts->ccallback(ts, event + 100, &tstamp, resolution); + ts->ccallback(ts, event, &tstamp, resolution); } /* start/continue a master timer */ From 1294a5d725e8d19d661c8065959128da0ab7ef52 Mon Sep 17 00:00:00 2001 From: Carlos M Date: Mon, 31 May 2021 22:20:26 +0200 Subject: [PATCH 27/58] ALSA: hda: Fix for mute key LED for HP Pavilion 15-CK0xx commit 901be145a46eb79879367d853194346a549e623d upstream. For the HP Pavilion 15-CK0xx, with audio subsystem ID 0x103c:0x841c, adding a line in patch_realtek.c to apply the ALC269_FIXUP_HP_MUTE_LED_MIC3 fix activates the mute key LED. Signed-off-by: Carlos M Cc: Link: https://lore.kernel.org/r/20210531202026.35427-1-carlos.marr.pz@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f9ebbee3824b..42c30fba699f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7095,6 +7095,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x82bf, "HP G3 mini", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x841c, "HP Pavilion 15-CK0xx", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_LED), From 569496aa3776eea1ff0d49d0174ac1b7e861e107 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 6 May 2021 22:10:42 +0800 Subject: [PATCH 28/58] ext4: fix bug on in ext4_es_cache_extent as ext4_split_extent_at failed commit 082cd4ec240b8734a82a89ffb890216ac98fec68 upstream. We got follow bug_on when run fsstress with injecting IO fault: [130747.323114] kernel BUG at fs/ext4/extents_status.c:762! [130747.323117] Internal error: Oops - BUG: 0 [#1] SMP ...... [130747.334329] Call trace: [130747.334553] ext4_es_cache_extent+0x150/0x168 [ext4] [130747.334975] ext4_cache_extents+0x64/0xe8 [ext4] [130747.335368] ext4_find_extent+0x300/0x330 [ext4] [130747.335759] ext4_ext_map_blocks+0x74/0x1178 [ext4] [130747.336179] ext4_map_blocks+0x2f4/0x5f0 [ext4] [130747.336567] ext4_mpage_readpages+0x4a8/0x7a8 [ext4] [130747.336995] ext4_readpage+0x54/0x100 [ext4] [130747.337359] generic_file_buffered_read+0x410/0xae8 [130747.337767] generic_file_read_iter+0x114/0x190 [130747.338152] ext4_file_read_iter+0x5c/0x140 [ext4] [130747.338556] __vfs_read+0x11c/0x188 [130747.338851] vfs_read+0x94/0x150 [130747.339110] ksys_read+0x74/0xf0 This patch's modification is according to Jan Kara's suggestion in: https://patchwork.ozlabs.org/project/linux-ext4/patch/20210428085158.3728201-1-yebin10@huawei.com/ "I see. Now I understand your patch. Honestly, seeing how fragile is trying to fix extent tree after split has failed in the middle, I would probably go even further and make sure we fix the tree properly in case of ENOSPC and EDQUOT (those are easily user triggerable). Anything else indicates a HW problem or fs corruption so I'd rather leave the extent tree as is and don't try to fix it (which also means we will not create overlapping extents)." Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210506141042.3298679-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 36708d9d71cb..093cb675841b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3263,7 +3263,10 @@ static int ext4_split_extent_at(handle_t *handle, ext4_ext_mark_unwritten(ex2); err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); - if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + if (err != -ENOSPC && err != -EDQUOT) + goto out; + + if (EXT4_EXT_MAY_ZEROOUT & split_flag) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { if (split_flag & EXT4_EXT_DATA_VALID1) { err = ext4_ext_zeroout(inode, ex2); @@ -3289,30 +3292,30 @@ static int ext4_split_extent_at(handle_t *handle, ext4_ext_pblock(&orig_ex)); } - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_len = cpu_to_le16(ee_len); - ext4_ext_try_to_merge(handle, inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + path->p_depth); - if (err) - goto fix_extent_len; - - /* update extent status tree */ - err = ext4_zeroout_es(inode, &zero_ex); - - goto out; - } else if (err) - goto fix_extent_len; - -out: - ext4_ext_show_leaf(inode, path); - return err; + if (!err) { + /* update the extent length and mark as initialized */ + ex->ee_len = cpu_to_le16(ee_len); + ext4_ext_try_to_merge(handle, inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); + if (!err) + /* update extent status tree */ + err = ext4_zeroout_es(inode, &zero_ex); + /* If we failed at this point, we don't know in which + * state the extent tree exactly is so don't try to fix + * length of the original extent as it may do even more + * damage. + */ + goto out; + } + } fix_extent_len: ex->ee_len = orig_ex.ee_len; ext4_ext_dirty(handle, inode, path + path->p_depth); return err; +out: + ext4_ext_show_leaf(inode, path); + return err; } /* From 2132a28807cf8eda722f96628b3ab7709a236bb8 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Tue, 8 Jun 2021 13:00:49 +0100 Subject: [PATCH 29/58] usb: dwc2: Fix build in periphal-only mode In branches to which 24d209dba5a3 ("usb: dwc2: Fix hibernation between host and device modes.") has been back-ported, the bus_suspended member of struct dwc2_hsotg is only present in builds that support host-mode. To avoid having to pull in several more non-Fix commits in order to get it to compile, wrap the usage of the member in a macro conditional. Fixes: 24d209dba5a3 ("usb: dwc2: Fix hibernation between host and device modes.") Signed-off-by: Phil Elwell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core_intr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index af26a8a20e0b..5919ecb7d4b7 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -700,7 +700,11 @@ static inline void dwc_handle_gpwrdn_disc_det(struct dwc2_hsotg *hsotg, dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN); hsotg->hibernated = 0; + +#if IS_ENABLED(CONFIG_USB_DWC2_HOST) || \ + IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE) hsotg->bus_suspended = 0; +#endif if (gpwrdn & GPWRDN_IDSTS) { hsotg->op_state = OTG_STATE_B_PERIPHERAL; From d106f05432e60f9f62d456ef017687f5c73cb414 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 4 Jun 2021 20:01:14 -0700 Subject: [PATCH 30/58] pid: take a reference when initializing `cad_pid` commit 0711f0d7050b9e07c44bc159bbc64ac0a1022c7f upstream. During boot, kernel_init_freeable() initializes `cad_pid` to the init task's struct pid. Later on, we may change `cad_pid` via a sysctl, and when this happens proc_do_cad_pid() will increment the refcount on the new pid via get_pid(), and will decrement the refcount on the old pid via put_pid(). As we never called get_pid() when we initialized `cad_pid`, we decrement a reference we never incremented, can therefore free the init task's struct pid early. As there can be dangling references to the struct pid, we can later encounter a use-after-free (e.g. when delivering signals). This was spotted when fuzzing v5.13-rc3 with Syzkaller, but seems to have been around since the conversion of `cad_pid` to struct pid in commit 9ec52099e4b8 ("[PATCH] replace cad_pid by a struct pid") from the pre-KASAN stone age of v2.6.19. Fix this by getting a reference to the init task's struct pid when we assign it to `cad_pid`. Full KASAN splat below. ================================================================== BUG: KASAN: use-after-free in ns_of_pid include/linux/pid.h:153 [inline] BUG: KASAN: use-after-free in task_active_pid_ns+0xc0/0xc8 kernel/pid.c:509 Read of size 4 at addr ffff23794dda0004 by task syz-executor.0/273 CPU: 1 PID: 273 Comm: syz-executor.0 Not tainted 5.12.0-00001-g9aef892b2d15 #1 Hardware name: linux,dummy-virt (DT) Call trace: ns_of_pid include/linux/pid.h:153 [inline] task_active_pid_ns+0xc0/0xc8 kernel/pid.c:509 do_notify_parent+0x308/0xe60 kernel/signal.c:1950 exit_notify kernel/exit.c:682 [inline] do_exit+0x2334/0x2bd0 kernel/exit.c:845 do_group_exit+0x108/0x2c8 kernel/exit.c:922 get_signal+0x4e4/0x2a88 kernel/signal.c:2781 do_signal arch/arm64/kernel/signal.c:882 [inline] do_notify_resume+0x300/0x970 arch/arm64/kernel/signal.c:936 work_pending+0xc/0x2dc Allocated by task 0: slab_post_alloc_hook+0x50/0x5c0 mm/slab.h:516 slab_alloc_node mm/slub.c:2907 [inline] slab_alloc mm/slub.c:2915 [inline] kmem_cache_alloc+0x1f4/0x4c0 mm/slub.c:2920 alloc_pid+0xdc/0xc00 kernel/pid.c:180 copy_process+0x2794/0x5e18 kernel/fork.c:2129 kernel_clone+0x194/0x13c8 kernel/fork.c:2500 kernel_thread+0xd4/0x110 kernel/fork.c:2552 rest_init+0x44/0x4a0 init/main.c:687 arch_call_rest_init+0x1c/0x28 start_kernel+0x520/0x554 init/main.c:1064 0x0 Freed by task 270: slab_free_hook mm/slub.c:1562 [inline] slab_free_freelist_hook+0x98/0x260 mm/slub.c:1600 slab_free mm/slub.c:3161 [inline] kmem_cache_free+0x224/0x8e0 mm/slub.c:3177 put_pid.part.4+0xe0/0x1a8 kernel/pid.c:114 put_pid+0x30/0x48 kernel/pid.c:109 proc_do_cad_pid+0x190/0x1b0 kernel/sysctl.c:1401 proc_sys_call_handler+0x338/0x4b0 fs/proc/proc_sysctl.c:591 proc_sys_write+0x34/0x48 fs/proc/proc_sysctl.c:617 call_write_iter include/linux/fs.h:1977 [inline] new_sync_write+0x3ac/0x510 fs/read_write.c:518 vfs_write fs/read_write.c:605 [inline] vfs_write+0x9c4/0x1018 fs/read_write.c:585 ksys_write+0x124/0x240 fs/read_write.c:658 __do_sys_write fs/read_write.c:670 [inline] __se_sys_write fs/read_write.c:667 [inline] __arm64_sys_write+0x78/0xb0 fs/read_write.c:667 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall arch/arm64/kernel/syscall.c:49 [inline] el0_svc_common.constprop.1+0x16c/0x388 arch/arm64/kernel/syscall.c:129 do_el0_svc+0xf8/0x150 arch/arm64/kernel/syscall.c:168 el0_svc+0x28/0x38 arch/arm64/kernel/entry-common.c:416 el0_sync_handler+0x134/0x180 arch/arm64/kernel/entry-common.c:432 el0_sync+0x154/0x180 arch/arm64/kernel/entry.S:701 The buggy address belongs to the object at ffff23794dda0000 which belongs to the cache pid of size 224 The buggy address is located 4 bytes inside of 224-byte region [ffff23794dda0000, ffff23794dda00e0) The buggy address belongs to the page: page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4dda0 head:(____ptrval____) order:1 compound_mapcount:0 flags: 0x3fffc0000010200(slab|head) raw: 03fffc0000010200 dead000000000100 dead000000000122 ffff23794d40d080 raw: 0000000000000000 0000000000190019 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff23794dd9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff23794dd9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff23794dda0000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff23794dda0080: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff23794dda0100: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00 ================================================================== Link: https://lkml.kernel.org/r/20210524172230.38715-1-mark.rutland@arm.com Fixes: 9ec52099e4b8678a ("[PATCH] replace cad_pid by a struct pid") Signed-off-by: Mark Rutland Acked-by: Christian Brauner Cc: Cedric Le Goater Cc: Christian Brauner Cc: Eric W. Biederman Cc: Kees Cook Cc: Paul Mackerras Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index fdfef08da0c4..7baad67c2e93 100644 --- a/init/main.c +++ b/init/main.c @@ -1124,7 +1124,7 @@ static noinline void __init kernel_init_freeable(void) */ set_mems_allowed(node_states[N_MEMORY]); - cad_pid = task_pid(current); + cad_pid = get_pid(task_pid(current)); smp_prepare_cpus(setup_max_cpus); From cec4e857ffaa8c447f51cd8ab4e72350077b6770 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 4 Jun 2021 20:01:42 -0700 Subject: [PATCH 31/58] ocfs2: fix data corruption by fallocate commit 6bba4471f0cc1296fe3c2089b9e52442d3074b2e upstream. When fallocate punches holes out of inode size, if original isize is in the middle of last cluster, then the part from isize to the end of the cluster will be zeroed with buffer write, at that time isize is not yet updated to match the new size, if writeback is kicked in, it will invoke ocfs2_writepage()->block_write_full_page() where the pages out of inode size will be dropped. That will cause file corruption. Fix this by zero out eof blocks when extending the inode size. Running the following command with qemu-image 4.2.1 can get a corrupted coverted image file easily. qemu-img convert -p -t none -T none -f qcow2 $qcow_image \ -O qcow2 -o compat=1.1 $qcow_image.conv The usage of fallocate in qemu is like this, it first punches holes out of inode size, then extend the inode size. fallocate(11, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 2276196352, 65536) = 0 fallocate(11, 0, 2276196352, 65536) = 0 v1: https://www.spinics.net/lists/linux-fsdevel/msg193999.html v2: https://lore.kernel.org/linux-fsdevel/20210525093034.GB4112@quack2.suse.cz/T/ Link: https://lkml.kernel.org/r/20210528210648.9124-1-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Jan Kara Cc: Mark Fasheh Cc: Joel Becker Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/file.c | 55 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5c507569ef70..94df697e2638 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1863,6 +1863,45 @@ out: return ret; } +/* + * zero out partial blocks of one cluster. + * + * start: file offset where zero starts, will be made upper block aligned. + * len: it will be trimmed to the end of current cluster if "start + len" + * is bigger than it. + */ +static int ocfs2_zeroout_partial_cluster(struct inode *inode, + u64 start, u64 len) +{ + int ret; + u64 start_block, end_block, nr_blocks; + u64 p_block, offset; + u32 cluster, p_cluster, nr_clusters; + struct super_block *sb = inode->i_sb; + u64 end = ocfs2_align_bytes_to_clusters(sb, start); + + if (start + len < end) + end = start + len; + + start_block = ocfs2_blocks_for_bytes(sb, start); + end_block = ocfs2_blocks_for_bytes(sb, end); + nr_blocks = end_block - start_block; + if (!nr_blocks) + return 0; + + cluster = ocfs2_bytes_to_clusters(sb, start); + ret = ocfs2_get_clusters(inode, cluster, &p_cluster, + &nr_clusters, NULL); + if (ret) + return ret; + if (!p_cluster) + return 0; + + offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); + p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; + return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); +} + /* * Parts of this function taken from xfs_change_file_space() */ @@ -1873,7 +1912,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, { int ret; s64 llen; - loff_t size; + loff_t size, orig_isize; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct buffer_head *di_bh = NULL; handle_t *handle; @@ -1904,6 +1943,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, goto out_inode_unlock; } + orig_isize = i_size_read(inode); switch (sr->l_whence) { case 0: /*SEEK_SET*/ break; @@ -1911,7 +1951,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, sr->l_start += f_pos; break; case 2: /*SEEK_END*/ - sr->l_start += i_size_read(inode); + sr->l_start += orig_isize; break; default: ret = -EINVAL; @@ -1965,6 +2005,14 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, default: ret = -EINVAL; } + + /* zeroout eof blocks in the cluster. */ + if (!ret && change_size && orig_isize < size) { + ret = ocfs2_zeroout_partial_cluster(inode, orig_isize, + size - orig_isize); + if (!ret) + i_size_write(inode, size); + } up_write(&OCFS2_I(inode)->ip_alloc_sem); if (ret) { mlog_errno(ret); @@ -1981,9 +2029,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, goto out_inode_unlock; } - if (change_size && i_size_read(inode) < size) - i_size_write(inode, size); - inode->i_ctime = inode->i_mtime = current_time(inode); ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); if (ret < 0) From 93e4ac2a9979a9a4ecc158409ed9c3044dc0ae1f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 31 May 2021 09:21:38 +0200 Subject: [PATCH 32/58] nfc: fix NULL ptr dereference in llcp_sock_getname() after failed connect commit 4ac06a1e013cf5fdd963317ffd3b968560f33bba upstream. It's possible to trigger NULL pointer dereference by local unprivileged user, when calling getsockname() after failed bind() (e.g. the bind fails because LLCP_SAP_MAX used as SAP): BUG: kernel NULL pointer dereference, address: 0000000000000000 CPU: 1 PID: 426 Comm: llcp_sock_getna Not tainted 5.13.0-rc2-next-20210521+ #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014 Call Trace: llcp_sock_getname+0xb1/0xe0 __sys_getpeername+0x95/0xc0 ? lockdep_hardirqs_on_prepare+0xd5/0x180 ? syscall_enter_from_user_mode+0x1c/0x40 __x64_sys_getpeername+0x11/0x20 do_syscall_64+0x36/0x70 entry_SYSCALL_64_after_hwframe+0x44/0xae This can be reproduced with Syzkaller C repro (bind followed by getpeername): https://syzkaller.appspot.com/x/repro.c?x=14def446e00000 Cc: Fixes: d646960f7986 ("NFC: Initial LLCP support") Reported-by: syzbot+80fb126e7f7d8b1a5914@syzkaller.appspotmail.com Reported-by: butt3rflyh4ck Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20210531072138.5219-1-krzysztof.kozlowski@canonical.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/nfc/llcp_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 59de4f54dd18..23f7116d122a 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -122,6 +122,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) if (!llcp_sock->service_name) { nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; + llcp_sock->dev = NULL; ret = -ENOMEM; goto put_dev; } @@ -131,6 +132,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) llcp_sock->local = NULL; kfree(llcp_sock->service_name); llcp_sock->service_name = NULL; + llcp_sock->dev = NULL; ret = -EADDRINUSE; goto put_dev; } From 7e25cb1b22f81239ae3332e14a1d0cff7014bccd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 May 2021 13:08:41 +0200 Subject: [PATCH 33/58] x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing commit 7d65f9e80646c595e8c853640a9d0768a33e204c upstream. PIC interrupts do not support affinity setting and they can end up on any online CPU. Therefore, it's required to mark the associated vectors as system-wide reserved. Otherwise, the corresponding irq descriptors are copied to the secondary CPUs but the vectors are not marked as assigned or reserved. This works correctly for the IO/APIC case. When the IO/APIC is disabled via config, kernel command line or lack of enumeration then all legacy interrupts are routed through the PIC, but nothing marks them as system-wide reserved vectors. As a consequence, a subsequent allocation on a secondary CPU can result in allocating one of these vectors, which triggers the BUG() in apic_update_vector() because the interrupt descriptor slot is not empty. Imran tried to work around that by marking those interrupts as allocated when a CPU comes online. But that's wrong in case that the IO/APIC is available and one of the legacy interrupts, e.g. IRQ0, has been switched to PIC mode because then marking them as allocated will fail as they are already marked as system vectors. Stay consistent and update the legacy vectors after attempting IO/APIC initialization and mark them as system vectors in case that no IO/APIC is available. Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment") Reported-by: Imran Khan Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210519233928.2157496-1-imran.f.khan@oracle.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/apic.h | 1 + arch/x86/kernel/apic/apic.c | 1 + arch/x86/kernel/apic/vector.c | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b5354e216b07..163c2af44a44 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -172,6 +172,7 @@ static inline int apic_is_clustered_box(void) extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_update_legacy_vectors(void); extern void lapic_online(void); extern void lapic_offline(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index da6b52c70964..9791828f3fcd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2507,6 +2507,7 @@ void __init apic_bsp_setup(bool upmode) end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); + lapic_update_legacy_vectors(); } #ifdef CONFIG_UP_LATE_INIT diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f0d0535e8f34..dc7c759442f1 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -682,6 +682,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace) irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); } +void __init lapic_update_legacy_vectors(void) +{ + unsigned int i; + + if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0) + return; + + /* + * If the IO/APIC is disabled via config, kernel command line or + * lack of enumeration then all legacy interrupts are routed + * through the PIC. Make sure that they are marked as legacy + * vectors. PIC_CASCADE_IRQ has already been marked in + * lapic_assign_system_vectors(). + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + if (i != PIC_CASCADE_IR) + lapic_assign_legacy_vector(i, true); + } +} + void __init lapic_assign_system_vectors(void) { unsigned int i, vector = 0; From 543a6f5284b4cc0591e5497d1912300378043561 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 May 2021 09:38:27 -0400 Subject: [PATCH 34/58] btrfs: mark ordered extent and inode with error if we fail to finish commit d61bec08b904cf171835db98168f82bc338e92e4 upstream. While doing error injection testing I saw that sometimes we'd get an abort that wouldn't stop the current transaction commit from completing. This abort was coming from finish ordered IO, but at this point in the transaction commit we should have gotten an error and stopped. It turns out the abort came from finish ordered io while trying to write out the free space cache. It occurred to me that any failure inside of finish_ordered_io isn't actually raised to the person doing the writing, so we could have any number of failures in this path and think the ordered extent completed successfully and the inode was fine. Fix this by marking the ordered extent with BTRFS_ORDERED_IOERR, and marking the mapping of the inode with mapping_set_error, so any callers that simply call fdatawait will also get the error. With this we're seeing the IO error on the free space inode when we fail to do the finish_ordered_io. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8c6f619c9ee6..bf0e0e3e09c5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3162,6 +3162,18 @@ out: if (ret || truncated) { u64 start, end; + /* + * If we failed to finish this ordered extent for any reason we + * need to make sure BTRFS_ORDERED_IOERR is set on the ordered + * extent, and mark the inode with the error if it wasn't + * already set. Any error during writeback would have already + * set the mapping error, so we need to set it if we're the ones + * marking this ordered extent as failed. + */ + if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR, + &ordered_extent->flags)) + mapping_set_error(ordered_extent->inode->i_mapping, -EIO); + if (truncated) start = ordered_extent->file_offset + logical_len; else From 8299bb94fae9393c922251ff90508f0a8b058ff4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 May 2021 10:52:45 -0400 Subject: [PATCH 35/58] btrfs: fix error handling in btrfs_del_csums commit b86652be7c83f70bf406bed18ecf55adb9bfb91b upstream. Error injection stress would sometimes fail with checksums on disk that did not have a corresponding extent. This occurred because the pattern in btrfs_del_csums was while (1) { ret = btrfs_search_slot(); if (ret < 0) break; } ret = 0; out: btrfs_free_path(path); return ret; If we got an error from btrfs_search_slot we'd clear the error because we were breaking instead of goto out. Instead of using goto out, simply handle the cases where we may leave a random value in ret, and get rid of the ret = 0; out: pattern and simply allow break to have the proper error reporting. With this fix we properly abort the transaction and do not commit thinking we successfully deleted the csum. Reviewed-by: Qu Wenruo CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file-item.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 1b8a04b767ff..40db31b69ef7 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -586,7 +586,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, u64 end_byte = bytenr + len; u64 csum_end; struct extent_buffer *leaf; - int ret; + int ret = 0; u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); int blocksize_bits = fs_info->sb->s_blocksize_bits; @@ -605,6 +605,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) { + ret = 0; if (path->slots[0] == 0) break; path->slots[0]--; @@ -661,7 +662,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, root, path, path->slots[0], del_nr); if (ret) - goto out; + break; if (key.offset == bytenr) break; } else if (key.offset < bytenr && csum_end > end_byte) { @@ -705,8 +706,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, ret = btrfs_split_item(trans, root, path, &key, offset); if (ret && ret != -EAGAIN) { btrfs_abort_transaction(trans, ret); - goto out; + break; } + ret = 0; key.offset = end_byte - 1; } else { @@ -716,8 +718,6 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, } btrfs_release_path(path); } - ret = 0; -out: btrfs_free_path(path); return ret; } From adaafc32d8b0d896c71348775eee3d0fbe743052 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 May 2021 10:52:46 -0400 Subject: [PATCH 36/58] btrfs: return errors from btrfs_del_csums in cleanup_ref_head commit 856bd270dc4db209c779ce1e9555c7641ffbc88e upstream. We are unconditionally returning 0 in cleanup_ref_head, despite the fact that btrfs_del_csums could fail. We need to return the error so the transaction gets aborted properly, fix this by returning ret from btrfs_del_csums in cleanup_ref_head. Reviewed-by: Qu Wenruo CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ce5e0f6c6af4..014f956be66a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2501,7 +2501,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, head->qgroup_reserved); btrfs_delayed_ref_unlock(head); btrfs_put_delayed_ref_head(head); - return 0; + return ret; } /* From c5cfa81562174e585ecc40bf8766835abd058572 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 May 2021 13:13:15 -0400 Subject: [PATCH 37/58] btrfs: fixup error handling in fixup_inode_link_counts commit 011b28acf940eb61c000059dd9e2cfcbf52ed96b upstream. This function has the following pattern while (1) { ret = whatever(); if (ret) goto out; } ret = 0 out: return ret; However several places in this while loop we simply break; when there's a problem, thus clearing the return value, and in one case we do a return -EIO, and leak the memory for the path. Fix this by re-arranging the loop to deal with ret == 1 coming from btrfs_search_slot, and then simply delete the ret = 0; out: bit so everybody can break if there is an error, which will allow for proper error handling to occur. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1cd610ddbb24..93e59ce00174 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1699,6 +1699,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; if (ret == 1) { + ret = 0; if (path->slots[0] == 0) break; path->slots[0]--; @@ -1711,17 +1712,19 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + break; btrfs_release_path(path); inode = read_one_inode(root, key.offset); - if (!inode) - return -EIO; + if (!inode) { + ret = -EIO; + break; + } ret = fixup_inode_link_count(trans, root, inode); iput(inode); if (ret) - goto out; + break; /* * fixup on a directory may create new entries, @@ -1730,8 +1733,6 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, */ key.offset = (u64)-1; } - ret = 0; -out: btrfs_release_path(path); return ret; } From 7de60c2d5a2a66ef2c5d76952a5a3a9a4ea4d436 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 4 Jun 2021 20:01:36 -0700 Subject: [PATCH 38/58] mm, hugetlb: fix simple resv_huge_pages underflow on UFFDIO_COPY [ Upstream commit d84cf06e3dd8c5c5b547b5d8931015fc536678e5 ] The userfaultfd hugetlb tests cause a resv_huge_pages underflow. This happens when hugetlb_mcopy_atomic_pte() is called with !is_continue on an index for which we already have a page in the cache. When this happens, we allocate a second page, double consuming the reservation, and then fail to insert the page into the cache and return -EEXIST. To fix this, we first check if there is a page in the cache which already consumed the reservation, and return -EEXIST immediately if so. There is still a rare condition where we fail to copy the page contents AND race with a call for hugetlb_no_page() for this index and again we will underflow resv_huge_pages. That is fixed in a more complicated patch not targeted for -stable. Test: Hacked the code locally such that resv_huge_pages underflows produce a warning, then: ./tools/testing/selftests/vm/userfaultfd hugetlb_shared 10 2 /tmp/kokonut_test/huge/userfaultfd_test && echo test success ./tools/testing/selftests/vm/userfaultfd hugetlb 10 2 /tmp/kokonut_test/huge/userfaultfd_test && echo test success Both tests succeed and produce no warnings. After the test runs number of free/resv hugepages is correct. [mike.kravetz@oracle.com: changelog fixes] Link: https://lkml.kernel.org/r/20210528004649.85298-1-almasrymina@google.com Fixes: 8fb5debc5fcd ("userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for userfaultfd support") Signed-off-by: Mina Almasry Reviewed-by: Mike Kravetz Cc: Axel Rasmussen Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/hugetlb.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2f769a661568..c69f12e4c149 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4154,10 +4154,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, struct page *page; if (!*pagep) { - ret = -ENOMEM; - page = alloc_huge_page(dst_vma, dst_addr, 0); - if (IS_ERR(page)) + /* If a page already exists, then it's UFFDIO_COPY for + * a non-missing case. Return -EEXIST. + */ + if (vm_shared && + hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) { + ret = -EEXIST; goto out; + } + + page = alloc_huge_page(dst_vma, dst_addr, 0); + if (IS_ERR(page)) { + ret = -ENOMEM; + goto out; + } ret = copy_huge_page_from_user(page, (const void __user *) src_addr, From 1b5c4b0669e0a63c9e5345c42ff6329a9001c207 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 2 Jun 2021 11:27:45 +0800 Subject: [PATCH 39/58] bpf: fix test suite to enable all unpriv program types commit 36641ad61db5ce9befd5eb0071abb36eaff16cfc upstream Given BPF_PROG_TYPE_CGROUP_SKB program types are also valid in an unprivileged setting, lets not omit these tests and potentially have issues fall through the cracks. Make this more obvious by adding a small test_as_unpriv() helper. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e1e4b6ab83f7..d33d1d70722e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -4798,6 +4798,7 @@ static struct bpf_test tests[] = { .fixup_cgroup_storage = { 1 }, .result = REJECT, .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { @@ -12963,6 +12964,13 @@ static void get_unpriv_disabled() fclose(fd); } +static bool test_as_unpriv(struct bpf_test *test) +{ + return !test->prog_type || + test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER || + test->prog_type == BPF_PROG_TYPE_CGROUP_SKB; +} + static int do_test(bool unpriv, unsigned int from, unsigned int to) { int i, passes = 0, errors = 0, skips = 0; @@ -12973,10 +12981,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) /* Program types that are not supported by non-root we * skip right away. */ - if (!test->prog_type && unpriv_disabled) { + if (test_as_unpriv(test) && unpriv_disabled) { printf("#%d/u %s SKIP\n", i, test->descr); skips++; - } else if (!test->prog_type) { + } else if (test_as_unpriv(test)) { if (!unpriv) set_admin(false); printf("#%d/u %s ", i, test->descr); From ac0985c8a2d8f829e7f6dfcdd543f32dd29747b7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 2 Jun 2021 11:27:46 +0800 Subject: [PATCH 40/58] bpf: test make sure to run unpriv test cases in test_verifier commit 832c6f2c29ec519b766923937f4f93fb1008b47d upstream Right now unprivileged tests are never executed as a BPF test run, only loaded. Allow for running them as well so that we can check the outcome and probe for regressions. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 71 ++++++++++++--------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index d33d1d70722e..38dd1299bfcf 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -70,7 +70,7 @@ struct bpf_test { int fixup_cgroup_storage[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; - uint32_t retval; + uint32_t retval, retval_unpriv; enum { UNDEF, ACCEPT, @@ -2986,6 +2986,8 @@ static struct bpf_test tests[] = { .fixup_prog1 = { 2 }, .result = ACCEPT, .retval = 42, + /* Verifier rewrite for unpriv skips tail call here. */ + .retval_unpriv = 2, }, { "stack pointer arithmetic", @@ -12811,6 +12813,33 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } } +static int set_admin(bool admin) +{ + cap_t caps; + const cap_value_t cap_val = CAP_SYS_ADMIN; + int ret = -1; + + caps = cap_get_proc(); + if (!caps) { + perror("cap_get_proc"); + return -1; + } + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val, + admin ? CAP_SET : CAP_CLEAR)) { + perror("cap_set_flag"); + goto out; + } + if (cap_set_proc(caps)) { + perror("cap_set_proc"); + goto out; + } + ret = 0; +out: + if (cap_free(caps)) + perror("cap_free"); + return ret; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { @@ -12819,6 +12848,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, struct bpf_insn *prog = test->insns; int map_fds[MAX_NR_MAPS]; const char *expected_err; + uint32_t expected_val; uint32_t retval; int i, err; @@ -12836,6 +12866,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; + expected_val = unpriv && test->retval_unpriv ? + test->retval_unpriv : test->retval; reject_from_alignment = fd_prog < 0 && (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && @@ -12869,16 +12901,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv, __u8 tmp[TEST_DATA_LEN << 2]; __u32 size_tmp = sizeof(tmp); + if (unpriv) + set_admin(true); err = bpf_prog_test_run(fd_prog, 1, test->data, sizeof(test->data), tmp, &size_tmp, &retval, NULL); + if (unpriv) + set_admin(false); if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { printf("Unexpected bpf_prog_test_run error\n"); goto fail_log; } - if (!err && retval != test->retval && - test->retval != POINTER_VALUE) { - printf("FAIL retval %d != %d\n", retval, test->retval); + if (!err && retval != expected_val && + expected_val != POINTER_VALUE) { + printf("FAIL retval %d != %d\n", retval, expected_val); goto fail_log; } } @@ -12921,33 +12957,6 @@ static bool is_admin(void) return (sysadmin == CAP_SET); } -static int set_admin(bool admin) -{ - cap_t caps; - const cap_value_t cap_val = CAP_SYS_ADMIN; - int ret = -1; - - caps = cap_get_proc(); - if (!caps) { - perror("cap_get_proc"); - return -1; - } - if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val, - admin ? CAP_SET : CAP_CLEAR)) { - perror("cap_set_flag"); - goto out; - } - if (cap_set_proc(caps)) { - perror("cap_set_proc"); - goto out; - } - ret = 0; -out: - if (cap_free(caps)) - perror("cap_free"); - return ret; -} - static void get_unpriv_disabled() { char buf[2]; From b6c9e3b46c3a4c78799ac550176856e7ff5e313c Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 2 Jun 2021 11:27:47 +0800 Subject: [PATCH 41/58] selftests/bpf: Generalize dummy program types commit 0c586079f852187d19fea60c9a4981ad29e22ba8 upstream Don't hardcode the dummy program types to SOCKET_FILTER type, as this prevents testing bpf_tail_call in conjunction with other program types. Instead, use the program type specified in the test case. Signed-off-by: Joe Stringer Signed-off-by: Daniel Borkmann Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 31 +++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 38dd1299bfcf..809d8e9ac356 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -12631,18 +12631,18 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } -static int create_prog_dummy1(void) +static int create_prog_dummy1(enum bpf_map_type prog_type) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_0, 42), BPF_EXIT_INSN(), }; - return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + return bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_dummy2(int mfd, int idx) +static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_3, idx), @@ -12653,11 +12653,12 @@ static int create_prog_dummy2(int mfd, int idx) BPF_EXIT_INSN(), }; - return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + return bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_array(uint32_t max_elem, int p1key) +static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem, + int p1key) { int p2key = 1; int mfd, p1fd, p2fd; @@ -12669,8 +12670,8 @@ static int create_prog_array(uint32_t max_elem, int p1key) return -1; } - p1fd = create_prog_dummy1(); - p2fd = create_prog_dummy2(mfd, p2key); + p1fd = create_prog_dummy1(prog_type); + p2fd = create_prog_dummy2(prog_type, mfd, p2key); if (p1fd < 0 || p2fd < 0) goto out; if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0) @@ -12725,8 +12726,8 @@ static int create_cgroup_storage(void) static char bpf_vlog[UINT_MAX >> 8]; -static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, - int *map_fds) +static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, + struct bpf_insn *prog, int *map_fds) { int *fixup_map1 = test->fixup_map1; int *fixup_map2 = test->fixup_map2; @@ -12781,7 +12782,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_prog1) { - map_fds[4] = create_prog_array(4, 0); + map_fds[4] = create_prog_array(prog_type, 4, 0); do { prog[*fixup_prog1].imm = map_fds[4]; fixup_prog1++; @@ -12789,7 +12790,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_prog2) { - map_fds[5] = create_prog_array(8, 7); + map_fds[5] = create_prog_array(prog_type, 8, 7); do { prog[*fixup_prog2].imm = map_fds[5]; fixup_prog2++; @@ -12855,11 +12856,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv, for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; - do_test_fixup(test, prog, map_fds); + if (!prog_type) + prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + do_test_fixup(test, prog_type, prog, map_fds); prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + fd_prog = bpf_verify_program(prog_type, prog, prog_len, + test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? From 878470e7f5edb0f8d78f863d25d65e460da3593f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Jun 2021 11:27:48 +0800 Subject: [PATCH 42/58] bpf: Add BPF_F_ANY_ALIGNMENT. commit e9ee9efc0d176512cdce9d27ff8549d7ffa2bfcd upstream Often we want to write tests cases that check things like bad context offset accesses. And one way to do this is to use an odd offset on, for example, a 32-bit load. This unfortunately triggers the alignment checks first on platforms that do not set CONFIG_EFFICIENT_UNALIGNED_ACCESS. So the test case see the alignment failure rather than what it was testing for. It is often not completely possible to respect the original intention of the test, or even test the same exact thing, while solving the alignment issue. Another option could have been to check the alignment after the context and other validations are performed by the verifier, but that is a non-trivial change to the verifier. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/bpf.h | 14 ++++++++++++++ kernel/bpf/syscall.c | 7 ++++++- kernel/bpf/verifier.c | 3 +++ tools/include/uapi/linux/bpf.h | 14 ++++++++++++++ tools/lib/bpf/bpf.c | 8 ++++---- tools/lib/bpf/bpf.h | 2 +- tools/testing/selftests/bpf/test_align.c | 4 ++-- tools/testing/selftests/bpf/test_verifier.c | 3 ++- 8 files changed, 46 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 71ca8c4dc290..8481fc7676c0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -228,6 +228,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 21a366a661ac..353a8d672302 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1367,9 +1367,14 @@ static int bpf_prog_load(union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; - if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) + if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT)) return -EINVAL; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* copy eBPF program license from user space */ if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1f4c88ce58de..4ce032c4acd0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6440,6 +6440,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; + if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) + env->strict_alignment = false; + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 13944978ada5..9e060c6a01ac 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -226,6 +226,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 482025b72839..f28ae6a68697 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -261,9 +261,9 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, int log_level) + size_t insns_cnt, __u32 prog_flags, const char *license, + __u32 kern_version, char *log_buf, size_t log_buf_sz, + int log_level) { union bpf_attr attr; @@ -277,7 +277,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_level = log_level; log_buf[0] = 0; attr.kern_version = kern_version; - attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + attr.prog_flags = prog_flags; return sys_bpf_prog_load(&attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index c3145ab3bdca..7f2e947d940c 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -79,7 +79,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, __u32 kern_version, char *log_buf, size_t log_buf_sz); int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, + size_t insns_cnt, __u32 prog_flags, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz, int log_level); diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 5f377ec53f2f..3c789d03b629 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -620,8 +620,8 @@ static int do_test_single(struct bpf_align_test *test) prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, 1, "GPL", 0, - bpf_vlog, sizeof(bpf_vlog), 2); + prog, prog_len, BPF_F_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); if (fd_prog < 0 && test->result != REJECT) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 809d8e9ac356..919f97a20203 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -12862,7 +12862,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type, prog, prog_len, - test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + test->flags & F_LOAD_WITH_STRICT_ALIGNMENT ? + BPF_F_STRICT_ALIGNMENT : 0, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? From 1e7ee04b035a268716c98a1ee22634419f30d25d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Jun 2021 11:27:49 +0800 Subject: [PATCH 43/58] bpf: Adjust F_NEEDS_EFFICIENT_UNALIGNED_ACCESS handling in test_verifier.c commit c7665702d3208b77b8e00f0699b6b88241b04360 upstream Make it set the flag argument to bpf_verify_program() which will relax the alignment restrictions. Now all such test cases will go properly through the verifier even on inefficient unaligned access architectures. On inefficient unaligned access architectures do not try to run such programs, instead mark the test case as passing but annotate the result similarly to how it is done now in the presence of this flag. So, we get complete full coverage for all REJECT test cases, and at least verifier level coverage for ACCEPT test cases. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 42 ++++++++++++--------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 919f97a20203..686f5d1ac74a 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -12844,13 +12844,14 @@ out: static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, reject_from_alignment; + int fd_prog, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; int map_fds[MAX_NR_MAPS]; const char *expected_err; uint32_t expected_val; uint32_t retval; + __u32 pflags; int i, err; for (i = 0; i < MAX_NR_MAPS; i++) @@ -12861,9 +12862,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, do_test_fixup(test, prog_type, prog, map_fds); prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type, prog, prog_len, - test->flags & F_LOAD_WITH_STRICT_ALIGNMENT ? - BPF_F_STRICT_ALIGNMENT : 0, + pflags = 0; + if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) + pflags |= BPF_F_STRICT_ALIGNMENT; + if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) + pflags |= BPF_F_ANY_ALIGNMENT; + fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? @@ -12873,28 +12877,27 @@ static void do_test_single(struct bpf_test *test, bool unpriv, expected_val = unpriv && test->retval_unpriv ? test->retval_unpriv : test->retval; - reject_from_alignment = fd_prog < 0 && - (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "misaligned"); -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (reject_from_alignment) { - printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", - strerror(errno)); - goto fail_log; - } -#endif + alignment_prevented_execution = 0; + if (expected_ret == ACCEPT) { - if (fd_prog < 0 && !reject_from_alignment) { + if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); goto fail_log; } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (fd_prog >= 0 && + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) { + alignment_prevented_execution = 1; + goto test_ok; + } +#endif } else { if (fd_prog >= 0) { printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { + if (!strstr(bpf_vlog, expected_err)) { printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", expected_err, bpf_vlog); goto fail_log; @@ -12922,9 +12925,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto fail_log; } } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +test_ok: +#endif (*passes)++; - printf("OK%s\n", reject_from_alignment ? - " (NOTE: reject due to unknown alignment)" : ""); + printf("OK%s\n", alignment_prevented_execution ? + " (NOTE: not executed due to unknown alignment)" : ""); close_fds: close(fd_prog); for (i = 0; i < MAX_NR_MAPS; i++) From f22c1cd341cd91296967f143cd7b2068779751a2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Jun 2021 11:27:50 +0800 Subject: [PATCH 44/58] bpf: Make more use of 'any' alignment in test_verifier.c commit 2acc5fd5b8c25df0de7f3c8b8e385f5c6f8202ec upstream Use F_NEEDS_EFFICIENT_UNALIGNED_ACCESS in more tests where the expected result is REJECT. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 686f5d1ac74a..a402121e9812 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1799,6 +1799,7 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet read for SK_MSG", @@ -2191,6 +2192,7 @@ static struct bpf_test tests[] = { }, .errstr = "invalid bpf_context access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check cb access: half, wrong type", @@ -3151,6 +3153,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R0 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs corruption 2", @@ -3181,6 +3184,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R3 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs + data", @@ -3680,6 +3684,7 @@ static struct bpf_test tests[] = { .errstr = "R2 invalid mem access 'inv'", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test16 (arith on data_end)", @@ -3863,6 +3868,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test24 (x += pkt_ptr, 5)", @@ -4767,6 +4773,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid access to map value, value_size=64 off=-2 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid cgroup storage access 5", @@ -6433,6 +6440,7 @@ static struct bpf_test tests[] = { .errstr = "invalid mem access 'inv'", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value illegal alu op, 5", @@ -6455,6 +6463,7 @@ static struct bpf_test tests[] = { .fixup_map2 = { 3 }, .errstr = "R0 invalid mem access 'inv'", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value is preserved across register spilling", @@ -8951,6 +8960,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', good access", @@ -8989,6 +8999,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', bad access 2", @@ -9007,6 +9018,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, good access", @@ -9045,6 +9057,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, bad access 2", @@ -9063,6 +9076,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', good access", @@ -9117,6 +9131,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, good access", @@ -9153,6 +9168,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, bad access 2", @@ -9228,6 +9244,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, good access", @@ -9284,6 +9301,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', good access", @@ -9320,6 +9338,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', bad access 2", @@ -9393,6 +9412,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', good access", @@ -9431,6 +9451,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', bad access 2", @@ -9449,6 +9470,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, good access", @@ -9487,6 +9509,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, bad access 2", @@ -9505,6 +9528,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', good access", @@ -9559,6 +9583,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, good access", @@ -9595,6 +9620,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", @@ -9670,6 +9696,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, good access", @@ -9726,6 +9753,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', good access", @@ -9762,6 +9790,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', bad access 2", @@ -9876,6 +9905,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "dereference of modified ctx ptr", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 8", @@ -9890,6 +9920,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "dereference of modified ctx ptr", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 9", @@ -10365,6 +10396,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R6 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls with args", @@ -11230,6 +11262,7 @@ static struct bpf_test tests[] = { .fixup_map1 = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", @@ -11373,6 +11406,7 @@ static struct bpf_test tests[] = { .fixup_map1 = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value_ptr_or_null via arg. test1", @@ -11544,6 +11578,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = POINTER_VALUE, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 2", @@ -11575,6 +11610,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 3", @@ -11677,6 +11713,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "same insn cannot be used with different", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 6", @@ -11712,6 +11749,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 7", @@ -11746,6 +11784,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 8", @@ -11827,6 +11866,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: caller stack init to zero or map_value_or_null", @@ -12192,6 +12232,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "BPF_XADD stores into R2 packet", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "xadd/w check whether src/dst got mangled, 1", From 3789f9c3a4f55dff424424fd93c5aad829647dce Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Jun 2021 11:27:51 +0800 Subject: [PATCH 45/58] bpf: Apply F_NEEDS_EFFICIENT_UNALIGNED_ACCESS to more ACCEPT test cases. commit 0a68632488aa0129ed530af9ae9e8573f5650812 upstream If a testcase has alignment problems but is expected to be ACCEPT, verify it using F_NEEDS_EFFICIENT_UNALIGNED_ACCESS too. Maybe in the future if we add some architecture specific code to elide the unaligned memory access warnings during the test, we can execute these as well. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a402121e9812..2241e4eefde3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3787,6 +3787,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test21 (x += pkt_ptr, 2)", @@ -3812,6 +3813,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test22 (x += pkt_ptr, 3)", @@ -3842,6 +3844,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test23 (x += pkt_ptr, 4)", @@ -3894,6 +3897,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test25 (marking on <, good access)", @@ -6957,6 +6961,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = 0 /* csum_diff of 64-byte packet */, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -8923,6 +8928,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' > pkt_end, bad access 1", @@ -9094,6 +9100,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', bad access 1", @@ -9206,6 +9213,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end >= pkt_data', bad access 1", @@ -9263,6 +9271,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, bad access 1", @@ -9375,6 +9384,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' > pkt_data, bad access 1", @@ -9546,6 +9556,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', bad access 1", @@ -9658,6 +9669,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data >= pkt_meta', bad access 1", @@ -9715,6 +9727,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", @@ -11646,6 +11659,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 4", @@ -11680,6 +11694,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 5", @@ -11825,6 +11840,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 9", From 0c4acb93972e9de3c3dfaa60a532a7fa529ac708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 2 Jun 2021 11:27:52 +0800 Subject: [PATCH 46/58] selftests/bpf: add "any alignment" annotation for some tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e2c6f50e48849298bed694de03cceb537d95cdc4 upstream RISC-V does, in-general, not have "efficient unaligned access". When testing the RISC-V BPF JIT, some selftests failed in the verification due to misaligned access. Annotate these tests with the F_NEEDS_EFFICIENT_UNALIGNED_ACCESS flag. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2241e4eefde3..b0fd67ce48bd 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -963,6 +963,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "attempt to corrupt spilled", .errstr = "corrupted spill", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid src register in STX", @@ -1777,6 +1778,7 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid read past end of SK_MSG", @@ -2176,6 +2178,7 @@ static struct bpf_test tests[] = { }, .errstr = "invalid bpf_context access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check skb->hash half load not permitted, unaligned 3", From 1d80154040c27c5363d04404dcf46d738919f575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 2 Jun 2021 11:27:53 +0800 Subject: [PATCH 47/58] selftests/bpf: Avoid running unprivileged tests with alignment requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c77b0589ca29ad1859fe7d7c1ecd63c0632379fa upstream Some architectures have strict alignment requirements. In that case, the BPF verifier detects if a program has unaligned accesses and rejects them. A user can pass BPF_F_ANY_ALIGNMENT to a program to override this check. That, however, will only work when a privileged user loads a program. An unprivileged user loading a program with this flag will be rejected prior entering the verifier. Hence, it does not make sense to load unprivileged programs without strict alignment when testing the verifier. This patch avoids exactly that. Signed-off-by: Björn Töpel Signed-off-by: Andrii Nakryiko Acked-by: Luke Nelson Link: https://lore.kernel.org/bpf/20201118071640.83773-3-bjorn.topel@gmail.com Signed-off-by: Tiezhu Yang Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b0fd67ce48bd..b44324530948 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -13045,6 +13045,19 @@ static void get_unpriv_disabled() static bool test_as_unpriv(struct bpf_test *test) { +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + /* Some architectures have strict alignment requirements. In + * that case, the BPF verifier detects if a program has + * unaligned accesses and rejects them. A user can pass + * BPF_F_ANY_ALIGNMENT to a program to override this + * check. That, however, will only work when a privileged user + * loads a program. An unprivileged user loading a program + * with this flag will be rejected prior entering the + * verifier. + */ + if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) + return false; +#endif return !test->prog_type || test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER || test->prog_type == BPF_PROG_TYPE_CGROUP_SKB; From c35461390b486a1f34340e969d333b04a348e599 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 27 Jan 2020 04:56:15 -0500 Subject: [PATCH 48/58] bnxt_en: Remove the setting of dev_port. commit 1d86859fdf31a0d50cc82b5d0d6bfb5fe98f6c00 upstream. The dev_port is meant to distinguish the network ports belonging to the same PCI function. Our devices only have one network port associated with each PCI function and so we should not set it for correctness. Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6033970fb667..ebcf4ea66385 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5252,7 +5252,6 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) pf->fw_fid = le16_to_cpu(resp->fid); pf->port_id = le16_to_cpu(resp->port_id); - bp->dev->dev_port = pf->port_id; memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN); pf->first_vf_id = le16_to_cpu(resp->first_vf_id); pf->max_vfs = le16_to_cpu(resp->max_vfs); From 0b4c9255a1d0d08fa8a2d8bc97302756d0e9e98d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 1 Jun 2019 01:27:22 -0700 Subject: [PATCH 49/58] perf/cgroups: Don't rotate events for cgroups unnecessarily commit fd7d55172d1e2e501e6da0a5c1de25f06612dc2e upstream. Currently perf_rotate_context assumes that if the context's nr_events != nr_active a rotation is necessary for perf event multiplexing. With cgroups, nr_events is the total count of events for all cgroups and nr_active will not include events in a cgroup other than the current task's. This makes rotation appear necessary for cgroups when it is not. Add a perf_event_context flag that is set when rotation is necessary. Clear the flag during sched_out and set it when a flexible sched_in fails due to resources. Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/20190601082722.44543-1-irogers@google.com Signed-off-by: Ingo Molnar Signed-off-by: Wen Yang Signed-off-by: Greg Kroah-Hartman --- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 42 ++++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d8b4d31acd18..efe30b9b1190 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -747,6 +747,11 @@ struct perf_event_context { int nr_stat; int nr_freq; int rotate_disable; + /* + * Set when nr_events != nr_active, except tolerant to events not + * necessary to be active due to scheduling constraints, such as cgroups. + */ + int rotate_necessary; atomic_t refcount; struct task_struct *task; diff --git a/kernel/events/core.c b/kernel/events/core.c index b8b74a4a524c..56e3789b5c76 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2952,6 +2952,12 @@ static void ctx_sched_out(struct perf_event_context *ctx, if (!ctx->nr_active || !(is_active & EVENT_ALL)) return; + /* + * If we had been multiplexing, no rotations are necessary, now no events + * are active. + */ + ctx->rotate_necessary = 0; + perf_pmu_disable(ctx->pmu); if (is_active & EVENT_PINNED) { list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list) @@ -3319,10 +3325,13 @@ static int flexible_sched_in(struct perf_event *event, void *data) return 0; if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { - if (!group_sched_in(event, sid->cpuctx, sid->ctx)) - list_add_tail(&event->active_list, &sid->ctx->flexible_active); - else + int ret = group_sched_in(event, sid->cpuctx, sid->ctx); + if (ret) { sid->can_add_hw = 0; + sid->ctx->rotate_necessary = 1; + return 0; + } + list_add_tail(&event->active_list, &sid->ctx->flexible_active); } return 0; @@ -3690,24 +3699,17 @@ ctx_first_active(struct perf_event_context *ctx) static bool perf_rotate_context(struct perf_cpu_context *cpuctx) { struct perf_event *cpu_event = NULL, *task_event = NULL; - bool cpu_rotate = false, task_rotate = false; - struct perf_event_context *ctx = NULL; + struct perf_event_context *task_ctx = NULL; + int cpu_rotate, task_rotate; /* * Since we run this from IRQ context, nobody can install new * events, thus the event count values are stable. */ - if (cpuctx->ctx.nr_events) { - if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) - cpu_rotate = true; - } - - ctx = cpuctx->task_ctx; - if (ctx && ctx->nr_events) { - if (ctx->nr_events != ctx->nr_active) - task_rotate = true; - } + cpu_rotate = cpuctx->ctx.rotate_necessary; + task_ctx = cpuctx->task_ctx; + task_rotate = task_ctx ? task_ctx->rotate_necessary : 0; if (!(cpu_rotate || task_rotate)) return false; @@ -3716,7 +3718,7 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) perf_pmu_disable(cpuctx->ctx.pmu); if (task_rotate) - task_event = ctx_first_active(ctx); + task_event = ctx_first_active(task_ctx); if (cpu_rotate) cpu_event = ctx_first_active(&cpuctx->ctx); @@ -3724,17 +3726,17 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) * As per the order given at ctx_resched() first 'pop' task flexible * and then, if needed CPU flexible. */ - if (task_event || (ctx && cpu_event)) - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); + if (task_event || (task_ctx && cpu_event)) + ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE); if (cpu_event) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (task_event) - rotate_ctx(ctx, task_event); + rotate_ctx(task_ctx, task_event); if (cpu_event) rotate_ctx(&cpuctx->ctx, cpu_event); - perf_event_sched_in(cpuctx, ctx, current); + perf_event_sched_in(cpuctx, task_ctx, current); perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); From c31789645b7bdcf6f55969974d55336831652055 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 8 Oct 2019 09:59:49 -0700 Subject: [PATCH 50/58] perf/core: Fix corner case in perf_rotate_context() commit 7fa343b7fdc4f351de4e3f28d5c285937dd1f42f upstream. In perf_rotate_context(), when the first cpu flexible event fail to schedule, cpu_rotate is 1, while cpu_event is NULL. Since cpu_event is NULL, perf_rotate_context will _NOT_ call cpu_ctx_sched_out(), thus cpuctx->ctx.is_active will have EVENT_FLEXIBLE set. Then, the next perf_event_sched_in() will skip all cpu flexible events because of the EVENT_FLEXIBLE bit. In the next call of perf_rotate_context(), cpu_rotate stays 1, and cpu_event stays NULL, so this process repeats. The end result is, flexible events on this cpu will not be scheduled (until another event being added to the cpuctx). Here is an easy repro of this issue. On Intel CPUs, where ref-cycles could only use one counter, run one pinned event for ref-cycles, one flexible event for ref-cycles, and one flexible event for cycles. The flexible ref-cycles is never scheduled, which is expected. However, because of this issue, the cycles event is never scheduled either. $ perf stat -e ref-cycles:D,ref-cycles,cycles -C 5 -I 1000 time counts unit events 1.000152973 15,412,480 ref-cycles:D 1.000152973 ref-cycles (0.00%) 1.000152973 cycles (0.00%) 2.000486957 18,263,120 ref-cycles:D 2.000486957 ref-cycles (0.00%) 2.000486957 cycles (0.00%) To fix this, when the flexible_active list is empty, try rotate the first event in the flexible_groups. Also, rename ctx_first_active() to ctx_event_to_rotate(), which is more accurate. Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sasha Levin Cc: Thomas Gleixner Fixes: 8d5bce0c37fa ("perf/core: Optimize perf_rotate_context() event scheduling") Link: https://lkml.kernel.org/r/20191008165949.920548-1-songliubraving@fb.com Signed-off-by: Ingo Molnar Signed-off-by: Wen Yang Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 56e3789b5c76..d3be2cd57af1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3689,11 +3689,23 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event) perf_event_groups_insert(&ctx->flexible_groups, event); } +/* pick an event from the flexible_groups to rotate */ static inline struct perf_event * -ctx_first_active(struct perf_event_context *ctx) +ctx_event_to_rotate(struct perf_event_context *ctx) { - return list_first_entry_or_null(&ctx->flexible_active, - struct perf_event, active_list); + struct perf_event *event; + + /* pick the first active flexible event */ + event = list_first_entry_or_null(&ctx->flexible_active, + struct perf_event, active_list); + + /* if no active flexible event, pick the first event */ + if (!event) { + event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree), + typeof(*event), group_node); + } + + return event; } static bool perf_rotate_context(struct perf_cpu_context *cpuctx) @@ -3718,9 +3730,9 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx) perf_pmu_disable(cpuctx->ctx.pmu); if (task_rotate) - task_event = ctx_first_active(task_ctx); + task_event = ctx_event_to_rotate(task_ctx); if (cpu_rotate) - cpu_event = ctx_first_active(&cpuctx->ctx); + cpu_event = ctx_event_to_rotate(&cpuctx->ctx); /* * As per the order given at ctx_resched() first 'pop' task flexible From 6b678e02e63e27885b750ab8ccf7c280e34530a0 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 30 Apr 2021 19:59:51 +0800 Subject: [PATCH 51/58] btrfs: fix unmountable seed device after fstrim commit 5e753a817b2d5991dfe8a801b7b1e8e79a1c5a20 upstream. The following test case reproduces an issue of wrongly freeing in-use blocks on the readonly seed device when fstrim is called on the rw sprout device. As shown below. Create a seed device and add a sprout device to it: $ mkfs.btrfs -fq -dsingle -msingle /dev/loop0 $ btrfstune -S 1 /dev/loop0 $ mount /dev/loop0 /btrfs $ btrfs dev add -f /dev/loop1 /btrfs BTRFS info (device loop0): relocating block group 290455552 flags system BTRFS info (device loop0): relocating block group 1048576 flags system BTRFS info (device loop0): disk added /dev/loop1 $ umount /btrfs Mount the sprout device and run fstrim: $ mount /dev/loop1 /btrfs $ fstrim /btrfs $ umount /btrfs Now try to mount the seed device, and it fails: $ mount /dev/loop0 /btrfs mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error. Block 5292032 is missing on the readonly seed device: $ dmesg -kt | tail BTRFS error (device loop0): bad tree block start, want 5292032 have 0 BTRFS warning (device loop0): couldn't read-tree root BTRFS error (device loop0): open_ctree failed >From the dump-tree of the seed device (taken before the fstrim). Block 5292032 belonged to the block group starting at 5242880: $ btrfs inspect dump-tree -e /dev/loop0 | grep -A1 BLOCK_GROUP item 3 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16169 itemsize 24 block group used 114688 chunk_objectid 256 flags METADATA >From the dump-tree of the sprout device (taken before the fstrim). fstrim used block-group 5242880 to find the related free space to free: $ btrfs inspect dump-tree -e /dev/loop1 | grep -A1 BLOCK_GROUP item 1 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16226 itemsize 24 block group used 32768 chunk_objectid 256 flags METADATA BPF kernel tracing the fstrim command finds the missing block 5292032 within the range of the discarded blocks as below: kprobe:btrfs_discard_extent { printf("freeing start %llu end %llu num_bytes %llu:\n", arg1, arg1+arg2, arg2); } freeing start 5259264 end 5406720 num_bytes 147456 Fix this by avoiding the discard command to the readonly seed device. Reported-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 014f956be66a..bf46ed74eae6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1984,16 +1984,20 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, for (i = 0; i < bbio->num_stripes; i++, stripe++) { u64 bytes; struct request_queue *req_q; + struct btrfs_device *device = stripe->dev; - if (!stripe->dev->bdev) { + if (!device->bdev) { ASSERT(btrfs_test_opt(fs_info, DEGRADED)); continue; } - req_q = bdev_get_queue(stripe->dev->bdev); + req_q = bdev_get_queue(device->bdev); if (!blk_queue_discard(req_q)) continue; - ret = btrfs_issue_discard(stripe->dev->bdev, + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) + continue; + + ret = btrfs_issue_discard(device->bdev, stripe->physical, stripe->length, &bytes); From 982903d43ecb2a3b2550af0560c0c66482a6d99f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 21 Apr 2021 19:21:22 -0700 Subject: [PATCH 52/58] KVM: SVM: Truncate GPR value for DR and CR accesses in !64-bit mode commit 0884335a2e653b8a045083aa1d57ce74269ac81d upstream. Drop bits 63:32 on loads/stores to/from DRs and CRs when the vCPU is not in 64-bit mode. The APM states bits 63:32 are dropped for both DRs and CRs: In 64-bit mode, the operand size is fixed at 64 bits without the need for a REX prefix. In non-64-bit mode, the operand size is fixed at 32 bits and the upper 32 bits of the destination are forced to 0. Fixes: 7ff76d58a9dc ("KVM: SVM: enhance MOV CR intercept handler") Fixes: cae3797a4639 ("KVM: SVM: enhance mov DR intercept handler") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210422022128.3464144-4-seanjc@google.com> Signed-off-by: Paolo Bonzini [sudip: manual backport to old file] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8cb9277aa6ff..ad24e6777277 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4033,7 +4033,7 @@ static int cr_interception(struct vcpu_svm *svm) err = 0; if (cr >= 16) { /* mov to cr */ cr -= 16; - val = kvm_register_read(&svm->vcpu, reg); + val = kvm_register_readl(&svm->vcpu, reg); switch (cr) { case 0: if (!check_selective_cr0_intercepted(svm, val)) @@ -4078,7 +4078,7 @@ static int cr_interception(struct vcpu_svm *svm) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } - kvm_register_write(&svm->vcpu, reg, val); + kvm_register_writel(&svm->vcpu, reg, val); } return kvm_complete_insn_gp(&svm->vcpu, err); } @@ -4108,13 +4108,13 @@ static int dr_interception(struct vcpu_svm *svm) if (dr >= 16) { /* mov to DRn */ if (!kvm_require_dr(&svm->vcpu, dr - 16)) return 1; - val = kvm_register_read(&svm->vcpu, reg); + val = kvm_register_readl(&svm->vcpu, reg); kvm_set_dr(&svm->vcpu, dr - 16, val); } else { if (!kvm_require_dr(&svm->vcpu, dr)) return 1; kvm_get_dr(&svm->vcpu, dr, &val); - kvm_register_write(&svm->vcpu, reg, val); + kvm_register_writel(&svm->vcpu, reg, val); } return kvm_skip_emulated_instruction(&svm->vcpu); From 2ca5f9f13a3ec1f4cb75a6383a1294e384667ece Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 May 2021 09:05:41 +0100 Subject: [PATCH 53/58] KVM: arm64: Fix debug register indexing commit cb853ded1d25e5b026ce115dbcde69e3d7e2e831 upstream. Commit 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on reset") flipped the register number to 0 for all the debug registers in the sysreg table, hereby indicating that these registers live in a separate shadow structure. However, the author of this patch failed to realise that all the accessors are using that particular index instead of the register encoding, resulting in all the registers hitting index 0. Not quite a valid implementation of the architecture... Address the issue by fixing all the accessors to use the CRm field of the encoding, which contains the debug register index. Fixes: 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on reset") Reported-by: Ricardo Koller Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/sys_regs.c | 42 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index fe97b2ad82b9..98e8bc919583 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -426,14 +426,14 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, dbg_reg); else dbg_to_reg(vcpu, p, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -441,7 +441,7 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -451,7 +451,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -461,21 +461,21 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val; } static bool trap_bcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, dbg_reg); else dbg_to_reg(vcpu, p, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -483,7 +483,7 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -494,7 +494,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -504,22 +504,22 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val; } static bool trap_wvr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, dbg_reg); else dbg_to_reg(vcpu, p, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); + trace_trap_reg(__func__, rd->CRm, p->is_write, + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]); return true; } @@ -527,7 +527,7 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -537,7 +537,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -547,21 +547,21 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val; } static bool trap_wcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, dbg_reg); else dbg_to_reg(vcpu, p, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -569,7 +569,7 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -579,7 +579,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -589,7 +589,7 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val; } static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) From eeb48f5ca3ab66d793a911917712929875385059 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Wed, 17 Oct 2018 15:41:21 -0700 Subject: [PATCH 54/58] ACPI: probe ECDT before loading AML tables regardless of module-level code flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d737f333b211361b6e239fc753b84c3be2634aaa upstream. It was discovered that AML tables were loaded before or after the ECDT depending on acpi_gbl_execute_tables_as_methods. According to the ACPI spec, the ECDT should be loaded before the namespace is populated by loading AML tables (DSDT and SSDT). Since the ECDT should be loaded early in the boot process, this change moves the ECDT probing to acpi_early_init. Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki Cc: Laurențiu Păncescu Cc: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/bus.c | 44 ++++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 92a146861086..1063b762dda2 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1054,15 +1054,17 @@ void __init acpi_early_init(void) goto error0; } - if (!acpi_gbl_execute_tables_as_methods && - acpi_gbl_group_module_level_code) { - status = acpi_load_tables(); - if (ACPI_FAILURE(status)) { - printk(KERN_ERR PREFIX - "Unable to load the System Description Tables\n"); - goto error0; - } - } + /* + * ACPI 2.0 requires the EC driver to be loaded and work before + * the EC device is found in the namespace (i.e. before + * acpi_load_tables() is called). + * + * This is accomplished by looking for the ECDT table, and getting + * the EC parameters out of that. + * + * Ignore the result. Not having an ECDT is not fatal. + */ + status = acpi_ec_ecdt_probe(); #ifdef CONFIG_X86 if (!acpi_ioapic) { @@ -1133,25 +1135,11 @@ static int __init acpi_bus_init(void) acpi_os_initialize1(); - /* - * ACPI 2.0 requires the EC driver to be loaded and work before - * the EC device is found in the namespace (i.e. before - * acpi_load_tables() is called). - * - * This is accomplished by looking for the ECDT table, and getting - * the EC parameters out of that. - */ - status = acpi_ec_ecdt_probe(); - /* Ignore result. Not having an ECDT is not fatal. */ - - if (acpi_gbl_execute_tables_as_methods || - !acpi_gbl_group_module_level_code) { - status = acpi_load_tables(); - if (ACPI_FAILURE(status)) { - printk(KERN_ERR PREFIX - "Unable to load the System Description Tables\n"); - goto error1; - } + status = acpi_load_tables(); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR PREFIX + "Unable to load the System Description Tables\n"); + goto error1; } status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE); From f47c2c06982efbeb8f3aaa418fca1e3cf3b9792c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Jan 2019 00:34:37 +0100 Subject: [PATCH 55/58] ACPI: EC: Look for ECDT EC after calling acpi_load_tables() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b1c0330823fe842dbb34641f1410f0afa51c29d3 upstream. Some systems have had functional issues since commit 5a8361f7ecce (ACPICA: Integrate package handling with module-level code) that, among other things, changed the initial values of the acpi_gbl_group_module_level_code and acpi_gbl_parse_table_as_term_list global flags in ACPICA which implicitly caused acpi_ec_ecdt_probe() to be called before acpi_load_tables() on the vast majority of platforms. Namely, before commit 5a8361f7ecce, acpi_load_tables() was called from acpi_early_init() if acpi_gbl_parse_table_as_term_list was FALSE and acpi_gbl_group_module_level_code was TRUE, which almost always was the case as FALSE and TRUE were their initial values, respectively. The acpi_gbl_parse_table_as_term_list value would be changed to TRUE for a couple of platforms in acpi_quirks_dmi_table[], but it remained FALSE in the vast majority of cases. After commit 5a8361f7ecce, the initial values of the two flags have been reversed, so in effect acpi_load_tables() has not been called from acpi_early_init() any more. That, in turn, affects acpi_ec_ecdt_probe() which is invoked before acpi_load_tables() now and it is not possible to evaluate the _REG method for the EC address space handler installed by it. That effectively causes the EC address space to be inaccessible to AML on platforms with an ECDT matching the EC device definition in the DSDT and functional problems ensue in there. Because the default behavior before commit 5a8361f7ecce was to call acpi_ec_ecdt_probe() after acpi_load_tables(), it should be safe to do that again. Moreover, the EC address space handler installed by acpi_ec_ecdt_probe() is only needed for AML to be able to access the EC address space and the only AML that can run during acpi_load_tables() is module-level code which only is allowed to access address spaces with default handlers (memory, I/O and PCI config space). For this reason, move the acpi_ec_ecdt_probe() invocation back to acpi_bus_init(), from where it was taken away by commit d737f333b211 (ACPI: probe ECDT before loading AML tables regardless of module-level code flag), and put it after the invocation of acpi_load_tables() to restore the original code ordering from before commit 5a8361f7ecce. Fixes: 5a8361f7ecce ("ACPICA: Integrate package handling with module-level code") Link: https://bugzilla.kernel.org/show_bug.cgi?id=199981 Reported-by: step-ali Reported-by: Charles Stanhope Tested-by: Charles Stanhope Reported-by: Paulo Nascimento Reported-by: David Purton Reported-by: Adam Harvey Reported-by: Zhang Rui Tested-by: Zhang Rui Tested-by: Jean-Marc Lenoir Signed-off-by: Rafael J. Wysocki Cc: Laurențiu Păncescu Cc: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/bus.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 1063b762dda2..d60e57d14c85 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1054,18 +1054,6 @@ void __init acpi_early_init(void) goto error0; } - /* - * ACPI 2.0 requires the EC driver to be loaded and work before - * the EC device is found in the namespace (i.e. before - * acpi_load_tables() is called). - * - * This is accomplished by looking for the ECDT table, and getting - * the EC parameters out of that. - * - * Ignore the result. Not having an ECDT is not fatal. - */ - status = acpi_ec_ecdt_probe(); - #ifdef CONFIG_X86 if (!acpi_ioapic) { /* compatible (0) means level (3) */ @@ -1142,6 +1130,18 @@ static int __init acpi_bus_init(void) goto error1; } + /* + * ACPI 2.0 requires the EC driver to be loaded and work before the EC + * device is found in the namespace. + * + * This is accomplished by looking for the ECDT table and getting the EC + * parameters out of that. + * + * Do that before calling acpi_initialize_objects() which may trigger EC + * address space accesses. + */ + acpi_ec_ecdt_probe(); + status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE); if (ACPI_FAILURE(status)) { printk(KERN_ERR PREFIX From b5cd7f229609cf328664aedb056a6fd744e0068b Mon Sep 17 00:00:00 2001 From: Cheng Jian Date: Fri, 13 Dec 2019 10:45:30 +0800 Subject: [PATCH 56/58] sched/fair: Optimize select_idle_cpu commit 60588bfa223ff675b95f866249f90616613fbe31 upstream. select_idle_cpu() will scan the LLC domain for idle CPUs, it's always expensive. so the next commit : 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()") introduces a way to limit how many CPUs we scan. But it consume some CPUs out of 'nr' that are not allowed for the task and thus waste our attempts. The function always return nr_cpumask_bits, and we can't find a CPU which our task is allowed to run. Cpumask may be too big, similar to select_idle_core(), use per_cpu_ptr 'select_idle_mask' to prevent stack overflow. Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()") Signed-off-by: Cheng Jian Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Srikar Dronamraju Reviewed-by: Vincent Guittot Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20191213024530.28052-1-cj.chengjian@huawei.com Signed-off-by: Yang Wei Tested-by: Yang Wei Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 80392cdd5f3b..f06687053f96 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6154,6 +6154,7 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd */ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) { + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); struct sched_domain *this_sd; u64 avg_cost, avg_idle; u64 time, cost; @@ -6184,11 +6185,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t time = local_clock(); - for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { + cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); + + for_each_cpu_wrap(cpu, cpus, target) { if (!--nr) return -1; - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) - continue; if (available_idle_cpu(cpu)) break; } From ed6a024f4888df6fa2b8a3e92dea5f6a63649b84 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 18 May 2021 18:13:42 +0200 Subject: [PATCH 57/58] xen-pciback: redo VF placement in the virtual topology The commit referenced below was incomplete: It merely affected what would get written to the vdev- xenstore node. The guest would still find the function at the original function number as long as __xen_pcibk_get_pci_dev() wouldn't be in sync. The same goes for AER wrt __xen_pcibk_get_pcifront_dev(). Undo overriding the function to zero and instead make sure that VFs at function zero remain alone in their slot. This has the added benefit of improving overall capacity, considering that there's only a total of 32 slots available right now (PCI segment and bus can both only ever be zero at present). This is upstream commit 4ba50e7c423c29639878c00573288869aa627068. Fixes: 8a5248fe10b1 ("xen PV passthru: assign SR-IOV virtual functions to separate virtual slots") Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/8def783b-404c-3452-196d-3f3fd4d72c9e@suse.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-pciback/vpci.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index f6ba18191c0f..30313084f06c 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -69,7 +69,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, int devid, publish_pci_dev_cb publish_cb) { - int err = 0, slot, func = -1; + int err = 0, slot, func = PCI_FUNC(dev->devfn); struct pci_dev_entry *t, *dev_entry; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; @@ -94,23 +94,26 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, /* * Keep multi-function devices together on the virtual PCI bus, except - * virtual functions. + * that we want to keep virtual functions at func 0 on their own. They + * aren't multi-function devices and hence their presence at func 0 + * may cause guests to not scan the other functions. */ - if (!dev->is_virtfn) { + if (!dev->is_virtfn || func) { for (slot = 0; slot < PCI_SLOT_MAX; slot++) { if (list_empty(&vpci_dev->dev_list[slot])) continue; t = list_entry(list_first(&vpci_dev->dev_list[slot]), struct pci_dev_entry, list); + if (t->dev->is_virtfn && !PCI_FUNC(t->dev->devfn)) + continue; if (match_slot(dev, t->dev)) { pr_info("vpci: %s: assign to virtual slot %d func %d\n", pci_name(dev), slot, - PCI_FUNC(dev->devfn)); + func); list_add_tail(&dev_entry->list, &vpci_dev->dev_list[slot]); - func = PCI_FUNC(dev->devfn); goto unlock; } } @@ -123,7 +126,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, pci_name(dev), slot); list_add_tail(&dev_entry->list, &vpci_dev->dev_list[slot]); - func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn); goto unlock; } } From 9a2dc0e6c531d595bcdf2c66d0be131679bd02df Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Jun 2021 13:24:09 +0200 Subject: [PATCH 58/58] Linux 4.19.194 Link: https://lore.kernel.org/r/20210608175932.263480586@linuxfoundation.org Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20210609062858.532803536@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Jason Self Tested-by: Guenter Roeck Tested-by: Pavel Machek (CIP) Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e5d41b6792d7..8ea26b64b334 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 193 +SUBLEVEL = 194 EXTRAVERSION = NAME = "People's Front"