From 9552c3a4a06aedae1a36a6832567195229c53d87 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Nov 2022 17:22:59 -0300 Subject: [PATCH 01/80] ARM: dts: imx6qdl-gw560x: Remove incorrect 'uart-has-rtscts' [ Upstream commit 9dfbc72256b5de608ad10989bcbafdbbd1ac8d4e ] The following build warning is seen when running: make dtbs_check DT_SCHEMA_FILES=fsl-imx-uart.yaml arch/arm/boot/dts/imx6dl-gw560x.dtb: serial@2020000: rts-gpios: False schema does not allow [[20, 1, 0]] From schema: Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml The imx6qdl-gw560x board does not expose the UART RTS and CTS as native UART pins, so 'uart-has-rtscts' should not be used. Using 'uart-has-rtscts' with 'rts-gpios' is an invalid combination detected by serial.yaml. Fix the problem by removing the incorrect 'uart-has-rtscts' property. Fixes: b8a559feffb2 ("ARM: dts: imx: add Gateworks Ventana GW5600 support") Signed-off-by: Fabio Estevam Acked-by: Tim Harvey Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl-gw560x.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi index b5986efe1090..143d249b821e 100644 --- a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi @@ -463,7 +463,6 @@ &uart1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1>; - uart-has-rtscts; rts-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>; status = "okay"; }; From cc906a3a4432da143ab3d2e894f99ddeff500cd3 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 22 Nov 2022 21:48:23 +0800 Subject: [PATCH 02/80] HID: intel_ish-hid: Add check for ishtp_dma_tx_map [ Upstream commit b3d40c3ec3dc4ad78017de6c3a38979f57aaaab8 ] As the kcalloc may return NULL pointer, it should be better to check the ishtp_dma_tx_map before use in order to avoid NULL pointer dereference. Fixes: 3703f53b99e4 ("HID: intel_ish-hid: ISH Transport layer") Signed-off-by: Jiasheng Jiang Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-ish-hid/ishtp/dma-if.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ishtp/dma-if.c b/drivers/hid/intel-ish-hid/ishtp/dma-if.c index 2783f3666114..ff4419c8ed4f 100644 --- a/drivers/hid/intel-ish-hid/ishtp/dma-if.c +++ b/drivers/hid/intel-ish-hid/ishtp/dma-if.c @@ -113,6 +113,11 @@ void *ishtp_cl_get_dma_send_buf(struct ishtp_device *dev, int required_slots = (size / DMA_SLOT_SIZE) + 1 * (size % DMA_SLOT_SIZE != 0); + if (!dev->ishtp_dma_tx_map) { + dev_err(dev->devc, "Fail to allocate Tx map\n"); + return NULL; + } + spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags); for (i = 0; i <= (dev->ishtp_dma_num_slots - required_slots); i++) { free = 1; @@ -159,6 +164,11 @@ void ishtp_cl_release_dma_acked_mem(struct ishtp_device *dev, return; } + if (!dev->ishtp_dma_tx_map) { + dev_err(dev->devc, "Fail to allocate Tx map\n"); + return; + } + i = (msg_addr - dev->ishtp_host_dma_tx_buf) / DMA_SLOT_SIZE; spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags); for (j = 0; j < acked_slots; j++) { From 0db40e23b56d217eebd385bebb64057ef764b2c7 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 09:48:24 +0400 Subject: [PATCH 03/80] EDAC/highbank: Fix memory leak in highbank_mc_probe() [ Upstream commit e7a293658c20a7945014570e1921bf7d25d68a36 ] When devres_open_group() fails, it returns -ENOMEM without freeing memory allocated by edac_mc_alloc(). Call edac_mc_free() on the error handling path to avoid a memory leak. [ bp: Massage commit message. ] Fixes: a1b01edb2745 ("edac: add support for Calxeda highbank memory controller") Signed-off-by: Miaoqian Lin Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20221229054825.1361993-1-linmq006@gmail.com Signed-off-by: Sasha Levin --- drivers/edac/highbank_mc_edac.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 6092e61be605..bcf41601a977 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -185,8 +185,10 @@ static int highbank_mc_probe(struct platform_device *pdev) drvdata = mci->pvt_info; platform_set_drvdata(pdev, mci); - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) - return -ENOMEM; + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + res = -ENOMEM; + goto free; + } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) { @@ -254,6 +256,7 @@ err2: edac_mc_del_mc(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); +free: edac_mc_free(mci); return res; } From 89115a857655748e9d86421382b7208db797d9b6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jan 2023 16:47:41 +0900 Subject: [PATCH 04/80] tomoyo: fix broken dependency on *.conf.default [ Upstream commit eaf2213ba563b2d74a1f2c13a6b258273f689802 ] If *.conf.default is updated, builtin-policy.h should be rebuilt, but this does not work when compiled with O= option. [Without this commit] $ touch security/tomoyo/policy/exception_policy.conf.default $ make O=/tmp security/tomoyo/ make[1]: Entering directory '/tmp' GEN Makefile CALL /home/masahiro/ref/linux/scripts/checksyscalls.sh DESCEND objtool make[1]: Leaving directory '/tmp' [With this commit] $ touch security/tomoyo/policy/exception_policy.conf.default $ make O=/tmp security/tomoyo/ make[1]: Entering directory '/tmp' GEN Makefile CALL /home/masahiro/ref/linux/scripts/checksyscalls.sh DESCEND objtool POLICY security/tomoyo/builtin-policy.h CC security/tomoyo/common.o AR security/tomoyo/built-in.a make[1]: Leaving directory '/tmp' $(srctree)/ is essential because $(wildcard ) does not follow VPATH. Fixes: f02dee2d148b ("tomoyo: Do not generate empty policy files") Signed-off-by: Masahiro Yamada Signed-off-by: Tetsuo Handa Signed-off-by: Sasha Levin --- security/tomoyo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index cca5a3012fee..221eaadffb09 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -10,7 +10,7 @@ endef quiet_cmd_policy = POLICY $@ cmd_policy = ($(call do_policy,profile); $(call do_policy,exception_policy); $(call do_policy,domain_policy); $(call do_policy,manager); $(call do_policy,stat)) >$@ -$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(src)/policy/*.conf.default) FORCE +$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(srctree)/$(src)/policy/*.conf.default) FORCE $(call if_changed,policy) $(obj)/common.o: $(obj)/builtin-policy.h From fede0e6fe397f8c5cadc8635fcbd8cada6e21aa0 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:11 -0500 Subject: [PATCH 05/80] IB/hfi1: Reject a zero-length user expected buffer [ Upstream commit 0a0a6e80472c98947d73c3d13bcd7d101895f55d ] A zero length user buffer makes no sense and the code does not handle it correctly. Instead, reject a zero length as invalid. Fixes: 97736f36dbeb ("IB/hfi1: Validate page aligned for a given virtual addres") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328547120.1472310.6362802432127399257.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 4e417ed08b09..4e0f7cd14c57 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -325,6 +325,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, if (!PAGE_ALIGNED(tinfo->vaddr)) return -EINVAL; + if (tinfo->length == 0) + return -EINVAL; tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); if (!tidbuf) From d426437b18431f18cbaa3d595e100d2f8024721d Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:16 -0500 Subject: [PATCH 06/80] IB/hfi1: Reserve user expected TIDs [ Upstream commit ecf91551cdd2925ed6d9a9d99074fa5f67b90596 ] To avoid a race, reserve the number of user expected TIDs before setup. Fixes: 7e7a436ecb6e ("staging/hfi1: Add TID entry program function body") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328547636.1472310.7419712824785353905.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 4e0f7cd14c57..0a118f2963fb 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -351,16 +351,13 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, /* Find sets of physically contiguous pages */ tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); - /* - * We don't need to access this under a lock since tid_used is per - * process and the same process cannot be in hfi1_user_exp_rcv_clear() - * and hfi1_user_exp_rcv_setup() at the same time. - */ + /* Reserve the number of expected tids to be used. */ spin_lock(&fd->tid_lock); if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) pageset_count = fd->tid_limit - fd->tid_used; else pageset_count = tidbuf->n_psets; + fd->tid_used += pageset_count; spin_unlock(&fd->tid_lock); if (!pageset_count) @@ -469,10 +466,11 @@ unlock: nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); + /* adjust reserved tid_used to actual count */ + spin_lock(&fd->tid_lock); + fd->tid_used -= pageset_count - tididx; + spin_unlock(&fd->tid_lock); if (tididx) { - spin_lock(&fd->tid_lock); - fd->tid_used += tididx; - spin_unlock(&fd->tid_lock); tinfo->tidcnt = tididx; tinfo->length = mapped_pages * PAGE_SIZE; From 5f38dcf32c69113bf4cce7ada1d9e172f4c2ce56 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:21 -0500 Subject: [PATCH 07/80] IB/hfi1: Fix expected receive setup error exit issues [ Upstream commit e0c4a422f5246abefbf7c178ef99a1f2dc3c5f62 ] Fix three error exit issues in expected receive setup. Re-arrange error exits to increase readability. Issues and fixes: 1. Possible missed page unpin if tidlist copyout fails and not all pinned pages where made part of a TID. Fix: Unpin the unused pages. 2. Return success with unset return values tidcnt and length when no pages were pinned. Fix: Return -ENOSPC if no pages were pinned. 3. Return success with unset return values tidcnt and length when no rcvarray entries available. Fix: Return -ENOSPC if no rcvarray entries are available. Fixes: 7e7a436ecb6e ("staging/hfi1: Add TID entry program function body") Fixes: 97736f36dbeb ("IB/hfi1: Validate page aligned for a given virtual addres") Fixes: f404ca4c7ea8 ("IB/hfi1: Refactor hfi_user_exp_rcv_setup() IOCTL") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328548150.1472310.1492305874804187634.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 87 ++++++++++++++--------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0a118f2963fb..dab823aac95e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -337,15 +337,14 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), GFP_KERNEL); if (!tidbuf->psets) { - kfree(tidbuf); - return -ENOMEM; + ret = -ENOMEM; + goto fail_release_mem; } pinned = pin_rcv_pages(fd, tidbuf); if (pinned <= 0) { - kfree(tidbuf->psets); - kfree(tidbuf); - return pinned; + ret = (pinned < 0) ? pinned : -ENOSPC; + goto fail_unpin; } /* Find sets of physically contiguous pages */ @@ -360,14 +359,16 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, fd->tid_used += pageset_count; spin_unlock(&fd->tid_lock); - if (!pageset_count) - goto bail; + if (!pageset_count) { + ret = -ENOSPC; + goto fail_unreserve; + } ngroups = pageset_count / dd->rcv_entries.group_size; tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); if (!tidlist) { ret = -ENOMEM; - goto nomem; + goto fail_unreserve; } tididx = 0; @@ -463,44 +464,60 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, } unlock: mutex_unlock(&uctxt->exp_mutex); -nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); + + /* fail if nothing was programmed, set error if none provided */ + if (tididx == 0) { + if (ret >= 0) + ret = -ENOSPC; + goto fail_unreserve; + } + /* adjust reserved tid_used to actual count */ spin_lock(&fd->tid_lock); fd->tid_used -= pageset_count - tididx; spin_unlock(&fd->tid_lock); - if (tididx) { - tinfo->tidcnt = tididx; - tinfo->length = mapped_pages * PAGE_SIZE; - if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), - tidlist, sizeof(tidlist[0]) * tididx)) { - /* - * On failure to copy to the user level, we need to undo - * everything done so far so we don't leak resources. - */ - tinfo->tidlist = (unsigned long)&tidlist; - hfi1_user_exp_rcv_clear(fd, tinfo); - tinfo->tidlist = 0; - ret = -EFAULT; - goto bail; - } + /* unpin all pages not covered by a TID */ + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, + false); + + tinfo->tidcnt = tididx; + tinfo->length = mapped_pages * PAGE_SIZE; + + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), + tidlist, sizeof(tidlist[0]) * tididx)) { + ret = -EFAULT; + goto fail_unprogram; } - /* - * If not everything was mapped (due to insufficient RcvArray entries, - * for example), unpin all unmapped pages so we can pin them nex time. - */ - if (mapped_pages != pinned) - unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, - (pinned - mapped_pages), false); -bail: - kfree(tidbuf->psets); - kfree(tidlist); kfree(tidbuf->pages); + kfree(tidbuf->psets); kfree(tidbuf); - return ret > 0 ? 0 : ret; + kfree(tidlist); + return 0; + +fail_unprogram: + /* unprogram, unmap, and unpin all allocated TIDs */ + tinfo->tidlist = (unsigned long)tidlist; + hfi1_user_exp_rcv_clear(fd, tinfo); + tinfo->tidlist = 0; + pinned = 0; /* nothing left to unpin */ + pageset_count = 0; /* nothing left reserved */ +fail_unreserve: + spin_lock(&fd->tid_lock); + fd->tid_used -= pageset_count; + spin_unlock(&fd->tid_lock); +fail_unpin: + if (pinned > 0) + unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); +fail_release_mem: + kfree(tidbuf->pages); + kfree(tidbuf->psets); + kfree(tidbuf); + kfree(tidlist); + return ret; } int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, From a89bb9bc739eef64dd56a997d34fd919024c0feb Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 10 Jan 2023 13:49:30 +0100 Subject: [PATCH 08/80] affs: initialize fsdata in affs_truncate() [ Upstream commit eef034ac6690118c88f357b00e2b3239c9d8575d ] When aops->write_begin() does not initialize fsdata, KMSAN may report an error passing the latter to aops->write_end(). Fix this by unconditionally initializing fsdata. Fixes: f2b6a16eb8f5 ("fs: affs convert to new aops") Suggested-by: Eric Biggers Signed-off-by: Alexander Potapenko Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/affs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index ba084b0b214b..82bb38370aa9 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -878,7 +878,7 @@ affs_truncate(struct inode *inode) if (inode->i_size > AFFS_I(inode)->mmu_private) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; loff_t isize = inode->i_size; int res; From 423e5d547e7a537d9ff66778c9f3b7caa0f7289d Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 11 Jan 2023 22:58:51 +0530 Subject: [PATCH 09/80] amd-xgbe: TX Flow Ctrl Registers are h/w ver dependent [ Upstream commit 579923d84b04abb6cd4cd1fd9974096a2dd1832b ] There is difference in the TX Flow Control registers (TFCR) between the revisions of the hardware. The older revisions of hardware used to have single register per queue. Whereas, the newer revision of hardware (from ver 30H onwards) have one register per priority. Update the driver to use the TFCR based on the reported version of the hardware. Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver") Co-developed-by: Ajith Nayak Signed-off-by: Ajith Nayak Signed-off-by: Raju Rangoju Acked-by: Shyam Sundar S K Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 4666084eda16..9d6fe5a892d9 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -524,19 +524,28 @@ static void xgbe_disable_vxlan(struct xgbe_prv_data *pdata) netif_dbg(pdata, drv, pdata->netdev, "VXLAN acceleration disabled\n"); } +static unsigned int xgbe_get_fc_queue_count(struct xgbe_prv_data *pdata) +{ + unsigned int max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; + + /* From MAC ver 30H the TFCR is per priority, instead of per queue */ + if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) >= 0x30) + return max_q_count; + else + return min_t(unsigned int, pdata->tx_q_count, max_q_count); +} + static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata) { - unsigned int max_q_count, q_count; unsigned int reg, reg_val; - unsigned int i; + unsigned int i, q_count; /* Clear MTL flow control */ for (i = 0; i < pdata->rx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0); /* Clear MAC flow control */ - max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; - q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); + q_count = xgbe_get_fc_queue_count(pdata); reg = MAC_Q0TFCR; for (i = 0; i < q_count; i++) { reg_val = XGMAC_IOREAD(pdata, reg); @@ -553,9 +562,8 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) { struct ieee_pfc *pfc = pdata->pfc; struct ieee_ets *ets = pdata->ets; - unsigned int max_q_count, q_count; unsigned int reg, reg_val; - unsigned int i; + unsigned int i, q_count; /* Set MTL flow control */ for (i = 0; i < pdata->rx_q_count; i++) { @@ -579,8 +587,7 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) } /* Set MAC flow control */ - max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; - q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); + q_count = xgbe_get_fc_queue_count(pdata); reg = MAC_Q0TFCR; for (i = 0; i < q_count; i++) { reg_val = XGMAC_IOREAD(pdata, reg); From 5ed914020d306aae4fe155a34df219bd5fcd6396 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 11 Jan 2023 22:58:52 +0530 Subject: [PATCH 10/80] amd-xgbe: Delay AN timeout during KR training [ Upstream commit 926446ae24c03311a480fb96eb78f0ce7ea6d091 ] AN restart triggered during KR training not only aborts the KR training process but also move the HW to unstable state. Driver has to wait upto 500ms or until the KR training is completed before restarting AN cycle. Fixes: 7c12aa08779c ("amd-xgbe: Move the PHY support into amd-xgbe") Co-developed-by: Sudheesh Mavila Signed-off-by: Sudheesh Mavila Signed-off-by: Raju Rangoju Acked-by: Shyam Sundar S K Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 24 +++++++++++++++++++++++ drivers/net/ethernet/amd/xgbe/xgbe.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 97167fc9bebe..7840eb4cdb8d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -496,6 +496,7 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata, reg |= XGBE_KR_TRAINING_ENABLE; reg |= XGBE_KR_TRAINING_START; XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); + pdata->kr_start_time = jiffies; netif_dbg(pdata, link, pdata->netdev, "KR training initiated\n"); @@ -632,6 +633,8 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata) xgbe_switch_mode(pdata); + pdata->an_result = XGBE_AN_READY; + xgbe_an_restart(pdata); return XGBE_AN_INCOMPAT_LINK; @@ -1275,9 +1278,30 @@ static bool xgbe_phy_aneg_done(struct xgbe_prv_data *pdata) static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata) { unsigned long link_timeout; + unsigned long kr_time; + int wait; link_timeout = pdata->link_check + (XGBE_LINK_TIMEOUT * HZ); if (time_after(jiffies, link_timeout)) { + if ((xgbe_cur_mode(pdata) == XGBE_MODE_KR) && + pdata->phy.autoneg == AUTONEG_ENABLE) { + /* AN restart should not happen while KR training is in progress. + * The while loop ensures no AN restart during KR training, + * waits up to 500ms and AN restart is triggered only if KR + * training is failed. + */ + wait = XGBE_KR_TRAINING_WAIT_ITER; + while (wait--) { + kr_time = pdata->kr_start_time + + msecs_to_jiffies(XGBE_AN_MS_TIMEOUT); + if (time_after(jiffies, kr_time)) + break; + /* AN restart is not required, if AN result is COMPLETE */ + if (pdata->an_result == XGBE_AN_COMPLETE) + return; + usleep_range(10000, 11000); + } + } netif_dbg(pdata, link, pdata->netdev, "AN link timeout\n"); xgbe_phy_config_aneg(pdata); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 0c93a552b921..729307a96c50 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -290,6 +290,7 @@ /* Auto-negotiation */ #define XGBE_AN_MS_TIMEOUT 500 #define XGBE_LINK_TIMEOUT 5 +#define XGBE_KR_TRAINING_WAIT_ITER 50 #define XGBE_SGMII_AN_LINK_STATUS BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) @@ -1266,6 +1267,7 @@ struct xgbe_prv_data { unsigned int parallel_detect; unsigned int fec_ability; unsigned long an_start; + unsigned long kr_start_time; enum xgbe_an_mode an_mode; /* I2C support */ From aae109414a57ab4164218f36e2e4a17f027fcaaa Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Mon, 9 Jan 2023 16:05:46 +0100 Subject: [PATCH 11/80] bpf: Fix pointer-leak due to insufficient speculative store bypass mitigation [ Upstream commit e4f4db47794c9f474b184ee1418f42e6a07412b6 ] To mitigate Spectre v4, 2039f26f3aca ("bpf: Fix leakage due to insufficient speculative store bypass mitigation") inserts lfence instructions after 1) initializing a stack slot and 2) spilling a pointer to the stack. However, this does not cover cases where a stack slot is first initialized with a pointer (subject to sanitization) but then overwritten with a scalar (not subject to sanitization because the slot was already initialized). In this case, the second write may be subject to speculative store bypass (SSB) creating a speculative pointer-as-scalar type confusion. This allows the program to subsequently leak the numerical pointer value using, for example, a branch-based cache side channel. To fix this, also sanitize scalars if they write a stack slot that previously contained a pointer. Assuming that pointer-spills are only generated by LLVM on register-pressure, the performance impact on most real-world BPF programs should be small. The following unprivileged BPF bytecode drafts a minimal exploit and the mitigation: [...] // r6 = 0 or 1 (skalar, unknown user input) // r7 = accessible ptr for side channel // r10 = frame pointer (fp), to be leaked // r9 = r10 # fp alias to encourage ssb *(u64 *)(r9 - 8) = r10 // fp[-8] = ptr, to be leaked // lfence added here because of pointer spill to stack. // // Ommitted: Dummy bpf_ringbuf_output() here to train alias predictor // for no r9-r10 dependency. // *(u64 *)(r10 - 8) = r6 // fp[-8] = scalar, overwrites ptr // 2039f26f3aca: no lfence added because stack slot was not STACK_INVALID, // store may be subject to SSB // // fix: also add an lfence when the slot contained a ptr // r8 = *(u64 *)(r9 - 8) // r8 = architecturally a scalar, speculatively a ptr // // leak ptr using branch-based cache side channel: r8 &= 1 // choose bit to leak if r8 == 0 goto SLOW // no mispredict // architecturally dead code if input r6 is 0, // only executes speculatively iff ptr bit is 1 r8 = *(u64 *)(r7 + 0) # encode bit in cache (0: slow, 1: fast) SLOW: [...] After running this, the program can time the access to *(r7 + 0) to determine whether the chosen pointer bit was 0 or 1. Repeat this 64 times to recover the whole address on amd64. In summary, sanitization can only be skipped if one scalar is overwritten with another scalar. Scalar-confusion due to speculative store bypass can not lead to invalid accesses because the pointer bounds deducted during verification are enforced using branchless logic. See 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") for details. Do not make the mitigation depend on !env->allow_{uninit_stack,ptr_leaks} because speculative leaks are likely unexpected if these were enabled. For example, leaking the address to a protected log file may be acceptable while disabling the mitigation might unintentionally leak the address into the cached-state of a map that is accessible to unprivileged processes. Fixes: 2039f26f3aca ("bpf: Fix leakage due to insufficient speculative store bypass mitigation") Signed-off-by: Luis Gerhorst Signed-off-by: Daniel Borkmann Acked-by: Henriette Hofmeier Link: https://lore.kernel.org/bpf/edc95bad-aada-9cfc-ffe2-fa9bb206583c@cs.fau.de Link: https://lore.kernel.org/bpf/20230109150544.41465-1-gerhorst@cs.fau.de Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 694ee0b1fefe..61f3a31abc1a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1012,7 +1012,9 @@ static int check_stack_write(struct bpf_verifier_env *env, bool sanitize = reg && is_spillable_regtype(reg->type); for (i = 0; i < size; i++) { - if (state->stack[spi].slot_type[i] == STACK_INVALID) { + u8 type = state->stack[spi].slot_type[i]; + + if (type != STACK_MISC && type != STACK_ZERO) { sanitize = true; break; } From 7deac1fad44c279386162591307dc1485c643115 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 5 Dec 2022 19:58:23 +0800 Subject: [PATCH 12/80] phy: rockchip-inno-usb2: Fix missing clk_disable_unprepare() in rockchip_usb2phy_power_on() [ Upstream commit 5daba914da0e48950e9407ea4d75fa57029c9adc ] The clk_disable_unprepare() should be called in the error handling of rockchip_usb2phy_power_on(). Fixes: 0e08d2a727e6 ("phy: rockchip-inno-usb2: add a new driver for Rockchip usb2phy") Signed-off-by: Shang XiaoJing Link: https://lore.kernel.org/r/20221205115823.16957-1-shangxiaojing@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/rockchip/phy-rockchip-inno-usb2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c index 5049dac79bd0..77c1c3ffaed7 100644 --- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c @@ -477,8 +477,10 @@ static int rockchip_usb2phy_power_on(struct phy *phy) return ret; ret = property_enable(base, &rport->port_cfg->phy_sus, false); - if (ret) + if (ret) { + clk_disable_unprepare(rphy->clk480m); return ret; + } /* waiting for the utmi_clk to become stable */ usleep_range(1500, 2000); From 54f7be61584b8ec4c6df405f479495b9397bae4a Mon Sep 17 00:00:00 2001 From: Jisoo Jang Date: Wed, 11 Jan 2023 22:19:14 +0900 Subject: [PATCH 13/80] net: nfc: Fix use-after-free in local_cleanup() [ Upstream commit 4bb4db7f3187c6e3de6b229ffc87cdb30a2d22b6 ] Fix a use-after-free that occurs in kfree_skb() called from local_cleanup(). This could happen when killing nfc daemon (e.g. neard) after detaching an nfc device. When detaching an nfc device, local_cleanup() called from nfc_llcp_unregister_device() frees local->rx_pending and decreases local->ref by kref_put() in nfc_llcp_local_put(). In the terminating process, nfc daemon releases all sockets and it leads to decreasing local->ref. After the last release of local->ref, local_cleanup() called from local_release() frees local->rx_pending again, which leads to the bug. Setting local->rx_pending to NULL in local_cleanup() could prevent use-after-free when local_cleanup() is called twice. Found by a modified version of syzkaller. BUG: KASAN: use-after-free in kfree_skb() Call Trace: dump_stack_lvl (lib/dump_stack.c:106) print_address_description.constprop.0.cold (mm/kasan/report.c:306) kasan_check_range (mm/kasan/generic.c:189) kfree_skb (net/core/skbuff.c:955) local_cleanup (net/nfc/llcp_core.c:159) nfc_llcp_local_put.part.0 (net/nfc/llcp_core.c:172) nfc_llcp_local_put (net/nfc/llcp_core.c:181) llcp_sock_destruct (net/nfc/llcp_sock.c:959) __sk_destruct (net/core/sock.c:2133) sk_destruct (net/core/sock.c:2181) __sk_free (net/core/sock.c:2192) sk_free (net/core/sock.c:2203) llcp_sock_release (net/nfc/llcp_sock.c:646) __sock_release (net/socket.c:650) sock_close (net/socket.c:1365) __fput (fs/file_table.c:306) task_work_run (kernel/task_work.c:179) ptrace_notify (kernel/signal.c:2354) syscall_exit_to_user_mode_prepare (kernel/entry/common.c:278) syscall_exit_to_user_mode (kernel/entry/common.c:296) do_syscall_64 (arch/x86/entry/common.c:86) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:106) Allocated by task 4719: kasan_save_stack (mm/kasan/common.c:45) __kasan_slab_alloc (mm/kasan/common.c:325) slab_post_alloc_hook (mm/slab.h:766) kmem_cache_alloc_node (mm/slub.c:3497) __alloc_skb (net/core/skbuff.c:552) pn533_recv_response (drivers/nfc/pn533/usb.c:65) __usb_hcd_giveback_urb (drivers/usb/core/hcd.c:1671) usb_giveback_urb_bh (drivers/usb/core/hcd.c:1704) tasklet_action_common.isra.0 (kernel/softirq.c:797) __do_softirq (kernel/softirq.c:571) Freed by task 1901: kasan_save_stack (mm/kasan/common.c:45) kasan_set_track (mm/kasan/common.c:52) kasan_save_free_info (mm/kasan/genericdd.c:518) __kasan_slab_free (mm/kasan/common.c:236) kmem_cache_free (mm/slub.c:3809) kfree_skbmem (net/core/skbuff.c:874) kfree_skb (net/core/skbuff.c:931) local_cleanup (net/nfc/llcp_core.c:159) nfc_llcp_unregister_device (net/nfc/llcp_core.c:1617) nfc_unregister_device (net/nfc/core.c:1179) pn53x_unregister_nfc (drivers/nfc/pn533/pn533.c:2846) pn533_usb_disconnect (drivers/nfc/pn533/usb.c:579) usb_unbind_interface (drivers/usb/core/driver.c:458) device_release_driver_internal (drivers/base/dd.c:1279) bus_remove_device (drivers/base/bus.c:529) device_del (drivers/base/core.c:3665) usb_disable_device (drivers/usb/core/message.c:1420) usb_disconnect (drivers/usb/core.c:2261) hub_event (drivers/usb/core/hub.c:5833) process_one_work (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:212 include/trace/events/workqueue.h:108 kernel/workqueue.c:2281) worker_thread (include/linux/list.h:282 kernel/workqueue.c:2423) kthread (kernel/kthread.c:319) ret_from_fork (arch/x86/entry/entry_64.S:301) Fixes: 3536da06db0b ("NFC: llcp: Clean local timers and works when removing a device") Signed-off-by: Jisoo Jang Link: https://lore.kernel.org/r/20230111131914.3338838-1-jisoo.jang@yonsei.ac.kr Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/nfc/llcp_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 4fa015208aab..3290f2275b85 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -171,6 +171,7 @@ static void local_cleanup(struct nfc_llcp_local *local) cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); + local->rx_pending = NULL; del_timer_sync(&local->sdreq_timer); cancel_work_sync(&local->sdreq_timeout_work); nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs); From b4cc9d7ae9bed976de5463958afea2983b4ca57f Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Wed, 11 Jan 2023 18:50:31 +0100 Subject: [PATCH 14/80] wifi: rndis_wlan: Prevent buffer overflow in rndis_query_oid [ Upstream commit b870e73a56c4cccbec33224233eaf295839f228c ] Since resplen and respoffs are signed integers sufficiently large values of unsigned int len and offset members of RNDIS response will result in negative values of prior variables. This may be utilized to bypass implemented security checks to either extract memory contents by manipulating offset or overflow the data buffer via memcpy by manipulating both offset and len. Additionally assure that sum of resplen and respoffs does not overflow so buffer boundaries are kept. Fixes: 80f8c5b434f9 ("rndis_wlan: copy only useful data from rndis_command respond") Signed-off-by: Szymon Heidrich Reviewed-by: Alexander Duyck Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230111175031.7049-1-szymon.heidrich@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/rndis_wlan.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 51e4e92d95a0..0bbeb61ec3a3 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -712,8 +712,8 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) struct rndis_query *get; struct rndis_query_c *get_c; } u; - int ret, buflen; - int resplen, respoffs, copylen; + int ret; + size_t buflen, resplen, respoffs, copylen; buflen = *len + sizeof(*u.get); if (buflen < CONTROL_BUFFER_SIZE) @@ -748,22 +748,15 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) if (respoffs > buflen) { /* Device returned data offset outside buffer, error. */ - netdev_dbg(dev->net, "%s(%s): received invalid " - "data offset: %d > %d\n", __func__, - oid_to_string(oid), respoffs, buflen); + netdev_dbg(dev->net, + "%s(%s): received invalid data offset: %zu > %zu\n", + __func__, oid_to_string(oid), respoffs, buflen); ret = -EINVAL; goto exit_unlock; } - if ((resplen + respoffs) > buflen) { - /* Device would have returned more data if buffer would - * have been big enough. Copy just the bits that we got. - */ - copylen = buflen - respoffs; - } else { - copylen = resplen; - } + copylen = min(resplen, buflen - respoffs); if (copylen > *len) copylen = *len; From a264cc6ded2d404f5e93642e39731972cd67b7df Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Sat, 14 Jan 2023 19:23:26 +0100 Subject: [PATCH 15/80] net: usb: sr9700: Handle negative len [ Upstream commit ecf7cf8efb59789e2b21d2f9ab926142579092b2 ] Packet len computed as difference of length word extracted from skb data and four may result in a negative value. In such case processing of the buffer should be interrupted rather than setting sr_skb->len to an unexpectedly large value (due to cast from signed to unsigned integer) and passing sr_skb to usbnet_skb_return. Fixes: e9da0b56fe27 ("sr9700: sanity check for packet length") Signed-off-by: Szymon Heidrich Link: https://lore.kernel.org/r/20230114182326.30479-1-szymon.heidrich@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/sr9700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 83640628c47d..8bee8286e41a 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -410,7 +410,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* ignore the CRC length */ len = (skb->data[1] | (skb->data[2] << 8)) - 4; - if (len > ETH_FRAME_LEN || len > skb->len) + if (len > ETH_FRAME_LEN || len > skb->len || len < 0) return 0; /* the last packet of current skb */ From c431a3d642593bbdb99e8a9e3eed608b730db6f8 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 15 Jan 2023 11:54:06 +0100 Subject: [PATCH 16/80] net: mdio: validate parameter addr in mdiobus_get_phy() [ Upstream commit 867dbe784c5010a466f00a7d1467c1c5ea569c75 ] The caller may pass any value as addr, what may result in an out-of-bounds access to array mdio_map. One existing case is stmmac_init_phy() that may pass -1 as addr. Therefore validate addr before using it. Fixes: 7f854420fbfe ("phy: Add API for {un}registering an mdio device to a bus.") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/cdf664ea-3312-e915-73f8-021678d08887@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/mdio_bus.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 1d1fbd7bd6fc..550806351049 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -103,7 +103,12 @@ EXPORT_SYMBOL(mdiobus_unregister_device); struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr) { - struct mdio_device *mdiodev = bus->mdio_map[addr]; + struct mdio_device *mdiodev; + + if (addr < 0 || addr >= ARRAY_SIZE(bus->mdio_map)) + return NULL; + + mdiodev = bus->mdio_map[addr]; if (!mdiodev) return NULL; From f958da03d9a71808548b2e5418d95482b106eb9a Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Mon, 16 Jan 2023 11:11:24 +0000 Subject: [PATCH 17/80] HID: check empty report_list in hid_validate_values() [ Upstream commit b12fece4c64857e5fab4290bf01b2e0317a88456 ] Add a check for empty report_list in hid_validate_values(). The missing check causes a type confusion when issuing a list_entry() on an empty report_list. The problem is caused by the assumption that the device must have valid report_list. While this will be true for all normal HID devices, a suitably malicious device can violate the assumption. Fixes: 1b15d2e5b807 ("HID: core: fix validation of report id 0") Signed-off-by: Pietro Borrello Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 62656636d30c..8cc79d0d11fb 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -980,8 +980,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid, * Validating on id 0 means we should examine the first * report in the list. */ - report = list_entry( - hid->report_enum[type].report_list.next, + report = list_first_entry_or_null( + &hid->report_enum[type].report_list, struct hid_report, list); } else { report = hid->report_enum[type].report_id_hash[id]; From e9036e951f93fb8d7b5e9d6e2c7f94a4da312ae4 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 15 Dec 2022 10:59:05 +0530 Subject: [PATCH 18/80] usb: gadget: f_fs: Prevent race during ffs_ep0_queue_wait [ Upstream commit 6a19da111057f69214b97c62fb0ac59023970850 ] While performing fast composition switch, there is a possibility that the process of ffs_ep0_write/ffs_ep0_read get into a race condition due to ep0req being freed up from functionfs_unbind. Consider the scenario that the ffs_ep0_write calls the ffs_ep0_queue_wait by taking a lock &ffs->ev.waitq.lock. However, the functionfs_unbind isn't bounded so it can go ahead and mark the ep0req to NULL, and since there is no NULL check in ffs_ep0_queue_wait we will end up in use-after-free. Fix this by making a serialized execution between the two functions using a mutex_lock(ffs->mutex). Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Udipto Goswami Tested-by: Krishna Kurapati Link: https://lore.kernel.org/r/20221215052906.8993-2-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_fs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 49eb4e3c760f..b3489a3449f6 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -271,6 +271,9 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len) struct usb_request *req = ffs->ep0req; int ret; + if (!req) + return -EINVAL; + req->zero = len < le16_to_cpu(ffs->ev.setup.wLength); spin_unlock_irq(&ffs->ev.waitq.lock); @@ -1807,10 +1810,12 @@ static void functionfs_unbind(struct ffs_data *ffs) ENTER(); if (!WARN_ON(!ffs->gadget)) { + mutex_lock(&ffs->mutex); usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); ffs->ep0req = NULL; ffs->gadget = NULL; clear_bit(FFS_FL_BOUND, &ffs->flags); + mutex_unlock(&ffs->mutex); ffs_data_put(ffs); } } From bf7ddf96c12beac7058af27fe7ce6e2e499724c5 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 15 Dec 2022 10:59:06 +0530 Subject: [PATCH 19/80] usb: gadget: f_fs: Ensure ep0req is dequeued before free_request [ Upstream commit ce405d561b020e5a46340eb5146805a625dcacee ] As per the documentation, function usb_ep_free_request guarantees the request will not be queued or no longer be re-queued (or otherwise used). However, with the current implementation it doesn't make sure that the request in ep0 isn't reused. Fix this by dequeuing the ep0req on functionfs_unbind before freeing the request to align with the definition. Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Udipto Goswami Tested-by: Krishna Kurapati Link: https://lore.kernel.org/r/20221215052906.8993-3-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index b3489a3449f6..48bdb2a3972b 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1810,6 +1810,8 @@ static void functionfs_unbind(struct ffs_data *ffs) ENTER(); if (!WARN_ON(!ffs->gadget)) { + /* dequeue before freeing ep0req */ + usb_ep_dequeue(ffs->gadget->ep0, ffs->ep0req); mutex_lock(&ffs->mutex); usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); ffs->ep0req = NULL; From bbae142fbe2352c4a6145ddc80377f9ea9ddcc72 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 30 Aug 2022 20:12:29 -0700 Subject: [PATCH 20/80] net: mlx5: eliminate anonymous module_init & module_exit [ Upstream commit 2c1e1b949024989e20907b84e11a731a50778416 ] Eliminate anonymous module_init() and module_exit(), which can lead to confusion or ambiguity when reading System.map, crashes/oops/bugs, or an initcall_debug log. Give each of these init and exit functions unique driver-specific names to eliminate the anonymous names. Example 1: (System.map) ffffffff832fc78c t init ffffffff832fc79e t init ffffffff832fc8f8 t init Example 2: (initcall_debug log) calling init+0x0/0x12 @ 1 initcall init+0x0/0x12 returned 0 after 15 usecs calling init+0x0/0x60 @ 1 initcall init+0x0/0x60 returned 0 after 2 usecs calling init+0x0/0x9a @ 1 initcall init+0x0/0x9a returned 0 after 74 usecs Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Randy Dunlap Cc: Eli Cohen Cc: Saeed Mahameed Cc: Leon Romanovsky Cc: linux-rdma@vger.kernel.org Reviewed-by: Ira Weiny Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a2b25afa2472..e09bd059984e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1683,7 +1683,7 @@ static void mlx5_core_verify_params(void) } } -static int __init init(void) +static int __init mlx5_init(void) { int err; @@ -1708,7 +1708,7 @@ err_debug: return err; } -static void __exit cleanup(void) +static void __exit mlx5_cleanup(void) { #ifdef CONFIG_MLX5_CORE_EN mlx5e_cleanup(); @@ -1717,5 +1717,5 @@ static void __exit cleanup(void) mlx5_unregister_debugfs(); } -module_init(init); -module_exit(cleanup); +module_init(mlx5_init); +module_exit(mlx5_cleanup); From c6221afe573413fd2981e291f7df4a58283e0654 Mon Sep 17 00:00:00 2001 From: Koba Ko Date: Thu, 1 Dec 2022 11:00:50 +0800 Subject: [PATCH 21/80] dmaengine: Fix double increment of client_count in dma_chan_get() [ Upstream commit f3dc1b3b4750851a94212dba249703dd0e50bb20 ] The first time dma_chan_get() is called for a channel the channel client_count is incorrectly incremented twice for public channels, first in balance_ref_count(), and again prior to returning. This results in an incorrect client count which will lead to the channel resources not being freed when they should be. A simple test of repeated module load and unload of async_tx on a Dell Power Edge R7425 also shows this resulting in a kref underflow warning. [ 124.329662] async_tx: api initialized (async) [ 129.000627] async_tx: api initialized (async) [ 130.047839] ------------[ cut here ]------------ [ 130.052472] refcount_t: underflow; use-after-free. [ 130.057279] WARNING: CPU: 3 PID: 19364 at lib/refcount.c:28 refcount_warn_saturate+0xba/0x110 [ 130.065811] Modules linked in: async_tx(-) rfkill intel_rapl_msr intel_rapl_common amd64_edac edac_mce_amd ipmi_ssif kvm_amd dcdbas kvm mgag200 drm_shmem_helper acpi_ipmi irqbypass drm_kms_helper ipmi_si syscopyarea sysfillrect rapl pcspkr ipmi_devintf sysimgblt fb_sys_fops k10temp i2c_piix4 ipmi_msghandler acpi_power_meter acpi_cpufreq vfat fat drm fuse xfs libcrc32c sd_mod t10_pi sg ahci crct10dif_pclmul libahci crc32_pclmul crc32c_intel ghash_clmulni_intel igb megaraid_sas i40e libata i2c_algo_bit ccp sp5100_tco dca dm_mirror dm_region_hash dm_log dm_mod [last unloaded: async_tx] [ 130.117361] CPU: 3 PID: 19364 Comm: modprobe Kdump: loaded Not tainted 5.14.0-185.el9.x86_64 #1 [ 130.126091] Hardware name: Dell Inc. PowerEdge R7425/02MJ3T, BIOS 1.18.0 01/17/2022 [ 130.133806] RIP: 0010:refcount_warn_saturate+0xba/0x110 [ 130.139041] Code: 01 01 e8 6d bd 55 00 0f 0b e9 72 9d 8a 00 80 3d 26 18 9c 01 00 75 85 48 c7 c7 f8 a3 03 9d c6 05 16 18 9c 01 01 e8 4a bd 55 00 <0f> 0b e9 4f 9d 8a 00 80 3d 01 18 9c 01 00 0f 85 5e ff ff ff 48 c7 [ 130.157807] RSP: 0018:ffffbf98898afe68 EFLAGS: 00010286 [ 130.163036] RAX: 0000000000000000 RBX: ffff9da06028e598 RCX: 0000000000000000 [ 130.170172] RDX: ffff9daf9de26480 RSI: ffff9daf9de198a0 RDI: ffff9daf9de198a0 [ 130.177316] RBP: ffff9da7cddf3970 R08: 0000000000000000 R09: 00000000ffff7fff [ 130.184459] R10: ffffbf98898afd00 R11: ffffffff9d9e8c28 R12: ffff9da7cddf1970 [ 130.191596] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 130.198739] FS: 00007f646435c740(0000) GS:ffff9daf9de00000(0000) knlGS:0000000000000000 [ 130.206832] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 130.212586] CR2: 00007f6463b214f0 CR3: 00000008ab98c000 CR4: 00000000003506e0 [ 130.219729] Call Trace: [ 130.222192] [ 130.224305] dma_chan_put+0x10d/0x110 [ 130.227988] dmaengine_put+0x7a/0xa0 [ 130.231575] __do_sys_delete_module.constprop.0+0x178/0x280 [ 130.237157] ? syscall_trace_enter.constprop.0+0x145/0x1d0 [ 130.242652] do_syscall_64+0x5c/0x90 [ 130.246240] ? exc_page_fault+0x62/0x150 [ 130.250178] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 130.255243] RIP: 0033:0x7f6463a3f5ab [ 130.258830] Code: 73 01 c3 48 8b 0d 75 a8 1b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 45 a8 1b 00 f7 d8 64 89 01 48 [ 130.277591] RSP: 002b:00007fff22f972c8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 130.285164] RAX: ffffffffffffffda RBX: 000055b6786edd40 RCX: 00007f6463a3f5ab [ 130.292303] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055b6786edda8 [ 130.299443] RBP: 000055b6786edd40 R08: 0000000000000000 R09: 0000000000000000 [ 130.306584] R10: 00007f6463b9eac0 R11: 0000000000000206 R12: 000055b6786edda8 [ 130.313731] R13: 0000000000000000 R14: 000055b6786edda8 R15: 00007fff22f995f8 [ 130.320875] [ 130.323081] ---[ end trace eff7156d56b5cf25 ]--- cat /sys/class/dma/dma0chan*/in_use would get the wrong result. 2 2 2 Fixes: d2f4f99db3e9 ("dmaengine: Rework dma_chan_get") Signed-off-by: Koba Ko Reviewed-by: Jie Hai Test-by: Jie Hai Reviewed-by: Jerry Snitselaar Reviewed-by: Dave Jiang Tested-by: Joel Savitz Link: https://lore.kernel.org/r/20221201030050.978595-1-koba.ko@canonical.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dmaengine.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 8a52a5efee4f..e1cf7c250c33 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -223,7 +223,8 @@ static int dma_chan_get(struct dma_chan *chan) /* The channel is already in use, update client count */ if (chan->client_count) { __module_get(owner); - goto out; + chan->client_count++; + return 0; } if (!try_module_get(owner)) @@ -236,11 +237,11 @@ static int dma_chan_get(struct dma_chan *chan) goto err_out; } + chan->client_count++; + if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask)) balance_ref_count(chan); -out: - chan->client_count++; return 0; err_out: From 8ee28f88112d83a9189c62d583f1251d6da0bc3e Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Mon, 16 Jan 2023 15:41:33 -0600 Subject: [PATCH 22/80] net: macb: fix PTP TX timestamp failure due to packet padding [ Upstream commit 7b90f5a665acd46efbbfa677a3a3a18d01ad6487 ] PTP TX timestamp handling was observed to be broken with this driver when using the raw Layer 2 PTP encapsulation. ptp4l was not receiving the expected TX timestamp after transmitting a packet, causing it to enter a failure state. The problem appears to be due to the way that the driver pads packets which are smaller than the Ethernet minimum of 60 bytes. If headroom space was available in the SKB, this caused the driver to move the data back to utilize it. However, this appears to cause other data references in the SKB to become inconsistent. In particular, this caused the ptp_one_step_sync function to later (in the TX completion path) falsely detect the packet as a one-step SYNC packet, even when it was not, which caused the TX timestamp to not be processed when it should be. Using the headroom for this purpose seems like an unnecessary complexity as this is not a hot path in the driver, and in most cases it appears that there is sufficient tailroom to not require using the headroom anyway. Remove this usage of headroom to prevent this inconsistency from occurring and causing other problems. Fixes: 653e92a9175e ("net: macb: add support for padding and fcs computation") Signed-off-by: Robert Hancock Reviewed-by: Jacob Keller Tested-by: Claudiu Beznea # on SAMA7G5 Reviewed-by: Claudiu Beznea Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 50331b202f73..324d81516832 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1738,7 +1738,6 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) || skb_is_nonlinear(*skb); int padlen = ETH_ZLEN - (*skb)->len; - int headroom = skb_headroom(*skb); int tailroom = skb_tailroom(*skb); struct sk_buff *nskb; u32 fcs; @@ -1752,9 +1751,6 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) /* FCS could be appeded to tailroom. */ if (tailroom >= ETH_FCS_LEN) goto add_fcs; - /* FCS could be appeded by moving data to headroom. */ - else if (!cloned && headroom + tailroom >= ETH_FCS_LEN) - padlen = 0; /* No room for FCS, need to reallocate skb. */ else padlen = ETH_FCS_LEN; @@ -1763,10 +1759,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) padlen += ETH_FCS_LEN; } - if (!cloned && headroom + tailroom >= padlen) { - (*skb)->data = memmove((*skb)->head, (*skb)->data, (*skb)->len); - skb_set_tail_pointer(*skb, (*skb)->len); - } else { + if (cloned || tailroom < padlen) { nskb = skb_copy_expand(*skb, 0, padlen, GFP_ATOMIC); if (!nskb) return -ENOMEM; From 7317326f685824c7c29bd80841fd18041af6bb73 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Wed, 11 Jan 2023 18:12:16 +0000 Subject: [PATCH 23/80] HID: betop: check shape of output reports [ Upstream commit 3782c0d6edf658b71354a64d60aa7a296188fc90 ] betopff_init() only checks the total sum of the report counts for each report field to be at least 4, but hid_betopff_play() expects 4 report fields. A device advertising an output report with one field and 4 report counts would pass the check but crash the kernel with a NULL pointer dereference in hid_betopff_play(). Fixes: 52cd7785f3cd ("HID: betop: add drivers/hid/hid-betopff.c") Signed-off-by: Pietro Borrello Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-betopff.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-betopff.c b/drivers/hid/hid-betopff.c index 9b60efe6ec44..ba386e5aa055 100644 --- a/drivers/hid/hid-betopff.c +++ b/drivers/hid/hid-betopff.c @@ -63,7 +63,6 @@ static int betopff_init(struct hid_device *hid) struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev; - int field_count = 0; int error; int i, j; @@ -89,19 +88,21 @@ static int betopff_init(struct hid_device *hid) * ----------------------------------------- * Do init them with default value. */ + if (report->maxfield < 4) { + hid_err(hid, "not enough fields in the report: %d\n", + report->maxfield); + return -ENODEV; + } for (i = 0; i < report->maxfield; i++) { + if (report->field[i]->report_count < 1) { + hid_err(hid, "no values in the field\n"); + return -ENODEV; + } for (j = 0; j < report->field[i]->report_count; j++) { report->field[i]->value[j] = 0x00; - field_count++; } } - if (field_count < 4) { - hid_err(hid, "not enough fields in the report: %d\n", - field_count); - return -ENODEV; - } - betopff = kzalloc(sizeof(*betopff), GFP_KERNEL); if (!betopff) return -ENOMEM; From 50e32641c4d024fb264fa622837c19dd270687d5 Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Tue, 20 Nov 2018 16:31:45 +0100 Subject: [PATCH 24/80] dmaengine: xilinx_dma: commonize DMA copy size calculation [ Upstream commit 616f0f81d857e248a72b5af45ab185196556ae2e ] This patch removes a bit of duplicated code by introducing a new function that implements calculations for DMA copy size, and prepares for changes to the copy size calculation that will happen in following patches. Suggested-by: Vinod Koul Signed-off-by: Andrea Merello Reviewed-by: Radhey Shyam Pandey Signed-off-by: Vinod Koul Stable-dep-of: 596b53ccc36a ("dmaengine: xilinx_dma: call of_node_put() when breaking out of for_each_child_of_node()") Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 39 ++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 0ba70be4ea85..e4ea94b93400 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -428,6 +428,7 @@ struct xilinx_dma_config { * @rxs_clk: DMA s2mm stream clock * @nr_channels: Number of channels DMA device supports * @chan_id: DMA channel identifier + * @max_buffer_len: Max buffer length */ struct xilinx_dma_device { void __iomem *regs; @@ -447,6 +448,7 @@ struct xilinx_dma_device { struct clk *rxs_clk; u32 nr_channels; u32 chan_id; + u32 max_buffer_len; }; /* Macros */ @@ -969,6 +971,25 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) return 0; } +/** + * xilinx_dma_calc_copysize - Calculate the amount of data to copy + * @chan: Driver specific DMA channel + * @size: Total data that needs to be copied + * @done: Amount of data that has been already copied + * + * Return: Amount of data that has to be copied + */ +static int xilinx_dma_calc_copysize(struct xilinx_dma_chan *chan, + int size, int done) +{ + size_t copy; + + copy = min_t(size_t, size - done, + chan->xdev->max_buffer_len); + + return copy; +} + /** * xilinx_dma_tx_status - Get DMA transaction status * @dchan: DMA channel @@ -1002,7 +1023,7 @@ static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan, list_for_each_entry(segment, &desc->segments, node) { hw = &segment->hw; residue += (hw->control - hw->status) & - XILINX_DMA_MAX_TRANS_LEN; + chan->xdev->max_buffer_len; } } spin_unlock_irqrestore(&chan->lock, flags); @@ -1262,7 +1283,7 @@ static void xilinx_cdma_start_transfer(struct xilinx_dma_chan *chan) /* Start the transfer */ dma_ctrl_write(chan, XILINX_DMA_REG_BTT, - hw->control & XILINX_DMA_MAX_TRANS_LEN); + hw->control & chan->xdev->max_buffer_len); } list_splice_tail_init(&chan->pending_list, &chan->active_list); @@ -1365,7 +1386,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) /* Start the transfer */ dma_ctrl_write(chan, XILINX_DMA_REG_BTT, - hw->control & XILINX_DMA_MAX_TRANS_LEN); + hw->control & chan->xdev->max_buffer_len); } list_splice_tail_init(&chan->pending_list, &chan->active_list); @@ -1729,7 +1750,7 @@ xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, struct xilinx_cdma_tx_segment *segment; struct xilinx_cdma_desc_hw *hw; - if (!len || len > XILINX_DMA_MAX_TRANS_LEN) + if (!len || len > chan->xdev->max_buffer_len) return NULL; desc = xilinx_dma_alloc_tx_descriptor(chan); @@ -1819,8 +1840,8 @@ static struct dma_async_tx_descriptor *xilinx_dma_prep_slave_sg( * Calculate the maximum number of bytes to transfer, * making sure it is less than the hw limit */ - copy = min_t(size_t, sg_dma_len(sg) - sg_used, - XILINX_DMA_MAX_TRANS_LEN); + copy = xilinx_dma_calc_copysize(chan, sg_dma_len(sg), + sg_used); hw = &segment->hw; /* Fill in the descriptor */ @@ -1924,8 +1945,8 @@ static struct dma_async_tx_descriptor *xilinx_dma_prep_dma_cyclic( * Calculate the maximum number of bytes to transfer, * making sure it is less than the hw limit */ - copy = min_t(size_t, period_len - sg_used, - XILINX_DMA_MAX_TRANS_LEN); + copy = xilinx_dma_calc_copysize(chan, period_len, + sg_used); hw = &segment->hw; xilinx_axidma_buf(chan, hw, buf_addr, sg_used, period_len * i); @@ -2645,6 +2666,8 @@ static int xilinx_dma_probe(struct platform_device *pdev) /* Retrieve the DMA engine properties from the device tree */ xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg"); + xdev->max_buffer_len = XILINX_DMA_MAX_TRANS_LEN; + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) xdev->mcdma = of_property_read_bool(node, "xlnx,mcdma"); From 2690edc3b4ca721042f63fe7c25f595b8baa8360 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Tue, 20 Nov 2018 16:31:48 +0100 Subject: [PATCH 25/80] dmaengine: xilinx_dma: program hardware supported buffer length [ Upstream commit ae809690b46a71dc56cda5b3b8884c8c41a0df15 ] AXI-DMA IP supports configurable (c_sg_length_width) buffer length register width, hence read buffer length (xlnx,sg-length-width) DT property and ensure that driver doesn't program buffer length exceeding the supported limit. For VDMA and CDMA there is no change. Cc: Rob Herring Cc: Mark Rutland Cc: devicetree@vger.kernel.org Signed-off-by: Radhey Shyam Pandey Signed-off-by: Michal Simek Signed-off-by: Andrea Merello [rebase, reword] Signed-off-by: Vinod Koul Stable-dep-of: 596b53ccc36a ("dmaengine: xilinx_dma: call of_node_put() when breaking out of for_each_child_of_node()") Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index e4ea94b93400..6687cee26843 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -164,7 +164,9 @@ #define XILINX_DMA_REG_BTT 0x28 /* AXI DMA Specific Masks/Bit fields */ -#define XILINX_DMA_MAX_TRANS_LEN GENMASK(22, 0) +#define XILINX_DMA_MAX_TRANS_LEN_MIN 8 +#define XILINX_DMA_MAX_TRANS_LEN_MAX 23 +#define XILINX_DMA_V2_MAX_TRANS_LEN_MAX 26 #define XILINX_DMA_CR_COALESCE_MAX GENMASK(23, 16) #define XILINX_DMA_CR_CYCLIC_BD_EN_MASK BIT(4) #define XILINX_DMA_CR_COALESCE_SHIFT 16 @@ -2634,7 +2636,7 @@ static int xilinx_dma_probe(struct platform_device *pdev) struct xilinx_dma_device *xdev; struct device_node *child, *np = pdev->dev.of_node; struct resource *io; - u32 num_frames, addr_width; + u32 num_frames, addr_width, len_width; int i, err; /* Allocate and initialize the DMA engine structure */ @@ -2666,10 +2668,24 @@ static int xilinx_dma_probe(struct platform_device *pdev) /* Retrieve the DMA engine properties from the device tree */ xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg"); - xdev->max_buffer_len = XILINX_DMA_MAX_TRANS_LEN; + xdev->max_buffer_len = GENMASK(XILINX_DMA_MAX_TRANS_LEN_MAX - 1, 0); - if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { xdev->mcdma = of_property_read_bool(node, "xlnx,mcdma"); + if (!of_property_read_u32(node, "xlnx,sg-length-width", + &len_width)) { + if (len_width < XILINX_DMA_MAX_TRANS_LEN_MIN || + len_width > XILINX_DMA_V2_MAX_TRANS_LEN_MAX) { + dev_warn(xdev->dev, + "invalid xlnx,sg-length-width property value. Using default width\n"); + } else { + if (len_width > XILINX_DMA_MAX_TRANS_LEN_MAX) + dev_warn(xdev->dev, "Please ensure that IP supports buffer length > 23 bits\n"); + xdev->max_buffer_len = + GENMASK(len_width - 1, 0); + } + } + } if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { err = of_property_read_u32(node, "xlnx,num-fstores", From bb73b985d272148e6599bb32691e8316d71108cf Mon Sep 17 00:00:00 2001 From: Swati Agarwal Date: Wed, 17 Aug 2022 11:41:23 +0530 Subject: [PATCH 26/80] dmaengine: xilinx_dma: Fix devm_platform_ioremap_resource error handling [ Upstream commit 91df7751eb890e970afc08f50b8f0fa5ea39e03d ] Add missing cleanup in devm_platform_ioremap_resource(). When probe fails remove dma channel resources and disable clocks in accordance with the order of resources allocated . Signed-off-by: Swati Agarwal Link: https://lore.kernel.org/r/20220817061125.4720-2-swati.agarwal@xilinx.com Signed-off-by: Vinod Koul Stable-dep-of: 596b53ccc36a ("dmaengine: xilinx_dma: call of_node_put() when breaking out of for_each_child_of_node()") Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 6687cee26843..c56ce7cd1f6f 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2663,9 +2663,10 @@ static int xilinx_dma_probe(struct platform_device *pdev) /* Request and map I/O memory */ io = platform_get_resource(pdev, IORESOURCE_MEM, 0); xdev->regs = devm_ioremap_resource(&pdev->dev, io); - if (IS_ERR(xdev->regs)) - return PTR_ERR(xdev->regs); - + if (IS_ERR(xdev->regs)) { + err = PTR_ERR(xdev->regs); + goto disable_clks; + } /* Retrieve the DMA engine properties from the device tree */ xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg"); xdev->max_buffer_len = GENMASK(XILINX_DMA_MAX_TRANS_LEN_MAX - 1, 0); @@ -2759,7 +2760,7 @@ static int xilinx_dma_probe(struct platform_device *pdev) for_each_child_of_node(node, child) { err = xilinx_dma_child_probe(xdev, child); if (err < 0) - goto disable_clks; + goto error; } if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { @@ -2792,12 +2793,12 @@ static int xilinx_dma_probe(struct platform_device *pdev) return 0; -disable_clks: - xdma_disable_allclks(xdev); error: for (i = 0; i < xdev->nr_channels; i++) if (xdev->chan[i]) xilinx_dma_chan_remove(xdev->chan[i]); +disable_clks: + xdma_disable_allclks(xdev); return err; } From 9714a6c69a38beda93b577df9b337a62d034656e Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 22 Nov 2022 10:16:12 +0800 Subject: [PATCH 27/80] dmaengine: xilinx_dma: call of_node_put() when breaking out of for_each_child_of_node() [ Upstream commit 596b53ccc36a546ab28e8897315c5b4d1d5a0200 ] Since for_each_child_of_node() will increase the refcount of node, we need to call of_node_put() manually when breaking out of the iteration. Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support") Signed-off-by: Liu Shixin Acked-by: Peter Korsgaard Link: https://lore.kernel.org/r/20221122021612.1908866-1-liushixin2@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index c56ce7cd1f6f..5f9945651e95 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2759,8 +2759,10 @@ static int xilinx_dma_probe(struct platform_device *pdev) /* Initialize the channels */ for_each_child_of_node(node, child) { err = xilinx_dma_child_probe(xdev, child); - if (err < 0) + if (err < 0) { + of_node_put(child); goto error; + } } if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { From 48261e4654f4885f0faaabf2e4acce33e304c15f Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 18 Jan 2023 09:59:41 +0800 Subject: [PATCH 28/80] tcp: avoid the lookup process failing to get sk in ehash table [ Upstream commit 3f4ca5fafc08881d7a57daa20449d171f2887043 ] While one cpu is working on looking up the right socket from ehash table, another cpu is done deleting the request socket and is about to add (or is adding) the big socket from the table. It means that we could miss both of them, even though it has little chance. Let me draw a call trace map of the server side. CPU 0 CPU 1 ----- ----- tcp_v4_rcv() syn_recv_sock() inet_ehash_insert() -> sk_nulls_del_node_init_rcu(osk) __inet_lookup_established() -> __sk_nulls_add_node_rcu(sk, list) Notice that the CPU 0 is receiving the data after the final ack during 3-way shakehands and CPU 1 is still handling the final ack. Why could this be a real problem? This case is happening only when the final ack and the first data receiving by different CPUs. Then the server receiving data with ACK flag tries to search one proper established socket from ehash table, but apparently it fails as my map shows above. After that, the server fetches a listener socket and then sends a RST because it finds a ACK flag in the skb (data), which obeys RST definition in RFC 793. Besides, Eric pointed out there's one more race condition where it handles tw socket hashdance. Only by adding to the tail of the list before deleting the old one can we avoid the race if the reader has already begun the bucket traversal and it would possibly miss the head. Many thanks to Eric for great help from beginning to end. Fixes: 5e0724d027f0 ("tcp/dccp: fix hashdance race for passive sessions") Suggested-by: Eric Dumazet Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/lkml/20230112065336.41034-1-kerneljasonxing@gmail.com/ Link: https://lore.kernel.org/r/20230118015941.1313-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 17 +++++++++++++++-- net/ipv4/inet_timewait_sock.c | 8 ++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c58019f0718..d64522af9c3a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -579,8 +579,20 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); - ret = sk_nulls_del_node_init_rcu(osk); - } else if (found_dup_sk) { + ret = sk_hashed(osk); + if (ret) { + /* Before deleting the node, we insert a new one to make + * sure that the look-up-sk process would not miss either + * of them and that at least one node would exist in ehash + * table all the time. Otherwise there's a tiny chance + * that lookup process could find nothing in ehash table. + */ + __sk_nulls_add_node_tail_rcu(sk, list); + sk_nulls_del_node_init_rcu(osk); + } + goto unlock; + } + if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; @@ -589,6 +601,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ret) __sk_nulls_add_node_rcu(sk, list); +unlock: spin_unlock(lock); return ret; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 88c5069b5d20..fedd19c22b39 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -80,10 +80,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(inet_twsk_put); -static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, - struct hlist_nulls_head *list) +static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) { - hlist_nulls_add_head_rcu(&tw->tw_node, list); + hlist_nulls_add_tail_rcu(&tw->tw_node, list); } static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -119,7 +119,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); - inet_twsk_add_node_rcu(tw, &ehead->chain); + inet_twsk_add_node_tail_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) From 7dbec351013db6358b9de7a032c9ff1fb57c6819 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 5 Dec 2022 16:04:34 +0800 Subject: [PATCH 29/80] w1: fix deadloop in __w1_remove_master_device() [ Upstream commit 25d5648802f12ae486076ceca5d7ddf1fef792b2 ] I got a deadloop report while doing device(ds2482) add/remove test: [ 162.241881] w1_master_driver w1_bus_master1: Waiting for w1_bus_master1 to become free: refcnt=1. [ 163.272251] w1_master_driver w1_bus_master1: Waiting for w1_bus_master1 to become free: refcnt=1. [ 164.296157] w1_master_driver w1_bus_master1: Waiting for w1_bus_master1 to become free: refcnt=1. ... __w1_remove_master_device() can't return, because the dev->refcnt is not zero. w1_add_master_device() | w1_alloc_dev() | atomic_set(&dev->refcnt, 2) | kthread_run() | |__w1_remove_master_device() | kthread_stop() // KTHREAD_SHOULD_STOP is set, | // threadfn(w1_process) won't be | // called. | kthread() | | // refcnt will never be 0, it's deadloop. | while (atomic_read(&dev->refcnt)) {...} After calling w1_add_master_device(), w1_process() is not really invoked, before w1_process() starting, if kthread_stop() is called in __w1_remove_master_device(), w1_process() will never be called, the refcnt can not be decreased, then it causes deadloop in remove function because of non-zero refcnt. We need to make sure w1_process() is really started, so move the set refcnt into w1_process() to fix this problem. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221205080434.3149205-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/w1/w1.c | 2 ++ drivers/w1/w1_int.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 890c038c25f8..c1d0e723fabc 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -1140,6 +1140,8 @@ int w1_process(void *data) /* remainder if it woke up early */ unsigned long jremain = 0; + atomic_inc(&dev->refcnt); + for (;;) { if (!jremain && dev->search_count) { diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 1c776178f598..eb851eb44300 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -60,10 +60,9 @@ static struct w1_master *w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->search_count = w1_search_count; dev->enable_pullup = w1_enable_pullup; - /* 1 for w1_process to decrement - * 1 for __w1_remove_master_device to decrement + /* For __w1_remove_master_device to decrement */ - atomic_set(&dev->refcnt, 2); + atomic_set(&dev->refcnt, 1); INIT_LIST_HEAD(&dev->slist); INIT_LIST_HEAD(&dev->async_list); From bccd6df4c177b1ad766f16565ccc298653d027d0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 5 Dec 2022 18:15:58 +0800 Subject: [PATCH 30/80] w1: fix WARNING after calling w1_process() [ Upstream commit 36225a7c72e9e3e1ce4001b6ce72849f5c9a2d3b ] I got the following WARNING message while removing driver(ds2482): ------------[ cut here ]------------ do not call blocking ops when !TASK_RUNNING; state=1 set at [<000000002d50bfb6>] w1_process+0x9e/0x1d0 [wire] WARNING: CPU: 0 PID: 262 at kernel/sched/core.c:9817 __might_sleep+0x98/0xa0 CPU: 0 PID: 262 Comm: w1_bus_master1 Tainted: G N 6.1.0-rc3+ #307 RIP: 0010:__might_sleep+0x98/0xa0 Call Trace: exit_signals+0x6c/0x550 do_exit+0x2b4/0x17e0 kthread_exit+0x52/0x60 kthread+0x16d/0x1e0 ret_from_fork+0x1f/0x30 The state of task is set to TASK_INTERRUPTIBLE in loop in w1_process(), set it to TASK_RUNNING when it breaks out of the loop to avoid the warning. Fixes: 3c52e4e62789 ("W1: w1_process, block or sleep") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221205101558.3599162-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/w1/w1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index c1d0e723fabc..cb3650efc29c 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -1169,8 +1169,10 @@ int w1_process(void *data) */ mutex_unlock(&dev->list_mutex); - if (kthread_should_stop()) + if (kthread_should_stop()) { + __set_current_state(TASK_RUNNING); break; + } /* Only sleep when the search is active. */ if (dev->search_count) { From 01687e35df44dd09cc6943306db35d9efc507907 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jun 2021 12:36:42 +0200 Subject: [PATCH 31/80] netfilter: conntrack: do not renew entry stuck in tcp SYN_SENT state [ Upstream commit e15d4cdf27cb0c1e977270270b2cea12e0955edd ] Consider: client -----> conntrack ---> Host client sends a SYN, but $Host is unreachable/silent. Client eventually gives up and the conntrack entry will time out. However, if the client is restarted with same addr/port pair, it may prevent the conntrack entry from timing out. This is noticeable when the existing conntrack entry has no NAT transformation or an outdated one and port reuse happens either on client or due to a NAT middlebox. This change prevents refresh of the timeout for SYN retransmits, so entry is going away after nf_conntrack_tcp_timeout_syn_sent seconds (default: 60). Entry will be re-created on next connection attempt, but then nat rules will be evaluated again. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 66cda5e2d6b9..955b73a9a05e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1094,6 +1094,16 @@ static int tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + + if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) { + /* do not renew timeout on SYN retransmit. + * + * Else port reuse by client or NAT middlebox can keep + * entry alive indefinitely (including nat info). + */ + return NF_ACCEPT; + } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ From cc42c752fee620e43d34f000c23037d463a3f488 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Mar 2022 19:00:56 +0100 Subject: [PATCH 32/80] block: fix and cleanup bio_check_ro commit 57e95e4670d1126c103305bcf34a9442f49f6d6a upstream. Don't use a WARN_ON when printing a potentially user triggered condition. Also don't print the partno when the block device name already includes it, and use the %pg specifier to simplify printing the block device name. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220304180105.409765-2-hch@lst.de Signed-off-by: Jens Axboe Signed-off-by: Fedor Pchelkin Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 80f3e729fdd4..4fbf915d9cb0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2179,10 +2179,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return false; - - WARN_ONCE(1, - "generic_make_request: Trying to write " - "to read-only block-device %s (partno %d)\n", + pr_warn("Trying to write to read-only block-device %s (partno %d)\n", bio_devname(bio, b), part->partno); /* Older lvm-tools actually trigger this */ return false; From db73f35365bf9928df8ee68269a0c6cc8b2b983e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2020 11:23:03 -0300 Subject: [PATCH 33/80] perf env: Do not return pointers to local variables commit ebcb9464a2ae3a547e97de476575c82ece0e93e2 upstream. It is possible to return a pointer to a local variable when looking up the architecture name for the running system and no normalization is done on that value, i.e. we may end up returning the uts.machine local variable. While this doesn't happen on most arches, as normalization takes place, lets fix this by making that a static variable and optimize it a bit by not always running uname(), only the first time. Noticed in fedora rawhide running with: [perfbuilder@a5ff49d6e6e4 ~]$ gcc --version gcc (GCC) 10.0.1 20200216 (Red Hat 10.0.1-0.8) Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/env.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 4c23779e271a..3641a96a2de4 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -163,11 +163,11 @@ static const char *normalize_arch(char *arch) const char *perf_env__arch(struct perf_env *env) { - struct utsname uts; char *arch_name; if (!env || !env->arch) { /* Assume local operation */ - if (uname(&uts) < 0) + static struct utsname uts = { .machine[0] = '\0', }; + if (uts.machine[0] == '\0' && uname(&uts) < 0) return NULL; arch_name = uts.machine; } else From 23d5769a674cb80808239081b35699d347d88ee3 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 27 Oct 2021 22:34:41 +0800 Subject: [PATCH 34/80] fs: reiserfs: remove useless new_opts in reiserfs_remount commit 81dedaf10c20959bdf5624f9783f408df26ba7a4 upstream. Since the commit c3d98ea08291 ("VFS: Don't use save/replace_mount_options if not using generic_show_options") eliminates replace_mount_options in reiserfs_remount, but does not handle the allocated new_opts, it will cause memory leak in the reiserfs_remount. Because new_opts is useless in reiserfs_mount, so we fix this bug by removing the useless new_opts in reiserfs_remount. Fixes: c3d98ea08291 ("VFS: Don't use save/replace_mount_options if not using generic_show_options") Link: https://lore.kernel.org/r/20211027143445.4156459-1-mudongliangabcd@gmail.com Signed-off-by: Dongliang Mu Signed-off-by: Jan Kara Signed-off-by: Fedor Pchelkin Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/super.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 831a542c22c6..e5be1d747c03 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1443,7 +1443,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); - char *new_opts; int err; char *qf_names[REISERFS_MAXQUOTAS]; unsigned int qfmt = 0; @@ -1451,10 +1450,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif - new_opts = kstrdup(arg, GFP_KERNEL); - if (arg && !new_opts) - return -ENOMEM; - sync_filesystem(s); reiserfs_write_lock(s); @@ -1605,7 +1600,6 @@ out_ok_unlocked: out_err_unlock: reiserfs_write_unlock(s); out_err: - kfree(new_opts); return err; } From 14b7058f0d123ab9a814a39ba7cbdc1c56cc5702 Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Thu, 26 Jan 2023 16:38:17 +0300 Subject: [PATCH 35/80] Bluetooth: hci_sync: cancel cmd_timer if hci_open failed commit 97dfaf073f5881c624856ef293be307b6166115c upstream. If a command is already sent, we take care of freeing it, but we also need to cancel the timeout as well. Signed-off-by: Archie Pusaka Reviewed-by: Abhishek Pandit-Subedi Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Fedor Pchelkin Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e87777255c47..497c8ac140d1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1519,6 +1519,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) hdev->flush(hdev); if (hdev->sent_cmd) { + cancel_delayed_work_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } From 13c3fd34b9afffa4638987a76ed9da1773a667ec Mon Sep 17 00:00:00 2001 From: "Alexey V. Vissarionov" Date: Wed, 18 Jan 2023 06:12:55 +0300 Subject: [PATCH 36/80] scsi: hpsa: Fix allocation size for scsi_host_alloc() [ Upstream commit bbbd25499100c810ceaf5193c3cfcab9f7402a33 ] The 'h' is a pointer to struct ctlr_info, so it's just 4 or 8 bytes, while the structure itself is much bigger. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: edd163687ea5 ("hpsa: add driver for HP Smart Array controllers.") Link: https://lore.kernel.org/r/20230118031255.GE15213@altlinux.org Signed-off-by: Alexey V. Vissarionov Acked-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 13931c5c0eff..25d9bdd4bc69 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5771,7 +5771,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) { struct Scsi_Host *sh; - sh = scsi_host_alloc(&hpsa_driver_template, sizeof(h)); + sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info)); if (sh == NULL) { dev_err(&h->pdev->dev, "scsi_host_alloc failed\n"); return -ENOMEM; From 008368a9b4e5fb433ff344693428cdf1370a2108 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 5 Dec 2022 11:35:57 +0100 Subject: [PATCH 37/80] module: Don't wait for GOING modules commit 0254127ab977e70798707a7a2b757c9f3c971210 upstream. During a system boot, it can happen that the kernel receives a burst of requests to insert the same module but loading it eventually fails during its init call. For instance, udev can make a request to insert a frequency module for each individual CPU when another frequency module is already loaded which causes the init function of the new module to return an error. Since commit 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for modules that have finished loading"), the kernel waits for modules in MODULE_STATE_GOING state to finish unloading before making another attempt to load the same module. This creates unnecessary work in the described scenario and delays the boot. In the worst case, it can prevent udev from loading drivers for other devices and might cause timeouts of services waiting on them and subsequently a failed boot. This patch attempts a different solution for the problem 6e6de3dee51a was trying to solve. Rather than waiting for the unloading to complete, it returns a different error code (-EBUSY) for modules in the GOING state. This should avoid the error situation that was described in 6e6de3dee51a (user space attempting to load a dependent module because the -EEXIST error code would suggest to user space that the first module had been loaded successfully), while avoiding the delay situation too. This has been tested on linux-next since December 2022 and passes all kmod selftests except test 0009 with module compression enabled but it has been confirmed that this issue has existed and has gone unnoticed since prior to this commit and can also be reproduced without module compression with a simple usleep(5000000) on tools/modprobe.c [0]. These failures are caused by hitting the kernel mod_concurrent_max and can happen either due to a self inflicted kernel module auto-loead DoS somehow or on a system with large CPU count and each CPU count incorrectly triggering many module auto-loads. Both of those issues need to be fixed in-kernel. [0] https://lore.kernel.org/all/Y9A4fiobL6IHp%2F%2FP@bombadil.infradead.org/ Fixes: 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for modules that have finished loading") Co-developed-by: Martin Wilck Signed-off-by: Martin Wilck Signed-off-by: Petr Pavlu Cc: stable@vger.kernel.org Reviewed-by: Petr Mladek [mcgrof: enhance commit log with testing and kmod test result interpretation ] Signed-off-by: Luis Chamberlain Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 42a604401c4d..6ec0b2e0f01f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3478,7 +3478,8 @@ static bool finished_loading(const char *name) sched_annotate_sleep(); mutex_lock(&module_mutex); mod = find_module_all(name, strlen(name), true); - ret = !mod || mod->state == MODULE_STATE_LIVE; + ret = !mod || mod->state == MODULE_STATE_LIVE + || mod->state == MODULE_STATE_GOING; mutex_unlock(&module_mutex); return ret; @@ -3632,20 +3633,35 @@ static int add_unformed_module(struct module *mod) mod->state = MODULE_STATE_UNFORMED; -again: mutex_lock(&module_mutex); old = find_module_all(mod->name, strlen(mod->name), true); if (old != NULL) { - if (old->state != MODULE_STATE_LIVE) { + if (old->state == MODULE_STATE_COMING + || old->state == MODULE_STATE_UNFORMED) { /* Wait in case it fails to load. */ mutex_unlock(&module_mutex); err = wait_event_interruptible(module_wq, finished_loading(mod->name)); if (err) goto out_unlocked; - goto again; + + /* The module might have gone in the meantime. */ + mutex_lock(&module_mutex); + old = find_module_all(mod->name, strlen(mod->name), + true); } - err = -EEXIST; + + /* + * We are here only when the same module was being loaded. Do + * not try to load it again right now. It prevents long delays + * caused by serialized module load failures. It might happen + * when more devices of the same type trigger load of + * a particular module. + */ + if (old && old->state == MODULE_STATE_LIVE) + err = -EEXIST; + else + err = -EBUSY; goto out; } mod_update_bounds(mod); From b94d7c7654356860dd7719120c7d15ba38b6162a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 4 Jan 2023 16:14:12 -0500 Subject: [PATCH 38/80] tracing: Make sure trace_printk() can output as soon as it can be used commit 3bb06eb6e9acf7c4a3e1b5bc87aed398ff8e2253 upstream. Currently trace_printk() can be used as soon as early_trace_init() is called from start_kernel(). But if a crash happens, and "ftrace_dump_on_oops" is set on the kernel command line, all you get will be: [ 0.456075] -0 0dN.2. 347519us : Unknown type 6 [ 0.456075] -0 0dN.2. 353141us : Unknown type 6 [ 0.456075] -0 0dN.2. 358684us : Unknown type 6 This is because the trace_printk() event (type 6) hasn't been registered yet. That gets done via an early_initcall(), which may be early, but not early enough. Instead of registering the trace_printk() event (and other ftrace events, which are not trace events) via an early_initcall(), have them registered at the same time that trace_printk() can be used. This way, if there is a crash before early_initcall(), then the trace_printk()s will actually be useful. Link: https://lkml.kernel.org/r/20230104161412.019f6c55@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Fixes: e725c731e3bb1 ("tracing: Split tracing initialization into two for early initialization") Reported-by: "Joel Fernandes (Google)" Tested-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 ++ kernel/trace/trace.h | 1 + kernel/trace/trace_output.c | 3 +-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 59ad83499ba2..98abff046236 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8674,6 +8674,8 @@ void __init early_trace_init(void) static_key_enable(&tracepoint_printk_key.key); } tracer_alloc_buffers(); + + init_events(); } void __init trace_init(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 74185fb040f3..0923d1b18d1f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1530,6 +1530,7 @@ extern void trace_event_enable_cmd_record(bool enable); extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); +extern int init_events(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 6e6cc64faa38..62015d62dd6f 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1395,7 +1395,7 @@ static struct trace_event *events[] __initdata = { NULL }; -__init static int init_events(void) +__init int init_events(void) { struct trace_event *event; int i, ret; @@ -1413,4 +1413,3 @@ __init static int init_events(void) return 0; } -early_initcall(init_events); From d2d1ada58e7cc100b8d7d6b082d19321ba4a700a Mon Sep 17 00:00:00 2001 From: Natalia Petrova Date: Wed, 11 Jan 2023 15:04:09 +0300 Subject: [PATCH 39/80] trace_events_hist: add check for return value of 'create_hist_field' commit 8b152e9150d07a885f95e1fd401fc81af202d9a4 upstream. Function 'create_hist_field' is called recursively at trace_events_hist.c:1954 and can return NULL-value that's why we have to check it to avoid null pointer dereference. Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://lkml.kernel.org/r/20230111120409.4111-1-n.petrova@fintech.ru Cc: stable@vger.kernel.org Fixes: 30350d65ac56 ("tracing: Add variable support to hist triggers") Signed-off-by: Natalia Petrova Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 17b15bd97857..a56ee4ba2afb 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2314,6 +2314,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; hist_field->fn = hist_field_log2; hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); + if (!hist_field->operands[0]) + goto free; hist_field->size = hist_field->operands[0]->size; hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL); if (!hist_field->type) From e1da82d0a33c8d88f1af5e348b5be1332d60136b Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 4 Apr 2019 11:35:42 -0500 Subject: [PATCH 40/80] smbd: Make upper layer decide when to destroy the transport [ Upstream commit 050b8c374019c10e4bcc3fbc9023884f76a85d9c ] On transport recoonect, upper layer CIFS code destroys the current transport and then recoonect. This code path is not used by SMBD, in that SMBD destroys its transport on RDMA disconnect notification independent of CIFS upper layer behavior. This approach adds some costs to SMBD layer to handle transport shutdown and restart, and to deal with several racing conditions on reconnecting transport. Re-work this code path by introducing a new smbd_destroy. This function is called form upper layer to ask SMBD to destroy the transport. SMBD will no longer need to destroy the transport by itself while worrying about data transfer is in progress. The upper layer guarantees the transport is locked. change log: v2: fix build errors when CONFIG_CIFS_SMB_DIRECT is not configured Signed-off-by: Long Li Signed-off-by: Steve French Stable-dep-of: b7ab9161cf5d ("cifs: Fix oops due to uncleared server->smbd_conn in reconnect") Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 9 ++-- fs/cifs/smbdirect.c | 114 ++++++++++++++++++++++++++++++++++++-------- fs/cifs/smbdirect.h | 5 +- 3 files changed, 101 insertions(+), 27 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a7ef847f285d..37e91f27f49b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -429,7 +429,8 @@ cifs_reconnect(struct TCP_Server_Info *server) server->ssocket->state, server->ssocket->flags); sock_release(server->ssocket); server->ssocket = NULL; - } + } else if (cifs_rdma_enabled(server)) + smbd_destroy(server); server->sequence_number = 0; server->session_estab = false; kfree(server->session_key.response); @@ -799,10 +800,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) wake_up_all(&server->request_q); /* give those requests time to exit */ msleep(125); - if (cifs_rdma_enabled(server) && server->smbd_conn) { - smbd_destroy(server->smbd_conn); - server->smbd_conn = NULL; - } + if (cifs_rdma_enabled(server)) + smbd_destroy(server); if (server->ssocket) { sock_release(server->ssocket); server->ssocket = NULL; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 784628ec4bc4..c839ff9d4965 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -320,6 +320,9 @@ static int smbd_conn_upcall( info->transport_status = SMBD_DISCONNECTED; smbd_process_disconnected(info); + wake_up(&info->disconn_wait); + wake_up_interruptible(&info->wait_reassembly_queue); + wake_up_interruptible_all(&info->wait_send_queue); break; default: @@ -1478,17 +1481,97 @@ static void idle_connection_timer(struct work_struct *work) info->keep_alive_interval*HZ); } -/* Destroy this SMBD connection, called from upper layer */ -void smbd_destroy(struct smbd_connection *info) +/* + * Destroy the transport and related RDMA and memory resources + * Need to go through all the pending counters and make sure on one is using + * the transport while it is destroyed + */ +void smbd_destroy(struct TCP_Server_Info *server) { + struct smbd_connection *info = server->smbd_conn; + struct smbd_response *response; + unsigned long flags; + + if (!info) { + log_rdma_event(INFO, "rdma session already destroyed\n"); + return; + } + log_rdma_event(INFO, "destroying rdma session\n"); + if (info->transport_status != SMBD_DISCONNECTED) { + rdma_disconnect(server->smbd_conn->id); + log_rdma_event(INFO, "wait for transport being disconnected\n"); + wait_event( + info->disconn_wait, + info->transport_status == SMBD_DISCONNECTED); + } - /* Kick off the disconnection process */ - smbd_disconnect_rdma_connection(info); + log_rdma_event(INFO, "destroying qp\n"); + ib_drain_qp(info->id->qp); + rdma_destroy_qp(info->id); - log_rdma_event(INFO, "wait for transport being destroyed\n"); - wait_event(info->wait_destroy, - info->transport_status == SMBD_DESTROYED); + log_rdma_event(INFO, "cancelling idle timer\n"); + cancel_delayed_work_sync(&info->idle_timer_work); + log_rdma_event(INFO, "cancelling send immediate work\n"); + cancel_delayed_work_sync(&info->send_immediate_work); + + log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); + wait_event(info->wait_send_pending, + atomic_read(&info->send_pending) == 0); + wait_event(info->wait_send_payload_pending, + atomic_read(&info->send_payload_pending) == 0); + + /* It's not posssible for upper layer to get to reassembly */ + log_rdma_event(INFO, "drain the reassembly queue\n"); + do { + spin_lock_irqsave(&info->reassembly_queue_lock, flags); + response = _get_first_reassembly(info); + if (response) { + list_del(&response->list); + spin_unlock_irqrestore( + &info->reassembly_queue_lock, flags); + put_receive_buffer(info, response); + } else + spin_unlock_irqrestore( + &info->reassembly_queue_lock, flags); + } while (response); + info->reassembly_data_length = 0; + + log_rdma_event(INFO, "free receive buffers\n"); + wait_event(info->wait_receive_queues, + info->count_receive_queue + info->count_empty_packet_queue + == info->receive_credit_max); + destroy_receive_buffers(info); + + /* + * For performance reasons, memory registration and deregistration + * are not locked by srv_mutex. It is possible some processes are + * blocked on transport srv_mutex while holding memory registration. + * Release the transport srv_mutex to allow them to hit the failure + * path when sending data, and then release memory registartions. + */ + log_rdma_event(INFO, "freeing mr list\n"); + wake_up_interruptible_all(&info->wait_mr); + while (atomic_read(&info->mr_used_count)) { + mutex_unlock(&server->srv_mutex); + msleep(1000); + mutex_lock(&server->srv_mutex); + } + destroy_mr_list(info); + + ib_free_cq(info->send_cq); + ib_free_cq(info->recv_cq); + ib_dealloc_pd(info->pd); + rdma_destroy_id(info->id); + + /* free mempools */ + mempool_destroy(info->request_mempool); + kmem_cache_destroy(info->request_cache); + + mempool_destroy(info->response_mempool); + kmem_cache_destroy(info->response_cache); + + info->transport_status = SMBD_DESTROYED; destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); @@ -1514,17 +1597,9 @@ int smbd_reconnect(struct TCP_Server_Info *server) */ if (server->smbd_conn->transport_status == SMBD_CONNECTED) { log_rdma_event(INFO, "disconnecting transport\n"); - smbd_disconnect_rdma_connection(server->smbd_conn); + smbd_destroy(server); } - /* wait until the transport is destroyed */ - if (!wait_event_timeout(server->smbd_conn->wait_destroy, - server->smbd_conn->transport_status == SMBD_DESTROYED, 5*HZ)) - return -EAGAIN; - - destroy_workqueue(server->smbd_conn->workqueue); - kfree(server->smbd_conn); - create_conn: log_rdma_event(INFO, "creating rdma session\n"); server->smbd_conn = smbd_get_connection( @@ -1741,12 +1816,13 @@ static struct smbd_connection *_smbd_get_connection( conn_param.retry_count = SMBD_CM_RETRY; conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; conn_param.flow_control = 0; - init_waitqueue_head(&info->wait_destroy); log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", &addr_in->sin_addr, port); init_waitqueue_head(&info->conn_wait); + init_waitqueue_head(&info->disconn_wait); + init_waitqueue_head(&info->wait_reassembly_queue); rc = rdma_connect(info->id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); @@ -1770,8 +1846,6 @@ static struct smbd_connection *_smbd_get_connection( } init_waitqueue_head(&info->wait_send_queue); - init_waitqueue_head(&info->wait_reassembly_queue); - INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work); queue_delayed_work(info->workqueue, &info->idle_timer_work, @@ -1812,7 +1886,7 @@ static struct smbd_connection *_smbd_get_connection( allocate_mr_failed: /* At this point, need to a full transport shutdown */ - smbd_destroy(info); + smbd_destroy(server); return NULL; negotiation_failed: diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index b5c240ff2191..b0ca7df41454 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -71,6 +71,7 @@ struct smbd_connection { struct completion ri_done; wait_queue_head_t conn_wait; wait_queue_head_t wait_destroy; + wait_queue_head_t disconn_wait; struct completion negotiate_completion; bool negotiate_done; @@ -288,7 +289,7 @@ struct smbd_connection *smbd_get_connection( /* Reconnect SMBDirect session */ int smbd_reconnect(struct TCP_Server_Info *server); /* Destroy SMBDirect session */ -void smbd_destroy(struct smbd_connection *info); +void smbd_destroy(struct TCP_Server_Info *server); /* Interface for carrying upper layer I/O through send/recv */ int smbd_recv(struct smbd_connection *info, struct msghdr *msg); @@ -331,7 +332,7 @@ struct smbd_connection {}; static inline void *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;} static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } -static inline void smbd_destroy(struct smbd_connection *info) {} +static inline void smbd_destroy(struct TCP_Server_Info *server) {} static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; } static inline int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst) {return -1; } #endif From a9640c0b268405f2540e8203a545e930ea88bb7d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Jan 2023 14:02:13 +0000 Subject: [PATCH 41/80] cifs: Fix oops due to uncleared server->smbd_conn in reconnect [ Upstream commit b7ab9161cf5ddc42a288edf9d1a61f3bdffe17c7 ] In smbd_destroy(), clear the server->smbd_conn pointer after freeing the smbd_connection struct that it points to so that reconnection doesn't get confused. Fixes: 8ef130f9ec27 ("CIFS: SMBD: Implement function to destroy a SMB Direct connection") Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Acked-by: Tom Talpey Signed-off-by: David Howells Cc: Long Li Cc: Pavel Shilovsky Cc: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smbdirect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index c839ff9d4965..591cd5c70432 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1576,6 +1576,7 @@ void smbd_destroy(struct TCP_Server_Info *server) destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); kfree(info); + server->smbd_conn = NULL; } /* From 7983cf22df5d86042883594ec19572b406e5f043 Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Tue, 13 Dec 2022 20:24:03 +0100 Subject: [PATCH 42/80] ARM: 9280/1: mm: fix warning on phys_addr_t to void pointer assignment commit a4e03921c1bb118e6718e0a3b0322a2c13ed172b upstream. zero_page is a void* pointer but memblock_alloc() returns phys_addr_t type so this generates a warning while using clang and with -Wint-error enabled that becomes and error. So let's cast the return of memblock_alloc() to (void *). Cc: # 4.14.x + Fixes: 340a982825f7 ("ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation") Signed-off-by: Giulio Benetti Signed-off-by: Russell King (Oracle) Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/nommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index e803fd16248b..46d2142ebd14 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -160,7 +160,7 @@ void __init paging_init(const struct machine_desc *mdesc) mpu_setup(); /* allocate the zero page. */ - zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + zero_page = (void *)memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!zero_page) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, PAGE_SIZE, PAGE_SIZE); From 7115981fdba7393db893aaf9e43c1738071f7368 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 18 Jan 2023 20:38:48 +0530 Subject: [PATCH 43/80] EDAC/device: Respect any driver-supplied workqueue polling value commit cec669ff716cc83505c77b242aecf6f7baad869d upstream. The EDAC drivers may optionally pass the poll_msec value. Use that value if available, else fall back to 1000ms. [ bp: Touchups. ] Fixes: e27e3dac6517 ("drivers/edac: add edac_device class") Reported-by: Luca Weiss Signed-off-by: Manivannan Sadhasivam Signed-off-by: Borislav Petkov (AMD) Tested-by: Steev Klimaszewski # Thinkpad X13s Tested-by: Andrew Halaney # sa8540p-ride Cc: # 4.9 Link: https://lore.kernel.org/r/COZYL8MWN97H.MROQ391BGA09@otso Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_device.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 93d6e6319b3c..0ac6c49ecdbf 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -34,6 +34,9 @@ static DEFINE_MUTEX(device_ctls_mutex); static LIST_HEAD(edac_device_list); +/* Default workqueue processing interval on this instance, in msecs */ +#define DEFAULT_POLL_INTERVAL 1000 + #ifdef CONFIG_EDAC_DEBUG static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) { @@ -366,7 +369,7 @@ static void edac_device_workq_function(struct work_struct *work_req) * whole one second to save timers firing all over the period * between integral seconds */ - if (edac_dev->poll_msec == 1000) + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else edac_queue_work(&edac_dev->work, edac_dev->delay); @@ -396,7 +399,7 @@ static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, * timers firing on sub-second basis, while they are happy * to fire together on the 1 second exactly */ - if (edac_dev->poll_msec == 1000) + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else edac_queue_work(&edac_dev->work, edac_dev->delay); @@ -430,7 +433,7 @@ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, edac_dev->delay = msecs_to_jiffies(msec); /* See comment in edac_device_workq_setup() above */ - if (edac_dev->poll_msec == 1000) + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else edac_mod_work(&edac_dev->work, edac_dev->delay); @@ -472,11 +475,7 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev) /* This instance is NOW RUNNING */ edac_dev->op_state = OP_RUNNING_POLL; - /* - * enable workq processing on this instance, - * default = 1000 msec - */ - edac_device_workq_setup(edac_dev, 1000); + edac_device_workq_setup(edac_dev, edac_dev->poll_msec ?: DEFAULT_POLL_INTERVAL); } else { edac_dev->op_state = OP_RUNNING_INTERRUPT; } From ad0dfe9bcf0d78e699c7efb64c90ed062dc48bea Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 19 Jan 2023 19:55:45 +0100 Subject: [PATCH 44/80] net: fix UaF in netns ops registration error path [ Upstream commit 71ab9c3e2253619136c31c89dbb2c69305cc89b1 ] If net_assign_generic() fails, the current error path in ops_init() tries to clear the gen pointer slot. Anyway, in such error path, the gen pointer itself has not been modified yet, and the existing and accessed one is smaller than the accessed index, causing an out-of-bounds error: BUG: KASAN: slab-out-of-bounds in ops_init+0x2de/0x320 Write of size 8 at addr ffff888109124978 by task modprobe/1018 CPU: 2 PID: 1018 Comm: modprobe Not tainted 6.2.0-rc2.mptcp_ae5ac65fbed5+ #1641 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.1-2.fc37 04/01/2014 Call Trace: dump_stack_lvl+0x6a/0x9f print_address_description.constprop.0+0x86/0x2b5 print_report+0x11b/0x1fb kasan_report+0x87/0xc0 ops_init+0x2de/0x320 register_pernet_operations+0x2e4/0x750 register_pernet_subsys+0x24/0x40 tcf_register_action+0x9f/0x560 do_one_initcall+0xf9/0x570 do_init_module+0x190/0x650 load_module+0x1fa5/0x23c0 __do_sys_finit_module+0x10d/0x1b0 do_syscall_64+0x58/0x80 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f42518f778d Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d cb 56 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007fff96869688 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 00005568ef7f7c90 RCX: 00007f42518f778d RDX: 0000000000000000 RSI: 00005568ef41d796 RDI: 0000000000000003 RBP: 00005568ef41d796 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000000 R13: 00005568ef7f7d30 R14: 0000000000040000 R15: 0000000000000000 This change addresses the issue by skipping the gen pointer de-reference in the mentioned error-path. Found by code inspection and verified with explicit error injection on a kasan-enabled kernel. Fixes: d266935ac43d ("net: fix UAF issue in nfqnl_nf_hook_drop() when ops_init() failed") Signed-off-by: Paolo Abeni Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/cec4e0f3bb2c77ac03a6154a8508d3930beb5f0f.1674154348.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 56c240c98a56..a87774424829 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -132,12 +132,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net) return 0; if (ops->id && ops->size) { -cleanup: ng = rcu_dereference_protected(net->gen, lockdep_is_held(&pernet_ops_rwsem)); ng->ptr[*ops->id] = NULL; } +cleanup: kfree(data); out: From 25f42a59ee559a0f0c80fb126f5e5a4249a8abaa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 14 Jan 2023 23:49:46 +0100 Subject: [PATCH 45/80] netfilter: nft_set_rbtree: skip elements in transaction from garbage collection [ Upstream commit 5d235d6ce75c12a7fdee375eb211e4116f7ab01b ] Skip interference with an ongoing transaction, do not perform garbage collection on inactive elements. Reset annotated previous end interval if the expired element is marked as busy (control plane removed the element right before expiration). Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_rbtree.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 84d317418d18..78a0f4283787 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -375,23 +375,37 @@ static void nft_rbtree_gc(struct work_struct *work) struct nft_rbtree *priv; struct rb_node *node; struct nft_set *set; + struct net *net; + u8 genmask; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + genmask = nft_genmask_cur(net); write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is + * always visited before the start element. + */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) continue; - if (nft_set_elem_mark_busy(&rbe->ext)) + + if (nft_set_elem_mark_busy(&rbe->ext)) { + rbe_end = NULL; continue; + } if (rbe_prev) { rb_erase(&rbe_prev->node, &priv->root); From bc6199018f7b1b1fca9d6d3a746101d72859c332 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 11 Sep 2018 09:05:01 +0800 Subject: [PATCH 46/80] netlink: remove hash::nelems check in netlink_insert [ Upstream commit 0041195d55bc38df6b574cc8c36dcf2266fbee39 ] The type of hash::nelems has been changed from size_t to atom_t which in fact is int, so not need to check if BITS_PER_LONG, that is bit number of size_t, is bigger than 32 and rht_grow_above_max() will be called to check if hashtable is too big, ensure it can not bigger than 1<<31 Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Signed-off-by: David S. Miller Stable-dep-of: c1bb9484e3b0 ("netlink: annotate data races around nlk->portid") Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6ffa83319d08..966c709c3831 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -578,11 +578,6 @@ static int netlink_insert(struct sock *sk, u32 portid) if (nlk_sk(sk)->bound) goto err; - err = -ENOMEM; - if (BITS_PER_LONG > 32 && - unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) - goto err; - nlk_sk(sk)->portid = portid; sock_hold(sk); From a737d39273e636ebcbcce2b04d088fe0b1effe87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 12:59:53 +0000 Subject: [PATCH 47/80] netlink: annotate data races around nlk->portid [ Upstream commit c1bb9484e3b05166880da8574504156ccbd0549e ] syzbot reminds us netlink_getname() runs locklessly [1] This first patch annotates the race against nlk->portid. Following patches take care of the remaining races. [1] BUG: KCSAN: data-race in netlink_getname / netlink_insert write to 0xffff88814176d310 of 4 bytes by task 2315 on cpu 1: netlink_insert+0xf1/0x9a0 net/netlink/af_netlink.c:583 netlink_autobind+0xae/0x180 net/netlink/af_netlink.c:856 netlink_sendmsg+0x444/0x760 net/netlink/af_netlink.c:1895 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x38f/0x500 net/socket.c:2476 ___sys_sendmsg net/socket.c:2530 [inline] __sys_sendmsg+0x19a/0x230 net/socket.c:2559 __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88814176d310 of 4 bytes by task 2316 on cpu 0: netlink_getname+0xcd/0x1a0 net/netlink/af_netlink.c:1144 __sys_getsockname+0x11d/0x1b0 net/socket.c:2026 __do_sys_getsockname net/socket.c:2041 [inline] __se_sys_getsockname net/socket.c:2038 [inline] __x64_sys_getsockname+0x3e/0x50 net/socket.c:2038 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00000000 -> 0xc9a49780 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 2316 Comm: syz-executor.2 Not tainted 6.2.0-rc3-syzkaller-00030-ge8f60cd7db24-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 966c709c3831..52bf99ed7093 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -578,7 +578,9 @@ static int netlink_insert(struct sock *sk, u32 portid) if (nlk_sk(sk)->bound) goto err; - nlk_sk(sk)->portid = portid; + /* portid can be read locklessly from netlink_getname(). */ + WRITE_ONCE(nlk_sk(sk)->portid, portid); + sock_hold(sk); err = __netlink_insert(table, sk); @@ -1133,7 +1135,8 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_pid = nlk->dst_portid; nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { - nladdr->nl_pid = nlk->portid; + /* Paired with WRITE_ONCE() in netlink_insert() */ + nladdr->nl_pid = READ_ONCE(nlk->portid); netlink_lock_table(); nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; netlink_unlock_table(); From 71bd90357ee93482922ea1ec73cd72271a825fec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 12:59:54 +0000 Subject: [PATCH 48/80] netlink: annotate data races around dst_portid and dst_group [ Upstream commit 004db64d185a5f23dfb891d7701e23713b2420ee ] netlink_getname(), netlink_sendmsg() and netlink_getsockbyportid() can read nlk->dst_portid and nlk->dst_group while another thread is changing them. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 52bf99ed7093..5c630506b7cd 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1091,8 +1091,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_portid = 0; - nlk->dst_group = 0; + /* dst_portid and dst_group can be read locklessly */ + WRITE_ONCE(nlk->dst_portid, 0); + WRITE_ONCE(nlk->dst_group, 0); return 0; } if (addr->sa_family != AF_NETLINK) @@ -1114,8 +1115,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (err == 0) { sk->sk_state = NETLINK_CONNECTED; - nlk->dst_portid = nladdr->nl_pid; - nlk->dst_group = ffs(nladdr->nl_groups); + /* dst_portid and dst_group can be read locklessly */ + WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); + WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); } return err; @@ -1132,8 +1134,9 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_pad = 0; if (peer) { - nladdr->nl_pid = nlk->dst_portid; - nladdr->nl_groups = netlink_group_mask(nlk->dst_group); + /* Paired with WRITE_ONCE() in netlink_connect() */ + nladdr->nl_pid = READ_ONCE(nlk->dst_portid); + nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group)); } else { /* Paired with WRITE_ONCE() in netlink_insert() */ nladdr->nl_pid = READ_ONCE(nlk->portid); @@ -1163,8 +1166,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); + /* dst_portid can be changed in netlink_connect() */ if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_portid != nlk_sk(ssk)->portid) { + READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -1876,8 +1880,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; netlink_skb_flags |= NETLINK_SKB_DST; } else { - dst_portid = nlk->dst_portid; - dst_group = nlk->dst_group; + /* Paired with WRITE_ONCE() in netlink_connect() */ + dst_portid = READ_ONCE(nlk->dst_portid); + dst_group = READ_ONCE(nlk->dst_group); } /* Paired with WRITE_ONCE() in netlink_insert() */ From 4988b4ad0f7614d68ff5882b3e89fe37daa2b25b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 12:59:55 +0000 Subject: [PATCH 49/80] netlink: annotate data races around sk_state [ Upstream commit 9b663b5cbb15b494ef132a3c937641c90646eb73 ] netlink_getsockbyportid() reads sk_state while a concurrent netlink_connect() can change its value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5c630506b7cd..6a49c0aa55bd 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1090,7 +1090,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if (addr->sa_family == AF_UNSPEC) { - sk->sk_state = NETLINK_UNCONNECTED; + /* paired with READ_ONCE() in netlink_getsockbyportid() */ + WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED); /* dst_portid and dst_group can be read locklessly */ WRITE_ONCE(nlk->dst_portid, 0); WRITE_ONCE(nlk->dst_group, 0); @@ -1114,7 +1115,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, err = netlink_autobind(sock); if (err == 0) { - sk->sk_state = NETLINK_CONNECTED; + /* paired with READ_ONCE() in netlink_getsockbyportid() */ + WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED); /* dst_portid and dst_group can be read locklessly */ WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); @@ -1166,8 +1168,8 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); - /* dst_portid can be changed in netlink_connect() */ - if (sock->sk_state == NETLINK_CONNECTED && + /* dst_portid and sk_state can be changed in netlink_connect() */ + if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED && READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); From ef050cf5fb70d995a0d03244e25179b7c66a924a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 13:30:40 +0000 Subject: [PATCH 50/80] ipv4: prevent potential spectre v1 gadget in ip_metrics_convert() [ Upstream commit 1d1d63b612801b3f0a39b7d4467cad0abd60e5c8 ] if (!type) continue; if (type > RTAX_MAX) return -EINVAL; ... metrics[type - 1] = val; @type being used as an array index, we need to prevent cpu speculation or risk leaking kernel memory content. Fixes: 6cf9dfd3bd62 ("net: fib: move metrics parsing to a helper") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230120133040.3623463-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/metrics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 04311f7067e2..9a6b01d85cd0 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -24,6 +25,7 @@ int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, if (type > RTAX_MAX) return -EINVAL; + type = array_index_nospec(type, RTAX_MAX + 1); if (type == RTAX_CC_ALGO) { char tmp[TCP_CA_NAME_MAX]; From 2bcbc46e0f6c5f847fc9cde8bf8d014ba029cc53 Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Tue, 24 Jan 2023 02:47:18 +0100 Subject: [PATCH 51/80] netfilter: conntrack: fix vtag checks for ABORT/SHUTDOWN_COMPLETE [ Upstream commit a9993591fa94246b16b444eea55d84c54608282a ] RFC 9260, Sec 8.5.1 states that for ABORT/SHUTDOWN_COMPLETE, the chunk MUST be accepted if the vtag of the packet matches its own tag and the T bit is not set OR if it is set to its peer's vtag and the T bit is set in chunk flags. Otherwise the packet MUST be silently dropped. Update vtag verification for ABORT/SHUTDOWN_COMPLETE based on the above description. Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Sriram Yagnaraman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto_sctp.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 8cb62805fd68..cadeb22a48f2 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -317,22 +317,29 @@ static int sctp_packet(struct nf_conn *ct, for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { - /* Sec 8.5.1 (A) */ + /* (A) vtag MUST be zero */ if (sh->vtag != 0) goto out_unlock; } else if (sch->type == SCTP_CID_ABORT) { - /* Sec 8.5.1 (B) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir]) + /* (B) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { - /* Sec 8.5.1 (C) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir] && - sch->flags & SCTP_CHUNK_FLAG_T) + /* (C) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_COOKIE_ECHO) { - /* Sec 8.5.1 (D) */ + /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; } else if (sch->type == SCTP_CID_HEARTBEAT) { From a31caf5779ace8fa98b0d454133808e082ee7a1b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 20 Jan 2023 15:19:27 -0800 Subject: [PATCH 52/80] netrom: Fix use-after-free of a listening socket. [ Upstream commit 409db27e3a2eb5e8ef7226ca33be33361b3ed1c9 ] syzbot reported a use-after-free in do_accept(), precisely nr_accept() as sk_prot_alloc() allocated the memory and sock_put() frees it. [0] The issue could happen if the heartbeat timer is fired and nr_heartbeat_expiry() calls nr_destroy_socket(), where a socket has SOCK_DESTROY or a listening socket has SOCK_DEAD. In this case, the first condition cannot be true. SOCK_DESTROY is flagged in nr_release() only when the file descriptor is close()d, but accept() is being called for the listening socket, so the second condition must be true. Usually, the AF_NETROM listener neither starts timers nor sets SOCK_DEAD. However, the condition is met if connect() fails before listen(). connect() starts the t1 timer and heartbeat timer, and t1timer calls nr_disconnect() when timeout happens. Then, SOCK_DEAD is set, and if we call listen(), the heartbeat timer calls nr_destroy_socket(). nr_connect nr_establish_data_link(sk) nr_start_t1timer(sk) nr_start_heartbeat(sk) nr_t1timer_expiry nr_disconnect(sk, ETIMEDOUT) nr_sk(sk)->state = NR_STATE_0 sk->sk_state = TCP_CLOSE sock_set_flag(sk, SOCK_DEAD) nr_listen if (sk->sk_state != TCP_LISTEN) sk->sk_state = TCP_LISTEN nr_heartbeat_expiry switch (nr->state) case NR_STATE_0 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD)) nr_destroy_socket(sk) This path seems expected, and nr_destroy_socket() is called to clean up resources. Initially, there was sock_hold() before nr_destroy_socket() so that the socket would not be freed, but the commit 517a16b1a88b ("netrom: Decrease sock refcount when sock timers expire") accidentally removed it. To fix use-after-free, let's add sock_hold(). [0]: BUG: KASAN: use-after-free in do_accept+0x483/0x510 net/socket.c:1848 Read of size 8 at addr ffff88807978d398 by task syz-executor.3/5315 CPU: 0 PID: 5315 Comm: syz-executor.3 Not tainted 6.2.0-rc3-syzkaller-00165-gd9fc1511728c #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x15e/0x461 mm/kasan/report.c:417 kasan_report+0xbf/0x1f0 mm/kasan/report.c:517 do_accept+0x483/0x510 net/socket.c:1848 __sys_accept4_file net/socket.c:1897 [inline] __sys_accept4+0x9a/0x120 net/socket.c:1927 __do_sys_accept net/socket.c:1944 [inline] __se_sys_accept net/socket.c:1941 [inline] __x64_sys_accept+0x75/0xb0 net/socket.c:1941 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fa436a8c0c9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fa437784168 EFLAGS: 00000246 ORIG_RAX: 000000000000002b RAX: ffffffffffffffda RBX: 00007fa436bac050 RCX: 00007fa436a8c0c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000005 RBP: 00007fa436ae7ae9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffebc6700df R14: 00007fa437784300 R15: 0000000000022000 Allocated by task 5294: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:371 [inline] ____kasan_kmalloc mm/kasan/common.c:330 [inline] __kasan_kmalloc+0xa3/0xb0 mm/kasan/common.c:380 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc+0x5a/0xd0 mm/slab_common.c:981 kmalloc include/linux/slab.h:584 [inline] sk_prot_alloc+0x140/0x290 net/core/sock.c:2038 sk_alloc+0x3a/0x7a0 net/core/sock.c:2091 nr_create+0xb6/0x5f0 net/netrom/af_netrom.c:433 __sock_create+0x359/0x790 net/socket.c:1515 sock_create net/socket.c:1566 [inline] __sys_socket_create net/socket.c:1603 [inline] __sys_socket_create net/socket.c:1588 [inline] __sys_socket+0x133/0x250 net/socket.c:1636 __do_sys_socket net/socket.c:1649 [inline] __se_sys_socket net/socket.c:1647 [inline] __x64_sys_socket+0x73/0xb0 net/socket.c:1647 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 14: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:518 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x13b/0x1a0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:177 [inline] __cache_free mm/slab.c:3394 [inline] __do_kmem_cache_free mm/slab.c:3580 [inline] __kmem_cache_free+0xcd/0x3b0 mm/slab.c:3587 sk_prot_free net/core/sock.c:2074 [inline] __sk_destruct+0x5df/0x750 net/core/sock.c:2166 sk_destruct net/core/sock.c:2181 [inline] __sk_free+0x175/0x460 net/core/sock.c:2192 sk_free+0x7c/0xa0 net/core/sock.c:2203 sock_put include/net/sock.h:1991 [inline] nr_heartbeat_expiry+0x1d7/0x460 net/netrom/nr_timer.c:148 call_timer_fn+0x1da/0x7c0 kernel/time/timer.c:1700 expire_timers+0x2c6/0x5c0 kernel/time/timer.c:1751 __run_timers kernel/time/timer.c:2022 [inline] __run_timers kernel/time/timer.c:1995 [inline] run_timer_softirq+0x326/0x910 kernel/time/timer.c:2035 __do_softirq+0x1fb/0xadc kernel/softirq.c:571 Fixes: 517a16b1a88b ("netrom: Decrease sock refcount when sock timers expire") Reported-by: syzbot+5fafd5cfe1fc91f6b352@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230120231927.51711-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 426d49609524..2bf99bd5be58 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -124,6 +124,7 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { + sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; From 26436553aabfd9b40e1daa537a099bf5bb13fb55 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 23 Jan 2023 14:59:33 -0300 Subject: [PATCH 53/80] sctp: fail if no bound addresses can be used for a given scope [ Upstream commit 458e279f861d3f61796894cd158b780765a1569f ] Currently, if you bind the socket to something like: servaddr.sin6_family = AF_INET6; servaddr.sin6_port = htons(0); servaddr.sin6_scope_id = 0; inet_pton(AF_INET6, "::1", &servaddr.sin6_addr); And then request a connect to: connaddr.sin6_family = AF_INET6; connaddr.sin6_port = htons(20000); connaddr.sin6_scope_id = if_nametoindex("lo"); inet_pton(AF_INET6, "fe88::1", &connaddr.sin6_addr); What the stack does is: - bind the socket - create a new asoc - to handle the connect - copy the addresses that can be used for the given scope - try to connect But the copy returns 0 addresses, and the effect is that it ends up trying to connect as if the socket wasn't bound, which is not the desired behavior. This unexpected behavior also allows KASLR leaks through SCTP diag interface. The fix here then is, if when trying to copy the addresses that can be used for the scope used in connect() it returns 0 addresses, bail out. This is what TCP does with a similar reproducer. Reported-by: Pietro Borrello Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Link: https://lore.kernel.org/r/9fcd182f1099f86c6661f3717f63712ddd1c676c.1674496737.git.marcelo.leitner@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/bind_addr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index f8a283245672..d723942e5e65 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -88,6 +88,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, } } + /* If somehow no addresses were found that can be used with this + * scope, it's an error. + */ + if (list_empty(&dest->address_list)) + error = -ENETUNREACH; + out: if (error) sctp_bind_addr_clean(dest); From 16ef54fd0512687282c33d3fd71c061e69f93b6b Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 24 Jan 2023 09:02:11 +0900 Subject: [PATCH 54/80] net: ravb: Fix possible hang if RIS2_QFF1 happen [ Upstream commit f3c07758c9007a6bfff5290d9e19d3c41930c897 ] Since this driver enables the interrupt by RIC2_QFE1, this driver should clear the interrupt flag if it happens. Otherwise, the interrupt causes to hang the system. Note that this also fix a minor coding style (a comment indentation) around the fixed code. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Sergey Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ff374d0d80a7..a1906804c139 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -738,14 +738,14 @@ static void ravb_error_interrupt(struct net_device *ndev) ravb_write(ndev, ~(EIS_QFS | EIS_RESERVED), EIS); if (eis & EIS_QFS) { ris2 = ravb_read(ndev, RIS2); - ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF | RIS2_RESERVED), + ravb_write(ndev, ~(RIS2_QFF0 | RIS2_QFF1 | RIS2_RFFF | RIS2_RESERVED), RIS2); /* Receive Descriptor Empty int */ if (ris2 & RIS2_QFF0) priv->stats[RAVB_BE].rx_over_errors++; - /* Receive Descriptor Empty int */ + /* Receive Descriptor Empty int */ if (ris2 & RIS2_QFF1) priv->stats[RAVB_NC].rx_over_errors++; From b985b684d7adbfaa0da2cb87ebe2436d6e4b1734 Mon Sep 17 00:00:00 2001 From: David Christensen Date: Tue, 24 Jan 2023 13:53:39 -0500 Subject: [PATCH 55/80] net/tg3: resolve deadlock in tg3_reset_task() during EEH [ Upstream commit 6c4ca03bd890566d873e3593b32d034bf2f5a087 ] During EEH error injection testing, a deadlock was encountered in the tg3 driver when tg3_io_error_detected() was attempting to cancel outstanding reset tasks: crash> foreach UN bt ... PID: 159 TASK: c0000000067c6000 CPU: 8 COMMAND: "eehd" ... #5 [c00000000681f990] __cancel_work_timer at c00000000019fd18 #6 [c00000000681fa30] tg3_io_error_detected at c00800000295f098 [tg3] #7 [c00000000681faf0] eeh_report_error at c00000000004e25c ... PID: 290 TASK: c000000036e5f800 CPU: 6 COMMAND: "kworker/6:1" ... #4 [c00000003721fbc0] rtnl_lock at c000000000c940d8 #5 [c00000003721fbe0] tg3_reset_task at c008000002969358 [tg3] #6 [c00000003721fc60] process_one_work at c00000000019e5c4 ... PID: 296 TASK: c000000037a65800 CPU: 21 COMMAND: "kworker/21:1" ... #4 [c000000037247bc0] rtnl_lock at c000000000c940d8 #5 [c000000037247be0] tg3_reset_task at c008000002969358 [tg3] #6 [c000000037247c60] process_one_work at c00000000019e5c4 ... PID: 655 TASK: c000000036f49000 CPU: 16 COMMAND: "kworker/16:2" ...:1 #4 [c0000000373ebbc0] rtnl_lock at c000000000c940d8 #5 [c0000000373ebbe0] tg3_reset_task at c008000002969358 [tg3] #6 [c0000000373ebc60] process_one_work at c00000000019e5c4 ... Code inspection shows that both tg3_io_error_detected() and tg3_reset_task() attempt to acquire the RTNL lock at the beginning of their code blocks. If tg3_reset_task() should happen to execute between the times when tg3_io_error_deteced() acquires the RTNL lock and tg3_reset_task_cancel() is called, a deadlock will occur. Moving tg3_reset_task_cancel() call earlier within the code block, prior to acquiring RTNL, prevents this from happening, but also exposes another deadlock issue where tg3_reset_task() may execute AFTER tg3_io_error_detected() has executed: crash> foreach UN bt PID: 159 TASK: c0000000067d2000 CPU: 9 COMMAND: "eehd" ... #4 [c000000006867a60] rtnl_lock at c000000000c940d8 #5 [c000000006867a80] tg3_io_slot_reset at c0080000026c2ea8 [tg3] #6 [c000000006867b00] eeh_report_reset at c00000000004de88 ... PID: 363 TASK: c000000037564000 CPU: 6 COMMAND: "kworker/6:1" ... #3 [c000000036c1bb70] msleep at c000000000259e6c #4 [c000000036c1bba0] napi_disable at c000000000c6b848 #5 [c000000036c1bbe0] tg3_reset_task at c0080000026d942c [tg3] #6 [c000000036c1bc60] process_one_work at c00000000019e5c4 ... This issue can be avoided by aborting tg3_reset_task() if EEH error recovery is already in progress. Fixes: db84bf43ef23 ("tg3: tg3_reset_task() needs to use rtnl_lock to synchronize") Signed-off-by: David Christensen Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20230124185339.225806-1-drc@linux.vnet.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/tg3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d1ca3d3f51a7..2cf144bbef3e 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -11189,7 +11189,7 @@ static void tg3_reset_task(struct work_struct *work) rtnl_lock(); tg3_full_lock(tp, 0); - if (!netif_running(tp->dev)) { + if (tp->pcierr_recovery || !netif_running(tp->dev)) { tg3_flag_clear(tp, RESET_TASK_PENDING); tg3_full_unlock(tp); rtnl_unlock(); @@ -18240,6 +18240,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, netdev_info(netdev, "PCI I/O error detected\n"); + /* Want to make sure that the reset task doesn't run */ + tg3_reset_task_cancel(tp); + rtnl_lock(); /* Could be second call or maybe we don't have netdev yet */ @@ -18256,9 +18259,6 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, tg3_timer_stop(tp); - /* Want to make sure that the reset task doesn't run */ - tg3_reset_task_cancel(tp); - netif_device_detach(netdev); /* Clean up software state, even if MMIO is blocked */ From c1873a0e154a4396be483a5b8d63dc579889750a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 16 Dec 2022 13:15:34 -0800 Subject: [PATCH 56/80] Revert "Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode" commit 3c44e2b6cde674797b76e76d3a903a63ce8a18bb upstream. This reverts commit ac5408991ea6b06e29129b4d4861097c4c3e0d59 because it causes loss of keyboard on HP 15-da1xxx. Fixes: ac5408991ea6 ("Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode") Reported-by: Jiri Slaby Link: https://lore.kernel.org/r/824effa5-8b9a-c28a-82bb-9b0ab24623e1@kernel.org Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1206358 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 7dc8ca5fd75f..c6d393114502 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -192,7 +192,6 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ - "SYN3286", /* HP Laptop 15-da3001TU */ NULL }; From 496975d1a2937f4baadf3d985991b13fc4fc4f27 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2023 22:57:13 +0100 Subject: [PATCH 57/80] x86/i8259: Mark legacy PIC interrupts with IRQ_LEVEL commit 5fa55950729d0762a787451dc52862c3f850f859 upstream. Baoquan reported that after triggering a crash the subsequent crash-kernel fails to boot about half of the time. It triggers a NULL pointer dereference in the periodic tick code. This happens because the legacy timer interrupt (IRQ0) is resent in software which happens in soft interrupt (tasklet) context. In this context get_irq_regs() returns NULL which leads to the NULL pointer dereference. The reason for the resend is a spurious APIC interrupt on the IRQ0 vector which is captured and leads to a resend when the legacy timer interrupt is enabled. This is wrong because the legacy PIC interrupts are level triggered and therefore should never be resent in software, but nothing ever sets the IRQ_LEVEL flag on those interrupts, so the core code does not know about their trigger type. Ensure that IRQ_LEVEL is set when the legacy PCI interrupts are set up. Fixes: a4633adcdbc1 ("[PATCH] genirq: add genirq sw IRQ-retrigger") Reported-by: Baoquan He Signed-off-by: Thomas Gleixner Tested-by: Baoquan He Link: https://lore.kernel.org/r/87mt6rjrra.ffs@tglx Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/i8259.c | 1 + arch/x86/kernel/irqinit.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index fe522691ac71..8821d0ab0a08 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq) disable_irq_nosync(irq); io_apic_irqs &= ~(1<init(0); - for (i = 0; i < nr_legacy_irqs(); i++) + for (i = 0; i < nr_legacy_irqs(); i++) { irq_set_chip_and_handler(i, chip, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } } void __init init_IRQ(void) From af5e3bc59d1e576e00b9ab73fb9d439f932b37fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 8 May 2021 11:30:22 -0700 Subject: [PATCH 58/80] drm/i915/display: fix compiler warning about array overrun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fec4d42724a1bf3dcba52307e55375fdb967b852 upstream. intel_dp_check_mst_status() uses a 14-byte array to read the DPRX Event Status Indicator data, but then passes that buffer at offset 10 off as an argument to drm_dp_channel_eq_ok(). End result: there are only 4 bytes remaining of the buffer, yet drm_dp_channel_eq_ok() wants a 6-byte buffer. gcc-11 correctly warns about this case: drivers/gpu/drm/i915/display/intel_dp.c: In function ‘intel_dp_check_mst_status’: drivers/gpu/drm/i915/display/intel_dp.c:3491:22: warning: ‘drm_dp_channel_eq_ok’ reading 6 bytes from a region of size 4 [-Wstringop-overread] 3491 | !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/display/intel_dp.c:3491:22: note: referencing argument 1 of type ‘const u8 *’ {aka ‘const unsigned char *’} In file included from drivers/gpu/drm/i915/display/intel_dp.c:38: include/drm/drm_dp_helper.h:1466:6: note: in a call to function ‘drm_dp_channel_eq_ok’ 1466 | bool drm_dp_channel_eq_ok(const u8 link_status[DP_LINK_STATUS_SIZE], | ^~~~~~~~~~~~~~~~~~~~ 6:14 elapsed This commit just extends the original array by 2 zero-initialized bytes, avoiding the warning. There may be some underlying bug in here that caused this confusion, but this is at least no worse than the existing situation that could use random data off the stack. Cc: Jani Nikula Cc: Ville Syrjälä Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Dave Airlie Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 77a2f7fc2b37..dfa6e6c2f808 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4116,7 +4116,18 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp) bool bret; if (intel_dp->is_mst) { - u8 esi[DP_DPRX_ESI_LEN] = { 0 }; + /* + * The +2 is because DP_DPRX_ESI_LEN is 14, but we then + * pass in "esi+10" to drm_dp_channel_eq_ok(), which + * takes a 6-byte array. So we actually need 16 bytes + * here. + * + * Somebody who knows what the limits actually are + * should check this, but for now this is at least + * harmless and avoids a valid compiler warning about + * using more of the array than we have allocated. + */ + u8 esi[DP_DPRX_ESI_LEN+2] = { 0 }; int ret = 0; int retry; bool handled; From 88992ac7734ea383dfe5e080f7f00e939ba02565 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Jan 2023 10:24:11 -0500 Subject: [PATCH 59/80] x86/asm: Fix an assembler warning with current binutils commit 55d235361fccef573990dfa5724ab453866e7816 upstream. Fix a warning: "found `movsd'; assuming `movsl' was meant" Signed-off-by: Mikulas Patocka Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/iomap_copy_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index 33147fef3452..f1024b51cfee 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -22,6 +22,6 @@ */ ENTRY(__iowrite32_copy) movl %edx,%ecx - rep movsd + rep movsl ret ENDPROC(__iowrite32_copy) From 8397ce73e06c987c30ebe4115226e4d9f4df0f16 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 16 Dec 2019 11:40:03 +0100 Subject: [PATCH 60/80] x86/entry/64: Add instruction suffix to SYSRET commit b2b1d94cdfd4e906d3936dab2850096a4a0c2017 upstream. ignore_sysret() contains an unsuffixed SYSRET instruction. gas correctly interprets this as SYSRETL, but leaving it up to gas to guess when there is no register operand that implies a size is bad practice, and upstream gas is likely to warn about this in the future. Use SYSRETL explicitly. This does not change the assembled output. Signed-off-by: Jan Beulich Signed-off-by: Borislav Petkov Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/038a7c35-062b-a285-c6d2-653b56585844@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3f418aedef8d..55e053871138 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1759,7 +1759,7 @@ END(nmi) ENTRY(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax - sysret + sysretl END(ignore_sysret) ENTRY(rewind_stack_do_exit) From d86927fd1f84e27e6d4d13bf82fe113a1216b848 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Dec 2022 17:49:18 +0100 Subject: [PATCH 61/80] ARM: dts: imx: Fix pca9547 i2c-mux node name [ Upstream commit f78985f9f58380eec37f82c8a2c765aa7670fc29 ] "make dtbs_check": arch/arm/boot/dts/imx53-ppd.dtb: i2c-switch@70: $nodename:0: 'i2c-switch@70' does not match '^(i2c-?)?mux' From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml arch/arm/boot/dts/imx53-ppd.dtb: i2c-switch@70: Unevaluated properties are not allowed ('#address-cells', '#size-cells', 'i2c@0', 'i2c@1', 'i2c@2', 'i2c@3', 'i2c@4', 'i2c@5', 'i2c@6', 'i2c@7' were unexpected) From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml Fix this by renaming the PCA9547 node to "i2c-mux", to match the I2C bus multiplexer/switch DT bindings and the Generic Names Recommendation in the Devicetree Specification. Signed-off-by: Geert Uytterhoeven Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx53-ppd.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts index f346673d34ea..0cb5f01f02d1 100644 --- a/arch/arm/boot/dts/imx53-ppd.dts +++ b/arch/arm/boot/dts/imx53-ppd.dts @@ -462,7 +462,7 @@ scl-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; status = "okay"; - i2c-switch@70 { + i2c-mux@70 { compatible = "nxp,pca9547"; #address-cells = <1>; #size-cells = <0>; From 80ee99e52936b2c04cc37b17a14b2ae2f9d282ac Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 30 Nov 2022 17:08:00 +0800 Subject: [PATCH 62/80] dmaengine: imx-sdma: Fix a possible memory leak in sdma_transfer_init [ Upstream commit 1417f59ac0b02130ee56c0c50794b9b257be3d17 ] If the function sdma_load_context() fails, the sdma_desc will be freed, but the allocated desc->bd is forgot to be freed. We already met the sdma_load_context() failure case and the log as below: [ 450.699064] imx-sdma 30bd0000.dma-controller: Timeout waiting for CH0 ready ... In this case, the desc->bd will not be freed without this change. Signed-off-by: Hui Wang Reviewed-by: Sascha Hauer Link: https://lore.kernel.org/r/20221130090800.102035-1-hui.wang@canonical.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/imx-sdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 709ead443fc5..5794d3120bb8 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1347,10 +1347,12 @@ static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac, sdma_config_ownership(sdmac, false, true, false); if (sdma_load_context(sdmac)) - goto err_desc_out; + goto err_bd_out; return desc; +err_bd_out: + sdma_free_bd(desc); err_desc_out: kfree(desc); err_out: From 700e1252c227a2bbcbe70001ce7cd2d26d772666 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Thu, 2 Feb 2023 16:27:03 -0800 Subject: [PATCH 63/80] sysctl: add a new register_sysctl_init() interface commit 3ddd9a808cee7284931312f2f3e854c9617f44b2 upstream. Patch series "sysctl: first set of kernel/sysctl cleanups", v2. Finally had time to respin the series of the work we had started last year on cleaning up the kernel/sysct.c kitchen sink. People keeps stuffing their sysctls in that file and this creates a maintenance burden. So this effort is aimed at placing sysctls where they actually belong. I'm going to split patches up into series as there is quite a bit of work. This first set adds register_sysctl_init() for uses of registerting a sysctl on the init path, adds const where missing to a few places, generalizes common values so to be more easy to share, and starts the move of a few kernel/sysctl.c out where they belong. The majority of rework on v2 in this first patch set is 0-day fixes. Eric Biederman's feedback is later addressed in subsequent patch sets. I'll only post the first two patch sets for now. We can address the rest once the first two patch sets get completely reviewed / Acked. This patch (of 9): The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. Today though folks heavily rely on tables on kernel/sysctl.c so they can easily just extend this table with their needed sysctls. In order to help users move their sysctls out we need to provide a helper which can be used during code initialization. We special-case the initialization use of register_sysctl() since it *is* safe to fail, given all that sysctls do is provide a dynamic interface to query or modify at runtime an existing variable. So the use case of register_sysctl() on init should *not* stop if the sysctls don't end up getting registered. It would be counter productive to stop boot if a simple sysctl registration failed. Provide a helper for init then, and document the recommended init levels to use for callers of this routine. We will later use this in subsequent patches to start slimming down kernel/sysctl.c tables and moving sysctl registration to the code which actually needs these sysctls. [mcgrof@kernel.org: major commit log and documentation rephrasing also moved to fs/proc/proc_sysctl.c ] Link: https://lkml.kernel.org/r/20211123202347.818157-1-mcgrof@kernel.org Link: https://lkml.kernel.org/r/20211123202347.818157-2-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Reviewed-by: Kees Cook Cc: Iurii Zaikin Cc: "Eric W. Biederman" Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: Paul Turner Cc: Andy Shevchenko Cc: Sebastian Reichel Cc: Tetsuo Handa Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Qing Wang Cc: Benjamin LaHaise Cc: Al Viro Cc: Jan Kara Cc: Amir Goldstein Cc: Stephen Kitt Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Phillip Potter Cc: Rodrigo Vivi Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_sysctl.c | 33 +++++++++++++++++++++++++++++++++ include/linux/sysctl.h | 3 +++ 2 files changed, 36 insertions(+) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index c95f32b83a94..7c62a526506c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -1376,6 +1377,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab } EXPORT_SYMBOL(register_sysctl); +/** + * __register_sysctl_init() - register sysctl table to path + * @path: path name for sysctl base + * @table: This is the sysctl table that needs to be registered to the path + * @table_name: The name of sysctl table, only used for log printing when + * registration fails + * + * The sysctl interface is used by userspace to query or modify at runtime + * a predefined value set on a variable. These variables however have default + * values pre-set. Code which depends on these variables will always work even + * if register_sysctl() fails. If register_sysctl() fails you'd just loose the + * ability to query or modify the sysctls dynamically at run time. Chances of + * register_sysctl() failing on init are extremely low, and so for both reasons + * this function does not return any error as it is used by initialization code. + * + * Context: Can only be called after your respective sysctl base path has been + * registered. So for instance, most base directories are registered early on + * init before init levels are processed through proc_sys_init() and + * sysctl_init(). + */ +void __init __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name) +{ + struct ctl_table_header *hdr = register_sysctl(path, table); + + if (unlikely(!hdr)) { + pr_err("failed when register_sysctl %s to %s\n", table_name, path); + return; + } + kmemleak_not_leak(hdr); +} + static char *append_path(const char *path, char *pos, const char *name) { int namelen; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index b769ecfcc3bd..0a980aecc8f0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -198,6 +198,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +extern void __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name); +#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) extern struct ctl_table sysctl_mount_point[]; From dd8cccab31e6255cb49a73e5c2e019cac7fb2eac Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 2 Feb 2023 16:27:04 -0800 Subject: [PATCH 64/80] panic: unset panic_on_warn inside panic() commit 1a2383e8b84c0451fd9b1eec3b9aab16f30b597c upstream. In the current code, the following three places need to unset panic_on_warn before calling panic() to avoid recursive panics: kernel/kcsan/report.c: print_report() kernel/sched/core.c: __schedule_bug() mm/kfence/report.c: kfence_report_error() In order to avoid copy-pasting "panic_on_warn = 0" all over the places, it is better to move it inside panic() and then remove it from the other places. Link: https://lkml.kernel.org/r/1644324666-15947-4-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Baoquan He Cc: Jonathan Corbet Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- kernel/panic.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 8138a676fb7d..a078d413042f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -142,6 +142,16 @@ void panic(const char *fmt, ...) int old_cpu, this_cpu; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; + if (panic_on_warn) { + /* + * This thread may hit another WARN() in the panic path. + * Resetting this prevents additional WARN() from panicking the + * system on this thread. Other threads are blocked by the + * panic_mutex in panic(). + */ + panic_on_warn = 0; + } + /* * Disable local interrupts. This will prevent panic_smp_self_stop * from deadlocking the first cpu that invokes the panic, since @@ -530,16 +540,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, if (args) vprintk(args->fmt, args->args); - if (panic_on_warn) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from panicking the - * system on this thread. Other threads are blocked by the - * panic_mutex in panic(). - */ - panic_on_warn = 0; + if (panic_on_warn) panic("panic_on_warn set ...\n"); - } print_modules(); From 7d5de91a9ae564c7b5869310192ab6cb72ea4c6b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Feb 2023 16:27:05 -0800 Subject: [PATCH 65/80] exit: Add and use make_task_dead. commit 0e25498f8cd43c1b5aa327f373dd094e9a006da7 upstream. There are two big uses of do_exit. The first is it's design use to be the guts of the exit(2) system call. The second use is to terminate a task after something catastrophic has happened like a NULL pointer in kernel code. Add a function make_task_dead that is initialy exactly the same as do_exit to cover the cases where do_exit is called to handle catastrophic failure. In time this can probably be reduced to just a light wrapper around do_task_dead. For now keep it exactly the same so that there will be no behavioral differences introducing this new concept. Replace all of the uses of do_exit that use it for catastraphic task cleanup with make_task_dead to make it clear what the code is doing. As part of this rename rewind_stack_do_exit rewind_stack_and_make_dead. Signed-off-by: "Eric W. Biederman" Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/traps.c | 6 +++--- arch/alpha/mm/fault.c | 2 +- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm64/kernel/traps.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/h8300/kernel/traps.c | 2 +- arch/h8300/mm/fault.c | 2 +- arch/hexagon/kernel/traps.c | 2 +- arch/ia64/kernel/mca_drv.c | 3 ++- arch/ia64/kernel/traps.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/m68k/kernel/traps.c | 2 +- arch/m68k/mm/fault.c | 2 +- arch/microblaze/kernel/exceptions.c | 4 ++-- arch/mips/kernel/traps.c | 2 +- arch/nds32/kernel/traps.c | 8 ++++---- arch/nios2/kernel/traps.c | 4 ++-- arch/openrisc/kernel/traps.c | 2 +- arch/parisc/kernel/traps.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/riscv/kernel/traps.c | 2 +- arch/riscv/mm/fault.c | 2 +- arch/s390/kernel/dumpstack.c | 2 +- arch/s390/kernel/nmi.c | 2 +- arch/sh/kernel/traps.c | 2 +- arch/sparc/kernel/traps_32.c | 4 +--- arch/sparc/kernel/traps_64.c | 4 +--- arch/x86/entry/entry_32.S | 6 +++--- arch/x86/entry/entry_64.S | 6 +++--- arch/x86/kernel/dumpstack.c | 4 ++-- arch/xtensa/kernel/traps.c | 2 +- include/linux/sched/task.h | 1 + kernel/exit.c | 9 +++++++++ tools/objtool/check.c | 3 ++- 35 files changed, 57 insertions(+), 49 deletions(-) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index bc9627698796..22f5c27b96b7 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -192,7 +192,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) local_irq_enable(); while (1); } - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } #ifndef CONFIG_MATHEMU @@ -577,7 +577,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg, printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n", pc, va, opcode, reg); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); got_exception: /* Ok, we caught the exception, but we don't want it. Is there @@ -632,7 +632,7 @@ got_exception: local_irq_enable(); while (1); } - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } /* diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 188fc9256baf..6ce67b05412e 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -206,7 +206,7 @@ retry: printk(KERN_ALERT "Unable to handle kernel paging request at " "virtual address %016lx\n", address); die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16); - do_exit(SIGKILL); + make_task_dead(SIGKILL); /* We ran out of memory, or some other thing happened to us that made us unable to handle the page fault gracefully. */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index d49fafd2b865..02a4aea52fb0 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -344,7 +344,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) if (panic_on_oops) panic("Fatal exception"); if (signr) - do_exit(signr); + make_task_dead(signr); } /* diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index a9ee0d9dc740..8457384139cb 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -149,7 +149,7 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, show_pte(mm, addr); die("Oops", regs, fsr); bust_spinlocks(0); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } /* diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 965595fe6804..20f896b27a44 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -224,7 +224,7 @@ void die(const char *str, struct pt_regs *regs, int err) raw_spin_unlock_irqrestore(&die_lock, flags); if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } static bool show_unhandled_signals_ratelimited(void) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b046006a387f..0d2be8eb87ec 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -268,7 +268,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } static void __do_kernel_fault(unsigned long addr, unsigned int esr, diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index e47a9e0dc278..a284c126f07a 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -110,7 +110,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err) dump(fp); spin_unlock_irq(&die_lock); - do_exit(SIGSEGV); + make_dead_task(SIGSEGV); } static int kstack_depth_to_print = 24; diff --git a/arch/h8300/mm/fault.c b/arch/h8300/mm/fault.c index fabffb83930a..a8d8fc63780e 100644 --- a/arch/h8300/mm/fault.c +++ b/arch/h8300/mm/fault.c @@ -52,7 +52,7 @@ asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address, printk(" at virtual address %08lx\n", address); if (!user_mode(regs)) die("Oops", regs, error_code); - do_exit(SIGKILL); + make_dead_task(SIGKILL); return 1; } diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 91ee04842c22..ac8154992f3e 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -234,7 +234,7 @@ int die(const char *str, struct pt_regs *regs, long err) panic("Fatal exception"); oops_exit(); - do_exit(err); + make_dead_task(err); return 0; } diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 06419a95af30..07353029b003 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -176,7 +177,7 @@ mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) spin_unlock(&mca_bh_lock); /* This process is about to be killed itself */ - do_exit(SIGKILL); + make_task_dead(SIGKILL); } /** diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index c6f4932073a1..32c2877b3c0a 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -85,7 +85,7 @@ die (const char *str, struct pt_regs *regs, long err) if (panic_on_oops) panic("Fatal exception"); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); return 0; } diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index a9d55ad8d67b..0b072af7e20d 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -302,7 +302,7 @@ retry: regs = NULL; bust_spinlocks(0); if (regs) - do_exit(SIGKILL); + make_task_dead(SIGKILL); return; out_of_memory: diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index b2fd000b9285..9b70a7f5e705 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1139,7 +1139,7 @@ void die_if_kernel (char *str, struct pt_regs *fp, int nr) pr_crit("%s: %08x\n", str, nr); show_registers(fp); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } asmlinkage void set_esp0(unsigned long ssp) diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 9b6163c05a75..fc3aea6dcaa7 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -48,7 +48,7 @@ int send_fault_sig(struct pt_regs *regs) pr_alert("Unable to handle kernel access"); pr_cont(" at virtual address %p\n", addr); die_if_kernel("Oops", regs, 0 /*error_code*/); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } return 1; diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index eafff21fcb0e..182402db6b04 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -44,10 +44,10 @@ void die(const char *str, struct pt_regs *fp, long err) pr_warn("Oops: %s, sig: %ld\n", str, err); show_regs(fp); spin_unlock_irq(&die_lock); - /* do_exit() should take care of panic'ing from an interrupt + /* make_task_dead() should take care of panic'ing from an interrupt * context so we don't handle it here */ - do_exit(err); + make_task_dead(err); } /* for user application debugging */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 0ca4185cc5e3..01c85d37c47a 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -412,7 +412,7 @@ void __noreturn die(const char *str, struct pt_regs *regs) if (regs && kexec_should_crash(current)) crash_kexec(regs); - do_exit(sig); + make_task_dead(sig); } extern struct exception_table_entry __start___dbe_table[]; diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 1496aab48998..a811f286bc85 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -183,7 +183,7 @@ void die(const char *str, struct pt_regs *regs, int err) bust_spinlocks(0); spin_unlock_irq(&die_lock); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } EXPORT_SYMBOL(die); @@ -286,7 +286,7 @@ void unhandled_interruption(struct pt_regs *regs) pr_emerg("unhandled_interruption\n"); show_regs(regs); if (!user_mode(regs)) - do_exit(SIGKILL); + make_task_dead(SIGKILL); force_sig(SIGKILL, current); } @@ -297,7 +297,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr, addr, type); show_regs(regs); if (!user_mode(regs)) - do_exit(SIGKILL); + make_task_dead(SIGKILL); force_sig(SIGKILL, current); } @@ -324,7 +324,7 @@ void do_revinsn(struct pt_regs *regs) pr_emerg("Reserved Instruction\n"); show_regs(regs); if (!user_mode(regs)) - do_exit(SIGILL); + make_task_dead(SIGILL); force_sig(SIGILL, current); } diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index 3bc3cd22b750..dc6c270a355b 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -37,10 +37,10 @@ void die(const char *str, struct pt_regs *regs, long err) show_regs(regs); spin_unlock_irq(&die_lock); /* - * do_exit() should take care of panic'ing from an interrupt + * make_task_dead() should take care of panic'ing from an interrupt * context so we don't handle it here */ - do_exit(err); + make_task_dead(err); } void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index d8981cbb852a..dfa3db1bccef 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -224,7 +224,7 @@ void die(const char *str, struct pt_regs *regs, long err) __asm__ __volatile__("l.nop 1"); do {} while (1); #endif - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } /* This is normally the 'Oops' routine */ diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index d7a66d852509..5d9cf726e4ec 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -265,7 +265,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) panic("Fatal exception"); oops_exit(); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } /* gdb uses break 4,8 */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2379c4bf3979..63c751ce130a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -251,7 +251,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - do_exit(signr); + make_task_dead(signr); } NOKPROBE_SYMBOL(oops_end); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 7c65750508f2..9b736e616131 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -64,7 +64,7 @@ void die(struct pt_regs *regs, const char *str) if (panic_on_oops) panic("Fatal exception"); if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } void do_trap(struct pt_regs *regs, int signo, int code, diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 523dbfbac03d..7b5f110f8191 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -200,7 +200,7 @@ no_context: (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); die(regs, "Oops"); - do_exit(SIGKILL); + make_task_dead(SIGKILL); /* * We ran out of memory, call the OOM killer, and return the userspace diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 5b23c4f6e50c..7b21019096f4 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -187,5 +187,5 @@ void die(struct pt_regs *regs, const char *str) if (panic_on_oops) panic("Fatal exception: panic_on_oops"); oops_exit(); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 8c867b43c8eb..471a965ae75a 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -179,7 +179,7 @@ void s390_handle_mcck(void) "malfunction (code 0x%016lx).\n", mcck.mcck_code); printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", current->comm, current->pid); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } } EXPORT_SYMBOL_GPL(s390_handle_mcck); diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 8b49cced663d..5fafbef7849b 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -57,7 +57,7 @@ void die(const char *str, struct pt_regs *regs, long err) if (panic_on_oops) panic("Fatal exception"); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } void die_if_kernel(const char *str, struct pt_regs *regs, long err) diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index bcdfc6168dd5..ec7fcfbad06c 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -86,9 +86,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs) } printk("Instruction DUMP:"); instruction_dump ((unsigned long *) regs->pc); - if(regs->psr & PSR_PS) - do_exit(SIGKILL); - do_exit(SIGSEGV); + make_task_dead((regs->psr & PSR_PS) ? SIGKILL : SIGSEGV); } void do_hw_interrupt(struct pt_regs *regs, unsigned long type) diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 86879c28910b..1a338784a37c 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2565,9 +2565,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs) } if (panic_on_oops) panic("Fatal exception"); - if (regs->tstate & TSTATE_PRIV) - do_exit(SIGKILL); - do_exit(SIGSEGV); + make_task_dead((regs->tstate & TSTATE_PRIV)? SIGKILL : SIGSEGV); } EXPORT_SYMBOL(die_if_kernel); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 78b308f2f2ea..e6c258bf9511 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1500,13 +1500,13 @@ ENTRY(async_page_fault) END(async_page_fault) #endif -ENTRY(rewind_stack_do_exit) +ENTRY(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp movl PER_CPU_VAR(cpu_current_top_of_stack), %esi leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp - call do_exit + call make_task_dead 1: jmp 1b -END(rewind_stack_do_exit) +END(rewind_stack_and_make_dead) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 55e053871138..ef693d3689aa 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1762,7 +1762,7 @@ ENTRY(ignore_sysret) sysretl END(ignore_sysret) -ENTRY(rewind_stack_do_exit) +ENTRY(rewind_stack_and_make_dead) UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1771,5 +1771,5 @@ ENTRY(rewind_stack_do_exit) leaq -PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS - call do_exit -END(rewind_stack_do_exit) + call make_task_dead +END(rewind_stack_and_make_dead) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2b5886401e5f..2b17a5cec099 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -326,7 +326,7 @@ unsigned long oops_begin(void) } NOKPROBE_SYMBOL(oops_begin); -void __noreturn rewind_stack_do_exit(int signr); +void __noreturn rewind_stack_and_make_dead(int signr); void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { @@ -361,7 +361,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) * reuse the task stack and that existing poisons are invalid. */ kasan_unpoison_task_stack(current); - rewind_stack_do_exit(signr); + rewind_stack_and_make_dead(signr); } NOKPROBE_SYMBOL(oops_end); diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 86507fa7c2d7..2ae5c505894b 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -542,5 +542,5 @@ void die(const char * str, struct pt_regs * regs, long err) if (panic_on_oops) panic("Fatal exception"); - do_exit(err); + make_task_dead(err); } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 91401309b1aa..0ee9aab3e309 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -36,6 +36,7 @@ extern int sched_fork(unsigned long clone_flags, struct task_struct *p); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); +void __noreturn make_task_dead(int signr); extern void proc_caches_init(void); diff --git a/kernel/exit.c b/kernel/exit.c index 908e7a33e1fc..2147bbc2c7c3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -922,6 +922,15 @@ void __noreturn do_exit(long code) } EXPORT_SYMBOL_GPL(do_exit); +void __noreturn make_task_dead(int signr) +{ + /* + * Take the task off the cpu after something catastrophic has + * happened. + */ + do_exit(signr); +} + void complete_and_exit(struct completion *comp, long code) { if (comp) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c0ab27368a34..047719055710 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -159,6 +159,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "panic", "do_exit", "do_task_dead", + "make_task_dead", "__module_put_and_exit", "complete_and_exit", "kvm_spurious_fault", @@ -167,7 +168,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "fortify_panic", "usercopy_abort", "machine_real_restart", - "rewind_stack_do_exit", + "rewind_stack_and_make_dead" }; if (func->bind == STB_WEAK) From c52a9d3a713eef03fcf22f9bc177b553c220de07 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Feb 2023 16:27:06 -0800 Subject: [PATCH 66/80] objtool: Add a missing comma to avoid string concatenation commit 1fb466dff904e4a72282af336f2c355f011eec61 upstream. Recently the kbuild robot reported two new errors: >> lib/kunit/kunit-example-test.o: warning: objtool: .text.unlikely: unexpected end of section >> arch/x86/kernel/dumpstack.o: warning: objtool: oops_end() falls through to next function show_opcodes() I don't know why they did not occur in my test setup but after digging it I realized I had accidentally dropped a comma in tools/objtool/check.c when I renamed rewind_stack_do_exit to rewind_stack_and_make_dead. Add that comma back to fix objtool errors. Link: https://lkml.kernel.org/r/202112140949.Uq5sFKR1-lkp@intel.com Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Reported-by: kernel test robot Signed-off-by: "Eric W. Biederman" Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 047719055710..55f3e7fa1c5d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -168,7 +168,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "fortify_panic", "usercopy_abort", "machine_real_restart", - "rewind_stack_and_make_dead" + "rewind_stack_and_make_dead", }; if (func->bind == STB_WEAK) From 8103729bc8eccbfba1aca19a9a6de5dcaf8d7512 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 2 Feb 2023 16:27:07 -0800 Subject: [PATCH 67/80] hexagon: Fix function name in die() commit 4f0712ccec09c071e221242a2db9a6779a55a949 upstream. When building ARCH=hexagon defconfig: arch/hexagon/kernel/traps.c:217:2: error: implicit declaration of function 'make_dead_task' [-Werror,-Wimplicit-function-declaration] make_dead_task(err); ^ The function's name is make_task_dead(), change it so there is no more build error. Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Nathan Chancellor Link: https://lkml.kernel.org/r/20211227184851.2297759-2-nathan@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- arch/hexagon/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index ac8154992f3e..34a74f73f169 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -234,7 +234,7 @@ int die(const char *str, struct pt_regs *regs, long err) panic("Fatal exception"); oops_exit(); - make_dead_task(err); + make_task_dead(err); return 0; } From 3f1da3f88b0bf675a5e4f2e2f9a6054ed110c178 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 2 Feb 2023 16:27:08 -0800 Subject: [PATCH 68/80] h8300: Fix build errors from do_exit() to make_task_dead() transition commit ab4ababdf77ccc56c7301c751dff49c79709c51c upstream. When building ARCH=h8300 defconfig: arch/h8300/kernel/traps.c: In function 'die': arch/h8300/kernel/traps.c:109:2: error: implicit declaration of function 'make_dead_task' [-Werror=implicit-function-declaration] 109 | make_dead_task(SIGSEGV); | ^~~~~~~~~~~~~~ arch/h8300/mm/fault.c: In function 'do_page_fault': arch/h8300/mm/fault.c:54:2: error: implicit declaration of function 'make_dead_task' [-Werror=implicit-function-declaration] 54 | make_dead_task(SIGKILL); | ^~~~~~~~~~~~~~ The function's name is make_task_dead(), change it so there is no more build error. Additionally, include linux/sched/task.h in arch/h8300/kernel/traps.c to avoid the same error because do_exit()'s declaration is in kernel.h but make_task_dead()'s is in task.h, which is not included in traps.c. Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Nathan Chancellor Link: https://lkml.kernel.org/r/20211227184851.2297759-3-nathan@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- arch/h8300/kernel/traps.c | 3 ++- arch/h8300/mm/fault.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index a284c126f07a..090adaee4b84 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -110,7 +111,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err) dump(fp); spin_unlock_irq(&die_lock); - make_dead_task(SIGSEGV); + make_task_dead(SIGSEGV); } static int kstack_depth_to_print = 24; diff --git a/arch/h8300/mm/fault.c b/arch/h8300/mm/fault.c index a8d8fc63780e..573825c3cb70 100644 --- a/arch/h8300/mm/fault.c +++ b/arch/h8300/mm/fault.c @@ -52,7 +52,7 @@ asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address, printk(" at virtual address %08lx\n", address); if (!user_mode(regs)) die("Oops", regs, error_code); - make_dead_task(SIGKILL); + make_task_dead(SIGKILL); return 1; } From 1862c78df12e491c0775e3c76dafbdadbb16d054 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 2 Feb 2023 16:27:09 -0800 Subject: [PATCH 69/80] ia64: make IA64_MCA_RECOVERY bool instead of tristate commit dbecf9b8b8ce580f4e11afed9d61e8aa294cddd2 upstream. In linux-next, IA64_MCA_RECOVERY uses the (new) function make_task_dead(), which is not exported for use by modules. Instead of exporting it for one user, convert IA64_MCA_RECOVERY to be a bool Kconfig symbol. In a config file from "kernel test robot " for a different problem, this linker error was exposed when CONFIG_IA64_MCA_RECOVERY=m. Fixes this build error: ERROR: modpost: "make_task_dead" [arch/ia64/kernel/mca_recovery.ko] undefined! Link: https://lkml.kernel.org/r/20220124213129.29306-1-rdunlap@infradead.org Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Randy Dunlap Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: "Eric W. Biederman" Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- arch/ia64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 8b4a0c1748c0..0d56b19b7511 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -445,7 +445,7 @@ config ARCH_PROC_KCORE_TEXT depends on PROC_KCORE config IA64_MCA_RECOVERY - tristate "MCA recovery from errors other than TLB." + bool "MCA recovery from errors other than TLB." config PERFMON bool "Performance monitor support" From ebf39c590a1e8b4d536b123e604d58d285e564ba Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 2 Feb 2023 16:27:10 -0800 Subject: [PATCH 70/80] exit: Put an upper limit on how often we can oops commit d4ccd54d28d3c8598e2354acc13e28c060961dbb upstream. Many Linux systems are configured to not panic on oops; but allowing an attacker to oops the system **really** often can make even bugs that look completely unexploitable exploitable (like NULL dereferences and such) if each crash elevates a refcount by one or a lock is taken in read mode, and this causes a counter to eventually overflow. The most interesting counters for this are 32 bits wide (like open-coded refcounts that don't use refcount_t). (The ldsem reader count on 32-bit platforms is just 16 bits, but probably nobody cares about 32-bit platforms that much nowadays.) So let's panic the system if the kernel is constantly oopsing. The speed of oopsing 2^32 times probably depends on several factors, like how long the stack trace is and which unwinder you're using; an empirically important one is whether your console is showing a graphical environment or a text console that oopses will be printed to. In a quick single-threaded benchmark, it looks like oopsing in a vfork() child with a very short stack trace only takes ~510 microseconds per run when a graphical console is active; but switching to a text console that oopses are printed to slows it down around 87x, to ~45 milliseconds per run. (Adding more threads makes this faster, but the actual oops printing happens under &die_lock on x86, so you can maybe speed this up by a factor of around 2 and then any further improvement gets eaten up by lock contention.) It looks like it would take around 8-12 days to overflow a 32-bit counter with repeated oopsing on a multi-core X86 system running a graphical environment; both me (in an X86 VM) and Seth (with a distro kernel on normal hardware in a standard configuration) got numbers in that ballpark. 12 days aren't *that* short on a desktop system, and you'd likely need much longer on a typical server system (assuming that people don't run graphical desktop environments on their servers), and this is a *very* noisy and violent approach to exploiting the kernel; and it also seems to take orders of magnitude longer on some machines, probably because stuff like EFI pstore will slow it down a ton if that's active. Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20221107201317.324457-1-jannh@google.com Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-2-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/kernel.txt | 9 +++++++ kernel/exit.c | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index db1676525ca3..fd65f4e651d5 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -51,6 +51,7 @@ show up in /proc/sys/kernel: - msgmnb - msgmni - nmi_watchdog +- oops_limit - osrelease - ostype - overflowgid @@ -555,6 +556,14 @@ scanned for a given scan. ============================================================== +oops_limit: + +Number of kernel oopses after which the kernel should panic when +``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect +as setting ``panic_on_oops=1``. + +============================================================== + osrelease, ostype & version: # cat osrelease diff --git a/kernel/exit.c b/kernel/exit.c index 2147bbc2c7c3..fa3004ff3336 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -68,6 +68,33 @@ #include #include +/* + * The default value should be high enough to not crash a system that randomly + * crashes its kernel from time to time, but low enough to at least not permit + * overflowing 32-bit refcounts or the ldsem writer count. + */ +static unsigned int oops_limit = 10000; + +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_exit_table[] = { + { + .procname = "oops_limit", + .data = &oops_limit, + .maxlen = sizeof(oops_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { } +}; + +static __init int kernel_exit_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_exit_table); + return 0; +} +late_initcall(kernel_exit_sysctls_init); +#endif + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -924,10 +951,26 @@ EXPORT_SYMBOL_GPL(do_exit); void __noreturn make_task_dead(int signr) { + static atomic_t oops_count = ATOMIC_INIT(0); + /* * Take the task off the cpu after something catastrophic has * happened. */ + + /* + * Every time the system oopses, if the oops happens while a reference + * to an object was held, the reference leaks. + * If the oops doesn't also leak memory, repeated oopsing can cause + * reference counters to wrap around (if they're not using refcount_t). + * This means that repeated oopsing can make unexploitable-looking bugs + * exploitable through repeated oopsing. + * To make sure this can't happen, place an upper bound on how often the + * kernel may oops without panic(). + */ + if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) + panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); + do_exit(signr); } From bad855964ec4805e2c9adbe85277bb0ac7e24af6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Feb 2023 16:27:11 -0800 Subject: [PATCH 71/80] exit: Expose "oops_count" to sysfs commit 9db89b41117024f80b38b15954017fb293133364 upstream. Since Oops count is now tracked and is a fairly interesting signal, add the entry /sys/kernel/oops_count to expose it to userspace. Cc: "Eric W. Biederman" Cc: Jann Horn Cc: Arnd Bergmann Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-3-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-kernel-oops_count | 6 +++++ kernel/exit.c | 22 +++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-oops_count diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count new file mode 100644 index 000000000000..156cca9dbc96 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-oops_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Oopsed since last boot. diff --git a/kernel/exit.c b/kernel/exit.c index fa3004ff3336..5cd8a3425765 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -95,6 +96,25 @@ static __init int kernel_exit_sysctls_init(void) late_initcall(kernel_exit_sysctls_init); #endif +static atomic_t oops_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&oops_count)); +} + +static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count); + +static __init int kernel_exit_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_exit_sysfs_init); +#endif + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -951,8 +971,6 @@ EXPORT_SYMBOL_GPL(do_exit); void __noreturn make_task_dead(int signr) { - static atomic_t oops_count = ATOMIC_INIT(0); - /* * Take the task off the cpu after something catastrophic has * happened. From 326716f32f3baa395cc6b0bbaeaba0cee3f3120b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Feb 2023 16:27:12 -0800 Subject: [PATCH 72/80] exit: Allow oops_limit to be disabled commit de92f65719cd672f4b48397540b9f9eff67eca40 upstream. In preparation for keeping oops_limit logic in sync with warn_limit, have oops_limit == 0 disable checking the Oops counter. Cc: Jann Horn Cc: Jonathan Corbet Cc: Andrew Morton Cc: Baolin Wang Cc: "Jason A. Donenfeld" Cc: Eric Biggers Cc: Huang Ying Cc: "Eric W. Biederman" Cc: Arnd Bergmann Cc: linux-doc@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/kernel.txt | 5 +++-- kernel/exit.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index fd65f4e651d5..e1d375df4f28 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -559,8 +559,9 @@ scanned for a given scan. oops_limit: Number of kernel oopses after which the kernel should panic when -``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect -as setting ``panic_on_oops=1``. +``panic_on_oops`` is not set. Setting this to 0 disables checking +the count. Setting this to 1 has the same effect as setting +``panic_on_oops=1``. The default value is 10000. ============================================================== diff --git a/kernel/exit.c b/kernel/exit.c index 5cd8a3425765..b2f0aaf6bee7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -986,7 +986,7 @@ void __noreturn make_task_dead(int signr) * To make sure this can't happen, place an upper bound on how often the * kernel may oops without panic(). */ - if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) + if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); do_exit(signr); From dcdce952196cf6f6103b3e5e0ea044498e9e0865 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Feb 2023 16:27:13 -0800 Subject: [PATCH 73/80] panic: Consolidate open-coded panic_on_warn checks commit 79cc1ba7badf9e7a12af99695a557e9ce27ee967 upstream. Several run-time checkers (KASAN, UBSAN, KFENCE, KCSAN, sched) roll their own warnings, and each check "panic_on_warn". Consolidate this into a single function so that future instrumentation can be added in a single location. Cc: Marco Elver Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Daniel Bristot de Oliveira Cc: Valentin Schneider Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Vincenzo Frascino Cc: Andrew Morton Cc: David Gow Cc: tangmeng Cc: Jann Horn Cc: Shuah Khan Cc: Petr Mladek Cc: "Paul E. McKenney" Cc: Sebastian Andrzej Siewior Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: kasan-dev@googlegroups.com Cc: linux-mm@kvack.org Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Reviewed-by: Marco Elver Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20221117234328.594699-4-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel.h | 1 + kernel/panic.c | 9 +++++++-- kernel/sched/core.c | 3 +-- mm/kasan/report.c | 3 +-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 50733abbe548..a28ec4c2f3f5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -327,6 +327,7 @@ extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); +void check_panic_on_warn(const char *origin); extern void oops_enter(void); extern void oops_exit(void); void print_oops_end_marker(void); diff --git a/kernel/panic.c b/kernel/panic.c index a078d413042f..08b8adc55b2b 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -125,6 +125,12 @@ void nmi_panic(struct pt_regs *regs, const char *msg) } EXPORT_SYMBOL(nmi_panic); +void check_panic_on_warn(const char *origin) +{ + if (panic_on_warn) + panic("%s: panic_on_warn set ...\n", origin); +} + /** * panic - halt the system * @fmt: The text string to print @@ -540,8 +546,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, if (args) vprintk(args->fmt, args->args); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("kernel"); print_modules(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a03464249771..46227cc48124 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3316,8 +3316,7 @@ static noinline void __schedule_bug(struct task_struct *prev) print_ip_sym(preempt_disable_ip); pr_cont("\n"); } - if (panic_on_warn) - panic("scheduling while atomic\n"); + check_panic_on_warn("scheduling while atomic"); dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 5c169aa688fd..3ae996824a04 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -176,8 +176,7 @@ static void kasan_end_report(unsigned long *flags) pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irqrestore(&report_lock, *flags); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("KASAN"); kasan_enable_current(); } From a40af7cd60635230aec5c29c1b2a714bf53fbdc0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Feb 2023 16:27:14 -0800 Subject: [PATCH 74/80] panic: Introduce warn_limit commit 9fc9e278a5c0b708eeffaf47d6eb0c82aa74ed78 upstream. Like oops_limit, add warn_limit for limiting the number of warnings when panic_on_warn is not set. Cc: Jonathan Corbet Cc: Andrew Morton Cc: Baolin Wang Cc: "Jason A. Donenfeld" Cc: Eric Biggers Cc: Huang Ying Cc: Petr Mladek Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: Sebastian Andrzej Siewior Cc: linux-doc@vger.kernel.org Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-5-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/kernel.txt | 10 ++++++++++ kernel/panic.c | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index e1d375df4f28..c8d3dbda3c1e 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -97,6 +97,7 @@ show up in /proc/sys/kernel: - threads-max - unprivileged_bpf_disabled - unknown_nmi_panic +- warn_limit - watchdog - watchdog_thresh - version @@ -1114,6 +1115,15 @@ example. If a system hangs up, try pressing the NMI switch. ============================================================== +warn_limit: + +Number of kernel warnings after which the kernel should panic when +``panic_on_warn`` is not set. Setting this to 0 disables checking +the warning count. Setting this to 1 has the same effect as setting +``panic_on_warn=1``. The default value is 0. + +============================================================== + watchdog: This parameter can be used to disable or enable the soft lockup detector diff --git a/kernel/panic.c b/kernel/panic.c index 08b8adc55b2b..d1a5360262f0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -42,6 +42,7 @@ static int pause_on_oops_flag; static DEFINE_SPINLOCK(pause_on_oops_lock); bool crash_kexec_post_notifiers; int panic_on_warn __read_mostly; +static unsigned int warn_limit __read_mostly; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -50,6 +51,26 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_panic_table[] = { + { + .procname = "warn_limit", + .data = &warn_limit, + .maxlen = sizeof(warn_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { } +}; + +static __init int kernel_panic_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_panic_table); + return 0; +} +late_initcall(kernel_panic_sysctls_init); +#endif + static long no_blink(int state) { return 0; @@ -127,8 +148,14 @@ EXPORT_SYMBOL(nmi_panic); void check_panic_on_warn(const char *origin) { + static atomic_t warn_count = ATOMIC_INIT(0); + if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); + + if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) + panic("%s: system warned too often (kernel.warn_limit is %d)", + origin, warn_limit); } /** From 4d00e68cfcfd91d3a8c794d47617429a96d623ed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Feb 2023 16:27:15 -0800 Subject: [PATCH 75/80] panic: Expose "warn_count" to sysfs commit 8b05aa26336113c4cea25f1c333ee8cd4fc212a6 upstream. Since Warn count is now tracked and is a fairly interesting signal, add the entry /sys/kernel/warn_count to expose it to userspace. Cc: Petr Mladek Cc: Andrew Morton Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Sebastian Andrzej Siewior Cc: Tiezhu Yang Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-6-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-kernel-warn_count | 6 +++++ kernel/panic.c | 22 +++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-warn_count diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count new file mode 100644 index 000000000000..08f083d2fd51 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-warn_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Warned since last boot. diff --git a/kernel/panic.c b/kernel/panic.c index d1a5360262f0..dbb6e27d33e1 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #define PANIC_TIMER_STEP 100 @@ -71,6 +72,25 @@ static __init int kernel_panic_sysctls_init(void) late_initcall(kernel_panic_sysctls_init); #endif +static atomic_t warn_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&warn_count)); +} + +static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count); + +static __init int kernel_panic_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_panic_sysfs_init); +#endif + static long no_blink(int state) { return 0; @@ -148,8 +168,6 @@ EXPORT_SYMBOL(nmi_panic); void check_panic_on_warn(const char *origin) { - static atomic_t warn_count = ATOMIC_INIT(0); - if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); From 6c960e335bb83a7a19a906a1066d9f5da1b17870 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Feb 2023 16:27:16 -0800 Subject: [PATCH 76/80] docs: Fix path paste-o for /sys/kernel/warn_count commit 00dd027f721e0458418f7750d8a5a664ed3e5994 upstream. Running "make htmldocs" shows that "/sys/kernel/oops_count" was duplicated. This should have been "warn_count": Warning: /sys/kernel/oops_count is defined 2 times: ./Documentation/ABI/testing/sysfs-kernel-warn_count:0 ./Documentation/ABI/testing/sysfs-kernel-oops_count:0 Fix the typo. Reported-by: kernel test robot Link: https://lore.kernel.org/linux-doc/202212110529.A3Qav8aR-lkp@intel.com Fixes: 8b05aa263361 ("panic: Expose "warn_count" to sysfs") Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-kernel-warn_count | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count index 08f083d2fd51..90a029813717 100644 --- a/Documentation/ABI/testing/sysfs-kernel-warn_count +++ b/Documentation/ABI/testing/sysfs-kernel-warn_count @@ -1,4 +1,4 @@ -What: /sys/kernel/oops_count +What: /sys/kernel/warn_count Date: November 2022 KernelVersion: 6.2.0 Contact: Linux Kernel Hardening List From a25982f544a9c4a730d62c4fa56e79260996b928 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Feb 2023 16:27:17 -0800 Subject: [PATCH 77/80] exit: Use READ_ONCE() for all oops/warn limit reads commit 7535b832c6399b5ebfc5b53af5c51dd915ee2538 upstream. Use a temporary variable to take full advantage of READ_ONCE() behavior. Without this, the report (and even the test) might be out of sync with the initial test. Reported-by: Peter Zijlstra Link: https://lore.kernel.org/lkml/Y5x7GXeluFmZ8E0E@hirez.programming.kicks-ass.net Fixes: 9fc9e278a5c0 ("panic: Introduce warn_limit") Fixes: d4ccd54d28d3 ("exit: Put an upper limit on how often we can oops") Cc: "Eric W. Biederman" Cc: Jann Horn Cc: Arnd Bergmann Cc: Petr Mladek Cc: Andrew Morton Cc: Luis Chamberlain Cc: Marco Elver Cc: tangmeng Cc: Sebastian Andrzej Siewior Cc: Tiezhu Yang Signed-off-by: Kees Cook Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 6 ++++-- kernel/panic.c | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index b2f0aaf6bee7..02360ec3b122 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -975,6 +975,7 @@ void __noreturn make_task_dead(int signr) * Take the task off the cpu after something catastrophic has * happened. */ + unsigned int limit; /* * Every time the system oopses, if the oops happens while a reference @@ -986,8 +987,9 @@ void __noreturn make_task_dead(int signr) * To make sure this can't happen, place an upper bound on how often the * kernel may oops without panic(). */ - if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) - panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); + limit = READ_ONCE(oops_limit); + if (atomic_inc_return(&oops_count) >= limit && limit) + panic("Oopsed too often (kernel.oops_limit is %d)", limit); do_exit(signr); } diff --git a/kernel/panic.c b/kernel/panic.c index dbb6e27d33e1..982ecba286c0 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -168,12 +168,15 @@ EXPORT_SYMBOL(nmi_panic); void check_panic_on_warn(const char *origin) { + unsigned int limit; + if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); - if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) + limit = READ_ONCE(warn_limit); + if (atomic_inc_return(&warn_count) >= limit && limit) panic("%s: system warned too often (kernel.warn_limit is %d)", - origin, warn_limit); + origin, limit); } /** From 78297d513157a31fd629626fe4cbb85a7dcbb94a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Oct 2022 19:01:24 -0700 Subject: [PATCH 78/80] ipv6: ensure sane device mtu in tunnels commit d89d7ff01235f218dad37de84457717f699dee79 upstream. Another syzbot report [1] with no reproducer hints at a bug in ip6_gre tunnel (dev:ip6gretap0) Since ipv6 mcast code makes sure to read dev->mtu once and applies a sanity check on it (see commit b9b312a7a451 "ipv6: mcast: better catch silly mtu values"), a remaining possibility is that a layer is able to set dev->mtu to an underflowed value (high order bit set). This could happen indeed in ip6gre_tnl_link_config_route(), ip6_tnl_link_config() and ipip6_tunnel_bind_dev() Make sure to sanitize mtu value in a local variable before it is written once on dev->mtu, as lockless readers could catch wrong temporary value. [1] skbuff: skb_over_panic: text:ffff80000b7a2f38 len:40 put:40 head:ffff000149dcf200 data:ffff000149dcf2b0 tail:0xd8 end:0xc0 dev:ip6gretap0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:120 Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 10241 Comm: kworker/1:1 Not tainted 6.0.0-rc7-syzkaller-18095-gbbed346d5a96 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/30/2022 Workqueue: mld mld_ifc_work pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic+0x4c/0x50 net/core/skbuff.c:116 lr : skb_panic+0x4c/0x50 net/core/skbuff.c:116 sp : ffff800020dd3b60 x29: ffff800020dd3b70 x28: 0000000000000000 x27: ffff00010df2a800 x26: 00000000000000c0 x25: 00000000000000b0 x24: ffff000149dcf200 x23: 00000000000000c0 x22: 00000000000000d8 x21: ffff80000b7a2f38 x20: ffff00014c2f7800 x19: 0000000000000028 x18: 00000000000001a9 x17: 0000000000000000 x16: ffff80000db49158 x15: ffff000113bf1a80 x14: 0000000000000000 x13: 00000000ffffffff x12: ffff000113bf1a80 x11: ff808000081c0d5c x10: 0000000000000000 x9 : 73f125dc5c63ba00 x8 : 73f125dc5c63ba00 x7 : ffff800008161d1c x6 : 0000000000000000 x5 : 0000000000000080 x4 : 0000000000000001 x3 : 0000000000000000 x2 : ffff0001fefddcd0 x1 : 0000000100000000 x0 : 0000000000000089 Call trace: skb_panic+0x4c/0x50 net/core/skbuff.c:116 skb_over_panic net/core/skbuff.c:125 [inline] skb_put+0xd4/0xdc net/core/skbuff.c:2049 ip6_mc_hdr net/ipv6/mcast.c:1714 [inline] mld_newpack+0x14c/0x270 net/ipv6/mcast.c:1765 add_grhead net/ipv6/mcast.c:1851 [inline] add_grec+0xa20/0xae0 net/ipv6/mcast.c:1989 mld_send_cr+0x438/0x5a8 net/ipv6/mcast.c:2115 mld_ifc_work+0x38/0x290 net/ipv6/mcast.c:2653 process_one_work+0x2d8/0x504 kernel/workqueue.c:2289 worker_thread+0x340/0x610 kernel/workqueue.c:2436 kthread+0x12c/0x158 kernel/kthread.c:376 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 Code: 91011400 aa0803e1 a90027ea 94373093 (d4210000) Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20221024020124.3756833-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski [ta: Backport patch for stable kernels < 5.10.y. Fix conflict in net/ipv6/ip6_tunnel.c, mtu initialized with: mtu = rt->dst.dev->mtu - t_hlen;] Cc: # 4.14.y, 4.19.y, 5.4.y Signed-off-by: Tudor Ambarus Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 16 +++++++++------- net/ipv6/ip6_tunnel.c | 10 ++++++---- net/ipv6/sit.c | 8 +++++--- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e617a98f4df6..00601bc4fdfa 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1154,14 +1154,16 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, dev->needed_headroom = dst_len; if (set_mtu) { - dev->mtu = rt->dst.dev->mtu - t_hlen; - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; - if (dev->type == ARPHRD_ETHER) - dev->mtu -= ETH_HLEN; + int mtu = rt->dst.dev->mtu - t_hlen; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + mtu -= 8; + if (dev->type == ARPHRD_ETHER) + mtu -= ETH_HLEN; + + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } ip6_rt_put(rt); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b647a4037679..75a1ec2605fc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1435,6 +1435,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; int t_hlen; + int mtu; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); @@ -1477,12 +1478,13 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) dev->hard_header_len = rt->dst.dev->hard_header_len + t_hlen; - dev->mtu = rt->dst.dev->mtu - t_hlen; + mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; + mtu -= 8; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } ip6_rt_put(rt); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 420b9fcc10d7..df734fe64d10 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1082,10 +1082,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int mtu; - dev->mtu = tdev->mtu - t_hlen; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + mtu = tdev->mtu - t_hlen; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } From d8459a9e57910566902a51ec91263856d5d1993a Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 18 Sep 2020 16:17:47 +0300 Subject: [PATCH 79/80] usb: host: xhci-plat: add wakeup entry at sysfs commit 4bb4fc0dbfa23acab9b762949b91ffd52106fe4b upstream. With this change, there will be a wakeup entry at /sys/../power/wakeup, and the user could use this entry to choose whether enable xhci wakeup features (wake up system from suspend) or not. Tested-by: Matthias Kaehlcke Reviewed-by: Matthias Kaehlcke Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200918131752.16488-6-mathias.nyman@linux.intel.com Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index adc437ca83b8..cb3ba2adae64 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -261,7 +261,7 @@ static int xhci_plat_probe(struct platform_device *pdev) *priv = *priv_match; } - device_wakeup_enable(hcd->self.controller); + device_set_wakeup_capable(&pdev->dev, true); xhci->clk = clk; xhci->reg_clk = reg_clk; From 53b696f0584acce2e90db69272a2a11aab138370 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 6 Feb 2023 07:49:46 +0100 Subject: [PATCH 80/80] Linux 4.19.272 Link: https://lore.kernel.org/r/20230203101015.263854890@linuxfoundation.org Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20230203170324.096985239@linuxfoundation.org Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20230204143559.734584366@linuxfoundation.org Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 560507d1f7a1..e3822e492543 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 271 +SUBLEVEL = 272 EXTRAVERSION = NAME = "People's Front"