From 8185962cbe6f5b89b992409cc104f9075dffe84a Mon Sep 17 00:00:00 2001 From: Lucien Buchmann Date: Sat, 25 Jun 2022 02:17:44 +0200 Subject: [PATCH 001/299] USB: serial: ftdi_sio: add Belimo device ids commit 7c239a071d1f04b7137789810807b4108d475c72 upstream. Those two product ids are known. Signed-off-by: Lucien Buchmann Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 3 +++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 49c08f07c9690..10d120589cbd6 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1023,6 +1023,9 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, + /* Belimo Automation devices */ + { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) }, + { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) }, /* ICP DAS I-756xU devices */ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index d1a9564697a4b..4e92c165c86bf 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1568,6 +1568,12 @@ #define CHETCO_SEASMART_LITE_PID 0xA5AE /* SeaSmart Lite USB Adapter */ #define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */ +/* + * Belimo Automation + */ +#define BELIMO_ZTH_PID 0x8050 +#define BELIMO_ZIP_PID 0xC811 + /* * Unjo AB */ From fd54a40491ca5b874aa6a720d9257063929fb1ab Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Fri, 1 Jul 2022 16:08:54 +0800 Subject: [PATCH 002/299] usb: typec: add missing uevent when partner support PD commit 6fb9e1d94789e8ee5a258a23bc588693f743fd6c upstream. System like Android allow user control power role from UI, it is possible to implement application base on typec uevent to refresh UI, but found there is chance that UI show different state from typec attribute file. In typec_set_pwr_opmode(), when partner support PD, there is no uevent send to user space which cause the problem. Fix it by sending uevent notification when change power mode to PD. Fixes: bdecb33af34f ("usb: typec: API for controlling USB Type-C Multiplexers") Cc: stable@vger.kernel.org Signed-off-by: Linyu Yuan Link: https://lore.kernel.org/r/1656662934-10226-1-git-send-email-quic_linyyuan@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index ee0e520707dd7..c4724750c81a5 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1718,6 +1718,7 @@ void typec_set_pwr_opmode(struct typec_port *port, partner->usb_pd = 1; sysfs_notify(&partner_dev->kobj, NULL, "supports_usb_power_delivery"); + kobject_uevent(&partner_dev->kobj, KOBJ_CHANGE); } put_device(partner_dev); } From 1bbc49745687b0b1edf47856f7e5bc8cb37ddd4e Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 27 Jun 2022 18:41:19 -0700 Subject: [PATCH 003/299] usb: dwc3: gadget: Fix event pending check commit 7441b273388b9a59d8387a03ffbbca9d5af6348c upstream. The DWC3_EVENT_PENDING flag is used to protect against invalid call to top-half interrupt handler, which can occur when there's a delay in software detection of the interrupt line deassertion. However, the clearing of this flag was done prior to unmasking the interrupt line, creating opportunity where the top-half handler can come. This breaks the serialization and creates a race between the top-half and bottom-half handler, resulting in losing synchronization between the controller and the driver when processing events. To fix this, make sure the clearing of the DWC3_EVENT_PENDING is done at the end of the bottom-half handler. Fixes: d325a1de49d6 ("usb: dwc3: gadget: Prevent losing events in event cache") Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/8670aaf1cf52e7d1e6df2a827af2d77263b93b75.1656380429.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 0a6633e7edce2..f46fc02669a8c 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4224,7 +4224,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) } evt->count = 0; - evt->flags &= ~DWC3_EVENT_PENDING; ret = IRQ_HANDLED; /* Unmask interrupt */ @@ -4236,6 +4235,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } + /* Keep the clearing of DWC3_EVENT_PENDING at the end */ + evt->flags &= ~DWC3_EVENT_PENDING; + return ret; } From a25d361f141196ad6fd2956bc1861e72bc275879 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 12 Jul 2022 09:40:55 +0200 Subject: [PATCH 004/299] gpio: sim: fix the chip_name configfs item commit 7329b071729645e243b6207e76bca2f4951c991b upstream. The chip_name configs attribute always displays the device name of the first GPIO bank because the logic of the relevant function is simply wrong. Fix it by correctly comparing the bank's swnode against the GPIO device's children. Fixes: cb8c474e79be ("gpio: sim: new testing module") Cc: stable@vger.kernel.org Reported-by: Kent Gibson Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Kent Gibson Tested-by: Kent Gibson Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-sim.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 98109839102fb..1020c2feb2496 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -991,28 +991,22 @@ static struct configfs_attribute *gpio_sim_device_config_attrs[] = { }; struct gpio_sim_chip_name_ctx { - struct gpio_sim_device *dev; + struct fwnode_handle *swnode; char *page; }; static int gpio_sim_emit_chip_name(struct device *dev, void *data) { struct gpio_sim_chip_name_ctx *ctx = data; - struct fwnode_handle *swnode; - struct gpio_sim_bank *bank; /* This would be the sysfs device exported in /sys/class/gpio. */ if (dev->class) return 0; - swnode = dev_fwnode(dev); + if (device_match_fwnode(dev, ctx->swnode)) + return sprintf(ctx->page, "%s\n", dev_name(dev)); - list_for_each_entry(bank, &ctx->dev->bank_list, siblings) { - if (bank->swnode == swnode) - return sprintf(ctx->page, "%s\n", dev_name(dev)); - } - - return -ENODATA; + return 0; } static ssize_t gpio_sim_bank_config_chip_name_show(struct config_item *item, @@ -1020,7 +1014,7 @@ static ssize_t gpio_sim_bank_config_chip_name_show(struct config_item *item, { struct gpio_sim_bank *bank = to_gpio_sim_bank(item); struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank); - struct gpio_sim_chip_name_ctx ctx = { dev, page }; + struct gpio_sim_chip_name_ctx ctx = { bank->swnode, page }; int ret; mutex_lock(&dev->lock); From b24c8369355afb1e7a3caa6eed1209748db00d20 Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Mon, 27 Jun 2022 15:51:13 +0900 Subject: [PATCH 005/299] tty: serial: samsung_tty: set dma burst_size to 1 commit f7e35e4bf1e8dc2c8cbd5e0955dc1bd58558dae0 upstream. The src_maxburst and dst_maxburst have been changed to 1 but the settings of the UCON register aren't changed yet. They should be changed as well according to the dmaengine slave config. Fixes: aa2f80e752c7 ("serial: samsung: fix maxburst parameter for DMA transactions") Cc: stable Cc: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20220627065113.139520-1-chanho61.park@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index e1585fbae909d..817ea46af2915 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -377,8 +377,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport) /* Enable tx dma mode */ ucon = rd_regl(port, S3C2410_UCON); ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK); - ucon |= (dma_get_cache_alignment() >= 16) ? - S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1; + ucon |= S3C64XX_UCON_TXBURST_1; ucon |= S3C64XX_UCON_TXMODE_DMA; wr_regl(port, S3C2410_UCON, ucon); @@ -674,7 +673,7 @@ static void enable_rx_dma(struct s3c24xx_uart_port *ourport) S3C64XX_UCON_DMASUS_EN | S3C64XX_UCON_TIMEOUT_EN | S3C64XX_UCON_RXMODE_MASK); - ucon |= S3C64XX_UCON_RXBURST_16 | + ucon |= S3C64XX_UCON_RXBURST_1 | 0xf << S3C64XX_UCON_TIMEOUT_SHIFT | S3C64XX_UCON_EMPTYINT_EN | S3C64XX_UCON_TIMEOUT_EN | From 9f0f7faf210c9c993604b839816124a8aaadd4b2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 30 Jun 2022 09:14:39 +0200 Subject: [PATCH 006/299] x86/xen: Use clear_bss() for Xen PV guests commit 96e8fc5818686d4a1591bb6907e7fdb64ef29884 upstream. Instead of clearing the bss area in assembly code, use the clear_bss() function. This requires to pass the start_info address as parameter to xen_start_kernel() in order to avoid the xen_start_info being zeroed again. Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Reviewed-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20220630071441.28576-2-jgross@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/setup.h | 3 +++ arch/x86/kernel/head64.c | 2 +- arch/x86/xen/enlighten_pv.c | 8 ++++++-- arch/x86/xen/xen-head.S | 10 +--------- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 896e48d45828c..bccc84de7ff26 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -132,6 +132,9 @@ void *extend_brk(size_t size, size_t align); } extern void probe_roms(void); + +void clear_bss(void); + #ifdef __i386__ asmlinkage void __init i386_start_kernel(void); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 4f5ecbbaae77c..2e10a33778cf2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -421,7 +421,7 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr) /* Don't add a printk in there. printk relies on the PDA which is not initialized yet. */ -static void __init clear_bss(void) +void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5038edb79ad51..b55de4ad685ce 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1183,15 +1183,19 @@ static void __init xen_domu_set_legacy_features(void) extern void early_xen_iret_patch(void); /* First C function to be called on Xen boot */ -asmlinkage __visible void __init xen_start_kernel(void) +asmlinkage __visible void __init xen_start_kernel(struct start_info *si) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; int rc; - if (!xen_start_info) + if (!si) return; + clear_bss(); + + xen_start_info = si; + __text_gen_insn(&early_xen_iret_patch, JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, JMP32_INSN_SIZE); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 3a2cd93bf0590..13af6fe453e3f 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -48,15 +48,6 @@ SYM_CODE_START(startup_xen) ANNOTATE_NOENDBR cld - /* Clear .bss */ - xor %eax,%eax - mov $__bss_start, %rdi - mov $__bss_stop, %rcx - sub %rdi, %rcx - shr $3, %rcx - rep stosq - - mov %rsi, xen_start_info mov initial_stack(%rip), %rsp /* Set up %gs. @@ -71,6 +62,7 @@ SYM_CODE_START(startup_xen) cdq wrmsr + mov %rsi, %rdi call xen_start_kernel SYM_CODE_END(startup_xen) __FINIT From cd52154b924f2ea05069d4296045d9fd56a8da23 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Tue, 12 Jul 2022 14:00:05 +0800 Subject: [PATCH 007/299] ALSA: hda - Add fixup for Dell Latitidue E5430 commit 841bdf85c226803a78a9319af9b2caa9bf3e2eda upstream. Another Dell model, another fixup entry: Latitude E5430 needs the same fixup as other Latitude E series as workaround for noise problems. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220712060005.20176-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 45f5db25a77ed..2b438bbcc4183 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8947,6 +8947,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), + SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X), From a9ff4345d5ad35276a51f6b6dec7af8207e20599 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Mon, 11 Jul 2022 18:17:44 +0800 Subject: [PATCH 008/299] ALSA: hda/conexant: Apply quirk for another HP ProDesk 600 G3 model commit d16d69bf5a25d91c6d8f3e29711be12551bf56cd upstream. There is another HP ProDesk 600 G3 model with the PCI SSID 103c:82b4 that requires the quirk HP_MIC_NO_PRESENCE. Add the corresponding entry to the quirk table. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220711101744.25189-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 0b7d500249f6e..8c68e6e1387ef 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -944,6 +944,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), From c6cce94bdcc269f14f0800fa530cafaa7b96fe87 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Mon, 11 Jul 2022 16:15:27 +0800 Subject: [PATCH 009/299] ALSA: hda/realtek: Fix headset mic for Acer SF313-51 commit 5f3fe25e70559fa3b096ab17e13316c93ddb7020 upstream. The issue on Acer SWIFT SF313-51 is that headset microphone doesn't work. The following quirk fixed headset microphone issue. Note that the fixup of SF314-54/55 (ALC256_FIXUP_ACER_HEADSET_MIC) was not successful on my SF313-51. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220711081527.6254-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2b438bbcc4183..5e1d692142b6d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8938,6 +8938,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x129d, "Acer SWIFT SF313-51", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), From 3b65a9d237bd61a5cdfdd334ccceed2f86cdb61f Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Tue, 12 Jul 2022 17:22:22 +0800 Subject: [PATCH 010/299] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc671 commit dbe75d314748e08fc6e4576d153d8a69621ee5ca upstream. On a HP 288 Pro G6, the front mic could not be detected.In order to get it working, the pin configuration needs to be set correctly, and the ALC671_FIXUP_HP_HEADSET_MIC2 fixup needs to be applied. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220712092222.21738-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5e1d692142b6d..cd901ab4994a9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11271,6 +11271,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), From 44978dd619c27b1191f3b5f38700a8103aceeed0 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Wed, 13 Jul 2022 10:27:04 +0800 Subject: [PATCH 011/299] ALSA: hda/realtek: fix mute/micmute LEDs for HP machines commit 61d307855eb1a2ae849da445edd5389db8a58a5c upstream. The HP ProBook 440/450 G9 and EliteBook 640/650 G9 have multiple motherboard design and they are using different subsystem ID of audio codec. Add the same quirk for other MBs. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20220713022706.22892-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cd901ab4994a9..05b308e1e9ae5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9150,6 +9150,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 8c639c0221efa7548dac3f8573c805456b3faacb Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Wed, 13 Jul 2022 14:33:32 +0800 Subject: [PATCH 012/299] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc221 commit 4ba5c853d7945b3855c3dcb293f7f9f019db641e upstream. On a HP 288 Pro G2 MT (X9W02AV), the front mic could not be detected. In order to get it working, the pin configuration needs to be set correctly, and the ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE fixup needs to be applied. Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220713063332.30095-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 05b308e1e9ae5..e2eea2dae3f47 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6953,6 +6953,7 @@ enum { ALC298_FIXUP_LENOVO_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, + ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE, ALC221_FIXUP_HP_MIC_NO_PRESENCE, ALC256_FIXUP_ASUS_HEADSET_MODE, ALC256_FIXUP_ASUS_MIC, @@ -7889,6 +7890,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_NO_SHUTUP }, + [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC221_FIXUP_HP_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -9064,6 +9075,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC), From c70092121f3c748a1f8a365401aa687e990ea471 Mon Sep 17 00:00:00 2001 From: Meng Tang Date: Wed, 13 Jul 2022 17:41:33 +0800 Subject: [PATCH 013/299] ALSA: hda/realtek - Enable the headset-mic on a Xiaomi's laptop commit 9b043a8f386485c74c0f8eea2c287d5bdbdf3279 upstream. The headset on this machine is not defined, after applying the quirk ALC256_FIXUP_ASUS_HEADSET_MIC, the headset-mic works well Signed-off-by: Meng Tang Cc: Link: https://lore.kernel.org/r/20220713094133.9894-1-tangmeng@uniontech.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e2eea2dae3f47..b7f1f2fb60cbc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9425,6 +9425,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), From f0b5c819b062df8bf5f2acf4697e3871cb3722da Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 13 Jul 2022 15:53:22 +0200 Subject: [PATCH 014/299] xen/netback: avoid entering xenvif_rx_next_skb() with an empty rx queue commit 94e8100678889ab428e68acadf042de723f094b9 upstream. xenvif_rx_next_skb() is expecting the rx queue not being empty, but in case the loop in xenvif_rx_action() is doing multiple iterations, the availability of another skb in the rx queue is not being checked. This can lead to crashes: [40072.537261] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 [40072.537407] IP: xenvif_rx_skb+0x23/0x590 [xen_netback] [40072.537534] PGD 0 P4D 0 [40072.537644] Oops: 0000 [#1] SMP NOPTI [40072.537749] CPU: 0 PID: 12505 Comm: v1-c40247-q2-gu Not tainted 4.12.14-122.121-default #1 SLE12-SP5 [40072.537867] Hardware name: HP ProLiant DL580 Gen9/ProLiant DL580 Gen9, BIOS U17 11/23/2021 [40072.537999] task: ffff880433b38100 task.stack: ffffc90043d40000 [40072.538112] RIP: e030:xenvif_rx_skb+0x23/0x590 [xen_netback] [40072.538217] RSP: e02b:ffffc90043d43de0 EFLAGS: 00010246 [40072.538319] RAX: 0000000000000000 RBX: ffffc90043cd7cd0 RCX: 00000000000000f7 [40072.538430] RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffffc90043d43df8 [40072.538531] RBP: 000000000000003f R08: 000077ff80000000 R09: 0000000000000008 [40072.538644] R10: 0000000000007ff0 R11: 00000000000008f6 R12: ffffc90043ce2708 [40072.538745] R13: 0000000000000000 R14: ffffc90043d43ed0 R15: ffff88043ea748c0 [40072.538861] FS: 0000000000000000(0000) GS:ffff880484600000(0000) knlGS:0000000000000000 [40072.538988] CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 [40072.539088] CR2: 0000000000000080 CR3: 0000000407ac8000 CR4: 0000000000040660 [40072.539211] Call Trace: [40072.539319] xenvif_rx_action+0x71/0x90 [xen_netback] [40072.539429] xenvif_kthread_guest_rx+0x14a/0x29c [xen_netback] Fix that by stopping the loop in case the rx queue becomes empty. Cc: stable@vger.kernel.org Fixes: 98f6d57ced73 ("xen-netback: process guest rx packets in batches") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/20220713135322.19616-1-jgross@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a1..a0335407be423 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -495,6 +495,7 @@ void xenvif_rx_action(struct xenvif_queue *queue) queue->rx_copy.completed = &completed_skbs; while (xenvif_rx_ring_slots_available(queue) && + !skb_queue_empty(&queue->rx_queue) && work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; From f45f4f82cb48c6ce1d838cf3bace9993c90aaee6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 11 Jul 2022 18:16:25 +0200 Subject: [PATCH 015/299] fix race between exit_itimers() and /proc/pid/timers commit d5b36a4dbd06c5e8e36ca8ccc552f679069e2946 upstream. As Chris explains, the comment above exit_itimers() is not correct, we can race with proc_timers_seq_ops. Change exit_itimers() to clear signal->posix_timers with ->siglock held. Cc: Reported-by: chris@accessvector.net Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 2 +- include/linux/sched/task.h | 2 +- kernel/exit.c | 2 +- kernel/time/posix-timers.c | 19 ++++++++++++++----- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 75eb6e0ee7b2f..5a75e92b1a0a3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1297,7 +1297,7 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; #ifdef CONFIG_POSIX_TIMERS - exit_itimers(me->signal); + exit_itimers(me); flush_itimer_signals(); #endif diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 4492266935dd7..d8d9ce5b3bf74 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -83,7 +83,7 @@ static inline void exit_thread(struct task_struct *tsk) extern __noreturn void do_group_exit(int); extern void exit_files(struct task_struct *); -extern void exit_itimers(struct signal_struct *); +extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); diff --git a/kernel/exit.c b/kernel/exit.c index f072959fcab7f..64c938ce36fe8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -766,7 +766,7 @@ void __noreturn do_exit(long code) #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); - exit_itimers(tsk->signal); + exit_itimers(tsk); #endif if (tsk->mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1cd10b102c51c..5dead89308b74 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1051,15 +1051,24 @@ static void itimer_delete(struct k_itimer *timer) } /* - * This is called by do_exit or de_thread, only when there are no more - * references to the shared signal_struct. + * This is called by do_exit or de_thread, only when nobody else can + * modify the signal->posix_timers list. Yet we need sighand->siglock + * to prevent the race with /proc/pid/timers. */ -void exit_itimers(struct signal_struct *sig) +void exit_itimers(struct task_struct *tsk) { + struct list_head timers; struct k_itimer *tmr; - while (!list_empty(&sig->posix_timers)) { - tmr = list_entry(sig->posix_timers.next, struct k_itimer, list); + if (list_empty(&tsk->signal->posix_timers)) + return; + + spin_lock_irq(&tsk->sighand->siglock); + list_replace_init(&tsk->signal->posix_timers, &timers); + spin_unlock_irq(&tsk->sighand->siglock); + + while (!list_empty(&timers)) { + tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); } } From 43c5ac008f03ba2b8c957a0b7b7936acbe481e7a Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Fri, 10 Jun 2022 10:38:12 -0700 Subject: [PATCH 016/299] mm: userfaultfd: fix UFFDIO_CONTINUE on fallocated shmem pages commit 73f37dbcfe1763ee2294c7717a1f571e27d17fd8 upstream. When fallocate() is used on a shmem file, the pages we allocate can end up with !PageUptodate. Since UFFDIO_CONTINUE tries to find the existing page the user wants to map with SGP_READ, we would fail to find such a page, since shmem_getpage_gfp returns with a "NULL" pagep for SGP_READ if it discovers !PageUptodate. As a result, UFFDIO_CONTINUE returns -EFAULT, as it would do if the page wasn't found in the page cache at all. This isn't the intended behavior. UFFDIO_CONTINUE is just trying to find if a page exists, and doesn't care whether it still needs to be cleared or not. So, instead of SGP_READ, pass in SGP_NOALLOC. This is the same, except for one critical difference: in the !PageUptodate case, SGP_NOALLOC will clear the page and then return it. With this change, UFFDIO_CONTINUE works properly (succeeds) on a shmem file which has been fallocated, but otherwise not modified. Link: https://lkml.kernel.org/r/20220610173812.1768919-1-axelrasmussen@google.com Fixes: 153132571f02 ("userfaultfd/shmem: support UFFDIO_CONTINUE for shmem") Signed-off-by: Axel Rasmussen Acked-by: Peter Xu Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/userfaultfd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e9bb6db002aa0..128b17fe98123 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -231,7 +231,10 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, struct page *page; int ret; - ret = shmem_getpage(inode, pgoff, &page, SGP_READ); + ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC); + /* Our caller expects us to return -EFAULT if we failed to find page. */ + if (ret == -ENOENT) + ret = -EFAULT; if (ret) goto out; if (!page) { From 6ebbd46d2ef5f6ab815907e1137bdea6e1a3ec3f Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 20 Jun 2022 10:30:19 +0800 Subject: [PATCH 017/299] mm: sparsemem: fix missing higher order allocation splitting commit 39d35edee4537487e5178f258e23518272a66413 upstream. Higher order allocations for vmemmap pages from buddy allocator must be able to be treated as indepdenent small pages as they can be freed individually by the caller. There is no problem for higher order vmemmap pages allocated at boot time since each individual small page will be initialized at boot time. However, it will be an issue for memory hotplug case since those higher order vmemmap pages are allocated from buddy allocator without initializing each individual small page's refcount. The system will panic in put_page_testzero() when CONFIG_DEBUG_VM is enabled if the vmemmap page is freed. Link: https://lkml.kernel.org/r/20220620023019.94257-1-songmuchun@bytedance.com Fixes: d8d55f5616cf ("mm: sparsemem: use page table lock to protect kernel pmd operations") Signed-off-by: Muchun Song Reviewed-by: Mike Kravetz Cc: Xiongchun Duan Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/sparse-vmemmap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 8aecd6b3896c7..365e47465ca0b 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -78,6 +78,14 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) spin_lock(&init_mm.page_table_lock); if (likely(pmd_leaf(*pmd))) { + /* + * Higher order allocations from buddy allocator must be able to + * be treated as indepdenent small pages (as they can be freed + * individually). + */ + if (!PageReserved(page)) + split_page(page, get_order(PMD_SIZE)); + /* Make pte visible before pmd. See comment in pmd_install(). */ smp_wmb(); pmd_populate_kernel(&init_mm, pmd, pgtable); From 3d637c7df81ef8cbed8e80cf57c9c619ea6f9fb8 Mon Sep 17 00:00:00 2001 From: "Gowans, James" Date: Thu, 23 Jun 2022 05:24:03 +0000 Subject: [PATCH 018/299] mm: split huge PUD on wp_huge_pud fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 14c99d65941538aa33edd8dc7b1bbbb593c324a2 upstream. Currently the implementation will split the PUD when a fallback is taken inside the create_huge_pud function. This isn't where it should be done: the splitting should be done in wp_huge_pud, just like it's done for PMDs. Reason being that if a callback is taken during create, there is no PUD yet so nothing to split, whereas if a fallback is taken when encountering a write protection fault there is something to split. It looks like this was the original intention with the commit where the splitting was introduced, but somehow it got moved to the wrong place between v1 and v2 of the patch series. Rebase mistake perhaps. Link: https://lkml.kernel.org/r/6f48d622eb8bce1ae5dd75327b0b73894a2ec407.camel@amazon.com Fixes: 327e9fd48972 ("mm: Split huge pages on write-notify or COW") Signed-off-by: James Gowans Reviewed-by: Thomas Hellström Cc: Christian König Cc: Jan H. Schönherr Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 76e3af9639d93..e176ee386238a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4524,6 +4524,19 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf) static vm_fault_t create_huge_pud(struct vm_fault *vmf) { +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + /* No support for anonymous transparent PUD pages yet */ + if (vma_is_anonymous(vmf->vma)) + return VM_FAULT_FALLBACK; + if (vmf->vma->vm_ops->huge_fault) + return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + return VM_FAULT_FALLBACK; +} + +static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) +{ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ @@ -4538,19 +4551,7 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) split: /* COW or write-notify not handled on PUD level: split pud.*/ __split_huge_pud(vmf->vma, vmf->pud, vmf->address); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - return VM_FAULT_FALLBACK; -} - -static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* No support for anonymous transparent PUD pages yet */ - if (vma_is_anonymous(vmf->vma)) - return VM_FAULT_FALLBACK; - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ return VM_FAULT_FALLBACK; } From 3920c5843e4ef47e5b0f6c8d78829fe4e07421ec Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 20 Jun 2022 10:34:42 +0800 Subject: [PATCH 019/299] mm/damon: use set_huge_pte_at() to make huge pte old commit ed1523a895ffdabcab6e067af18685ed00f5ce15 upstream. The huge_ptep_set_access_flags() can not make the huge pte old according to the discussion [1], that means we will always mornitor the young state of the hugetlb though we stopped accessing the hugetlb, as a result DAMON will get inaccurate accessing statistics. So changing to use set_huge_pte_at() to make the huge pte old to fix this issue. [1] https://lore.kernel.org/all/Yqy97gXI4Nqb7dYo@arm.com/ Link: https://lkml.kernel.org/r/1655692482-28797-1-git-send-email-baolin.wang@linux.alibaba.com Fixes: 49f4203aae06 ("mm/damon: add access checking for hugetlb pages") Signed-off-by: Baolin Wang Reviewed-by: SeongJae Park Acked-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/vaddr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index b2ec0aa1ff451..64456e775e8f5 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -407,8 +407,7 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - huge_ptep_set_access_flags(vma, addr, pte, entry, - vma->vm_flags & VM_WRITE); + set_huge_pte_at(mm, addr, pte, entry); } #ifdef CONFIG_MMU_NOTIFIER From 4d453eb5e1eec89971aa5b3262857ee26cfdffd3 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Mon, 11 Jul 2022 09:47:31 +0800 Subject: [PATCH 020/299] tracing/histograms: Fix memory leak problem commit 7edc3945bdce9c39198a10d6129377a5c53559c2 upstream. This reverts commit 46bbe5c671e06f070428b9be142cc4ee5cedebac. As commit 46bbe5c671e0 ("tracing: fix double free") said, the "double free" problem reported by clang static analyzer is: > In parse_var_defs() if there is a problem allocating > var_defs.expr, the earlier var_defs.name is freed. > This free is duplicated by free_var_defs() which frees > the rest of the list. However, if there is a problem allocating N-th var_defs.expr: + in parse_var_defs(), the freed 'earlier var_defs.name' is actually the N-th var_defs.name; + then in free_var_defs(), the names from 0th to (N-1)-th are freed; IF ALLOCATING PROBLEM HAPPENED HERE!!! -+ \ | 0th 1th (N-1)-th N-th V +-------------+-------------+-----+-------------+----------- var_defs: | name | expr | name | expr | ... | name | expr | name | /// +-------------+-------------+-----+-------------+----------- These two frees don't act on same name, so there was no "double free" problem before. Conversely, after that commit, we get a "memory leak" problem because the above "N-th var_defs.name" is not freed. If enable CONFIG_DEBUG_KMEMLEAK and inject a fault at where the N-th var_defs.expr allocated, then execute on shell like: $ echo 'hist:key=call_site:val=$v1,$v2:v1=bytes_req,v2=bytes_alloc' > \ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger Then kmemleak reports: unreferenced object 0xffff8fb100ef3518 (size 8): comm "bash", pid 196, jiffies 4295681690 (age 28.538s) hex dump (first 8 bytes): 76 31 00 00 b1 8f ff ff v1...... backtrace: [<0000000038fe4895>] kstrdup+0x2d/0x60 [<00000000c99c049a>] event_hist_trigger_parse+0x206f/0x20e0 [<00000000ae70d2cc>] trigger_process_regex+0xc0/0x110 [<0000000066737a4c>] event_trigger_write+0x75/0xd0 [<000000007341e40c>] vfs_write+0xbb/0x2a0 [<0000000087fde4c2>] ksys_write+0x59/0xd0 [<00000000581e9cdf>] do_syscall_64+0x3a/0x80 [<00000000cf3b065c>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Link: https://lkml.kernel.org/r/20220711014731.69520-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: 46bbe5c671e0 ("tracing: fix double free") Reported-by: Hulk Robot Suggested-by: Steven Rostedt Reviewed-by: Tom Zanussi Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index a0e41906d9ce1..217e03e477f62 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4429,6 +4429,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { + kfree(hist_data->attrs->var_defs.name[n_vars]); + hist_data->attrs->var_defs.name[n_vars] = NULL; ret = -ENOMEM; goto free; } From 157269e332865dec1ed64f243d122b9f066f6639 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 6 Jul 2022 10:50:40 -0400 Subject: [PATCH 021/299] net: sock: tracing: Fix sock_exceed_buf_limit not to dereference stale pointer commit 820b8963adaea34a87abbecb906d1f54c0aabfb7 upstream. The trace event sock_exceed_buf_limit saves the prot->sysctl_mem pointer and then dereferences it in the TP_printk() portion. This is unsafe as the TP_printk() portion is executed at the time the buffer is read. That is, it can be seconds, minutes, days, months, even years later. If the proto is freed, then this dereference will can also lead to a kernel crash. Instead, save the sysctl_mem array into the ring buffer and have the TP_printk() reference that instead. This is the proper and safe way to read pointers in trace events. Link: https://lore.kernel.org/all/20220706052130.16368-12-kuniyu@amazon.com/ Cc: stable@vger.kernel.org Fixes: 3847ce32aea9f ("core: add tracepoints for queueing skb to rcvbuf") Signed-off-by: Steven Rostedt (Google) Acked-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/trace/events/sock.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 12c315782766a..777ee6cbe9330 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_STRUCT__entry( __array(char, name, 32) - __field(long *, sysctl_mem) + __array(long, sysctl_mem, 3) __field(long, allocated) __field(int, sysctl_rmem) __field(int, rmem_alloc) @@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_fast_assign( strncpy(__entry->name, prot->name, 32); - __entry->sysctl_mem = prot->sysctl_mem; + __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); + __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); + __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); __entry->allocated = allocated; __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); From 681da5bff05fbbbba7bf15c0004a371cb57f2de9 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 13 Jul 2022 13:48:52 +0200 Subject: [PATCH 022/299] ip: fix dflt addr selection for connected nexthop commit 747c14307214b55dbd8250e1ab44cad8305756f1 upstream. When a nexthop is added, without a gw address, the default scope was set to 'host'. Thus, when a source address is selected, 127.0.0.1 may be chosen but rejected when the route is used. When using a route without a nexthop id, the scope can be configured in the route, thus the problem doesn't exist. To explain more deeply: when a user creates a nexthop, it cannot specify the scope. To create it, the function nh_create_ipv4() calls fib_check_nh() with scope set to 0. fib_check_nh() calls fib_check_nh_nongw() wich was setting scope to 'host'. Then, nh_create_ipv4() calls fib_info_update_nhc_saddr() with scope set to 'host'. The src addr is chosen before the route is inserted. When a 'standard' route (ie without a reference to a nexthop) is added, fib_create_info() calls fib_info_update_nhc_saddr() with the scope set by the user. iproute2 set the scope to 'link' by default. Here is a way to reproduce the problem: ip netns add foo ip -n foo link set lo up ip netns add bar ip -n bar link set lo up sleep 1 ip -n foo link add name eth0 type dummy ip -n foo link set eth0 up ip -n foo address add 192.168.0.1/24 dev eth0 ip -n foo link add name veth0 type veth peer name veth1 netns bar ip -n foo link set veth0 up ip -n bar link set veth1 up ip -n bar address add 192.168.1.1/32 dev veth1 ip -n bar route add default dev veth1 ip -n foo nexthop add id 1 dev veth0 ip -n foo route add 192.168.1.1 nhid 1 Try to get/use the route: > $ ip -n foo route get 192.168.1.1 > RTNETLINK answers: Invalid argument > $ ip netns exec foo ping -c1 192.168.1.1 > ping: connect: Invalid argument Try without nexthop group (iproute2 sets scope to 'link' by dflt): ip -n foo route del 192.168.1.1 ip -n foo route add 192.168.1.1 dev veth0 Try to get/use the route: > $ ip -n foo route get 192.168.1.1 > 192.168.1.1 dev veth0 src 192.168.0.1 uid 0 > cache > $ ip netns exec foo ping -c1 192.168.1.1 > PING 192.168.1.1 (192.168.1.1) 56(84) bytes of data. > 64 bytes from 192.168.1.1: icmp_seq=1 ttl=64 time=0.039 ms > > --- 192.168.1.1 ping statistics --- > 1 packets transmitted, 1 received, 0% packet loss, time 0ms > rtt min/avg/max/mdev = 0.039/0.039/0.039/0.000 ms CC: stable@vger.kernel.org Fixes: 597cfe4fc339 ("nexthop: Add support for IPv4 nexthops") Reported-by: Edwin Brossette Signed-off-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20220713114853.29406-1-nicolas.dichtel@6wind.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ccb62038f6a4a..1c4fef385ef61 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1230,7 +1230,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); - nh->fib_nh_scope = RT_SCOPE_HOST; + nh->fib_nh_scope = RT_SCOPE_LINK; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; From 9bac88b10f0f8e442fc1b8b4a72b29d708208805 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 28 Jun 2022 08:55:45 +0100 Subject: [PATCH 023/299] ARM: 9213/1: Print message about disabled Spectre workarounds only once commit e4ced82deb5fb17222fb82e092c3f8311955b585 upstream. Print the message about disabled Spectre workarounds only once. The message is printed each time CPU goes out from idling state on NVIDIA Tegra boards, causing storm in KMSG that makes system unusable. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Signed-off-by: Russell King (Oracle) Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7-bugs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index fb9f3eb6bf483..f9730eba06328 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -108,8 +108,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method) #else static unsigned int spectre_v2_install_workaround(unsigned int method) { - pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", - smp_processor_id()); + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); return SPECTRE_VULNERABLE; } From 05c214703f286006db285078e70cec4416e5bf7b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 30 Jun 2022 16:46:54 +0100 Subject: [PATCH 024/299] ARM: 9214/1: alignment: advance IT state after emulating Thumb instruction commit e5c46fde75e43c15a29b40e5fc5641727f97ae47 upstream. After emulating a misaligned load or store issued in Thumb mode, we have to advance the IT state by hand, or it will get out of sync with the actual instruction stream, which means we'll end up applying the wrong condition code to subsequent instructions. This might corrupt the program state rather catastrophically. So borrow the it_advance() helper from the probing code, and use it on CPSR if the emulated instruction is Thumb. Cc: Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/ptrace.h | 26 ++++++++++++++++++++++++++ arch/arm/mm/alignment.c | 3 +++ arch/arm/probes/decode.h | 26 +------------------------- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 93051e2f402c8..1408a6a15d0e0 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -163,5 +163,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) +{ + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f499559d1936..f8dd0b3cc8e04 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + return 0; bad_or_fault: diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 9731735989921..facc889d05eee 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -14,6 +14,7 @@ #include #include #include +#include #include void __init arm_probes_decode_init(void); @@ -35,31 +36,6 @@ void __init find_str_pc_offset(void); #endif -/* - * Update ITSTATE after normal execution of an IT block instruction. - * - * The 8 IT state bits are split into two parts in CPSR: - * ITSTATE<1:0> are in CPSR<26:25> - * ITSTATE<7:2> are in CPSR<15:10> - */ -static inline unsigned long it_advance(unsigned long cpsr) - { - if ((cpsr & 0x06000400) == 0) { - /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ - cpsr &= ~PSR_IT_MASK; - } else { - /* We need to shift left ITSTATE<4:0> */ - const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ - unsigned long it = cpsr & mask; - it <<= 1; - it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ - it &= mask; - cpsr &= ~mask; - cpsr |= it; - } - return cpsr; -} - static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) { long cpsr = regs->ARM_cpsr; From 41ecab279a70dced9005f4d55fa0fcde8938603f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 2 Jul 2022 16:52:27 +0200 Subject: [PATCH 025/299] wifi: mac80211: fix queue selection for mesh/OCB interfaces commit 50e2ab39291947b6c6c7025cf01707c270fcde59 upstream. When using iTXQ, the code assumes that there is only one vif queue for broadcast packets, using the BE queue. Allowing non-BE queue marking violates that assumption and txq->ac == skb_queue_mapping is no longer guaranteed. This can cause issues with queue handling in the driver and also causes issues with the recent ATF change, resulting in an AQL underflow warning. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20220702145227.39356-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/wme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 62c6733e07923..d50480b317505 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -147,8 +147,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, bool qos; /* all mesh/ocb stations are required to support WME */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB) + if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_OCB)) qos = true; else if (sta) qos = sta->sta.wme; From 0e41774b564befa6d271e8d5086bf870d617a4e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 13 Jun 2022 12:19:50 -1000 Subject: [PATCH 026/299] cgroup: Use separate src/dst nodes when preloading css_sets for migration commit 07fd5b6cdf3cc30bfde8fe0f644771688be04447 upstream. Each cset (css_set) is pinned by its tasks. When we're moving tasks around across csets for a migration, we need to hold the source and destination csets to ensure that they don't go away while we're moving tasks about. This is done by linking cset->mg_preload_node on either the mgctx->preloaded_src_csets or mgctx->preloaded_dst_csets list. Using the same cset->mg_preload_node for both the src and dst lists was deemed okay as a cset can't be both the source and destination at the same time. Unfortunately, this overloading becomes problematic when multiple tasks are involved in a migration and some of them are identity noop migrations while others are actually moving across cgroups. For example, this can happen with the following sequence on cgroup1: #1> mkdir -p /sys/fs/cgroup/misc/a/b #2> echo $$ > /sys/fs/cgroup/misc/a/cgroup.procs #3> RUN_A_COMMAND_WHICH_CREATES_MULTIPLE_THREADS & #4> PID=$! #5> echo $PID > /sys/fs/cgroup/misc/a/b/tasks #6> echo $PID > /sys/fs/cgroup/misc/a/cgroup.procs the process including the group leader back into a. In this final migration, non-leader threads would be doing identity migration while the group leader is doing an actual one. After #3, let's say the whole process was in cset A, and that after #4, the leader moves to cset B. Then, during #6, the following happens: 1. cgroup_migrate_add_src() is called on B for the leader. 2. cgroup_migrate_add_src() is called on A for the other threads. 3. cgroup_migrate_prepare_dst() is called. It scans the src list. 4. It notices that B wants to migrate to A, so it tries to A to the dst list but realizes that its ->mg_preload_node is already busy. 5. and then it notices A wants to migrate to A as it's an identity migration, it culls it by list_del_init()'ing its ->mg_preload_node and putting references accordingly. 6. The rest of migration takes place with B on the src list but nothing on the dst list. This means that A isn't held while migration is in progress. If all tasks leave A before the migration finishes and the incoming task pins it, the cset will be destroyed leading to use-after-free. This is caused by overloading cset->mg_preload_node for both src and dst preload lists. We wanted to exclude the cset from the src list but ended up inadvertently excluding it from the dst list too. This patch fixes the issue by separating out cset->mg_preload_node into ->mg_src_preload_node and ->mg_dst_preload_node, so that the src and dst preloadings don't interfere with each other. Signed-off-by: Tejun Heo Reported-by: Mukesh Ojha Reported-by: shisiyuan Link: http://lkml.kernel.org/r/1654187688-27411-1-git-send-email-shisiyuan@xiaomi.com Link: https://www.spinics.net/lists/cgroups/msg33313.html Fixes: f817de98513d ("cgroup: prepare migration path for unified hierarchy") Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup-defs.h | 3 ++- kernel/cgroup/cgroup.c | 37 +++++++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1bfcfb1af3524..d4427d0a0e187 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -264,7 +264,8 @@ struct css_set { * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ - struct list_head mg_preload_node; + struct list_head mg_src_preload_node; + struct list_head mg_dst_preload_node; struct list_head mg_node; /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index adb820e98f243..e6cd7e17bd259 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -765,7 +765,8 @@ struct css_set init_css_set = { .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), - .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), + .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node), + .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node), .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), /* @@ -1240,7 +1241,8 @@ static struct css_set *find_css_set(struct css_set *old_cset, INIT_LIST_HEAD(&cset->threaded_csets); INIT_HLIST_NODE(&cset->hlist); INIT_LIST_HEAD(&cset->cgrp_links); - INIT_LIST_HEAD(&cset->mg_preload_node); + INIT_LIST_HEAD(&cset->mg_src_preload_node); + INIT_LIST_HEAD(&cset->mg_dst_preload_node); INIT_LIST_HEAD(&cset->mg_node); /* Copy the set of subsystem state objects generated in @@ -2597,21 +2599,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) */ void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) { - LIST_HEAD(preloaded); struct css_set *cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); - list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, + mg_src_preload_node) { + cset->mg_src_cgrp = NULL; + cset->mg_dst_cgrp = NULL; + cset->mg_dst_cset = NULL; + list_del_init(&cset->mg_src_preload_node); + put_css_set_locked(cset); + } - list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, + mg_dst_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; - list_del_init(&cset->mg_preload_node); + list_del_init(&cset->mg_dst_preload_node); put_css_set_locked(cset); } @@ -2651,7 +2659,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, if (src_cset->dead) return; - if (!list_empty(&src_cset->mg_preload_node)) + if (!list_empty(&src_cset->mg_src_preload_node)) return; src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); @@ -2664,7 +2672,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cset->mg_src_cgrp = src_cgrp; src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); - list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); + list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); } /** @@ -2689,7 +2697,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) /* look up the dst cset for each src cset and link it to src */ list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, - mg_preload_node) { + mg_src_preload_node) { struct css_set *dst_cset; struct cgroup_subsys *ss; int ssid; @@ -2708,7 +2716,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; src_cset->mg_dst_cgrp = NULL; - list_del_init(&src_cset->mg_preload_node); + list_del_init(&src_cset->mg_src_preload_node); put_css_set(src_cset); put_css_set(dst_cset); continue; @@ -2716,8 +2724,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) src_cset->mg_dst_cset = dst_cset; - if (list_empty(&dst_cset->mg_preload_node)) - list_add_tail(&dst_cset->mg_preload_node, + if (list_empty(&dst_cset->mg_dst_preload_node)) + list_add_tail(&dst_cset->mg_dst_preload_node, &mgctx->preloaded_dst_csets); else put_css_set(dst_cset); @@ -2963,7 +2971,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; spin_lock_irq(&css_set_lock); - list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { + list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, + mg_src_preload_node) { struct task_struct *task, *ntask; /* all tasks in src_csets need to be migrated */ From 722ab9a4632bbf6dcc93aee03d7a121ac85d7f4c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Jul 2022 12:42:03 +0100 Subject: [PATCH 027/299] btrfs: return -EAGAIN for NOWAIT dio reads/writes on compressed and inline extents commit a4527e1853f8ff6e0b7c2dadad6268bd38427a31 upstream. When doing a direct IO read or write, we always return -ENOTBLK when we find a compressed extent (or an inline extent) so that we fallback to buffered IO. This however is not ideal in case we are in a NOWAIT context (io_uring for example), because buffered IO can block and we currently have no support for NOWAIT semantics for buffered IO, so if we need to fallback to buffered IO we should first signal the caller that we may need to block by returning -EAGAIN instead. This behaviour can also result in short reads being returned to user space, which although it's not incorrect and user space should be able to deal with partial reads, it's somewhat surprising and even some popular applications like QEMU (Link tag #1) and MariaDB (Link tag #2) don't deal with short reads properly (or at all). The short read case happens when we try to read from a range that has a non-compressed and non-inline extent followed by a compressed extent. After having read the first extent, when we find the compressed extent we return -ENOTBLK from btrfs_dio_iomap_begin(), which results in iomap to treat the request as a short read, returning 0 (success) and waiting for previously submitted bios to complete (this happens at fs/iomap/direct-io.c:__iomap_dio_rw()). After that, and while at btrfs_file_read_iter(), we call filemap_read() to use buffered IO to read the remaining data, and pass it the number of bytes we were able to read with direct IO. Than at filemap_read() if we get a page fault error when accessing the read buffer, we return a partial read instead of an -EFAULT error, because the number of bytes previously read is greater than zero. So fix this by returning -EAGAIN for NOWAIT direct IO when we find a compressed or an inline extent. Reported-by: Dominique MARTINET Link: https://lore.kernel.org/linux-btrfs/YrrFGO4A1jS0GI0G@atmark-techno.com/ Link: https://jira.mariadb.org/browse/MDEV-27900?focusedCommentId=216582&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-216582 Tested-by: Dominique MARTINET CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Christoph Hellwig Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 861b9748a0d9f..9ae79342631a8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7639,7 +7639,19 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || em->block_start == EXTENT_MAP_INLINE) { free_extent_map(em); - ret = -ENOTBLK; + /* + * If we are in a NOWAIT context, return -EAGAIN in order to + * fallback to buffered IO. This is not only because we can + * block with buffered IO (no support for NOWAIT semantics at + * the moment) but also to avoid returning short reads to user + * space - this happens if we were able to read some data from + * previous non-compressed extents and then when we fallback to + * buffered IO, at btrfs_file_read_iter() by calling + * filemap_read(), we fail to fault in pages for the read buffer, + * in which case filemap_read() returns a short read (the number + * of bytes previously read is > 0, so it does not return -EFAULT). + */ + ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK; goto unlock_err; } From 02fc3a6ed7873e85c037ea9b67614780713966d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jun 2022 18:03:19 +0200 Subject: [PATCH 028/299] btrfs: zoned: fix a leaked bioc in read_zone_info commit 2963457829decf0c824a443238d251151ed18ff5 upstream. The bioc would leak on the normal completion path and also on the RAID56 check (but that one won't happen in practice due to the invalid combination with zoned mode). Fixes: 7db1c5d14dcd ("btrfs: zoned: support dev-replace in zoned filesystems") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Anand Jain Signed-off-by: Christoph Hellwig [ update changelog ] Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/zoned.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b5236f5afca7c..5091d679a602c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1727,12 +1727,14 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, &mapped_length, &bioc); if (ret || !bioc || mapped_length < PAGE_SIZE) { - btrfs_put_bioc(bioc); - return -EIO; + ret = -EIO; + goto out_put_bioc; } - if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) - return -EINVAL; + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { + ret = -EINVAL; + goto out_put_bioc; + } nofs_flag = memalloc_nofs_save(); nmirrors = (int)bioc->num_stripes; @@ -1751,7 +1753,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, break; } memalloc_nofs_restore(nofs_flag); - +out_put_bioc: + btrfs_put_bioc(bioc); return ret; } From 3d1ca4114fe644c3a9ba229d02b593ae7b6cc77f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 30 Jun 2022 23:06:00 +0300 Subject: [PATCH 029/299] drm/panfrost: Put mapping instead of shmem obj on panfrost_mmu_map_fault_addr() error commit fb6e0637ab7ebd8e61fe24f4d663c4bae99cfa62 upstream. When panfrost_mmu_map_fault_addr() fails, the BO's mapping should be unreferenced and not the shmem object which backs the mapping. Cc: stable@vger.kernel.org Fixes: bdefca2d8dc0 ("drm/panfrost: Add the panfrost_gem_mapping concept") Reviewed-by: Steven Price Signed-off-by: Dmitry Osipenko Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20220630200601.1884120-2-dmitry.osipenko@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index d3f82b26a631d..b285a8001b1d2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -518,7 +518,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, err_pages: drm_gem_shmem_put_pages(&bo->base); err_bo: - drm_gem_object_put(&bo->base.base); + panfrost_gem_mapping_put(bomapping); return ret; } From f036392edd9c49090781d8cca26ad6557a63bae4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 30 Jun 2022 23:06:01 +0300 Subject: [PATCH 030/299] drm/panfrost: Fix shrinker list corruption by madvise IOCTL commit 9fc33eaaa979d112d10fea729edcd2a2e21aa912 upstream. Calling madvise IOCTL twice on BO causes memory shrinker list corruption and crashes kernel because BO is already on the list and it's added to the list again, while BO should be removed from the list before it's re-added. Fix it. Cc: stable@vger.kernel.org Fixes: 013b65101315 ("drm/panfrost: Add madvise and shrinker support") Acked-by: Alyssa Rosenzweig Reviewed-by: Steven Price Signed-off-by: Dmitry Osipenko Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20220630200601.1884120-3-dmitry.osipenko@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panfrost/panfrost_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 47780fe597f23..601e124842e17 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -432,8 +432,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, if (args->retained) { if (args->madv == PANFROST_MADV_DONTNEED) - list_add_tail(&bo->base.madv_list, - &pfdev->shrinker_list); + list_move_tail(&bo->base.madv_list, + &pfdev->shrinker_list); else if (args->madv == PANFROST_MADV_WILLNEED) list_del_init(&bo->base.madv_list); } From e711a68a0e66ad64d2234d80ecbf10ec49c0fa00 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 13 Jul 2022 17:49:15 +1000 Subject: [PATCH 031/299] fs/remap: constrain dedupe of EOF blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5750676b64a561f7ec920d7c6ba130fc9c7378f3 upstream. If dedupe of an EOF block is not constrainted to match against only other EOF blocks with the same EOF offset into the block, it can match against any other block that has the same matching initial bytes in it, even if the bytes beyond EOF in the source file do not match. Fix this by constraining the EOF block matching to only match against other EOF blocks that have identical EOF offsets and data. This allows "whole file dedupe" to continue to work without allowing eof blocks to randomly match against partial full blocks with the same data. Reported-by: Ansgar Lößer Fixes: 1383a7ed6749 ("vfs: check file ranges before cloning files") Link: https://lore.kernel.org/linux-fsdevel/a7c93559-4ba1-df2f-7a85-55a143696405@tu-darmstadt.de/ Signed-off-by: Dave Chinner Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/remap_range.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/remap_range.c b/fs/remap_range.c index e112b5424cdb9..881a306ee2473 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, * Otherwise, make sure the count is also block-aligned, having * already confirmed the starting offsets' block alignment. */ - if (pos_in + count == size_in) { + if (pos_in + count == size_in && + (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) { bcount = ALIGN(size_in, bs) - pos_in; } else { if (!IS_ALIGNED(count, bs)) From 4eb6ad83932e841ec6a93daff363bf00ba78c75b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 23 Jun 2022 17:54:01 +0900 Subject: [PATCH 032/299] nilfs2: fix incorrect masking of permission flags for symlinks commit 5924e6ec1585445f251ea92713eb15beb732622a upstream. The permission flags of newly created symlinks are wrongly dropped on nilfs2 with the current umask value even though symlinks should have 777 (rwxrwxrwx) permissions: $ umask 0022 $ touch file && ln -s file symlink; ls -l file symlink -rw-r--r--. 1 root root 0 Jun 23 16:29 file lrwxr-xr-x. 1 root root 4 Jun 23 16:29 symlink -> file This fixes the bug by inserting a missing check that excludes symlinks. Link: https://lkml.kernel.org/r/1655974441-5612-1-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: Tommy Pettersson Reported-by: Ciprian Craciun Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/nilfs.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 1344f7d475d3c..aecda4fc95f5f 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -198,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode) static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) { + if (S_ISLNK(inode->i_mode)) + return 0; + inode->i_mode &= ~current_umask(); return 0; } From 8c41ab0ecda85f984a889626184ef4487e36cde5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 20 Jun 2022 09:01:43 +0200 Subject: [PATCH 033/299] sh: convert nommu io{re,un}map() to static inline functions commit d684e0a52d36f8939eda30a0f31ee235ee4ee741 upstream. Recently, nommu iounmap() was converted from a static inline function to a macro again, basically reverting commit 4580ba4ad2e6b8dd ("sh: Convert iounmap() macros to inline functions"). With -Werror, this leads to build failures like: drivers/iio/adc/xilinx-ams.c: In function `ams_iounmap_ps': drivers/iio/adc/xilinx-ams.c:1195:14: error: unused variable `ams' [-Werror=unused-variable] 1195 | struct ams *ams = data; | ^~~ Fix this by replacing the macros for ioremap() and iounmap() by static inline functions, based on . Link: https://lkml.kernel.org/r/8d1b1766260961799b04035e7bc39a7f59729f72.1655708312.git.geert+renesas@glider.be Fixes: 13f1fc870dd74713 ("sh: move the ioremap implementation out of line") Signed-off-by: Geert Uytterhoeven Reported-by: kernel test robot Reported-by: Jonathan Cameron Acked-by: Jonathan Cameron Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/sh/include/asm/io.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index cf9a3ec32406f..fba90e670ed41 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, #endif /* CONFIG_HAVE_IOREMAP_PROT */ #else /* CONFIG_MMU */ -#define iounmap(addr) do { } while (0) -#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset)) +static inline void __iomem *ioremap(phys_addr_t offset, size_t size) +{ + return (void __iomem *)(unsigned long)offset; +} + +static inline void iounmap(volatile void __iomem *addr) { } #endif /* CONFIG_MMU */ #define ioremap_uc ioremap From a92d44b412e75dd66543843165e46637457f22cc Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 27 May 2022 19:17:26 +0800 Subject: [PATCH 034/299] Revert "evm: Fix memleak in init_desc" commit 51dd64bb99e4478fc5280171acd8e1b529eadaf7 upstream. This reverts commit ccf11dbaa07b328fa469415c362d33459c140a37. Commit ccf11dbaa07b ("evm: Fix memleak in init_desc") said there is memleak in init_desc. That may be incorrect, as we can see, tmp_tfm is saved in one of the two global variables hmac_tfm or evm_tfm[hash_algo], then if init_desc is called next time, there is no need to alloc tfm again, so in the error path of kmalloc desc or crypto_shash_init(desc), It is not a problem without freeing tmp_tfm. And also that commit did not reset the global variable to NULL after freeing tmp_tfm and this makes *tfm a dangling pointer which may cause a UAF issue. Reported-by: Guozihua (Scott) Signed-off-by: Xiu Jianfeng Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/evm/evm_crypto.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 0450d79afdc8f..b862f0f919bfc 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -75,7 +75,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) { long rc; const char *algo; - struct crypto_shash **tfm, *tmp_tfm = NULL; + struct crypto_shash **tfm, *tmp_tfm; struct shash_desc *desc; if (type == EVM_XATTR_HMAC) { @@ -120,16 +120,13 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) alloc: desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm), GFP_KERNEL); - if (!desc) { - crypto_free_shash(tmp_tfm); + if (!desc) return ERR_PTR(-ENOMEM); - } desc->tfm = *tfm; rc = crypto_shash_init(desc); if (rc) { - crypto_free_shash(tmp_tfm); kfree(desc); return ERR_PTR(rc); } From ab5e910c989ad93b1eeb7f07aa8ed8dbe9b94bfd Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 24 Jun 2022 17:18:45 +0300 Subject: [PATCH 035/299] reset: Fix devm bulk optional exclusive control getter [ Upstream commit a57f68ddc8865d59a19783080cc52fb4a11dc209 ] Most likely due to copy-paste mistake the device managed version of the denoted reset control getter has been implemented with invalid semantic, which can be immediately spotted by having "WARN_ON(shared && acquired)" warning in the system log as soon as the method is called. Anyway let's fix it by altering the boolean arguments passed to the __devm_reset_control_bulk_get() method from - shared = true, optional = false, acquired = true to + shared = false, optional = true, acquired = true That's what they were supposed to be in the first place (see the non-devm version of the same method: reset_control_bulk_get_optional_exclusive()). Fixes: 48d71395896d ("reset: Add reset_control_bulk API") Signed-off-by: Serge Semin Reviewed-by: Dmitry Osipenko Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220624141853.7417-2-Sergey.Semin@baikalelectronics.ru Signed-off-by: Sasha Levin --- include/linux/reset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index 8a21b5756c3ef..514ddf003efc7 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -731,7 +731,7 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); } /** From 51904922902f04619635b1024ce3300955ccc599 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 28 Apr 2022 14:16:59 -0400 Subject: [PATCH 036/299] arm64: dts: ls1028a: Update SFP node to include clock [ Upstream commit 3c12e9da3098a30fc82dea01768d355c28e3692d ] The clocks property is now mandatory. Add it to avoid warning message. Signed-off-by: Sean Anderson Reviewed-by: Michael Walle Fixes: eba5bea8f37f ("arm64: dts: ls1028a: add efuse node") Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 088271d49139c..59b289b52a280 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -224,9 +224,12 @@ little-endian; }; - efuse@1e80000 { + sfp: efuse@1e80000 { compatible = "fsl,ls1028a-sfp"; reg = <0x0 0x1e80000 0x0 0x10000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(4)>; + clock-names = "sfp"; #address-cells = <1>; #size-cells = <1>; From 0125374851845eae4cb7932be2c1650ced2eda2b Mon Sep 17 00:00:00 2001 From: Kris Bahnsen Date: Thu, 30 Jun 2022 14:03:27 -0700 Subject: [PATCH 037/299] ARM: dts: imx6qdl-ts7970: Fix ngpio typo and count [ Upstream commit e95ea0f687e679fcb0a3a67d0755b81ee7d60db0 ] Device-tree incorrectly used "ngpio" which caused the driver to fallback to 32 ngpios. This platform has 62 GPIO registers. Fixes: 9ff8e9fccef9 ("ARM: dts: TS-7970: add basic device tree") Signed-off-by: Kris Bahnsen Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl-ts7970.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index fded07f370b39..d6ba4b2a60f6f 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -226,7 +226,7 @@ reg = <0x28>; #gpio-cells = <2>; gpio-controller; - ngpio = <32>; + ngpios = <62>; }; sgtl5000: codec@a { From 0903e579544ad1e608e3b2c905d2d0cd1e501fb5 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 29 Jun 2022 21:07:33 +0100 Subject: [PATCH 038/299] riscv: dts: microchip: hook up the mpfs' l2cache [ Upstream commit efa310ba00716d7a872bdc5fa1f5545edc9efd69 ] The initial PolarFire SoC devicetree must have been forked off from the fu540 one prior to the addition of l2cache controller support being added there. When the controller node was added to mpfs.dtsi, it was not hooked up to the CPUs & thus sysfs reports an incorrect cache configuration. Hook it up. Fixes: 0fa6107eca41 ("RISC-V: Initial DTS for Microchip ICICLE board") Reviewed-by: Sudeep Holla Reviewed-by: Daire McNamara Signed-off-by: Conor Dooley Signed-off-by: Sasha Levin --- arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index f44fce1fe080c..2f75e39d2fdd1 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -51,6 +51,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu1_intc: interrupt-controller { @@ -78,6 +79,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu2_intc: interrupt-controller { @@ -105,6 +107,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu3_intc: interrupt-controller { @@ -132,6 +135,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu4_intc: interrupt-controller { #interrupt-cells = <1>; From f65501ce53045740a86756471c48350e16f39add Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Wed, 6 Jul 2022 13:06:22 +0300 Subject: [PATCH 039/299] spi: amd: Limit max transfer and message size [ Upstream commit 6ece49c56965544262523dae4a071ace3db63507 ] Enabling the SPI CS35L41 audio codec driver for Steam Deck [1] revealed a problem with the current AMD SPI controller driver implementation, consisting of an unrecoverable system hang. The issue can be prevented if we ensure the max transfer size and the max message size do not exceed the FIFO buffer size. According to the implementation of the downstream driver, the AMD SPI controller is not able to handle more than 70 bytes per transfer, which corresponds to the size of the FIFO buffer. Hence, let's fix this by setting the SPI limits mentioned above. [1] https://lore.kernel.org/r/20220621213819.262537-1-cristian.ciocaltea@collabora.com Reported-by: Anastasios Vacharakis Fixes: bbb336f39efc ("spi: spi-amd: Add AMD SPI controller driver support") Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20220706100626.1234731-2-cristian.ciocaltea@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-amd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index cba6a4486c24c..efdcbe6c4c266 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -33,6 +33,7 @@ #define AMD_SPI_RX_COUNT_REG 0x4B #define AMD_SPI_STATUS_REG 0x4C +#define AMD_SPI_FIFO_SIZE 70 #define AMD_SPI_MEM_SIZE 200 /* M_CMD OP codes for SPI */ @@ -270,6 +271,11 @@ static int amd_spi_master_transfer(struct spi_master *master, return 0; } +static size_t amd_spi_max_transfer_size(struct spi_device *spi) +{ + return AMD_SPI_FIFO_SIZE; +} + static int amd_spi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -302,6 +308,8 @@ static int amd_spi_probe(struct platform_device *pdev) master->flags = SPI_MASTER_HALF_DUPLEX; master->setup = amd_spi_master_setup; master->transfer_one_message = amd_spi_master_transfer; + master->max_transfer_size = amd_spi_max_transfer_size; + master->max_message_size = amd_spi_max_transfer_size; /* Register the controller with SPI framework */ err = devm_spi_register_master(dev, master); From c10f57d82df1566af2ad3b5e6a81c35622889d08 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 31 May 2022 09:53:42 +0100 Subject: [PATCH 040/299] ARM: 9209/1: Spectre-BHB: avoid pr_info() every time a CPU comes out of idle [ Upstream commit 0609e200246bfd3b7516091c491bec4308349055 ] Jon reports that the Spectre-BHB init code is filling up the kernel log with spurious notifications about which mitigation has been enabled, every time any CPU comes out of a low power state. Given that Spectre-BHB mitigations are system wide, only a single mitigation can be enabled, and we already print an error if two types of CPUs coexist in a single system that require different Spectre-BHB mitigations. This means that the pr_info() that describes the selected mitigation does not need to be emitted for each CPU anyway, and so we can simply emit it only once. In order to clarify the above in the log message, update it to describe that the selected mitigation will be enabled on all CPUs, including ones that are unaffected. If another CPU comes up later that is affected and requires a different mitigation, we report an error as before. Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround") Tested-by: Jon Hunter Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/mm/proc-v7-bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index f9730eba06328..8bc7a2d6d6c7f 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -208,10 +208,10 @@ static int spectre_bhb_install_workaround(int method) return SPECTRE_VULNERABLE; spectre_bhb_method = method; - } - pr_info("CPU%u: Spectre BHB: using %s workaround\n", - smp_processor_id(), spectre_bhb_method_name(method)); + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } return SPECTRE_MITIGATED; } From 079fe72a31c404ce66515dc90ebdf774aa59386e Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 13 Jun 2022 15:05:41 +0100 Subject: [PATCH 041/299] ARM: 9210/1: Mark the FDT_FIXED sections as shareable [ Upstream commit 598f0a99fa8a35be44b27106b43ddc66417af3b1 ] commit 7a1be318f579 ("ARM: 9012/1: move device tree mapping out of linear region") use FDT_FIXED_BASE to map the whole FDT_FIXED_SIZE memory area which contains fdt. But it only reserves the exact physical memory that fdt occupied. Unfortunately, this mapping is non-shareable. An illegal or speculative read access can bring the RAM content from non-fdt zone into cache, PIPT makes it to be hit by subsequently read access through shareable mapping(such as linear mapping), and the cache consistency between cores is lost due to non-shareable property. |<---------FDT_FIXED_SIZE------>| | | ------------------------------- | | | | ------------------------------- 1. CoreA read through MT_ROM mapping, the old data is loaded into the cache. 2. CoreB write to update data through linear mapping. CoreA received the notification to invalid the corresponding cachelines, but the property non-shareable makes it to be ignored. 3. CoreA read through linear mapping, cache hit, the old data is read. To eliminate this risk, add a new memory type MT_MEMORY_RO. Compared to MT_ROM, it is shareable and non-executable. Here's an example: list_del corruption. prev->next should be c0ecbf74, but was c08410dc kernel BUG at lib/list_debug.c:53! ... ... PC is at __list_del_entry_valid+0x58/0x98 LR is at __list_del_entry_valid+0x58/0x98 psr: 60000093 sp : c0ecbf30 ip : 00000000 fp : 00000001 r10: c08410d0 r9 : 00000001 r8 : c0825e0c r7 : 20000013 r6 : c08410d0 r5 : c0ecbf74 r4 : c0ecbf74 r3 : c0825d08 r2 : 00000000 r1 : df7ce6f4 r0 : 00000044 ... ... Stack: (0xc0ecbf30 to 0xc0ecc000) bf20: c0ecbf74 c0164fd0 c0ecbf70 c0165170 bf40: c0eca000 c0840c00 c0840c00 c0824500 c0825e0c c0189bbc c088f404 60000013 bf60: 60000013 c0e85100 000004ec 00000000 c0ebcdc0 c0ecbf74 c0ecbf74 c0825d08 ... ... < next prev > (__list_del_entry_valid) from (__list_del_entry+0xc/0x20) (__list_del_entry) from (finish_swait+0x60/0x7c) (finish_swait) from (rcu_gp_kthread+0x560/0xa20) (rcu_gp_kthread) from (kthread+0x14c/0x15c) (kthread) from (ret_from_fork+0x14/0x24) The faulty list node to be deleted is a local variable, its address is c0ecbf74. The dumped stack shows that 'prev' = c0ecbf74, but its value before lib/list_debug.c:53 is c08410dc. A large amount of printing results in swapping out the cacheline containing the old data(MT_ROM mapping is read only, so the cacheline cannot be dirty), and the subsequent dump operation obtains new data from the DDR. Fixes: 7a1be318f579 ("ARM: 9012/1: move device tree mapping out of linear region") Suggested-by: Ard Biesheuvel Signed-off-by: Zhen Lei Reviewed-by: Ard Biesheuvel Reviewed-by: Kefeng Wang Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/include/asm/mach/map.h | 1 + arch/arm/mm/mmu.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 92282558caf7c..2b8970d8e5a2f 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -27,6 +27,7 @@ enum { MT_HIGH_VECTORS, MT_MEMORY_RWX, MT_MEMORY_RW, + MT_MEMORY_RO, MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 5e2be37a198e2..cd17e324aa51e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -296,6 +296,13 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, @@ -489,6 +496,7 @@ static void __init build_mem_type_table(void) /* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -568,6 +576,7 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif /* @@ -587,6 +596,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; @@ -647,6 +658,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -1360,7 +1373,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); map.virtual = FDT_FIXED_BASE; map.length = FDT_FIXED_SIZE; - map.type = MT_ROM; + map.type = MT_MEMORY_RO; create_mapping(&map); } From e3a19a4662126c3e8fb4cb2ae53838027ebd15c7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 6 Jun 2022 21:20:29 +0300 Subject: [PATCH 042/299] net/mlx5e: kTLS, Fix build time constant test in TX [ Upstream commit 6cc2714e85754a621219693ea8aa3077d6fca0cb ] Use the correct constant (TLS_DRIVER_STATE_SIZE_TX) in the comparison against the size of the private TX TLS driver context. Fixes: df8d866770f9 ("net/mlx5e: kTLS, Use kernel API to extract private offload context") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index aaf11c66bf4c8..6f12764d8880e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -68,8 +68,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_tx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); + BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX); *ctx = priv_tx; } From 41ab41f695de241ec37bca9e36a396869f6d8862 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 6 Jun 2022 21:21:10 +0300 Subject: [PATCH 043/299] net/mlx5e: kTLS, Fix build time constant test in RX [ Upstream commit 2ec6cf9b742a5c18982861322fa5de6510f8f57e ] Use the correct constant (TLS_DRIVER_STATE_SIZE_RX) in the comparison against the size of the private RX TLS driver context. Fixes: 1182f3659357 ("net/mlx5e: kTLS, Add kTLS RX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 96064a2033f79..f3f2aeb1bc21d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -231,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_rx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); + BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX); *ctx = priv_rx; } From 33ef9ed40fcc3f7b37033823a9470ccf32206bcc Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 22 Jun 2022 13:11:18 +0300 Subject: [PATCH 044/299] net/mlx5e: Fix enabling sriov while tc nic rules are offloaded [ Upstream commit 0c9d876545a56aebed30fa306d0460a4d28d271a ] There is a total of four 4M entries flow tables. In sriov disabled mode, ct, ct_nat and post_act take three of them. When adding the first tc nic rule in this mode, it will take another 4M table for the tc table. If user then enables sriov, the legacy flow table tries to take another 4M and fails, and so enablement fails. To fix that, have legacy fdb take the next available maximum size from the fs ft pool. Fixes: 4a98544d1827 ("net/mlx5: Move chains ft pool to be used by all firmware steering") Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 9d17206d16251..fabe49a35a5c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -11,6 +11,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "fs_ft_pool.h" #include "esw/qos.h" enum { @@ -95,8 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; + ft_attr.max_fte = POOL_NEXT_SIZE; ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { @@ -105,6 +105,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) goto out; } esw->fdb_table.legacy.fdb = fdb; + table_size = fdb->max_fte; /* Addresses group : Full match unicast/multicast addresses */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, From b8651049bdd77fa652bcf0f3157911a3a6fc4f2f Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 21 Jun 2022 10:43:55 +0300 Subject: [PATCH 045/299] net/mlx5e: CT: Use own workqueue instead of mlx5e priv [ Upstream commit 6c4e8fa03fde7e5b304594294e397a9ba92feaf6 ] Allocate a ct priv workqueue instead of using mlx5e priv one so flushing will only be of related CT entries. Also move flushing of the workqueue before rhashtable destroy otherwise entries won't be valid. Fixes: b069e14fff46 ("net/mlx5e: CT: Fix queued up restore put() executing after relevant ft release") Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 1ff7a07bcd06f..fbcce63e5b80e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -66,6 +66,7 @@ struct mlx5_tc_ct_priv { struct mlx5_ct_fs *fs; struct mlx5_ct_fs_ops *fs_ops; spinlock_t ht_lock; /* protects ft entries */ + struct workqueue_struct *wq; }; struct mlx5_ct_flow { @@ -927,14 +928,11 @@ static void mlx5_tc_ct_entry_del_work(struct work_struct *work) static void __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) { - struct mlx5e_priv *priv; - if (!refcount_dec_and_test(&entry->refcnt)) return; - priv = netdev_priv(entry->ct_priv->netdev); INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); - queue_work(priv->wq, &entry->work); + queue_work(entry->ct_priv->wq, &entry->work); } static struct mlx5_ct_counter * @@ -1744,19 +1742,16 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) static void mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) { - struct mlx5e_priv *priv; - if (!refcount_dec_and_test(&ft->refcount)) return; + flush_workqueue(ct_priv->wq); nf_flow_table_offload_del_cb(ft->nf_ft, mlx5_tc_ct_block_flow_offload, ft); rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); - priv = netdev_priv(ct_priv->netdev); - flush_workqueue(priv->wq); mlx5_tc_ct_free_pre_ct_tables(ft); mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); kfree(ft); @@ -2139,6 +2134,12 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) goto err_ct_tuples_nat_ht; + ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0); + if (!ct_priv->wq) { + err = -ENOMEM; + goto err_wq; + } + err = mlx5_tc_ct_fs_init(ct_priv); if (err) goto err_init_fs; @@ -2146,6 +2147,8 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, return ct_priv; err_init_fs: + destroy_workqueue(ct_priv->wq); +err_wq: rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); err_ct_tuples_nat_ht: rhashtable_destroy(&ct_priv->ct_tuples_ht); @@ -2175,6 +2178,7 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) if (!ct_priv) return; + destroy_workqueue(ct_priv->wq); chains = ct_priv->chains; ct_priv->fs_ops->destroy(ct_priv->fs); From 987df1259671aa40f791141bfe0c7d388df27816 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 27 Jun 2022 15:05:53 +0300 Subject: [PATCH 046/299] net/mlx5e: Fix capability check for updating vnic env counters [ Upstream commit 452133dd580811f184e76b1402983182ee425298 ] The existing capability check for vnic env counters only checks for receive steering discards, although we need the counters update for the exposed internal queue oob counter as well. This could result in the latter counter not being updated correctly when the receive steering discards counter is not supported. Fix that by checking whether any counter is supported instead of only the steering counter capability. Fixes: 0cfafd4b4ddf ("net/mlx5e: Add device out of buffer counter") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index bdc870f9c2f3f..4429c848d4c45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -688,7 +688,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; struct mlx5_core_dev *mdev = priv->mdev; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + if (!mlx5e_stats_grp_vnic_env_num_stats(priv)) return; MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); From 16427298f3dc02ec90bdfa31c8ef9b384ea5534a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 30 May 2022 14:01:37 +0300 Subject: [PATCH 047/299] net/mlx5e: Ring the TX doorbell on DMA errors [ Upstream commit 5b759bf2f9d73db05369aef2344502095c4e5e73 ] TX doorbells may be postponed, because sometimes the driver knows that another packet follows (for example, when xmit_more is true, or when a MPWQE session is closed before transmitting a packet). However, the DMA mapping may fail for the next packet, in which case a new WQE is not posted, the doorbell isn't updated either, and the transmission of the previous packet will be delayed indefinitely. This commit fixes the described rare error flow by posting a NOP and ringing the doorbell on errors to flush all the previous packets. The MPWQE session is closed before that. DMA mapping in the MPWQE flow is moved to the beginning of mlx5e_sq_xmit_mpwqe, because empty sessions are not allowed. Stop room always has enough space for a NOP, because the actual TX WQE is not posted. Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 39 ++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 2dc48406cd08d..54a3f866a3457 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -318,6 +318,26 @@ static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) } } +static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + /* Must not be called when a MPWQE session is active but empty. */ + mlx5e_tx_mpwqe_ensure_complete(sq); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); +} + static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, const struct mlx5e_tx_attr *attr, @@ -410,6 +430,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) @@ -511,6 +532,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_ctrl_seg *cseg; struct mlx5e_xmit_data txd; + txd.data = skb->data; + txd.len = skb->len; + + txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) + goto err_unmap; + if (!mlx5e_tx_mpwqe_session_is_active(sq)) { mlx5e_tx_mpwqe_session_start(sq, eseg); } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { @@ -520,18 +548,9 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->xmit_more += xmit_more; - txd.data = skb->data; - txd.len = skb->len; - - txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) - goto err_unmap; mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); - mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); - mlx5e_tx_mpwqe_add_dseg(sq, &txd); - mlx5e_tx_skb_update_hwts_flags(skb); if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { @@ -553,6 +572,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_dma_unmap_wqe_err(sq, 1); sq->stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) @@ -935,5 +955,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } #endif From 27dccf616a0a82f4d8004b7ee04560e7de419e63 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Jun 2022 10:04:55 -0400 Subject: [PATCH 048/299] drm/amdgpu: keep fbdev buffers pinned during suspend [ Upstream commit f9a89117fbdc63c0d4ab63a8f3596a72c245bcfe ] Was dropped when we converted to the generic helpers. Fixes: 087451f372bf ("drm/amdgpu: use generic fb helpers instead of setting up AMD own's.") Acked-by: Evan Quan Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 25 +++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index fae5c1debfad3..ffb3702745a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1547,6 +1547,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, stime, etime, mode); } +static bool +amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) +{ + struct drm_device *dev = adev_to_drm(adev); + struct drm_fb_helper *fb_helper = dev->fb_helper; + + if (!fb_helper || !fb_helper->buffer) + return false; + + if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj) + return false; + + return true; +} + int amdgpu_display_suspend_helper(struct amdgpu_device *adev) { struct drm_device *dev = adev_to_drm(adev); @@ -1582,10 +1597,12 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) continue; } robj = gem_to_amdgpu_bo(fb->obj[0]); - r = amdgpu_bo_reserve(robj, true); - if (r == 0) { - amdgpu_bo_unpin(robj); - amdgpu_bo_unreserve(robj); + if (!amdgpu_display_robj_is_fb(adev, robj)) { + r = amdgpu_bo_reserve(robj, true); + if (r == 0) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); + } } } return 0; From 4ffcacab7145080187330accafae69e87a481eec Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Jun 2022 10:10:37 -0400 Subject: [PATCH 049/299] drm/amdgpu/display: disable prefer_shadow for generic fb helpers [ Upstream commit 3a4b1cc28fbdc2325b3e3ed7d8024995a75f9216 ] Seems to break hibernation. Disable for now until we can root cause it. Fixes: 087451f372bf ("drm/amdgpu: use generic fb helpers instead of setting up AMD own's.") Bug: https://bugzilla.kernel.org/show_bug.cgi?id=216119 Acked-by: Evan Quan Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 3 ++- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 3 ++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 5224d9a39737f..842670d4a12e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -494,7 +494,8 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = YRES_MAX; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 288fce7dc0ed1..9c964cd3b5d4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2796,7 +2796,8 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index cbe5250b31cb4..e0ad9f27dc3f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2914,7 +2914,8 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 982855e6cf52e..3caf6f386042f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2673,7 +2673,8 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_width = 16384; adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 84440741c60bc..7c75df5bffed3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2693,7 +2693,8 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6dc9808760fc8..b55a433e829e2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3847,7 +3847,8 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; /* indicates support for immediate flip */ adev_to_drm(adev)->mode_config.async_page_flip = true; From a91522b4279bebb098106a19b91f82b9c3213be9 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 24 Jun 2022 06:04:06 -0700 Subject: [PATCH 050/299] drm/i915: fix a possible refcount leak in intel_dp_add_mst_connector() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 85144df9ff4652816448369de76897c57cbb1b93 ] If drm_connector_init fails, intel_connector_free will be called to take care of proper free. So it is necessary to drop the refcount of port before intel_connector_free. Fixes: 091a4f91942a ("drm/i915: Handle drm-layer errors in intel_dp_add_mst_connector") Signed-off-by: Hangyu Hua Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220624130406.17996-1-jose.souza@intel.com Signed-off-by: José Roberto de Souza (cherry picked from commit cea9ed611e85d36a05db52b6457bf584b7d969e2) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index e30e698aa6843..f7d46ea3afb96 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -841,6 +841,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); if (ret) { + drm_dp_mst_put_port_malloc(port); intel_connector_free(intel_connector); return NULL; } From 683feeb567732731238f620d181aede6b176da58 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 21 Jun 2022 16:30:05 -0700 Subject: [PATCH 051/299] drm/i915/guc: ADL-N should use the same GuC FW as ADL-S [ Upstream commit 25c95bf494067f7bd1dfa8064ef964abe88cafc2 ] The only difference between the ADL S and P GuC FWs is the HWConfig support. ADL-N does not support HWConfig, so we should use the same binary as ADL-S, otherwise the GuC might attempt to fetch a config table that does not exist. ADL-N is internally identified as an ADL-P, so we need to special-case it in the FW selection code. Fixes: 7e28d0b26759 ("drm/i915/adl-n: Enable ADL-N platform") Cc: John Harrison Cc: Tejas Upadhyay Cc: Anusha Srivatsa Cc: Jani Nikula Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220621233005.3952293-1-daniele.ceraolospurio@intel.com (cherry picked from commit 971e4a9781742aaad1587e25fd5582b2dd595ef8) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 9b6fbad476465..097b0c8b85313 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -160,6 +160,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) u8 rev = INTEL_REVID(i915); int i; + /* + * The only difference between the ADL GuC FWs is the HWConfig support. + * ADL-N does not support HWConfig, so we should use the same binary as + * ADL-S, otherwise the GuC might attempt to fetch a config table that + * does not exist. + */ + if (IS_ADLP_N(i915)) + p = INTEL_ALDERLAKE_S; + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); fw_blobs = blobs_all[uc_fw->type].blobs; fw_count = blobs_all[uc_fw->type].count; From 640cea4c2839a821adfbb703b590a5928abe9286 Mon Sep 17 00:00:00 2001 From: Huaxin Lu Date: Tue, 5 Jul 2022 13:14:17 +0800 Subject: [PATCH 052/299] ima: Fix a potential integer overflow in ima_appraise_measurement [ Upstream commit d2ee2cfc4aa85ff6a2a3b198a3a524ec54e3d999 ] When the ima-modsig is enabled, the rc passed to evm_verifyxattr() may be negative, which may cause the integer overflow problem. Fixes: 39b07096364a ("ima: Implement support for module-style appended signatures") Signed-off-by: Huaxin Lu Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- security/integrity/ima/ima_appraise.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 17232bbfb9f96..ee6a0f8879e43 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -408,7 +408,8 @@ int ima_appraise_measurement(enum ima_hooks func, goto out; } - status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint); + status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, + rc < 0 ? 0 : rc, iint); switch (status) { case INTEGRITY_PASS: case INTEGRITY_PASS_IMMUTABLE: From f4f11e5efcf56ee8bdd9c92df6ec7021ea13082d Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Fri, 24 Jun 2022 12:13:01 +0200 Subject: [PATCH 053/299] ASoC: sgtl5000: Fix noise on shutdown/remove [ Upstream commit 040e3360af3736348112d29425bf5d0be5b93115 ] Put the SGTL5000 in a silent/safe state on shutdown/remove, this is required since the SGTL5000 produces a constant noise on its output after it is configured and its clock is removed. Without this change this is happening every time the module is unbound/removed or from reboot till the clock is enabled again. The issue was experienced on both a Toradex Colibri/Apalis iMX6, but can be easily reproduced everywhere just playing something on the codec and after that removing/unbinding the driver. Fixes: 9b34e6cc3bc2 ("ASoC: Add Freescale SGTL5000 codec support") Signed-off-by: Francesco Dolcini Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20220624101301.441314-1-francesco.dolcini@toradex.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/sgtl5000.c | 9 +++++++++ sound/soc/codecs/sgtl5000.h | 1 + 2 files changed, 10 insertions(+) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 8eebf27d0ea24..281785a9301bc 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1796,6 +1796,9 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); + clk_disable_unprepare(sgtl5000->mclk); regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies); regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies); @@ -1803,6 +1806,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) return 0; } +static void sgtl5000_i2c_shutdown(struct i2c_client *client) +{ + sgtl5000_i2c_remove(client); +} + static const struct i2c_device_id sgtl5000_id[] = { {"sgtl5000", 0}, {}, @@ -1823,6 +1831,7 @@ static struct i2c_driver sgtl5000_i2c_driver = { }, .probe = sgtl5000_i2c_probe, .remove = sgtl5000_i2c_remove, + .shutdown = sgtl5000_i2c_shutdown, .id_table = sgtl5000_id, }; diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 56ec5863f2507..3a808c762299e 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -80,6 +80,7 @@ /* * SGTL5000_CHIP_DIG_POWER */ +#define SGTL5000_DIG_POWER_DEFAULT 0x0000 #define SGTL5000_ADC_EN 0x0040 #define SGTL5000_DAC_EN 0x0020 #define SGTL5000_DAP_POWERUP 0x0010 From cd486308d773d6d062a0140062458b48f8a0eb6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 30 Jun 2022 09:51:32 +0200 Subject: [PATCH 054/299] ASoC: tas2764: Add post reset delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cd10bb89b0d57bca98eb75e0444854a1c129a14e ] Make sure there is at least 1 ms delay from reset to first command as is specified in the datasheet. This is a fix similar to commit 307f31452078 ("ASoC: tas2770: Insert post reset delay"). Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220630075135.2221-1-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 9265af41c235d..edc66ff6dc49f 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -42,10 +42,12 @@ static void tas2764_reset(struct tas2764_priv *tas2764) gpiod_set_value_cansleep(tas2764->reset_gpio, 0); msleep(20); gpiod_set_value_cansleep(tas2764->reset_gpio, 1); + usleep_range(1000, 2000); } snd_soc_component_write(tas2764->component, TAS2764_SW_RST, TAS2764_RST); + usleep_range(1000, 2000); } static int tas2764_set_bias_level(struct snd_soc_component *component, @@ -107,8 +109,10 @@ static int tas2764_codec_resume(struct snd_soc_component *component) struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); int ret; - if (tas2764->sdz_gpio) + if (tas2764->sdz_gpio) { gpiod_set_value_cansleep(tas2764->sdz_gpio, 1); + usleep_range(1000, 2000); + } ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, TAS2764_PWR_CTRL_MASK, @@ -501,8 +505,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component) tas2764->component = component; - if (tas2764->sdz_gpio) + if (tas2764->sdz_gpio) { gpiod_set_value_cansleep(tas2764->sdz_gpio, 1); + usleep_range(1000, 2000); + } tas2764_reset(tas2764); From 9a851ee8091ab383e5d0cd495410f6495959544d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 30 Jun 2022 09:51:33 +0200 Subject: [PATCH 055/299] ASoC: tas2764: Fix and extend FSYNC polarity handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d1a10f1b48202e2d183cce144c218a211e98d906 ] Fix setting of FSYNC polarity in case of LEFT_J and DSP_A/B formats. Do NOT set the SCFG field as was previously done, because that is not correct and is also in conflict with the "ASI1 Source" control which sets the same SCFG field! Also add support for explicit polarity inversion. Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220630075135.2221-2-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 30 +++++++++++++++++------------- sound/soc/codecs/tas2764.h | 6 ++---- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index edc66ff6dc49f..46c815650b2c0 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -135,7 +135,8 @@ static const char * const tas2764_ASI1_src[] = { }; static SOC_ENUM_SINGLE_DECL( - tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, 4, tas2764_ASI1_src); + tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, TAS2764_TDM_CFG2_SCFG_SHIFT, + tas2764_ASI1_src); static const struct snd_kcontrol_new tas2764_asi1_mux = SOC_DAPM_ENUM("ASI1 Source", tas2764_ASI1_src_enum); @@ -333,20 +334,22 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); - u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0; - int iface; + u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0; int ret; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_IF: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_NB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING; break; + case SND_SOC_DAIFMT_IB_IF: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_IB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING; break; - default: - dev_err(tas2764->dev, "ASI format Inverse is not found\n"); - return -EINVAL; } ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, @@ -357,13 +360,13 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_DSP_A: - iface = TAS2764_TDM_CFG2_SCFG_I2S; tdm_rx_start_slot = 1; break; case SND_SOC_DAIFMT_DSP_B: case SND_SOC_DAIFMT_LEFT_J: - iface = TAS2764_TDM_CFG2_SCFG_LEFT_J; tdm_rx_start_slot = 0; break; default: @@ -372,14 +375,15 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, - TAS2764_TDM_CFG1_MASK, - (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT)); + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG0, + TAS2764_TDM_CFG0_FRAME_START, + asi_cfg_0); if (ret < 0) return ret; - ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG2, - TAS2764_TDM_CFG2_SCFG_MASK, iface); + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, + TAS2764_TDM_CFG1_MASK, + (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT)); if (ret < 0) return ret; diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index 67d6fd903c42c..f015f22a083b5 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -47,6 +47,7 @@ #define TAS2764_TDM_CFG0_MASK GENMASK(3, 1) #define TAS2764_TDM_CFG0_44_1_48KHZ BIT(3) #define TAS2764_TDM_CFG0_88_2_96KHZ (BIT(3) | BIT(1)) +#define TAS2764_TDM_CFG0_FRAME_START BIT(0) /* TDM Configuration Reg1 */ #define TAS2764_TDM_CFG1 TAS2764_REG(0X0, 0x09) @@ -66,10 +67,7 @@ #define TAS2764_TDM_CFG2_RXS_16BITS 0x0 #define TAS2764_TDM_CFG2_RXS_24BITS BIT(0) #define TAS2764_TDM_CFG2_RXS_32BITS BIT(1) -#define TAS2764_TDM_CFG2_SCFG_MASK GENMASK(5, 4) -#define TAS2764_TDM_CFG2_SCFG_I2S 0x0 -#define TAS2764_TDM_CFG2_SCFG_LEFT_J BIT(4) -#define TAS2764_TDM_CFG2_SCFG_RIGHT_J BIT(5) +#define TAS2764_TDM_CFG2_SCFG_SHIFT 4 /* TDM Configuration Reg3 */ #define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c) From cc4a817fdcda4b70304357373a5acdd29ca9e92d Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 30 Jun 2022 09:51:34 +0200 Subject: [PATCH 056/299] ASoC: tas2764: Correct playback volume range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3e99e5697e1f7120b5abc755e8a560b22612d6ed ] DVC value 0xc8 is -100dB and 0xc9 is mute; this needs to map to -100.5dB as far as the dB scale is concerned. Fix that and enable the mute flag, so alsamixer correctly shows the control as <0 dB .. -100 dB, mute>. Signed-off-by: Hector Martin Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220630075135.2221-3-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 46c815650b2c0..bd79bc7ecf6bb 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -536,7 +536,7 @@ static int tas2764_codec_probe(struct snd_soc_component *component) } static DECLARE_TLV_DB_SCALE(tas2764_digital_tlv, 1100, 50, 0); -static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10000, 50, 0); +static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1); static const struct snd_kcontrol_new tas2764_snd_controls[] = { SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0, From b174cca5647f14faca87bc52c85a68a93511682a Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 30 Jun 2022 09:51:35 +0200 Subject: [PATCH 057/299] ASoC: tas2764: Fix amp gain register offset & default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1c4f29ec878bbf1cc0a1eb54ae7da5ff98e19641 ] The register default is 0x28 per the datasheet, and the amp gain field is supposed to be shifted left by one. With the wrong default, the ALSA controls lie about the power-up state. With the wrong shift, we get only half the gain we expect. Signed-off-by: Hector Martin Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220630075135.2221-4-povik+lin@cutebit.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index bd79bc7ecf6bb..ec13ba01e5223 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -541,7 +541,7 @@ static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1); static const struct snd_kcontrol_new tas2764_snd_controls[] = { SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0, TAS2764_DVC_MAX, 1, tas2764_playback_volume), - SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 0, 0x14, 0, + SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 1, 0x14, 0, tas2764_digital_tlv), }; @@ -566,7 +566,7 @@ static const struct reg_default tas2764_reg_defaults[] = { { TAS2764_SW_RST, 0x00 }, { TAS2764_PWR_CTRL, 0x1a }, { TAS2764_DVC, 0x00 }, - { TAS2764_CHNL_0, 0x00 }, + { TAS2764_CHNL_0, 0x28 }, { TAS2764_TDM_CFG0, 0x09 }, { TAS2764_TDM_CFG1, 0x02 }, { TAS2764_TDM_CFG2, 0x0a }, From 6777a8e60e8c7ce7103be41224839d75cd0151c9 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 30 Jun 2022 09:56:37 +0300 Subject: [PATCH 058/299] ASoC: Intel: Skylake: Correct the ssp rate discovery in skl_get_ssp_clks() [ Upstream commit 219af251bd1694bce1f627d238347d2eaf13de61 ] The present flag is only set once when one rate has been found to be saved. This will effectively going to ignore any rate discovered at later time and based on the code, this is not the intention. Fixes: bc2bd45b1f7f3 ("ASoC: Intel: Skylake: Parse nhlt and register clock device") Signed-off-by: Peter Ujfalusi Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220630065638.11183-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/skylake/skl-nhlt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 2439a574ac2fa..366f7bd9bc02a 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -99,7 +99,6 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, struct nhlt_fmt_cfg *fmt_cfg; struct wav_fmt_ext *wav_fmt; unsigned long rate; - bool present = false; int rate_index = 0; u16 channels, bps; u8 clk_src; @@ -113,6 +112,8 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, return; for (i = 0; i < fmt->fmt_count; i++) { + bool present = false; + fmt_cfg = &fmt->fmt_config[i]; wav_fmt = &fmt_cfg->fmt_ext; From 7ba8d419b95a70d32823bf5755c06519efc6373f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 30 Jun 2022 09:56:38 +0300 Subject: [PATCH 059/299] ASoC: Intel: Skylake: Correct the handling of fmt_config flexible array [ Upstream commit fc976f5629afb4160ee77798b14a693eac903ffd ] The struct nhlt_format's fmt_config is a flexible array, it must not be used as normal array. When moving to the next nhlt_fmt_cfg we need to take into account the data behind the ->config.caps (indicated by ->config.size). The logic of the code also changed: it is no longer saves the _last_ fmt_cfg for all found rates. Fixes: bc2bd45b1f7f3 ("ASoC: Intel: Skylake: Parse nhlt and register clock device") Signed-off-by: Peter Ujfalusi Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/20220630065638.11183-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/skylake/skl-nhlt.c | 37 ++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 366f7bd9bc02a..deb7b820325e7 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -111,11 +111,12 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, if (fmt->fmt_count == 0) return; + fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; for (i = 0; i < fmt->fmt_count; i++) { + struct nhlt_fmt_cfg *saved_fmt_cfg = fmt_cfg; bool present = false; - fmt_cfg = &fmt->fmt_config[i]; - wav_fmt = &fmt_cfg->fmt_ext; + wav_fmt = &saved_fmt_cfg->fmt_ext; channels = wav_fmt->fmt.channels; bps = wav_fmt->fmt.bits_per_sample; @@ -133,12 +134,18 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, * derive the rate. */ for (j = i; j < fmt->fmt_count; j++) { - fmt_cfg = &fmt->fmt_config[j]; - wav_fmt = &fmt_cfg->fmt_ext; + struct nhlt_fmt_cfg *tmp_fmt_cfg = fmt_cfg; + + wav_fmt = &tmp_fmt_cfg->fmt_ext; if ((fs == wav_fmt->fmt.samples_per_sec) && - (bps == wav_fmt->fmt.bits_per_sample)) + (bps == wav_fmt->fmt.bits_per_sample)) { channels = max_t(u16, channels, wav_fmt->fmt.channels); + saved_fmt_cfg = tmp_fmt_cfg; + } + /* Move to the next nhlt_fmt_cfg */ + tmp_fmt_cfg = (struct nhlt_fmt_cfg *)(tmp_fmt_cfg->config.caps + + tmp_fmt_cfg->config.size); } rate = channels * bps * fs; @@ -154,8 +161,11 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, /* Fill rate and parent for sclk/sclkfs */ if (!present) { + struct nhlt_fmt_cfg *first_fmt_cfg; + + first_fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; i2s_config_ext = (struct skl_i2s_config_blob_ext *) - fmt->fmt_config[0].config.caps; + first_fmt_cfg->config.caps; /* MCLK Divider Source Select */ if (is_legacy_blob(i2s_config_ext->hdr.sig)) { @@ -169,6 +179,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, parent = skl_get_parent_clk(clk_src); + /* Move to the next nhlt_fmt_cfg */ + fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps + + fmt_cfg->config.size); /* * Do not copy the config data if there is no parent * clock available for this clock source select @@ -177,9 +190,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, continue; sclk[id].rate_cfg[rate_index].rate = rate; - sclk[id].rate_cfg[rate_index].config = fmt_cfg; + sclk[id].rate_cfg[rate_index].config = saved_fmt_cfg; sclkfs[id].rate_cfg[rate_index].rate = rate; - sclkfs[id].rate_cfg[rate_index].config = fmt_cfg; + sclkfs[id].rate_cfg[rate_index].config = saved_fmt_cfg; sclk[id].parent_name = parent->name; sclkfs[id].parent_name = parent->name; @@ -193,13 +206,13 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk, { struct skl_i2s_config_blob_ext *i2s_config_ext; struct skl_i2s_config_blob_legacy *i2s_config; - struct nhlt_specific_cfg *fmt_cfg; + struct nhlt_fmt_cfg *fmt_cfg; struct skl_clk_parent_src *parent; u32 clkdiv, div_ratio; u8 clk_src; - fmt_cfg = &fmt->fmt_config[0].config; - i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->caps; + fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; + i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->config.caps; /* MCLK Divider Source Select and divider */ if (is_legacy_blob(i2s_config_ext->hdr.sig)) { @@ -228,7 +241,7 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk, return; mclk[id].rate_cfg[0].rate = parent->rate/div_ratio; - mclk[id].rate_cfg[0].config = &fmt->fmt_config[0]; + mclk[id].rate_cfg[0].config = fmt_cfg; mclk[id].parent_name = parent->name; } From 60d85bb77afd7b108e4b60b95886c34f6d3d39af Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 23 Mar 2022 14:22:01 +0100 Subject: [PATCH 060/299] netfilter: ecache: move to separate structure [ Upstream commit 9027ce0b071a1bbd046682907fc2e23ca3592883 ] This makes it easier for a followup patch to only expose ecache related parts of nf_conntrack_net structure. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_conntrack.h | 8 ++++++-- net/netfilter/nf_conntrack_ecache.c | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b08b70989d2cf..69e6c6a218be8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -43,6 +43,11 @@ union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; +struct nf_conntrack_net_ecache { + struct delayed_work dwork; + struct netns_ct *ct_net; +}; + struct nf_conntrack_net { /* only used when new connection is allocated: */ atomic_t count; @@ -58,8 +63,7 @@ struct nf_conntrack_net { struct ctl_table_header *sysctl_header; #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS - struct delayed_work ecache_dwork; - struct netns_ct *ct_net; + struct nf_conntrack_net_ecache ecache; #endif }; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 07e65b4e92f86..0cb2da0a759a6 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -96,8 +96,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) static void ecache_work(struct work_struct *work) { - struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work); - struct netns_ct *ctnet = cnet->ct_net; + struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work); + struct netns_ct *ctnet = cnet->ecache.ct_net; int cpu, delay = -1; struct ct_pcpu *pcpu; @@ -127,7 +127,7 @@ static void ecache_work(struct work_struct *work) ctnet->ecache_dwork_pending = delay > 0; if (delay >= 0) - schedule_delayed_work(&cnet->ecache_dwork, delay); + schedule_delayed_work(&cnet->ecache.dwork, delay); } static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, @@ -293,12 +293,12 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) struct nf_conntrack_net *cnet = nf_ct_pernet(net); if (state == NFCT_ECACHE_DESTROY_FAIL && - !delayed_work_pending(&cnet->ecache_dwork)) { - schedule_delayed_work(&cnet->ecache_dwork, HZ); + !delayed_work_pending(&cnet->ecache.dwork)) { + schedule_delayed_work(&cnet->ecache.dwork, HZ); net->ct.ecache_dwork_pending = true; } else if (state == NFCT_ECACHE_DESTROY_SENT) { net->ct.ecache_dwork_pending = false; - mod_delayed_work(system_wq, &cnet->ecache_dwork, 0); + mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); } } @@ -310,8 +310,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net) struct nf_conntrack_net *cnet = nf_ct_pernet(net); net->ct.sysctl_events = nf_ct_events; - cnet->ct_net = &net->ct; - INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work); + + cnet->ecache.ct_net = &net->ct; + INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work); BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */ } @@ -320,5 +321,5 @@ void nf_conntrack_ecache_pernet_fini(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); - cancel_delayed_work_sync(&cnet->ecache_dwork); + cancel_delayed_work_sync(&cnet->ecache.dwork); } From b900c5eaeef158deef762ea69f90675e844fa44f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 23 Mar 2022 14:22:03 +0100 Subject: [PATCH 061/299] netfilter: conntrack: split inner loop of list dumping to own function [ Upstream commit 49001a2e83a80f6d9c4287c46ffa41a03667bbd1 ] This allows code re-use in the followup patch. No functional changes intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_netlink.c | 68 ++++++++++++++++++---------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1ea2ad732d578..924d766e6c53e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1708,6 +1708,47 @@ static int ctnetlink_done_list(struct netlink_callback *cb) return 0; } +static int ctnetlink_dump_one_entry(struct sk_buff *skb, + struct netlink_callback *cb, + struct nf_conn *ct, + bool dying) +{ + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + u8 l3proto = nfmsg->nfgen_family; + int res; + + if (l3proto && nf_ct_l3num(ct) != l3proto) + return 0; + + if (ctx->last) { + if (ct != ctx->last) + return 0; + + ctx->last = NULL; + } + + /* We can't dump extension info for the unconfirmed + * list because unconfirmed conntracks can have + * ct->ext reallocated (and thus freed). + * + * In the dying list case ct->ext can't be free'd + * until after we drop pcpu->lock. + */ + res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct, dying, 0); + if (res < 0) { + if (!refcount_inc_not_zero(&ct->ct_general.use)) + return 0; + + ctx->last = ct; + } + + return res; +} + static int ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) { @@ -1715,12 +1756,9 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - u_int8_t l3proto = nfmsg->nfgen_family; - int res; - int cpu; struct hlist_nulls_head *list; struct net *net = sock_net(skb->sk); + int res, cpu; if (ctx->done) return 0; @@ -1739,30 +1777,10 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying restart: hlist_nulls_for_each_entry(h, n, list, hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); - if (l3proto && nf_ct_l3num(ct) != l3proto) - continue; - if (ctx->last) { - if (ct != last) - continue; - ctx->last = NULL; - } - /* We can't dump extension info for the unconfirmed - * list because unconfirmed conntracks can have - * ct->ext reallocated (and thus freed). - * - * In the dying list case ct->ext can't be free'd - * until after we drop pcpu->lock. - */ - res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFNL_MSG_TYPE(cb->nlh->nlmsg_type), - ct, dying, 0); + res = ctnetlink_dump_one_entry(skb, cb, ct, dying); if (res < 0) { - if (!refcount_inc_not_zero(&ct->ct_general.use)) - continue; ctx->cpu = cpu; - ctx->last = ct; spin_unlock_bh(&pcpu->lock); goto out; } From 87be95810c083816e7fd928009a319ae9fb10c2a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Apr 2022 13:01:16 +0200 Subject: [PATCH 062/299] netfilter: ecache: use dedicated list for event redelivery [ Upstream commit 2ed3bf188b33630cf9d93b996ebf001847a00b5a ] This disentangles event redelivery and the percpu dying list. Because entries are now stored on a dedicated list, all entries are in NFCT_ECACHE_DESTROY_FAIL state and all entries still have confirmed bit set -- the reference count is at least 1. The 'struct net' back-pointer can be removed as well. The pcpu dying list will be removed eventually, it has no functionality. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_conntrack.h | 3 +- include/net/netfilter/nf_conntrack_ecache.h | 2 - net/netfilter/nf_conntrack_core.c | 33 +++++- net/netfilter/nf_conntrack_ecache.c | 117 +++++++++----------- 4 files changed, 82 insertions(+), 73 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 69e6c6a218be8..28672a9444997 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -45,7 +45,8 @@ union nf_conntrack_expect_proto { struct nf_conntrack_net_ecache { struct delayed_work dwork; - struct netns_ct *ct_net; + spinlock_t dying_lock; + struct hlist_nulls_head dying_list; }; struct nf_conntrack_net { diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 6c4c490a3e342..a6135b5030ddc 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -14,7 +14,6 @@ #include enum nf_ct_ecache_state { - NFCT_ECACHE_UNKNOWN, /* destroy event not sent */ NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ }; @@ -23,7 +22,6 @@ struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ - enum nf_ct_ecache_state state:8;/* ecache state */ u32 missed; /* missed events */ u32 portid; /* netlink portid of destroyer */ }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0164e5f522e8e..ca1d1d1051631 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -660,15 +660,12 @@ void nf_ct_destroy(struct nf_conntrack *nfct) } EXPORT_SYMBOL(nf_ct_destroy); -static void nf_ct_delete_from_lists(struct nf_conn *ct) +static void __nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; unsigned int sequence; - nf_ct_helper_destroy(ct); - - local_bh_disable(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, @@ -681,12 +678,33 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) clean_from_lists(ct); nf_conntrack_double_unlock(hash, reply_hash); +} +static void nf_ct_delete_from_lists(struct nf_conn *ct) +{ + nf_ct_helper_destroy(ct); + local_bh_disable(); + + __nf_ct_delete_from_lists(ct); nf_ct_add_to_dying_list(ct); local_bh_enable(); } +static void nf_ct_add_to_ecache_list(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + struct nf_conntrack_net *cnet = nf_ct_pernet(nf_ct_net(ct)); + + spin_lock(&cnet->ecache.dying_lock); + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &cnet->ecache.dying_list); + spin_unlock(&cnet->ecache.dying_lock); +#else + nf_ct_add_to_dying_list(ct); +#endif +} + bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; @@ -709,7 +727,12 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) /* destroy event was not delivered. nf_ct_put will * be done by event cache worker on redelivery. */ - nf_ct_delete_from_lists(ct); + nf_ct_helper_destroy(ct); + local_bh_disable(); + __nf_ct_delete_from_lists(ct); + nf_ct_add_to_ecache_list(ct); + local_bh_enable(); + nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL); return false; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 0cb2da0a759a6..2752859479b2a 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -29,8 +28,9 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); -#define ECACHE_RETRY_WAIT (HZ/10) -#define ECACHE_STACK_ALLOC (256 / sizeof(void *)) +#define DYING_NULLS_VAL ((1 << 30) + 1) +#define ECACHE_MAX_JIFFIES msecs_to_jiffies(10) +#define ECACHE_RETRY_JIFFIES msecs_to_jiffies(10) enum retry_state { STATE_CONGESTED, @@ -38,58 +38,58 @@ enum retry_state { STATE_DONE, }; -static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) +static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet) { - struct nf_conn *refs[ECACHE_STACK_ALLOC]; + unsigned long stop = jiffies + ECACHE_MAX_JIFFIES; + struct hlist_nulls_head evicted_list; enum retry_state ret = STATE_DONE; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int evicted = 0; + unsigned int sent; - spin_lock(&pcpu->lock); + INIT_HLIST_NULLS_HEAD(&evicted_list, DYING_NULLS_VAL); - hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { +next: + sent = 0; + spin_lock_bh(&cnet->ecache.dying_lock); + + hlist_nulls_for_each_entry_safe(h, n, &cnet->ecache.dying_list, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - struct nf_conntrack_ecache *e; - - if (!nf_ct_is_confirmed(ct)) - continue; - - /* This ecache access is safe because the ct is on the - * pcpu dying list and we hold the spinlock -- the entry - * cannot be free'd until after the lock is released. - * - * This is true even if ct has a refcount of 0: the - * cpu that is about to free the entry must remove it - * from the dying list and needs the lock to do so. - */ - e = nf_ct_ecache_find(ct); - if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) - continue; - /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means - * the worker owns this entry: the ct will remain valid - * until the worker puts its ct reference. + /* The worker owns all entries, ct remains valid until nf_ct_put + * in the loop below. */ if (nf_conntrack_event(IPCT_DESTROY, ct)) { ret = STATE_CONGESTED; break; } - e->state = NFCT_ECACHE_DESTROY_SENT; - refs[evicted] = ct; + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &evicted_list); - if (++evicted >= ARRAY_SIZE(refs)) { + if (time_after(stop, jiffies)) { ret = STATE_RESTART; break; } + + if (sent++ > 16) { + spin_unlock_bh(&cnet->ecache.dying_lock); + cond_resched(); + goto next; + } } - spin_unlock(&pcpu->lock); + spin_unlock_bh(&cnet->ecache.dying_lock); - /* can't _put while holding lock */ - while (evicted) - nf_ct_put(refs[--evicted]); + hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + hlist_nulls_add_fake(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); + nf_ct_put(ct); + + cond_resched(); + } return ret; } @@ -97,35 +97,20 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) static void ecache_work(struct work_struct *work) { struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work); - struct netns_ct *ctnet = cnet->ecache.ct_net; - int cpu, delay = -1; - struct ct_pcpu *pcpu; - - local_bh_disable(); - - for_each_possible_cpu(cpu) { - enum retry_state ret; - - pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); - - ret = ecache_work_evict_list(pcpu); - - switch (ret) { - case STATE_CONGESTED: - delay = ECACHE_RETRY_WAIT; - goto out; - case STATE_RESTART: - delay = 0; - break; - case STATE_DONE: - break; - } + int ret, delay = -1; + + ret = ecache_work_evict_list(cnet); + switch (ret) { + case STATE_CONGESTED: + delay = ECACHE_RETRY_JIFFIES; + break; + case STATE_RESTART: + delay = 0; + break; + case STATE_DONE: + break; } - out: - local_bh_enable(); - - ctnet->ecache_dwork_pending = delay > 0; if (delay >= 0) schedule_delayed_work(&cnet->ecache.dwork, delay); } @@ -199,7 +184,6 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, */ if (e->portid == 0 && portid != 0) e->portid = portid; - e->state = NFCT_ECACHE_DESTROY_FAIL; } return ret; @@ -297,8 +281,10 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) schedule_delayed_work(&cnet->ecache.dwork, HZ); net->ct.ecache_dwork_pending = true; } else if (state == NFCT_ECACHE_DESTROY_SENT) { - net->ct.ecache_dwork_pending = false; - mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); + if (!hlist_nulls_empty(&cnet->ecache.dying_list)) + mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); + else + net->ct.ecache_dwork_pending = false; } } @@ -311,8 +297,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net) net->ct.sysctl_events = nf_ct_events; - cnet->ecache.ct_net = &net->ct; INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work); + INIT_HLIST_NULLS_HEAD(&cnet->ecache.dying_list, DYING_NULLS_VAL); + spin_lock_init(&cnet->ecache.dying_lock); BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */ } From be4dd63974cf215fd10b3a411a7de4013f78293b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Apr 2022 13:01:17 +0200 Subject: [PATCH 063/299] netfilter: conntrack: include ecache dying list in dumps [ Upstream commit 0d3cc504ba9cdcff76346306c37eb1ea01e60a86 ] The new pernet dying list includes conntrack entries that await delivery of the 'destroy' event via ctnetlink. The old percpu dying list will be removed soon. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_conntrack_ecache.h | 2 + net/netfilter/nf_conntrack_ecache.c | 10 +++++ net/netfilter/nf_conntrack_netlink.c | 43 +++++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index a6135b5030ddc..b57d73785e4d3 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -164,6 +164,8 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state); void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); +struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net); + static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return net->ct.ecache_dwork_pending; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 2752859479b2a..334b2b4e5e8b6 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -38,6 +38,16 @@ enum retry_state { STATE_DONE, }; +struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net) +{ + struct nf_conntrack_net *cnet = nf_ct_pernet(net); + + return &cnet->ecache; +} +#if IS_MODULE(CONFIG_NF_CT_NETLINK) +EXPORT_SYMBOL_GPL(nf_conn_pernet_ecache); +#endif + static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet) { unsigned long stop = jiffies + ECACHE_MAX_JIFFIES; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 924d766e6c53e..a4ec2aad2187b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -62,6 +62,7 @@ struct ctnetlink_list_dump_ctx { struct nf_conn *last; unsigned int cpu; bool done; + bool retrans_done; }; static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, @@ -1802,6 +1803,48 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying static int ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) { + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; + struct nf_conn *last = ctx->last; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + const struct net *net = sock_net(skb->sk); + struct nf_conntrack_net_ecache *ecache_net; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; +#endif + + if (ctx->retrans_done) + return ctnetlink_dump_list(skb, cb, true); + + ctx->last = NULL; + +#ifdef CONFIG_NF_CONNTRACK_EVENTS + ecache_net = nf_conn_pernet_ecache(net); + spin_lock_bh(&ecache_net->dying_lock); + + hlist_nulls_for_each_entry(h, n, &ecache_net->dying_list, hnnode) { + struct nf_conn *ct; + int res; + + ct = nf_ct_tuplehash_to_ctrack(h); + if (last && last != ct) + continue; + + res = ctnetlink_dump_one_entry(skb, cb, ct, true); + if (res < 0) { + spin_unlock_bh(&ecache_net->dying_lock); + nf_ct_put(last); + return skb->len; + } + + nf_ct_put(last); + last = NULL; + } + + spin_unlock_bh(&ecache_net->dying_lock); +#endif + nf_ct_put(last); + ctx->retrans_done = true; + return ctnetlink_dump_list(skb, cb, true); } From 594cea2c09f7cd440d1ee1c4547d5bc6a646b0e4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Apr 2022 13:01:18 +0200 Subject: [PATCH 064/299] netfilter: conntrack: remove the percpu dying list [ Upstream commit 1397af5bfd7d32b0cf2adb70a78c9a9e8f11d912 ] Its no longer needed. Entries that need event redelivery are placed on the new pernet dying list. The advantage is that there is no need to take additional spinlock on conntrack removal unless event redelivery failed or the conntrack entry was never added to the table in the first place (confirmed bit not set). The IPS_CONFIRMED bit now needs to be set as soon as the entry has been unlinked from the unconfirmed list, else the destroy function may attempt to unlink it a second time. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netns/conntrack.h | 1 - net/netfilter/nf_conntrack_core.c | 35 +++++----------------------- net/netfilter/nf_conntrack_ecache.c | 1 - net/netfilter/nf_conntrack_netlink.c | 23 ++++++------------ 4 files changed, 13 insertions(+), 47 deletions(-) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0294f3d473af8..e985a3010b89c 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -96,7 +96,6 @@ struct nf_ip_net { struct ct_pcpu { spinlock_t lock; struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; }; struct netns_ct { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ca1d1d1051631..9010b6e5a072e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -525,21 +525,6 @@ clean_from_lists(struct nf_conn *ct) nf_ct_remove_expectations(ct); } -/* must be called with local_bh_disable */ -static void nf_ct_add_to_dying_list(struct nf_conn *ct) -{ - struct ct_pcpu *pcpu; - - /* add this conntrack to the (per cpu) dying list */ - ct->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); - - spin_lock(&pcpu->lock); - hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->dying); - spin_unlock(&pcpu->lock); -} - /* must be called with local_bh_disable */ static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) { @@ -556,11 +541,11 @@ static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) } /* must be called with local_bh_disable */ -static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) +static void nf_ct_del_from_unconfirmed_list(struct nf_conn *ct) { struct ct_pcpu *pcpu; - /* We overload first tuple to link into unconfirmed or dying list.*/ + /* We overload first tuple to link into unconfirmed list.*/ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); spin_lock(&pcpu->lock); @@ -648,7 +633,8 @@ void nf_ct_destroy(struct nf_conntrack *nfct) */ nf_ct_remove_expectations(ct); - nf_ct_del_from_dying_or_unconfirmed_list(ct); + if (unlikely(!nf_ct_is_confirmed(ct))) + nf_ct_del_from_unconfirmed_list(ct); local_bh_enable(); @@ -686,7 +672,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) local_bh_disable(); __nf_ct_delete_from_lists(ct); - nf_ct_add_to_dying_list(ct); local_bh_enable(); } @@ -700,8 +685,6 @@ static void nf_ct_add_to_ecache_list(struct nf_conn *ct) hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &cnet->ecache.dying_list); spin_unlock(&cnet->ecache.dying_lock); -#else - nf_ct_add_to_dying_list(ct); #endif } @@ -995,7 +978,6 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) struct nf_conn_tstamp *tstamp; refcount_inc(&ct->ct_general.use); - ct->status |= IPS_CONFIRMED; /* set conntrack timestamp, if enabled. */ tstamp = nf_conn_tstamp_find(ct); @@ -1024,7 +1006,6 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, nf_conntrack_get(&ct->ct_general); nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_ct_add_to_dying_list(loser_ct); nf_ct_put(loser_ct); nf_ct_set(skb, ct, ctinfo); @@ -1157,7 +1138,6 @@ nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h, return ret; drop: - nf_ct_add_to_dying_list(loser_ct); NF_CT_STAT_INC(net, drop); NF_CT_STAT_INC(net, insert_failed); return NF_DROP; @@ -1224,10 +1204,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ - nf_ct_del_from_dying_or_unconfirmed_list(ct); + nf_ct_del_from_unconfirmed_list(ct); + ct->status |= IPS_CONFIRMED; if (unlikely(nf_ct_is_dying(ct))) { - nf_ct_add_to_dying_list(ct); NF_CT_STAT_INC(net, insert_failed); goto dying; } @@ -1251,7 +1231,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) goto out; if (chainlen++ > max_chainlen) { chaintoolong: - nf_ct_add_to_dying_list(ct); NF_CT_STAT_INC(net, chaintoolong); NF_CT_STAT_INC(net, insert_failed); ret = NF_DROP; @@ -2800,7 +2779,6 @@ void nf_conntrack_init_end(void) * We need to use special "null" values, not used in hash table */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) -#define DYING_NULLS_VAL ((1<<30)+1) int nf_conntrack_init_net(struct net *net) { @@ -2821,7 +2799,6 @@ int nf_conntrack_init_net(struct net *net) spin_lock_init(&pcpu->lock); INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); } net->ct.stat = alloc_percpu(struct ip_conntrack_stat); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 334b2b4e5e8b6..7472c544642fb 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -94,7 +94,6 @@ static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet) hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - hlist_nulls_add_fake(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); nf_ct_put(ct); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a4ec2aad2187b..2e9c8183e4a26 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -62,7 +62,6 @@ struct ctnetlink_list_dump_ctx { struct nf_conn *last; unsigned int cpu; bool done; - bool retrans_done; }; static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, @@ -1751,13 +1750,12 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb, } static int -ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) +ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) { struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - struct hlist_nulls_head *list; struct net *net = sock_net(skb->sk); int res, cpu; @@ -1774,12 +1772,11 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); spin_lock_bh(&pcpu->lock); - list = dying ? &pcpu->dying : &pcpu->unconfirmed; restart: - hlist_nulls_for_each_entry(h, n, list, hnnode) { + hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); - res = ctnetlink_dump_one_entry(skb, cb, ct, dying); + res = ctnetlink_dump_one_entry(skb, cb, ct, false); if (res < 0) { ctx->cpu = cpu; spin_unlock_bh(&pcpu->lock); @@ -1812,8 +1809,8 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_nulls_node *n; #endif - if (ctx->retrans_done) - return ctnetlink_dump_list(skb, cb, true); + if (ctx->done) + return 0; ctx->last = NULL; @@ -1842,10 +1839,10 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) spin_unlock_bh(&ecache_net->dying_lock); #endif + ctx->done = true; nf_ct_put(last); - ctx->retrans_done = true; - return ctnetlink_dump_list(skb, cb, true); + return skb->len; } static int ctnetlink_get_ct_dying(struct sk_buff *skb, @@ -1863,12 +1860,6 @@ static int ctnetlink_get_ct_dying(struct sk_buff *skb, return -EOPNOTSUPP; } -static int -ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) -{ - return ctnetlink_dump_list(skb, cb, false); -} - static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) From ac5e76476d3c29704fbbc9783178a7751091cd86 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jul 2022 16:50:04 +0200 Subject: [PATCH 065/299] netfilter: conntrack: fix crash due to confirmed bit load reordering [ Upstream commit 0ed8f619b412b52360ccdfaf997223ccd9319569 ] Kajetan Puchalski reports crash on ARM, with backtrace of: __nf_ct_delete_from_lists nf_ct_delete early_drop __nf_conntrack_alloc Unlike atomic_inc_not_zero, refcount_inc_not_zero is not a full barrier. conntrack uses SLAB_TYPESAFE_BY_RCU, i.e. it is possible that a 'newly' allocated object is still in use on another CPU: CPU1 CPU2 encounter 'ct' during hlist walk delete_from_lists refcount drops to 0 kmem_cache_free(ct); __nf_conntrack_alloc() // returns same object refcount_inc_not_zero(ct); /* might fail */ /* If set, ct is public/in the hash table */ test_bit(IPS_CONFIRMED_BIT, &ct->status); In case CPU1 already set refcount back to 1, refcount_inc_not_zero() will succeed. The expected possibilities for a CPU that obtained the object 'ct' (but no reference so far) are: 1. refcount_inc_not_zero() fails. CPU2 ignores the object and moves to the next entry in the list. This happens for objects that are about to be free'd, that have been free'd, or that have been reallocated by __nf_conntrack_alloc(), but where the refcount has not been increased back to 1 yet. 2. refcount_inc_not_zero() succeeds. CPU2 checks the CONFIRMED bit in ct->status. If set, the object is public/in the table. If not, the object must be skipped; CPU2 calls nf_ct_put() to un-do the refcount increment and moves to the next object. Parallel deletion from the hlists is prevented by a 'test_and_set_bit(IPS_DYING_BIT, &ct->status);' check, i.e. only one cpu will do the unlink, the other one will only drop its reference count. Because refcount_inc_not_zero is not a full barrier, CPU2 may try to delete an object that is not on any list: 1. refcount_inc_not_zero() successful (refcount inited to 1 on other CPU) 2. CONFIRMED test also successful (load was reordered or zeroing of ct->status not yet visible) 3. delete_from_lists unlinks entry not on the hlist, because IPS_DYING_BIT is 0 (already cleared). 2) is already wrong: CPU2 will handle a partially initited object that is supposed to be private to CPU1. Add needed barriers when refcount_inc_not_zero() is successful. It also inserts a smp_wmb() before the refcount is set to 1 during allocation. Because other CPU might still see the object, refcount_set(1) "resurrects" it, so we need to make sure that other CPUs will also observe the right content. In particular, the CONFIRMED bit test must only pass once the object is fully initialised and either in the hash or about to be inserted (with locks held to delay possible unlink from early_drop or gc worker). I did not change flow_offload_alloc(), as far as I can see it should call refcount_inc(), not refcount_inc_not_zero(): the ct object is attached to the skb so its refcount should be >= 1 in all cases. v2: prefer smp_acquire__after_ctrl_dep to smp_rmb (Will Deacon). v3: keep smp_acquire__after_ctrl_dep close to refcount_inc_not_zero call add comment in nf_conntrack_netlink, no control dependency there due to locks. Cc: Peter Zijlstra Link: https://lore.kernel.org/all/Yr7WTfd6AVTQkLjI@e126311.manchester.arm.com/ Reported-by: Kajetan Puchalski Diagnosed-by: Will Deacon Fixes: 719774377622 ("netfilter: conntrack: convert to refcount_t api") Signed-off-by: Florian Westphal Acked-by: Will Deacon Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 22 ++++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 1 + net/netfilter/nf_conntrack_standalone.c | 3 +++ 3 files changed, 26 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9010b6e5a072e..5a85735512ce3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -764,6 +764,9 @@ static void nf_ct_gc_expired(struct nf_conn *ct) if (!refcount_inc_not_zero(&ct->ct_general.use)) return; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (nf_ct_should_gc(ct)) nf_ct_kill(ct); @@ -830,6 +833,9 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, */ ct = nf_ct_tuplehash_to_ctrack(h); if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { + /* re-check key after refcount */ + smp_acquire__after_ctrl_dep(); + if (likely(nf_ct_key_equal(h, tuple, zone, net))) goto found; @@ -1369,6 +1375,9 @@ static unsigned int early_drop_list(struct net *net, if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; + /* load ->ct_net and ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + /* kill only if still in same netns -- might have moved due to * SLAB_TYPESAFE_BY_RCU rules. * @@ -1518,6 +1527,9 @@ static void gc_worker(struct work_struct *work) if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (gc_worker_skip_ct(tmp)) { nf_ct_put(tmp); continue; @@ -1749,6 +1761,16 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); + /* Other CPU might have obtained a pointer to this object before it was + * released. Because refcount is 0, refcount_inc_not_zero() will fail. + * + * After refcount_set(1) it will succeed; ensure that zeroing of + * ct->status and the correct ct->net pointer are visible; else other + * core might observe CONFIRMED bit which means the entry is valid and + * in the hash table, but its not (anymore). + */ + smp_wmb(); + /* Now it is inserted into the unconfirmed list, set refcount to 1. */ refcount_set(&ct->ct_general.use, 1); nf_ct_add_to_unconfirmed_list(ct); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2e9c8183e4a26..431e005ff14d3 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1203,6 +1203,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { + /* need to defer nf_ct_kill() until lock is released */ if (i < ARRAY_SIZE(nf_ct_evict) && refcount_inc_not_zero(&ct->ct_general.use)) nf_ct_evict[i++] = ct; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 55aa55b252b20..48812dda273b2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -306,6 +306,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use))) return 0; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (nf_ct_should_gc(ct)) { nf_ct_kill(ct); goto release; From cfa4caf3e881ad6dd366c903c34f1c7f21b857ab Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 6 Jul 2022 09:39:13 +0100 Subject: [PATCH 066/299] net: stmmac: dwc-qos: Disable split header for Tegra194 [ Upstream commit 029c1c2059e9c4b38f97a06204cdecd10cfbeb8a ] There is a long-standing issue with the Synopsys DWC Ethernet driver for Tegra194 where random system crashes have been observed [0]. The problem occurs when the split header feature is enabled in the stmmac driver. In the bad case, a larger than expected buffer length is received and causes the calculation of the total buffer length to overflow. This results in a very large buffer length that causes the kernel to crash. Why this larger buffer length is received is not clear, however, the feedback from the NVIDIA design team is that the split header feature is not supported for Tegra194. Therefore, disable split header support for Tegra194 to prevent these random crashes from occurring. [0] https://lore.kernel.org/linux-tegra/b0b17697-f23e-8fa5-3757-604a86f3a095@nvidia.com/ Fixes: 67afd6d1cfdf ("net: stmmac: Add Split Header support and enable it in XGMAC cores") Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20220706083913.13750-1-jonathanh@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index bc91fd867dcd4..358fc26f8d1fc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -361,6 +361,7 @@ static int tegra_eqos_probe(struct platform_device *pdev, data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; + data->sph_disable = 1; err = tegra_eqos_init(pdev, eqos); if (err < 0) From 79d60284bcd2d082a56f45ed716b93d78c24e12e Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 6 Jul 2022 12:32:08 +0530 Subject: [PATCH 067/299] net: ethernet: ti: am65-cpsw: Fix devlink port register sequence [ Upstream commit 0680e20af5fbf41df8a11b11bd9a7c25b2ca0746 ] Renaming interfaces using udevd depends on the interface being registered before its netdev is registered. Otherwise, udevd reads an empty phys_port_name value, resulting in the interface not being renamed. Fix this by registering the interface before registering its netdev by invoking am65_cpsw_nuss_register_devlink() before invoking register_netdev() for the interface. Move the function call to devlink_port_type_eth_set(), invoking it after register_netdev() is invoked, to ensure that netlink notification for the port state change is generated after the netdev is completely initialized. Fixes: 58356eb31d60 ("net: ti: am65-cpsw-nuss: Add devlink support") Signed-off-by: Siddharth Vadapalli Link: https://lore.kernel.org/r/20220706070208.12207-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 6d978dbf708f2..2989530534074 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2475,7 +2475,6 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) port->port_id, ret); goto dl_port_unreg; } - devlink_port_type_eth_set(dl_port, port->ndev); } devlink_register(common->devlink); return ret; @@ -2519,6 +2518,7 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { struct device *dev = common->dev; + struct devlink_port *dl_port; struct am65_cpsw_port *port; int ret = 0, i; @@ -2535,6 +2535,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) return ret; } + ret = am65_cpsw_nuss_register_devlink(common); + if (ret) + return ret; + for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; @@ -2547,25 +2551,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) i, ret); goto err_cleanup_ndev; } + + dl_port = &port->devlink_port; + devlink_port_type_eth_set(dl_port, port->ndev); } ret = am65_cpsw_register_notifiers(common); if (ret) goto err_cleanup_ndev; - ret = am65_cpsw_nuss_register_devlink(common); - if (ret) - goto clean_unregister_notifiers; - /* can't auto unregister ndev using devm_add_action() due to * devres release sequence in DD core for DMA */ return 0; -clean_unregister_notifiers: - am65_cpsw_unregister_notifiers(common); + err_cleanup_ndev: am65_cpsw_nuss_cleanup_ndev(common); + am65_cpsw_unregister_devlink(common); return ret; } From 62acd1e2fe22e7ac4dccd17855fe65ca6c90a4b8 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 6 Jul 2022 16:28:45 +0300 Subject: [PATCH 068/299] net: ocelot: fix wrong time_after usage [ Upstream commit f46fd3d7c3bd5d7bd5bb664135cf32ca9e97190b ] Accidentally noticed, that this driver is the only user of while (time_after(jiffies...)). It looks like typo, because likely this while loop will finish after 1st iteration, because time_after() returns true when 1st argument _is after_ 2nd one. There is one possible problem with this poll loop: the scheduler could put the thread to sleep, and it does not get woken up for OCELOT_FDMA_CH_SAFE_TIMEOUT_US. During that time, the hardware has done its thing, but you exit the while loop and return -ETIMEDOUT. Fix it by using sane poll API that avoids all problems described above Fixes: 753a026cfec1 ("net: ocelot: add FDMA support") Suggested-by: Andrew Lunn Signed-off-by: Pavel Skripkin Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220706132845.27968-1-paskripkin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot_fdma.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index dffa597bffe6b..6a8f84f325a39 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -94,19 +94,18 @@ static void ocelot_fdma_activate_chan(struct ocelot *ocelot, dma_addr_t dma, ocelot_fdma_writel(ocelot, MSCC_FDMA_CH_ACTIVATE, BIT(chan)); } +static u32 ocelot_fdma_read_ch_safe(struct ocelot *ocelot) +{ + return ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE); +} + static int ocelot_fdma_wait_chan_safe(struct ocelot *ocelot, int chan) { - unsigned long timeout; u32 safe; - timeout = jiffies + usecs_to_jiffies(OCELOT_FDMA_CH_SAFE_TIMEOUT_US); - do { - safe = ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE); - if (safe & BIT(chan)) - return 0; - } while (time_after(jiffies, timeout)); - - return -ETIMEDOUT; + return readx_poll_timeout_atomic(ocelot_fdma_read_ch_safe, ocelot, safe, + safe & BIT(chan), 0, + OCELOT_FDMA_CH_SAFE_TIMEOUT_US); } static void ocelot_fdma_dcb_set_data(struct ocelot_fdma_dcb *dcb, From bfbb0be2c8a40a1a7421942fa3d567b2e956aa56 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:52 -0700 Subject: [PATCH 069/299] sysctl: Fix data races in proc_dointvec(). [ Upstream commit 1f1be04b4d48a2475ea1aab46a99221bfc5c0968 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 830aaf8ca08ee..27b3a55dc4bd5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -518,14 +518,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, if (*negp) { if (*lvalp > (unsigned long) INT_MAX + 1) return -EINVAL; - *valp = -*lvalp; + WRITE_ONCE(*valp, -*lvalp); } else { if (*lvalp > (unsigned long) INT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } } else { - int val = *valp; + int val = READ_ONCE(*valp); if (val < 0) { *negp = true; *lvalp = -(unsigned long)val; From 630c76850d554d7140232e71b5d1663e88cffb54 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:53 -0700 Subject: [PATCH 070/299] sysctl: Fix data races in proc_douintvec(). [ Upstream commit 4762b532ec9539755aab61445d5da6e1926ccb99 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_douintvec() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_douintvec() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 27b3a55dc4bd5..6c61e2992fed1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -544,9 +544,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, if (write) { if (*lvalp > UINT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } else { - unsigned int val = *valp; + unsigned int val = READ_ONCE(*valp); *lvalp = (unsigned long)val; } return 0; From 2ebc99cab28f52c3ed27f67c0c27e21a4964aebb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:54 -0700 Subject: [PATCH 071/299] sysctl: Fix data races in proc_dointvec_minmax(). [ Upstream commit f613d86d014b6375a4085901de39406598121e35 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6c61e2992fed1..22ebf3f5eefe3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -929,7 +929,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, if ((param->min && *param->min > tmp) || (param->max && *param->max < tmp)) return -EINVAL; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; From b60eddf98b9716651069dfda296c91311a7a6293 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:55 -0700 Subject: [PATCH 072/299] sysctl: Fix data races in proc_douintvec_minmax(). [ Upstream commit 2d3b559df3ed39258737789aae2ae7973d205bc1 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_douintvec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_douintvec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 61d9b56a8920 ("sysctl: add unsigned int range support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 22ebf3f5eefe3..a769f64a78ed2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -995,7 +995,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, (param->max && *param->max < tmp)) return -ERANGE; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; From 8309d12bc3098ca8e56ab6c27a97f134c33a3dec Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:56 -0700 Subject: [PATCH 073/299] sysctl: Fix data races in proc_doulongvec_minmax(). [ Upstream commit c31bcc8fb89fc2812663900589c6325ba35d9a65 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_doulongvec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_doulongvec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a769f64a78ed2..7a8899f237a28 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1162,9 +1162,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, err = -EINVAL; break; } - *i = val; + WRITE_ONCE(*i, val); } else { - val = convdiv * (*i) / convmul; + val = convdiv * READ_ONCE(*i) / convmul; if (!first) proc_put_char(&buffer, &left, '\t'); proc_put_long(&buffer, &left, val, false); From 1908c3d91b4927a03713287bcbe75bf8fa5c7ead Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:57 -0700 Subject: [PATCH 074/299] sysctl: Fix data races in proc_dointvec_jiffies(). [ Upstream commit e877820877663fbae8cb9582ea597a7230b94df3 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec_jiffies() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec_jiffies() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7a8899f237a28..878b1122cb89c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1245,9 +1245,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, if (write) { if (*lvalp > INT_MAX / HZ) return 1; - *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); + if (*negp) + WRITE_ONCE(*valp, -*lvalp * HZ); + else + WRITE_ONCE(*valp, *lvalp * HZ); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; From e293ebc8b3bfb4cc45614bcf789a766f9b13e158 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:58 -0700 Subject: [PATCH 075/299] tcp: Fix a data-race around sysctl_tcp_max_orphans. [ Upstream commit 47e6ab24e8c6e3ca10ceb5835413f401f90de4bf ] While reading sysctl_tcp_max_orphans, it can be changed concurrently. So, we need to add READ_ONCE() to avoid a data-race. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e31cf137c6140..f2fd1779d9251 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2735,7 +2735,8 @@ static void tcp_orphan_update(struct timer_list *unused) static bool tcp_too_many_orphans(int shift) { - return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; + return READ_ONCE(tcp_orphan_cache) << shift > + READ_ONCE(sysctl_tcp_max_orphans); } bool tcp_check_oom(struct sock *sk, int shift) From c9b8ef81eb73a592e407e38c80ff0ddd042c514d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:39:59 -0700 Subject: [PATCH 076/299] inetpeer: Fix data-races around sysctl. [ Upstream commit 3d32edf1f3c38d3301f6434e56316f293466d7fb ] While reading inetpeer sysctl variables, they can be changed concurrently. So, we need to add READ_ONCE() to avoid data-races. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inetpeer.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da21dfce24d73..e9fed83e9b3cc 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { + int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; - if (base->total >= inet_peer_threshold) + peer_threshold = READ_ONCE(inet_peer_threshold); + peer_maxttl = READ_ONCE(inet_peer_maxttl); + peer_minttl = READ_ONCE(inet_peer_minttl); + + if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - base->total / inet_peer_threshold * HZ; + ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * + base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; From 28a912218350ba3de991ed23dbdb51006ffece8b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:40:00 -0700 Subject: [PATCH 077/299] net: Fix data-races around sysctl_mem. [ Upstream commit 310731e2f1611d1d13aae237abcf8e66d33345d5 ] While reading .sysctl_mem, it can be changed concurrently. So, we need to add READ_ONCE() to avoid data-races. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 3c4fb8f03fd99..6bef0ffb1e7b7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1534,7 +1534,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; + long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); #if PAGE_SIZE > SK_MEM_QUANTUM val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; From 59e26906b89cc35bb54476498772b45cbc32323f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:40:01 -0700 Subject: [PATCH 078/299] cipso: Fix data-races around sysctl. [ Upstream commit dd44f04b9214adb68ef5684ae87a81ba03632250 ] While reading cipso sysctl variables, they can be changed concurrently. So, we need to add READ_ONCE() to avoid data-races. Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine") Signed-off-by: Kuniyuki Iwashima Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv4/cipso_ipv4.c | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 66828293d9cb7..8ffed7135fc1a 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1085,7 +1085,7 @@ cipso_cache_enable - BOOLEAN cipso_cache_bucket_size - INTEGER The CIPSO label cache consists of a fixed size hash table with each hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the + the number of entries in each hash bucket; the larger the value is, the more CIPSO label mappings that can be cached. When the number of entries in a given hash bucket reaches this limit adding new entries causes the oldest entry in the bucket to be removed to make room. diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 62d5f99760aac..6cd3b6c559f05 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key, struct cipso_v4_map_cache_entry *prev_entry = NULL; u32 hash; - if (!cipso_v4_cache_enabled) + if (!READ_ONCE(cipso_v4_cache_enabled)) return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); @@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key, int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { + int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize); int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; u32 cipso_ptr_len; - if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0) return 0; cipso_ptr_len = cipso_ptr[1]; @@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); - if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + if (cipso_v4_cache[bkt].size < bkt_size) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; } else { @@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, /* This will send packets using the "optimized" format when * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) + if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 && + ret_val <= 10) tag_len = 14; else tag_len = 4 + ret_val; @@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * all the CIPSO validations here but it doesn't * really specify _exactly_ what we need to validate * ... so, just make it a sysctl tunable. */ - if (cipso_v4_rbm_strictvalid) { + if (READ_ONCE(cipso_v4_rbm_strictvalid)) { if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; From 798c2cf57c63ab39c8aac24d6a3d50f4fa5eeb06 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:40:02 -0700 Subject: [PATCH 079/299] icmp: Fix data-races around sysctl. [ Upstream commit 48d7ee321ea5182c6a70782aa186422a70e67e22 ] While reading icmp sysctl variables, they can be changed concurrently. So, we need to add READ_ONCE() to avoid data-races. Fixes: 4cdf507d5452 ("icmp: add a global rate limitation") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72a375c7f4172..97350a38a75d0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -253,11 +253,12 @@ bool icmp_global_allow(void) spin_lock(&icmp_global.lock); delta = min_t(u32, now - icmp_global.stamp, HZ); if (delta >= HZ / 50) { - incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; if (incr) WRITE_ONCE(icmp_global.stamp, now); } - credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + credit = min_t(u32, icmp_global.credit + incr, + READ_ONCE(sysctl_icmp_msgs_burst)); if (credit) { /* We want to use a credit of one in average, but need to randomize * it for security reasons. From 190cd4ff128373271e065afb20f1d2247b3f10c3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 Jul 2022 16:40:03 -0700 Subject: [PATCH 080/299] ipv4: Fix a data-race around sysctl_fib_sync_mem. [ Upstream commit 73318c4b7dbd0e781aaababff17376b2894745c0 ] While reading sysctl_fib_sync_mem, it can be changed concurrently. So, we need to add READ_ONCE() to avoid a data-race. Fixes: 9ab948a91b2c ("ipv4: Allow amount of dirty memory from fib resizing to be controllable") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index fb0e49c36c2e6..43a4962722279 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= sysctl_fib_sync_mem) { + if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; synchronize_rcu(); } From c0328380d0efeae8a100c39273e4860bf213bfea Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Thu, 7 Jul 2022 14:58:12 -0700 Subject: [PATCH 081/299] ARM: dts: at91: sama5d2: Fix typo in i2s1 node [ Upstream commit 2fdf15b50a46e366740df4cccbe2343269b4ff55 ] Fix typo in i2s1 causing errors in dt binding validation. Change assigned-parrents to assigned-clock-parents to match i2s0 node formatting. Fixes: 1ca81883c557 ("ARM: dts: at91: sama5d2: add nodes for I2S controllers") Signed-off-by: Ryan Wanner [claudiu.beznea: use imperative addressing in commit description, remove blank line after fixes tag, fix typo in commit message] Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20220707215812.193008-1-Ryan.Wanner@microchip.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 89c71d419f82a..659a17fc755cf 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1124,7 +1124,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>; clock-names = "pclk", "gclk"; assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>; - assigned-parrents = <&pmc PMC_TYPE_GCK 55>; + assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>; status = "disabled"; }; From bee4dc387f64009011953191ea0e7bbab1642632 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 8 Jul 2022 19:45:29 +0200 Subject: [PATCH 082/299] ARM: dts: sunxi: Fix SPI NOR campatible on Orange Pi Zero [ Upstream commit 884b66976a7279ee889ba885fe364244d50b79e7 ] The device tree should include generic "jedec,spi-nor" compatible, and a manufacturer-specific one. The macronix part is what is shipped on the boards that come with a flash chip. Fixes: 45857ae95478 ("ARM: dts: orange-pi-zero: add node for SPI NOR") Signed-off-by: Michal Suchanek Acked-by: Jernej Skrabec Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20220708174529.3360-1-msuchanek@suse.de Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index f19ed981da9d9..3706216ffb40b 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -169,7 +169,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mxicy,mx25l1606e", "winbond,w25q128"; + compatible = "mxicy,mx25l1606e", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; }; From 658a67f68bca44d922bbd786080535a69e1ac000 Mon Sep 17 00:00:00 2001 From: William Zhang Date: Fri, 8 Jul 2022 11:25:06 -0700 Subject: [PATCH 083/299] arm64: dts: broadcom: bcm4908: Fix timer node for BCM4906 SoC [ Upstream commit b4a544e415e9be33b37d9bfa9d9f9f4d13f553d6 ] The cpu mask value in interrupt property inherits from bcm4908.dtsi which sets to four cpus. Correct the value to two cpus for dual core BCM4906 SoC. Fixes: c8b404fb05dc ("arm64: dts: broadcom: bcm4908: add BCM4906 Netgear R8000P DTS files") Signed-off-by: William Zhang Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi index 66023d5535247..d084c33d5ca82 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi @@ -9,6 +9,14 @@ /delete-node/ cpu@3; }; + timer { + compatible = "arm,armv8-timer"; + interrupts = , + , + , + ; + }; + pmu { compatible = "arm,cortex-a53-pmu"; interrupts = , From 7fc7c6d053cfca70bb81892f3f00937e5c459d5a Mon Sep 17 00:00:00 2001 From: William Zhang Date: Fri, 8 Jul 2022 11:25:07 -0700 Subject: [PATCH 084/299] arm64: dts: broadcom: bcm4908: Fix cpu node for smp boot [ Upstream commit 8bd582ae9a71d7f14c4e0c735b2eacaf7516d626 ] Add spin-table enable-method and cpu-release-addr properties for cpu0 node. This is required by all ARMv8 SoC. Otherwise some bootloader like u-boot can not update cpu-release-addr and linux fails to start up secondary cpus. Fixes: 2961f69f151c ("arm64: dts: broadcom: add BCM4908 and Asus GT-AC5300 early DTS files") Signed-off-by: William Zhang Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi index a4be040a00c07..967d2cd3c3cee 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi @@ -29,6 +29,8 @@ device_type = "cpu"; compatible = "brcm,brahma-b53"; reg = <0x0>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0xfff8>; next-level-cache = <&l2>; }; From 9ace115965b7d04bab14b90082d457a3ae63f783 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Jul 2022 10:26:15 +0200 Subject: [PATCH 085/299] netfilter: nf_log: incorrect offset to network header [ Upstream commit 7a847c00eeba9744353ecdfad253143b9115678a ] NFPROTO_ARP is expecting to find the ARP header at the network offset. In the particular case of ARP, HTYPE= field shows the initial bytes of the ethernet header destination MAC address. netdev out: IN= OUT=bridge0 MACSRC=c2:76:e5:71:e1:de MACDST=36:b0:4a:e2:72:ea MACPROTO=0806 ARP HTYPE=14000 PTYPE=0x4ae2 OPCODE=49782 NFPROTO_NETDEV egress hook is also expecting to find the IP headers at the network offset. Fixes: 35b9395104d5 ("netfilter: add generic ARP packet logger") Reported-by: Tom Yan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_log_syslog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 13234641cdb34..7000e069bc076 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -61,7 +61,7 @@ dump_arp_packet(struct nf_log_buf *m, unsigned int logflags; struct arphdr _arph; - ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph); if (!ah) { nf_log_buf_add(m, "TRUNCATED"); return; @@ -90,7 +90,7 @@ dump_arp_packet(struct nf_log_buf *m, ah->ar_pln != sizeof(__be32)) return; - ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); + ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp); if (!ap) { nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", skb->len - sizeof(_arph)); @@ -144,7 +144,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - dump_arp_packet(m, loginfo, skb, 0); + dump_arp_packet(m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } @@ -829,7 +829,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, if (in) dump_ipv4_mac_header(m, loginfo, skb); - dump_ipv4_packet(net, m, loginfo, skb, 0); + dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } From e75554f74bce38b9e8e0cd8147c8a677e97d410a Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Fri, 8 Jul 2022 11:07:18 +0100 Subject: [PATCH 086/299] nfp: fix issue of skb segments exceeds descriptor limitation [ Upstream commit 9c840d5f9aaef87e65db900bae21c70b059aba5f ] TCP packets will be dropped if the segments number in the tx skb exceeds limitation when sending iperf3 traffic with --zerocopy option. we make the following changes: Get nr_frags in nfp_nfdk_tx_maybe_close_block instead of passing from outside because it will be changed after skb_linearize operation. Fill maximum dma_len in first tx descriptor to make sure the whole head is included in the first descriptor. Fixes: c10d12e3dce8 ("nfp: add support for NFDK data path") Signed-off-by: Baowen Zheng Reviewed-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/netronome/nfp/nfdk/dp.c | 33 +++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index e509d6dcba5cb..805071d64a203 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -125,17 +125,18 @@ nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, static int nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, - unsigned int nr_frags, struct sk_buff *skb) + struct sk_buff *skb) { unsigned int n_descs, wr_p, nop_slots; const skb_frag_t *frag, *fend; struct nfp_nfdk_tx_desc *txd; + unsigned int nr_frags; unsigned int wr_idx; int err; recount_descs: n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); - + nr_frags = skb_shinfo(skb)->nr_frags; frag = skb_shinfo(skb)->frags; fend = frag + nr_frags; for (; frag < fend; frag++) @@ -281,10 +282,13 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) if (unlikely((int)metadata < 0)) goto err_flush; - nr_frags = skb_shinfo(skb)->nr_frags; - if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb)) + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) goto err_flush; + /* nr_frags will change after skb_linearize so we get nr_frags after + * nfp_nfdk_tx_maybe_close_block function + */ + nr_frags = skb_shinfo(skb)->nr_frags; /* DMA map all */ wr_idx = D_IDX(tx_ring, tx_ring->wr_p); txd = &tx_ring->ktxds[wr_idx]; @@ -310,7 +314,16 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) /* FIELD_PREP() implicitly truncates to chunk */ dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + + /* We will do our best to pass as much data as we can in descriptor + * and we need to make sure the first descriptor includes whole head + * since there is limitation in firmware side. Sometimes the value of + * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than + * headlen. + */ + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); @@ -925,7 +938,9 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, /* FIELD_PREP() implicitly truncates to chunk */ dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); @@ -1303,7 +1318,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, skb_push(skb, 4)); } - if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb)) + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) goto err_free; /* DMA map all */ @@ -1328,7 +1343,9 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, txbuf++; dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); From 4c43069bb1097dd6cc1cf0f7c43a36d1f7b3910b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jul 2022 15:11:53 +0000 Subject: [PATCH 087/299] vlan: fix memory leak in vlan_newlink() [ Upstream commit 72a0b329114b1caa8e69dfa7cdad1dd3c69b8602 ] Blamed commit added back a bug I fixed in commit 9bbd917e0bec ("vlan: fix memory leak in vlan_dev_set_egress_priority") If a memory allocation fails in vlan_changelink() after other allocations succeeded, we need to call vlan_dev_free_egress_priority() to free all allocated memory because after a failed ->newlink() we do not call any methods like ndo_uninit() or dev->priv_destructor(). In following example, if the allocation for last element 2000:2001 fails, we need to free eight prior allocations: ip link add link dummy0 dummy0.100 type vlan id 100 \ egress-qos-map 1:2 2:3 3:4 4:5 5:6 6:7 7:8 8:9 2000:2001 syzbot report was: BUG: memory leak unreferenced object 0xffff888117bd1060 (size 32): comm "syz-executor408", pid 3759, jiffies 4294956555 (age 34.090s) hex dump (first 32 bytes): 09 00 00 00 00 a0 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc include/linux/slab.h:600 [inline] [] vlan_dev_set_egress_priority+0xed/0x170 net/8021q/vlan_dev.c:193 [] vlan_changelink+0x178/0x1d0 net/8021q/vlan_netlink.c:128 [] vlan_newlink+0x148/0x260 net/8021q/vlan_netlink.c:185 [] rtnl_newlink_create net/core/rtnetlink.c:3363 [inline] [] __rtnl_newlink+0xa58/0xdc0 net/core/rtnetlink.c:3580 [] rtnl_newlink+0x49/0x70 net/core/rtnetlink.c:3593 [] rtnetlink_rcv_msg+0x21c/0x5c0 net/core/rtnetlink.c:6089 [] netlink_rcv_skb+0x87/0x1d0 net/netlink/af_netlink.c:2501 [] netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] [] netlink_unicast+0x397/0x4c0 net/netlink/af_netlink.c:1345 [] netlink_sendmsg+0x396/0x710 net/netlink/af_netlink.c:1921 [] sock_sendmsg_nosec net/socket.c:714 [inline] [] sock_sendmsg+0x56/0x80 net/socket.c:734 [] ____sys_sendmsg+0x36c/0x390 net/socket.c:2488 [] ___sys_sendmsg+0x8b/0xd0 net/socket.c:2542 [] __sys_sendmsg net/socket.c:2571 [inline] [] __do_sys_sendmsg net/socket.c:2580 [inline] [] __se_sys_sendmsg net/socket.c:2578 [inline] [] __x64_sys_sendmsg+0x78/0xf0 net/socket.c:2578 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 37aa50c539bc ("vlan: introduce vlan_dev_free_egress_priority") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Xin Long Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/8021q/vlan_netlink.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 53b1955b027f8..214532173536b 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -182,10 +182,14 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, else if (dev->mtu > max_mtu) return -EINVAL; + /* Note: If this initial vlan_changelink() fails, we need + * to call vlan_dev_free_egress_priority() to free memory. + */ err = vlan_changelink(dev, tb, data, extack); - if (err) - return err; - err = register_vlan_dev(dev, extack); + + if (!err) + err = register_vlan_dev(dev, extack); + if (err) vlan_dev_free_egress_priority(dev); return err; From dbef6a21e49942736e5ce1c205802274410c1211 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Jul 2022 11:41:59 +0200 Subject: [PATCH 088/299] netfilter: nf_tables: replace BUG_ON by element length check [ Upstream commit c39ba4de6b0a843bec5d46c2b6f2064428dada5e ] BUG_ON can be triggered from userspace with an element with a large userdata area. Replace it by length check and return EINVAL instead. Over time extensions have been growing in size. Pick a sufficiently old Fixes: tag to propagate this fix. Fixes: 7d7402642eaf ("netfilter: nf_tables: variable sized set element keys / data") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 14 +++--- net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 279ae0fff7adb..f0c3a1ee197c0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -657,18 +657,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) tmpl->len = sizeof(struct nft_set_ext); } -static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, - unsigned int len) +static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) { tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); - BUG_ON(tmpl->len > U8_MAX); + if (tmpl->len > U8_MAX) + return -EINVAL; + tmpl->offset[id] = tmpl->len; tmpl->len += nft_set_ext_types[id].len + len; + + return 0; } -static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) { - nft_set_ext_add_length(tmpl, id, 0); + return nft_set_ext_add_length(tmpl, id, 0); } static inline void nft_set_ext_init(struct nft_set_ext *ext, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a136148627e70..de3dc35ce6095 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5831,8 +5831,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && @@ -5941,7 +5944,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_set_elem_expr; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto err_parse_key; } if (nla[NFTA_SET_ELEM_KEY_END]) { @@ -5950,22 +5955,31 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto err_parse_key_end; } if (timeout > 0) { - nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); - if (timeout != set->timeout) - nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (err < 0) + goto err_parse_key_end; + + if (timeout != set->timeout) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + if (err < 0) + goto err_parse_key_end; + } } if (num_exprs) { for (i = 0; i < num_exprs; i++) size += expr_array[i]->ops->size; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, - sizeof(struct nft_set_elem_expr) + - size); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, + sizeof(struct nft_set_elem_expr) + size); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { @@ -5980,7 +5994,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = PTR_ERR(obj); goto err_parse_key_end; } - nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_DATA] != NULL) { @@ -6014,7 +6030,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, NFT_VALIDATE_NEED); } - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + if (err < 0) + goto err_parse_data; } /* The full maximum length of userdata can exceed the maximum @@ -6024,9 +6042,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ulen = 0; if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); - if (ulen > 0) - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, - ulen); + if (ulen > 0) { + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, + ulen); + if (err < 0) + goto err_parse_data; + } } err = -ENOMEM; @@ -6252,8 +6273,11 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_prepare(&tmpl); - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, @@ -6261,16 +6285,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto fail_elem; } if (nla[NFTA_SET_ELEM_KEY_END]) { err = nft_setelem_parse_key(ctx, set, &elem.key_end.val, nla[NFTA_SET_ELEM_KEY_END]); if (err < 0) - return err; + goto fail_elem; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto fail_elem_key_end; } err = -ENOMEM; @@ -6278,7 +6306,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, elem.key_end.val.data, NULL, 0, 0, GFP_KERNEL_ACCOUNT); if (elem.priv == NULL) - goto fail_elem; + goto fail_elem_key_end; ext = nft_set_elem_ext(set, elem.priv); if (flags) @@ -6302,6 +6330,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, kfree(trans); fail_trans: kfree(elem.priv); +fail_elem_key_end: + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); fail_elem: nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; From a541767afc81d739613557fd16a535236652cfad Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 11 Jul 2022 09:36:32 +0530 Subject: [PATCH 089/299] RISC-V: KVM: Fix SRCU deadlock caused by kvm_riscv_check_vcpu_requests() [ Upstream commit be82abe6a76ba8e76f25312566182b0f13c4fbf9 ] The kvm_riscv_check_vcpu_requests() is called with SRCU read lock held and for KVM_REQ_SLEEP request it will block the VCPU without releasing SRCU read lock. This causes KVM ioctls (such as KVM_IOEVENTFD) from other VCPUs of the same Guest/VM to hang/deadlock if there is any synchronize_srcu() or synchronize_srcu_expedited() in the path. To fix the above in kvm_riscv_check_vcpu_requests(), we should do SRCU read unlock before blocking the VCPU and do SRCU read lock after VCPU wakeup. Fixes: cce69aff689e ("RISC-V: KVM: Implement VCPU interrupts and requests handling") Reported-by: Bin Meng Signed-off-by: Anup Patel Reviewed-by: Atish Patra Tested-by: Heinrich Schuchardt Tested-by: Bin Meng Signed-off-by: Anup Patel Signed-off-by: Sasha Levin --- arch/riscv/kvm/vcpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 7461f964d20a9..3894777bfa87d 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -673,9 +673,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, (!vcpu->arch.power_off) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); + kvm_vcpu_srcu_read_lock(vcpu); if (vcpu->arch.power_off || vcpu->arch.pause) { /* From 778e85514b617b3a6b14d7143d88244ed853a089 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Jul 2022 11:41:06 +0300 Subject: [PATCH 090/299] drm/i915/gvt: IS_ERR() vs NULL bug in intel_gvt_update_reg_whitelist() [ Upstream commit e87197fbd137c888fd6c871c72fe7e89445dd015 ] The shmem_pin_map() function returns NULL, it doesn't return error pointers. Fixes: 97ea656521c8 ("drm/i915/gvt: Parse default state to update reg whitelist") Reviewed-by: Andrzej Hajda Signed-off-by: Dan Carpenter Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/Ysftoia2BPUyqVcD@kili Acked-by: Zhenyu Wang Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 2459213b6c87f..f49c1e8b8df74 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) continue; vaddr = shmem_pin_map(engine->default_state); - if (IS_ERR(vaddr)) { - gvt_err("failed to map %s->default state, err:%zd\n", - engine->name, PTR_ERR(vaddr)); + if (!vaddr) { + gvt_err("failed to map %s->default state\n", + engine->name); return; } From 7fb77b292abec73345852c179a444001730f6e7e Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Sun, 10 Jul 2022 19:05:22 -0400 Subject: [PATCH 091/299] xen/gntdev: Ignore failure to unmap INVALID_GRANT_HANDLE [ Upstream commit 166d3863231667c4f64dee72b77d1102cdfad11f ] The error paths of gntdev_mmap() can call unmap_grant_pages() even though not all of the pages have been successfully mapped. This will trigger the WARN_ON()s in __unmap_grant_pages_done(). The number of warnings can be very large; I have observed thousands of lines of warnings in the systemd journal. Avoid this problem by only warning on unmapping failure if the handle being unmapped is not INVALID_GRANT_HANDLE. The handle field of any page that was not successfully mapped will be INVALID_GRANT_HANDLE, so this catches all cases where unmapping can legitimately fail. Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()") Cc: stable@vger.kernel.org Suggested-by: Juergen Gross Signed-off-by: Demi Marie Obenour Reviewed-by: Oleksandr Tyshchenko Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220710230522.1563-1-demi@invisiblethingslab.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/gntdev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 4b56c39f766d4..84b143eef395b 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -396,13 +396,15 @@ static void __unmap_grant_pages_done(int result, unsigned int offset = data->unmap_ops - map->unmap_ops; for (i = 0; i < data->count; i++) { - WARN_ON(map->unmap_ops[offset+i].status); + WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay && + map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - WARN_ON(map->kunmap_ops[offset+i].status); + WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay && + map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("kunmap handle=%u st=%d\n", map->kunmap_ops[offset+i].handle, map->kunmap_ops[offset+i].status); From ff151c477f6a529b0e4643d90f4f0b8eca85de04 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 8 Jul 2022 16:36:09 -0700 Subject: [PATCH 092/299] mptcp: fix subflow traversal at disconnect time [ Upstream commit 5c835bb142d4013c2ab24bff5ae9f6709a39cbcf ] At disconnect time the MPTCP protocol traverse the subflows list closing each of them. In some circumstances - MPJ subflow, passive MPTCP socket, the latter operation can remove the subflow from the list, invalidating the current iterator. Address the issue using the safe list traversing helper variant. Reported-by: van fantasy Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Tested-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/mptcp/protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b0fb1fc0bd4a9..b52fd250cb3a8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2840,12 +2840,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_for_each_subflow(msk, subflow) { + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE); From 2bcf184c16f1a1f1c27ae75c9e997543428bd667 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 10 Jul 2022 14:46:04 -0400 Subject: [PATCH 093/299] NFSD: Decode NFSv4 birth time attribute [ Upstream commit 5b2f3e0777da2a5dd62824bbe2fdab1d12caaf8f ] NFSD has advertised support for the NFSv4 time_create attribute since commit e377a3e698fb ("nfsd: Add support for the birth time attribute"). Igor Mammedov reports that Mac OS clients attempt to set the NFSv4 birth time attribute via OPEN(CREATE) and SETATTR if the server indicates that it supports it, but since the above commit was merged, those attempts now fail. Table 5 in RFC 8881 lists the time_create attribute as one that can be both set and retrieved, but the above commit did not add server support for clients to provide a time_create attribute. IMO that's a bug in our implementation of the NFSv4 protocol, which this commit addresses. Whether NFSD silently ignores the new birth time or actually sets it is another matter. I haven't found another filesystem service in the Linux kernel that enables users or clients to modify a file's birth time attribute. This commit reflects my (perhaps incorrect) understanding of whether Linux users can set a file's birth time. NFSD will now recognize a time_create attribute but it ignores its value. It clears the time_create bit in the returned attribute bitmask to indicate that the value was not used. Reported-by: Igor Mammedov Fixes: e377a3e698fb ("nfsd: Add support for the birth time attribute") Tested-by: Igor Mammedov Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfs4xdr.c | 9 +++++++++ fs/nfsd/nfsd.h | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index da92e7d2ab6a1..264c3a4629c9a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -470,6 +470,15 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, return nfserr_bad_xdr; } } + if (bmval[1] & FATTR4_WORD1_TIME_CREATE) { + struct timespec64 ts; + + /* No Linux filesystem supports setting this attribute. */ + bmval[1] &= ~FATTR4_WORD1_TIME_CREATE; + status = nfsd4_decode_nfstime4(argp, &ts); + if (status) + return status; + } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { u32 set_it; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 4fc1fd639527a..727754d56243d 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -460,7 +460,8 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \ + | FATTR4_WORD1_TIME_MODIFY_SET) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #define MAYBE_FATTR4_WORD2_SECURITY_LABEL \ FATTR4_WORD2_SECURITY_LABEL From 58c3f48a540b821b3253e2dc31c8fa237293ffc9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Jul 2022 14:30:13 -0400 Subject: [PATCH 094/299] lockd: set fl_owner when unlocking files [ Upstream commit aec158242b87a43d83322e99bc71ab4428e5ab79 ] Unlocking a POSIX lock on an inode with vfs_lock_file only works if the owner matches. Ensure we set it in the request. Cc: J. Bruce Fields Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/lockd/svcsubs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 0a22a2faf5522..b2f277727469c 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -176,7 +176,7 @@ nlm_delete_file(struct nlm_file *file) } } -static int nlm_unlock_files(struct nlm_file *file) +static int nlm_unlock_files(struct nlm_file *file, fl_owner_t owner) { struct file_lock lock; @@ -184,6 +184,7 @@ static int nlm_unlock_files(struct nlm_file *file) lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; + lock.fl_owner = owner; if (file->f_file[O_RDONLY] && vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL)) goto out_err; @@ -225,7 +226,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, if (match(lockhost, host)) { spin_unlock(&flctx->flc_lock); - if (nlm_unlock_files(file)) + if (nlm_unlock_files(file, fl->fl_owner)) return 1; goto again; } From 4e556ecd16413050afb87495708a0166b2915cfc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Jul 2022 14:30:14 -0400 Subject: [PATCH 095/299] lockd: fix nlm_close_files [ Upstream commit 1197eb5906a5464dbaea24cac296dfc38499cc00 ] This loop condition tries a bit too hard to be clever. Just test for the two indices we care about explicitly. Cc: J. Bruce Fields Fixes: 7f024fcd5c97 ("Keep read and write fds with each nlm_file") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/lockd/svcsubs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index b2f277727469c..e1c4617de7714 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -283,11 +283,10 @@ nlm_file_inuse(struct nlm_file *file) static void nlm_close_files(struct nlm_file *file) { - struct file *f; - - for (f = file->f_file[0]; f <= file->f_file[1]; f++) - if (f) - nlmsvc_ops->fclose(f); + if (file->f_file[O_RDONLY]) + nlmsvc_ops->fclose(file->f_file[O_RDONLY]); + if (file->f_file[O_WRONLY]) + nlmsvc_ops->fclose(file->f_file[O_WRONLY]); } /* From 0820e797c63812e355c23754ee5319cff875feba Mon Sep 17 00:00:00 2001 From: Yevhen Orlov Date: Sun, 10 Jul 2022 15:20:21 +0300 Subject: [PATCH 096/299] net: marvell: prestera: fix missed deinit sequence [ Upstream commit f946964a9f79f8dcb5a6329265281eebfc23aee5 ] Add unregister_fib_notifier as rollback of register_fib_notifier. Fixes: 4394fbcb78cf ("net: marvell: prestera: handle fib notifications") Signed-off-by: Yevhen Orlov Link: https://lore.kernel.org/r/20220710122021.7642-1-yevhen.orlov@plvision.eu Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/prestera/prestera_router.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 6c5618cf4f08f..97d9012db189f 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -587,6 +587,7 @@ int prestera_router_init(struct prestera_switch *sw) void prestera_router_fini(struct prestera_switch *sw) { + unregister_fib_notifier(&init_net, &sw->router->fib_nb); unregister_inetaddr_notifier(&sw->router->inetaddr_nb); unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb); rhashtable_destroy(&sw->router->kern_fib_cache_ht); From b3a5ec835f091a40d3250cc73422c640e13016a1 Mon Sep 17 00:00:00 2001 From: Paul M Stillwell Jr Date: Wed, 8 Jun 2022 14:09:52 -0700 Subject: [PATCH 097/299] ice: handle E822 generic device ID in PLDM header [ Upstream commit f52d166819a4d8e0d5cca07d8a8dd6397c96dcf1 ] The driver currently presumes that the record data in the PLDM header of the firmware image will match the device ID of the running device. This is true for E810 devices. It appears that for E822 devices that this is not guaranteed to be true. Fix this by adding a check for the generic E822 device. Fixes: d69ea414c9b4 ("ice: implement device flash update via devlink") Signed-off-by: Paul M Stillwell Jr Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_devids.h | 1 + .../net/ethernet/intel/ice/ice_fw_update.c | 96 ++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_main.c | 1 + 3 files changed, 96 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index 61dd2f18dee8e..b41bc3dc17454 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -5,6 +5,7 @@ #define _ICE_DEVIDS_H_ /* Device IDs */ +#define ICE_DEV_ID_E822_SI_DFLT 0x1888 /* Intel(R) Ethernet Connection E823-L for backplane */ #define ICE_DEV_ID_E823L_BACKPLANE 0x124C /* Intel(R) Ethernet Connection E823-L for SFP */ diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index 665a344fb9c06..3dc5662d62a6a 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -736,7 +736,87 @@ static int ice_finalize_update(struct pldmfw *context) return 0; } -static const struct pldmfw_ops ice_fwu_ops = { +struct ice_pldm_pci_record_id { + u32 vendor; + u32 device; + u32 subsystem_vendor; + u32 subsystem_device; +}; + +/** + * ice_op_pci_match_record - Check if a PCI device matches the record + * @context: PLDM fw update structure + * @record: list of records extracted from the PLDM image + * + * Determine if the PCI device associated with this device matches the record + * data provided. + * + * Searches the descriptor TLVs and extracts the relevant descriptor data into + * a pldm_pci_record_id. This is then compared against the PCI device ID + * information. + * + * Returns: true if the device matches the record, false otherwise. + */ +static bool +ice_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record) +{ + struct pci_dev *pdev = to_pci_dev(context->dev); + struct ice_pldm_pci_record_id id = { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .subsystem_vendor = PCI_ANY_ID, + .subsystem_device = PCI_ANY_ID, + }; + struct pldmfw_desc_tlv *desc; + + list_for_each_entry(desc, &record->descs, entry) { + u16 value; + int *ptr; + + switch (desc->type) { + case PLDM_DESC_ID_PCI_VENDOR_ID: + ptr = &id.vendor; + break; + case PLDM_DESC_ID_PCI_DEVICE_ID: + ptr = &id.device; + break; + case PLDM_DESC_ID_PCI_SUBVENDOR_ID: + ptr = &id.subsystem_vendor; + break; + case PLDM_DESC_ID_PCI_SUBDEV_ID: + ptr = &id.subsystem_device; + break; + default: + /* Skip unrelated TLVs */ + continue; + } + + value = get_unaligned_le16(desc->data); + /* A value of zero for one of the descriptors is sometimes + * used when the record should ignore this field when matching + * device. For example if the record applies to any subsystem + * device or vendor. + */ + if (value) + *ptr = value; + else + *ptr = PCI_ANY_ID; + } + + /* the E822 device can have a generic device ID so check for that */ + if ((id.vendor == PCI_ANY_ID || id.vendor == pdev->vendor) && + (id.device == PCI_ANY_ID || id.device == pdev->device || + id.device == ICE_DEV_ID_E822_SI_DFLT) && + (id.subsystem_vendor == PCI_ANY_ID || + id.subsystem_vendor == pdev->subsystem_vendor) && + (id.subsystem_device == PCI_ANY_ID || + id.subsystem_device == pdev->subsystem_device)) + return true; + + return false; +} + +static const struct pldmfw_ops ice_fwu_ops_e810 = { .match_record = &pldmfw_op_pci_match_record, .send_package_data = &ice_send_package_data, .send_component_table = &ice_send_component_table, @@ -744,6 +824,14 @@ static const struct pldmfw_ops ice_fwu_ops = { .finalize_update = &ice_finalize_update, }; +static const struct pldmfw_ops ice_fwu_ops_e822 = { + .match_record = &ice_op_pci_match_record, + .send_package_data = &ice_send_package_data, + .send_component_table = &ice_send_component_table, + .flash_component = &ice_flash_component, + .finalize_update = &ice_finalize_update, +}; + /** * ice_get_pending_updates - Check if the component has a pending update * @pf: the PF driver structure @@ -921,7 +1009,11 @@ int ice_devlink_flash_update(struct devlink *devlink, memset(&priv, 0, sizeof(priv)); - priv.context.ops = &ice_fwu_ops; + /* the E822 device needs a slightly different ops */ + if (hw->mac_type == ICE_MAC_GENERIC) + priv.context.ops = &ice_fwu_ops_e822; + else + priv.context.ops = &ice_fwu_ops_e810; priv.context.dev = dev; priv.extack = extack; priv.pf = pf; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d069b19f9bf7f..efb076f71e381 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5397,6 +5397,7 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 }, /* required last entry */ { 0, } }; From 6bd9f8470735eaee2ca83339e4ef8c3bd0d261ac Mon Sep 17 00:00:00 2001 From: Paul M Stillwell Jr Date: Wed, 8 Jun 2022 14:48:32 -0700 Subject: [PATCH 098/299] ice: change devlink code to read NVM in blocks [ Upstream commit 7b6f9462a3234c35cf808453d39a074a04e71de1 ] When creating a snapshot of the NVM the driver needs to read the entire contents from the NVM and store it. The NVM reads are protected by a lock that is shared between the driver and the firmware. If the driver takes too long to read the entire NVM (which can happen on some systems) then the firmware could reclaim the lock and cause subsequent reads from the driver to fail. We could fix this by increasing the timeout that we pass to the firmware, but we could end up in the same situation again if the system is slow. Instead have the driver break the reading of the NVM into blocks that are small enough that we have confidence that the read will complete within the timeout time, but large enough not to cause significant AQ overhead. Fixes: dce730f17825 ("ice: add a devlink region for dumping NVM contents") Signed-off-by: Paul M Stillwell Jr Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_devlink.c | 59 +++++++++++++------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 4a9de59121d85..31836bbdf8133 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -792,6 +792,8 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf) devlink_port_unregister(devlink_port); } +#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) + /** * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents * @devlink: the devlink instance @@ -818,8 +820,9 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - void *nvm_data; - u32 nvm_size; + u8 *nvm_data, *tmp, i; + u32 nvm_size, left; + s8 num_blks; int status; nvm_size = hw->flash.flash_size; @@ -827,26 +830,44 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, if (!nvm_data) return -ENOMEM; - status = ice_acquire_nvm(hw, ICE_RES_READ); - if (status) { - dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - vfree(nvm_data); - return status; - } - status = ice_read_flat_nvm(hw, 0, &nvm_size, nvm_data, false); - if (status) { - dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - nvm_size, status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); + tmp = nvm_data; + left = nvm_size; + + /* Some systems take longer to read the NVM than others which causes the + * FW to reclaim the NVM lock before the entire NVM has been read. Fix + * this by breaking the reads of the NVM into smaller chunks that will + * probably not take as long. This has some overhead since we are + * increasing the number of AQ commands, but it should always work + */ + for (i = 0; i < num_blks; i++) { + u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left); + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + vfree(nvm_data); + return -EIO; + } + + status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, + &read_sz, tmp, false); + if (status) { + dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", + read_sz, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + ice_release_nvm(hw); + vfree(nvm_data); + return -EIO; + } ice_release_nvm(hw); - vfree(nvm_data); - return status; - } - ice_release_nvm(hw); + tmp += read_sz; + left -= read_sz; + } *data = nvm_data; From 51eae602908cac76d2aad238f283722d5df8eaa0 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 8 Jul 2022 17:09:52 -0700 Subject: [PATCH 099/299] tracing: Fix sleeping while atomic in kdb ftdump [ Upstream commit 495fcec8648cdfb483b5b9ab310f3839f07cb3b8 ] If you drop into kdb and type "ftdump" you'll get a sleeping while atomic warning from memory allocation in trace_find_next_entry(). This appears to have been caused by commit ff895103a84a ("tracing: Save off entry when peeking at next entry"), which added the allocation in that path. The problematic commit was already fixed by commit 8e99cf91b99b ("tracing: Do not allocate buffer in trace_find_next_entry() in atomic") but that fix missed the kdb case. The fix here is easy: just move the assignment of the static buffer to the place where it should have been to begin with: trace_init_global_iter(). That function is called in two places, once is right before the assignment of the static buffer added by the previous fix and once is in kdb. Note that it appears that there's a second static buffer that we need to assign that was added in commit efbbdaa22bb7 ("tracing: Show real address for trace event arguments"), so we'll move that too. Link: https://lkml.kernel.org/r/20220708170919.1.I75844e5038d9425add2ad853a608cb44bb39df40@changeid Fixes: ff895103a84a ("tracing: Save off entry when peeking at next entry") Fixes: efbbdaa22bb7 ("tracing: Show real address for trace event arguments") Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 114c31bdf8f97..c0c98b0c86e79 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9863,6 +9863,12 @@ void trace_init_global_iter(struct trace_iterator *iter) /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + + /* Can not use kmalloc for iter.temp and iter.fmt */ + iter->temp = static_temp_buf; + iter->temp_size = STATIC_TEMP_BUF_SIZE; + iter->fmt = static_fmt_buf; + iter->fmt_size = STATIC_FMT_BUF_SIZE; } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) @@ -9895,11 +9901,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp and iter.fmt */ - iter.temp = static_temp_buf; - iter.temp_size = STATIC_TEMP_BUF_SIZE; - iter.fmt = static_fmt_buf; - iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); From f7eec9e63bd86dde7b2c5515f5fe24b9f73a5234 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Jul 2022 12:41:04 +0300 Subject: [PATCH 100/299] drm/i915/selftests: fix a couple IS_ERR() vs NULL tests [ Upstream commit 896dcabd1f8f613c533d948df17408c41f8929f5 ] The shmem_pin_map() function doesn't return error pointers, it returns NULL. Fixes: be1cb55a07bf ("drm/i915/gt: Keep a no-frills swappable copy of the default context state") Signed-off-by: Dan Carpenter Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220708094104.GL2316@kadam (cherry picked from commit d50f5a109cf4ed50c5b575c1bb5fc3bd17b23308) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 21c29d315cc0b..9d42a7c67a8c9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -155,8 +155,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -331,8 +331,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); From 46c5f0d16e1bcfb9cb9732b0bdb9f27a4d9e1e36 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Jul 2022 09:58:59 +0100 Subject: [PATCH 101/299] drm/i915/ttm: fix sg_table construction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aff1e0b09b54b64944b7fe32997229552737b9e9 ] If we encounter some monster sized local-memory page that exceeds the maximum sg length (UINT32_MAX), ensure that don't end up with some misaligned address in the entry that follows, leading to fireworks later. Also ensure we have some coverage of this in the selftests. v2(Chris): - Use round_down consistently to avoid udiv errors v3(Nirmoy): - Also update the max_segment in the selftest Fixes: f701b16d4cc5 ("drm/i915/ttm: add i915_sg_from_buddy_resource") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6379 Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220711085859.24198-1-matthew.auld@intel.com (cherry picked from commit bc99f1209f19fefa3ee11e77464ccfae541f4291) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 11 ++++++++-- drivers/gpu/drm/i915/i915_scatterlist.c | 19 +++++++++++++---- drivers/gpu/drm/i915/i915_scatterlist.h | 6 ++++-- drivers/gpu/drm/i915/intel_region_ttm.c | 10 ++++++--- drivers/gpu/drm/i915/intel_region_ttm.h | 3 ++- .../drm/i915/selftests/intel_memory_region.c | 21 +++++++++++++++++-- drivers/gpu/drm/i915/selftests/mock_region.c | 3 ++- 7 files changed, 58 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 45cc5837ce001..342ca303eae4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -583,10 +583,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + u64 page_alignment; if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); + page_alignment = bo->page_alignment << PAGE_SHIFT; + if (!page_alignment) + page_alignment = obj->mm.region->min_page_size; + /* * If CPU mapping differs, we need to add the ttm_tt pages to * the resulting st. Might make sense for GGTT. @@ -597,7 +602,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct i915_refct_sgt *rsgt; rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - res); + res, + page_alignment); if (IS_ERR(rsgt)) return rsgt; @@ -606,7 +612,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); } - return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res, + page_alignment); } static int i915_ttm_truncate(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 159571b9bd247..f63b50b71e10b 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -68,6 +68,7 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * drm_mm_node * @node: The drm_mm_node. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from a struct drm_mm_node, * taking a maximum segment length into account, splitting into segments @@ -77,15 +78,18 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start) + u64 region_start, + u64 page_alignment) { - const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ + const u64 max_segment = round_down(UINT_MAX, page_alignment); u64 segment_pages = max_segment >> PAGE_SHIFT; u64 block_size, offset, prev_end; struct i915_refct_sgt *rsgt; struct sg_table *st; struct scatterlist *sg; + GEM_BUG_ON(!max_segment); + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) return ERR_PTR(-ENOMEM); @@ -112,6 +116,8 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; @@ -138,6 +144,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * i915_buddy_block list * @res: The struct i915_ttm_buddy_resource. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from struct i915_buddy_block list, * taking a maximum segment length into account, splitting into segments @@ -147,11 +154,12 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start) + u64 region_start, + u64 page_alignment) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->num_pages << PAGE_SHIFT; - const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); + const u64 max_segment = round_down(UINT_MAX, page_alignment); struct drm_buddy *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; struct drm_buddy_block *block; @@ -161,6 +169,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, resource_size_t prev_end; GEM_BUG_ON(list_empty(blocks)); + GEM_BUG_ON(!max_segment); rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) @@ -191,6 +200,8 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 12c6a16840814..b13e4cdea9238 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -213,9 +213,11 @@ static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt, void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start); + u64 region_start, + u64 page_alignment); struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start); + u64 region_start, + u64 page_alignment); #endif diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 737ef3f4ab54e..d896558cf4588 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -151,6 +151,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) * Convert an opaque TTM resource manager resource to a refcounted sg_table. * @mem: The memory region. * @res: The resource manager resource obtained from the TTM resource manager. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -160,16 +161,19 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) */ struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res) + struct ttm_resource *res, + u64 page_alignment) { if (mem->is_range_manager) { struct ttm_range_mgr_node *range_node = to_ttm_range_mgr_node(res); return i915_rsgt_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + mem->region.start, + page_alignment); } else { - return i915_rsgt_from_buddy_resource(res, mem->region.start); + return i915_rsgt_from_buddy_resource(res, mem->region.start, + page_alignment); } } diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index fdee5e7bd46ca..b17e494ef79cd 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -24,7 +24,8 @@ int intel_region_ttm_fini(struct intel_memory_region *mem); struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res); + struct ttm_resource *res, + u64 page_alignment); void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res); diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index ba32893e0873c..0250a114fe0ac 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -451,7 +451,6 @@ static int igt_mock_splintered_region(void *arg) static int igt_mock_max_segment(void *arg) { - const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; @@ -460,7 +459,10 @@ static int igt_mock_max_segment(void *arg) struct drm_buddy *mm; struct list_head *blocks; struct scatterlist *sg; + I915_RND_STATE(prng); LIST_HEAD(objects); + unsigned int max_segment; + unsigned int ps; u64 size; int err = 0; @@ -472,7 +474,13 @@ static int igt_mock_max_segment(void *arg) */ size = SZ_8G; - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); + ps = PAGE_SIZE; + if (i915_prandom_u64_state(&prng) & 1) + ps = SZ_64K; /* For something like DG2 */ + + max_segment = round_down(UINT_MAX, ps); + + mem = mock_region_create(i915, 0, size, ps, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); @@ -498,12 +506,21 @@ static int igt_mock_max_segment(void *arg) } for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + dma_addr_t daddr = sg_dma_address(sg); + if (sg->length > max_segment) { pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", __func__, sg->length, max_segment); err = -EINVAL; goto out_close; } + + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_close; + } } out_close: diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index f64325491f352..6f7c9820d3e9d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -32,7 +32,8 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) return PTR_ERR(obj->mm.res); obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - obj->mm.res); + obj->mm.res, + obj->mm.region->min_page_size); if (IS_ERR(obj->mm.rsgt)) { err = PTR_ERR(obj->mm.rsgt); goto err_free_resource; From 8acf1575188989cf4b9553da0926c42b128b8c5f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2022 16:21:32 +0100 Subject: [PATCH 102/299] drm/i915/gt: Serialize GRDOM access between multiple engine resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b24dcf1dc507f69ed3b5c66c2b6a0209ae80d4d4 ] Don't allow two engines to be reset in parallel, as they would both try to select a reset bit (and send requests to common registers) and wait on that register, at the same time. Serialize control of the reset requests/acks using the uncore->lock, which will also ensure that no other GT state changes at the same time as the actual reset. Cc: stable@vger.kernel.org # v4.4 and upper Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Acked-by: Thomas Hellström Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7dbac3011729.1657639152.git.mchehab@kernel.org (cherry picked from commit 336561a914fc0c6f1218228718f633b31b7af1c3) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b7c6d4462ec55..d57db66ac7ea0 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -299,9 +299,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) return err; } -static int gen6_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; u32 hw_mask; @@ -320,6 +320,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } +static int gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(>->uncore->lock, flags); + ret = __gen6_reset_engines(gt, engine_mask, retry); + spin_unlock_irqrestore(>->uncore->lock, flags); + + return ret; +} + static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; @@ -486,9 +500,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } -static int gen11_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen11_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -582,8 +596,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; + unsigned long flags; int ret; + spin_lock_irqsave(>->uncore->lock, flags); + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) @@ -611,17 +628,19 @@ static int gen8_reset_engines(struct intel_gt *gt, * This is best effort, so ignore any error from the initial reset. */ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) - gen11_reset_engines(gt, gt->info.engine_mask, 0); + __gen11_reset_engines(gt, gt->info.engine_mask, 0); if (GRAPHICS_VER(gt->i915) >= 11) - ret = gen11_reset_engines(gt, engine_mask, retry); + ret = __gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(gt, engine_mask, retry); + ret = __gen6_reset_engines(gt, engine_mask, retry); skip_reset: for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); + spin_unlock_irqrestore(>->uncore->lock, flags); + return ret; } From 6e204eff3888ae75fffc1f7cd6f6d976d4a0a4a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2022 16:21:33 +0100 Subject: [PATCH 103/299] drm/i915/gt: Serialize TLB invalidates with GT resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a1c5a7bf79c1faa5633b918b5c0666545e84c4d1 ] Avoid trying to invalidate the TLB in the middle of performing an engine reset, as this may result in the reset timing out. Currently, the TLB invalidate is only serialised by its own mutex, forgoing the uncore lock, but we can take the uncore->lock as well to serialise the mmio access, thereby serialising with the GDRST. Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with i915 selftest/hangcheck. Cc: stable@vger.kernel.org # v4.4 and upper Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") Reported-by: Mauro Carvalho Chehab Tested-by: Mauro Carvalho Chehab Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Acked-by: Thomas Hellström Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721ac6ea86c0677.1657639152.git.mchehab@kernel.org (cherry picked from commit 33da97894758737895e90c909f16786052680ef4) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_gt.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8a2483ccbfb91..f4375479e6f0a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1012,6 +1012,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -1026,7 +1040,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, From e8997d2d6b8d764e12489f1af2a1ce1d7384ca2a Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 24 Jun 2022 13:35:28 +0200 Subject: [PATCH 104/299] drm/i915/selftests: fix subtraction overflow bug [ Upstream commit 333991c4e66b3d4b5613315f18016da80344f659 ] On some machines hole_end can be small enough to cause subtraction overflow. On the other side (addr + 2 * min_alignment) can overflow in case of mock tests. This patch should handle both cases. Fixes: e1c5f754067b59 ("drm/i915: Avoid overflow in computing pot_hole loop termination") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3674 Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20220624113528.2159210-1-andrzej.hajda@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit ab3edc679c552a466e4bf0b11af3666008bd65a2) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index ab751192eb3bb..34d1ef0152335 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -742,7 +742,7 @@ static int pot_hole(struct i915_address_space *vm, u64 addr; for (addr = round_up(hole_start + min_alignment, step) - min_alignment; - addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; + hole_end > addr && hole_end - addr >= 2 * min_alignment; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { From 97928c06973922ef230c90c9ef330aab8e01122a Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 11 Jul 2022 22:26:14 -0400 Subject: [PATCH 105/299] bnxt_en: reclaim max resources if sriov enable fails [ Upstream commit c5b744d38c36a407a41e918602eec4d89730787b ] If bnxt_sriov_enable() fails after some resources have been reserved for the VFs, the current code is not unwinding properly and the reserved resources become unavailable afterwards. Fix it by properly unwinding with a call to bnxt_hwrm_func_qcaps() to reset all maximum resources. Also, add the missing bnxt_ulp_sriov_cfg() call to let the RDMA driver know to abort. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Kashyap Desai Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 7 ++++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d5149478a3510..ee6686a111bde 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7641,7 +7641,7 @@ static void bnxt_hwrm_dbg_qcaps(struct bnxt *bp) static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp); -static int bnxt_hwrm_func_qcaps(struct bnxt *bp) +int bnxt_hwrm_func_qcaps(struct bnxt *bp) { int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 98453a78cbd04..4c6ce2b2b3b78 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2310,6 +2310,7 @@ int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); +int bnxt_hwrm_func_qcaps(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index ddf2f3963abee..a1a2c7a64fd58 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -823,8 +823,10 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) goto err_out2; rc = pci_enable_sriov(bp->pdev, *num_vfs); - if (rc) + if (rc) { + bnxt_ulp_sriov_cfg(bp, 0); goto err_out2; + } return 0; @@ -832,6 +834,9 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) /* Free the resources reserved for various VF's */ bnxt_hwrm_func_vf_resource_free(bp, *num_vfs); + /* Restore the max resources */ + bnxt_hwrm_func_qcaps(bp); + err_out1: bnxt_free_vf_resources(bp); From 5769db185f180263eba24cc1dc16f666ab8100c1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 11 Jul 2022 22:26:15 -0400 Subject: [PATCH 106/299] bnxt_en: Fix bnxt_reinit_after_abort() code path [ Upstream commit 4279414bff8af9898e8c53ae6c5bc17f68ad67b7 ] bnxt_reinit_after_abort() is called during ifup when a previous FW reset sequence has aborted or a previous ifup has failed after detecting FW reset. In all cases, it is safe to assume that a previous FW reset has completed and the driver may not have fully reinitialized. Prior to this patch, it is assumed that the FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE flag will always be set by the firmware in bnxt_hwrm_if_change(). This may not be true if the driver has already attempted to register with the firmware. The firmware may not set the RESET_DONE flag again after the driver has registered, assuming that the driver has seen the flag already. Fix it to always go through the FW reset initialization path if the BNXT_STATE_FW_RESET_DET flag is set. This flag is always set by the driver after successfully going through bnxt_reinit_after_abort(). Fixes: 6882c36cf82e ("bnxt_en: attempt to reinitialize after aborted reset") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ee6686a111bde..1ceccaed2da0b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9916,7 +9916,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE) resc_reinit = true; - if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE) + if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE || + test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) fw_reset = true; else bnxt_remap_fw_health_regs(bp); From f0c89d59e3847932106cefec029d454960a6224b Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Mon, 11 Jul 2022 22:26:16 -0400 Subject: [PATCH 107/299] bnxt_en: fix livepatch query [ Upstream commit 619b9b1622c283cc5ca86f4c487db266a8f55dab ] In the livepatch query fw_target BNXT_FW_SRT_PATCH is applicable for P5 chips only. Fixes: 3c4153394e2c ("bnxt_en: implement firmware live patching") Reviewed-by: Saravanan Vajravel Reviewed-by: Somnath Kotur Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 0c17f90d44a25..3a9441fe4fd17 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -979,9 +979,11 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (rc) return rc; - rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH); - if (rc) - return rc; + if (BNXT_CHIP_P5(bp)) { + rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH); + if (rc) + return rc; + } return bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_CRT_PATCH); } From 1899873812fa7586a0bd0a488a01719c878dd11c Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 11 Jul 2022 22:26:18 -0400 Subject: [PATCH 108/299] bnxt_en: Fix bnxt_refclk_read() [ Upstream commit ddde5412fdaa5048bbca31529d46cb8da882870c ] The upper 32-bit PHC register is not latched when reading the lower 32-bit PHC register. Current code leaves a small window where we may not read correct higher order bits if the lower order bits are just about to wrap around. This patch fixes this by reading higher order bits twice and makes sure that final value is correctly paired with its lower 32 bits. Fixes: 30e96f487f64 ("bnxt_en: Do not read the PTP PHC during chip reset") Cc: Richard Cochran Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index f9c94e5fe7187..3221911e25fed 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -76,14 +76,23 @@ static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts, u64 *ns) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u32 high_before, high_now, low; if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) return -EIO; + high_before = readl(bp->bar0 + ptp->refclk_mapped_regs[1]); ptp_read_system_prets(sts); - *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); ptp_read_system_postts(sts); - *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32; + high_now = readl(bp->bar0 + ptp->refclk_mapped_regs[1]); + if (high_now != high_before) { + ptp_read_system_prets(sts); + low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + ptp_read_system_postts(sts); + } + *ns = ((u64)high_now << 32) | low; + return 0; } From 5f776daef0b5354615ec4b4234cd9539ca05f273 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:19 -0700 Subject: [PATCH 109/299] sysctl: Fix data-races in proc_dou8vec_minmax(). [ Upstream commit 7dee5d7747a69aa2be41f04c6a7ecfe3ac8cdf18 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dou8vec_minmax() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dou8vec_minmax() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: cb9444130662 ("sysctl: add proc_dou8vec_minmax()") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 878b1122cb89c..54ec36e69907a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1079,13 +1079,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, tmp.maxlen = sizeof(val); tmp.data = &val; - val = *data; + val = READ_ONCE(*data); res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, do_proc_douintvec_minmax_conv, ¶m); if (res) return res; if (write) - *data = val; + WRITE_ONCE(*data, val); return 0; } EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); From 93793b9abeb1655c1e133c488759768aeaf21224 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:20 -0700 Subject: [PATCH 110/299] sysctl: Fix data-races in proc_dointvec_ms_jiffies(). [ Upstream commit 7d1025e559782b58824b36cb8ad547a69f2e4b31 ] A sysctl variable is accessed concurrently, and there is always a chance of data-race. So, all readers and writers need some basic protection to avoid load/store-tearing. This patch changes proc_dointvec_ms_jiffies() to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the sysctl side. For now, proc_dointvec_ms_jiffies() itself is tolerant to a data-race, but we still need to add annotations on the other subsystem's side. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/sysctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 54ec36e69907a..f165ea67dd33e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1296,9 +1296,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, if (jif > INT_MAX) return 1; - *valp = (int)jif; + WRITE_ONCE(*valp, (int)jif); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1366,8 +1366,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, * @ppos: the current position in the file * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * The values read are assumed to be in 1/1000 seconds, and + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in 1/1000 seconds, and * are converted into jiffies. * * Returns 0 on success. From 12d40638f02f75c565b7906045db811c56eb2325 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:21 -0700 Subject: [PATCH 111/299] tcp: Fix a data-race around sysctl_max_tw_buckets. [ Upstream commit 6f605b57f3782114e330e108ce1903ede22ec675 ] While reading sysctl_max_tw_buckets, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inet_timewait_sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 0ec501845cb3b..47ccc343c9fb0 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, { struct inet_timewait_sock *tw; - if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets) + if (refcount_read(&dr->tw_refcount) - 1 >= + READ_ONCE(dr->sysctl_max_tw_buckets)) return NULL; tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, From dde0d75e461ea61b4c31b99cfbcb45514ebf0b6a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:22 -0700 Subject: [PATCH 112/299] icmp: Fix a data-race around sysctl_icmp_echo_ignore_all. [ Upstream commit bb7bb35a63b4812da8e3aff587773678e31d23e3 ] While reading sysctl_icmp_echo_ignore_all, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 97350a38a75d0..92eaa96a9ff1c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -990,7 +990,7 @@ static bool icmp_echo(struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ - if (net->ipv4.sysctl_icmp_echo_ignore_all) + if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return true; icmp_param.data.icmph = *icmp_hdr(skb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ad80d180b60b5..8987864c44790 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -603,6 +603,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_enable_probe", From 05c615033174f1d19374f42285ccd8e9af13e427 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:23 -0700 Subject: [PATCH 113/299] icmp: Fix data-races around sysctl_icmp_echo_enable_probe. [ Upstream commit 4a2f7083cc6cb72dade9a63699ca352fad26d1cd ] While reading sysctl_icmp_echo_enable_probe, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Fixes: 1fd07f33c3ea ("ipv6: ICMPV6: add response to ICMPV6 RFC 8335 PROBE messages") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- net/ipv6/icmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 92eaa96a9ff1c..7edc8a3b16468 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1025,7 +1025,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) u16 ident_len; u8 status; - if (!net->ipv4.sysctl_icmp_echo_enable_probe) + if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) return false; /* We currently only support probing interfaces on the proxy node diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e6b978ea0e87f..26554aa6fc1b0 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -919,7 +919,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_EXT_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && - net->ipv4.sysctl_icmp_echo_enable_probe) + READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) icmpv6_echo_reply(skb); break; From 48fda9af1df971076416bace9bfbb5d6d89ba6c1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:24 -0700 Subject: [PATCH 114/299] icmp: Fix a data-race around sysctl_icmp_echo_ignore_broadcasts. [ Upstream commit 66484bb98ed2dfa1dda37a32411483d8311ac269 ] While reading sysctl_icmp_echo_ignore_broadcasts, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7edc8a3b16468..2c402b4671a1d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1239,7 +1239,7 @@ int icmp_rcv(struct sk_buff *skb) */ if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && - net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { + READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) { goto error; } if (icmph->type != ICMP_ECHO && diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 8987864c44790..6613351094cef 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -621,6 +621,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ignore_bogus_error_responses", From d9a9433f818e6a34fcd3b3cf1d657a3bde83bd24 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:25 -0700 Subject: [PATCH 115/299] icmp: Fix a data-race around sysctl_icmp_ignore_bogus_error_responses. [ Upstream commit b04f9b7e85c7d7aecbada620e8759a662af068d3 ] While reading sysctl_icmp_ignore_bogus_error_responses, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2c402b4671a1d..1a061d10949f9 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -930,7 +930,7 @@ static bool icmp_unreach(struct sk_buff *skb) * get the other vendor to fix their kit. */ - if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && + if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) && inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", &ip_hdr(skb)->saddr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6613351094cef..4cf2a6f560d44 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -630,6 +630,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_errors_use_inbound_ifaddr", From f9617844e4d5d6331dbce3fb19a24e5bda201e58 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:26 -0700 Subject: [PATCH 116/299] icmp: Fix a data-race around sysctl_icmp_errors_use_inbound_ifaddr. [ Upstream commit d2efabce81db7eed1c98fa1a3f203f0edd738ac3 ] While reading sysctl_icmp_errors_use_inbound_ifaddr, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1c2fb7f93cb2 ("[IPV4]: Sysctl configurable icmp error source address.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1a061d10949f9..37ba5f042908d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -693,7 +693,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4cf2a6f560d44..33e65e79e46e6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -639,6 +639,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", From 83c68347b41af8e11a720fb826b9d824527ad902 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:27 -0700 Subject: [PATCH 117/299] icmp: Fix a data-race around sysctl_icmp_ratelimit. [ Upstream commit 2a4eb714841f288cf51c7d942d98af6a8c6e4b01 ] While reading sysctl_icmp_ratelimit, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 37ba5f042908d..41efb7381859f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -320,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, vif = l3mdev_master_ifindex(dst->dev); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); - rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) inet_putpeer(peer); out: From 84492c1edafa7cb493767fd5479f5f984eb9ac10 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:28 -0700 Subject: [PATCH 118/299] icmp: Fix a data-race around sysctl_icmp_ratemask. [ Upstream commit 1ebcb25ad6fc3d50fca87350acf451b9a66dd31e ] While reading sysctl_icmp_ratemask, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 41efb7381859f..c13ceda9ce5d8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -282,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return true; /* Limit if icmp type is enabled in ratemask. */ - if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask))) return true; return false; From ab5adca2e17d6595f3fc0e25ccb6bcbe2e01ca4f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:29 -0700 Subject: [PATCH 119/299] raw: Fix a data-race around sysctl_raw_l3mdev_accept. [ Upstream commit 1dace014928e6e385363032d359a04dee9158af0 ] While reading sysctl_raw_l3mdev_accept, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 6897445fb194 ("net: provide a sysctl raw_l3mdev_accept for raw socket lookup with VRFs") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/raw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df5948536..c51a635671a73 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); From 7af0cc1f99010a6e4fe54f31a4804146a77260be Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:30 -0700 Subject: [PATCH 120/299] tcp: Fix data-races around sysctl_tcp_ecn. [ Upstream commit 4785a66702f086cf2ea84bdbe6ec921f274bd9f2 ] While reading sysctl_tcp_ecn, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 4af5561cbfc54..7c760aa655404 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk, th_ecn = tcph->ece && tcph->cwr; if (th_ecn) { ect = !INET_ECN_is_not_ect(ip_dsfield); - ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn; + ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk)) inet_rsk(oreq)->ecn_ok = 1; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f33c31dd7366c..b387c48351559 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, if (!ecn_ok) return false; - if (net->ipv4.sysctl_tcp_ecn) + if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) return true; return dst_feature(dst, RTAX_FEATURE_ECN); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 33e65e79e46e6..11add52147136 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -680,6 +680,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, }, { .procname = "tcp_ecn_fallback", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6b8fcf79688b5..2d71bcfcc7592 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6712,7 +6712,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); - ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; + ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst; if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6b00c17c72aa8..9eefe7f6370ff 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; if (!use_ecn) { From 1ec3d6c2626ee6e1b36b7bd006873a271406ba61 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:31 -0700 Subject: [PATCH 121/299] tcp: Fix a data-race around sysctl_tcp_ecn_fallback. [ Upstream commit 12b8d9ca7e678abc48195294494f1815b555d658 ] While reading sysctl_tcp_ecn_fallback, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 492135557dc0 ("tcp: add rfc3168, section 6.1.1.1. fallback") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/sysctl_net_ipv4.c | 2 ++ net/ipv4/tcp_output.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 11add52147136..ffe0264a51b8c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -689,6 +689,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9eefe7f6370ff..34249469e361f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ From f5042c5357ce931f1666faf6b26b27d12cbcc633 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:32 -0700 Subject: [PATCH 122/299] ipv4: Fix data-races around sysctl_ip_dynaddr. [ Upstream commit e49e4aff7ec19b2d0d0957ee30e93dade57dab9e ] While reading sysctl_ip_dynaddr, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv4/af_inet.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 8ffed7135fc1a..8899b474edbfd 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1179,7 +1179,7 @@ ip_autobind_reuse - BOOLEAN option should only be set by experts. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 72fde2888ad2f..98bc180563d16 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1247,7 +1247,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1302,7 +1302,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) From ae3054f6fbccc90f14ecd6cf9b2c09a2401c64fd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:33 -0700 Subject: [PATCH 123/299] nexthop: Fix data-races around nexthop_compat_mode. [ Upstream commit bdf00bf24bef9be1ca641a6390fd5487873e0d2e ] While reading nexthop_compat_mode, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 4f80116d3df3 ("net: ipv4: add sysctl for nexthop api compatibility mode") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fib_semantics.c | 2 +- net/ipv4/nexthop.c | 5 +++-- net/ipv6/route.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1c4fef385ef61..720f65f7bd0b0 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1811,7 +1811,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) + if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload; } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e459a391e607d..853a75a8fbafc 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); ipv6_stub->ip6_del_rt(net, f6i, - !net->ipv4.sysctl_nexthop_compat_mode); + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); } } @@ -2361,7 +2361,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, if (!rc) { nh_base_seq_inc(net); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); - if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) + if (replace_notify && + READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)) nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 83786de847abf..a87b96a256afc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5737,7 +5737,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nexthop_is_blackhole(rt->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (net->ipv4.sysctl_nexthop_compat_mode && + if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) && rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) goto nla_put_failure; From 5cfdd61c807df5b2905a4de78514c2022db6954a Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 12 Jul 2022 14:14:17 +0800 Subject: [PATCH 124/299] net: ftgmac100: Hold reference returned by of_get_child_by_name() [ Upstream commit 49b9f431ff0d845a36be0b3ede35ec324f2e5fee ] In ftgmac100_probe(), we should hold the refernece returned by of_get_child_by_name() and use it to call of_node_put() for reference balance. Fixes: 39bfab8844a0 ("net: ftgmac100: Add support for DT phy-handle property") Signed-off-by: Liang He Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/faraday/ftgmac100.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 5231818943c6e..c03663785a8d4 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1764,6 +1764,19 @@ static int ftgmac100_setup_clk(struct ftgmac100 *priv) return rc; } +static bool ftgmac100_has_child_node(struct device_node *np, const char *name) +{ + struct device_node *child_np = of_get_child_by_name(np, name); + bool ret = false; + + if (child_np) { + ret = true; + of_node_put(child_np); + } + + return ret; +} + static int ftgmac100_probe(struct platform_device *pdev) { struct resource *res; @@ -1883,7 +1896,7 @@ static int ftgmac100_probe(struct platform_device *pdev) /* Display what we found */ phy_attached_info(phy); - } else if (np && !of_get_child_by_name(np, "mdio")) { + } else if (np && !ftgmac100_has_child_node(np, "mdio")) { /* Support legacy ASPEED devicetree descriptions that decribe a * MAC with an embedded MDIO controller but have no "mdio" * child node. Automatically scan the MDIO bus for available From dd91bc60f305610401b2196bedb573693d6c8e46 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jul 2022 17:42:25 +0300 Subject: [PATCH 125/299] net: stmmac: fix leaks in probe [ Upstream commit 23aa6d5088e3bd65de77c5c307237b9937f8b48a ] These two error paths should clean up before returning. Fixes: 2bb4b98b60d7 ("net: stmmac: Add Ingenic SoCs MAC support.") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 9a6d819b84aea..378b4dd826bb5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -273,7 +273,8 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->tx_delay = tx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps); - return -EINVAL; + ret = -EINVAL; + goto err_remove_config_dt; } } @@ -283,7 +284,8 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->rx_delay = rx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps); - return -EINVAL; + ret = -EINVAL; + goto err_remove_config_dt; } } From 05f68241638e67457f7fab5066b08f6df181a3d4 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Wed, 13 Jul 2022 15:21:11 +0800 Subject: [PATCH 126/299] ima: force signature verification when CONFIG_KEXEC_SIG is configured [ Upstream commit af16df54b89dee72df253abc5e7b5e8a6d16c11c ] Currently, an unsigned kernel could be kexec'ed when IMA arch specific policy is configured unless lockdown is enabled. Enforce kernel signature verification check in the kexec_file_load syscall when IMA arch specific policy is configured. Fixes: 99d5cadfde2b ("kexec_file: split KEXEC_VERIFY_SIG into KEXEC_SIG and KEXEC_SIG_FORCE") Reported-and-suggested-by: Mimi Zohar Signed-off-by: Coiby Xu Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- include/linux/kexec.h | 6 ++++++ kernel/kexec_file.c | 11 ++++++++++- security/integrity/ima/ima_efi.c | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index fcd5035209f19..8d573baaab29e 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -452,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_KEXEC_SIG +void set_kexec_sig_enforced(void); +#else +static inline void set_kexec_sig_enforced(void) {} +#endif + #endif /* !defined(__ASSEBMLY__) */ #endif /* LINUX_KEXEC_H */ diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index c108a2a887546..bb0fb63f563cf 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -29,6 +29,15 @@ #include #include "kexec_internal.h" +#ifdef CONFIG_KEXEC_SIG +static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE); + +void set_kexec_sig_enforced(void) +{ + sig_enforce = true; +} +#endif + static int kexec_calculate_store_digests(struct kimage *image); /* @@ -159,7 +168,7 @@ kimage_validate_signature(struct kimage *image) image->kernel_buf_len); if (ret) { - if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) { + if (sig_enforce) { pr_notice("Enforced kernel signature verification failed (%d).\n", ret); return ret; } diff --git a/security/integrity/ima/ima_efi.c b/security/integrity/ima/ima_efi.c index 71786d01946f4..9db66fe310d42 100644 --- a/security/integrity/ima/ima_efi.c +++ b/security/integrity/ima/ima_efi.c @@ -67,6 +67,8 @@ const char * const *arch_get_ima_policy(void) if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) { if (IS_ENABLED(CONFIG_MODULE_SIG)) set_module_sig_enforced(); + if (IS_ENABLED(CONFIG_KEXEC_SIG)) + set_kexec_sig_enforced(); return sb_arch_rules; } return NULL; From 830de9667b3ada0a75a3f098dfc7159709fe397b Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Tue, 12 Jul 2022 09:10:37 +0800 Subject: [PATCH 127/299] ima: Fix potential memory leak in ima_init_crypto() [ Upstream commit 067d2521874135267e681c19d42761c601d503d6 ] On failure to allocate the SHA1 tfm, IMA fails to initialize and exits without freeing the ima_algo_array. Add the missing kfree() for ima_algo_array to avoid the potential memory leak. Signed-off-by: Jianglei Nie Fixes: 6d94809af6b0 ("ima: Allocate and initialize tfm for each PCR bank") Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- security/integrity/ima/ima_crypto.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index a7206cc1d7d19..64499056648ad 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -205,6 +205,7 @@ int __init ima_init_crypto(void) crypto_free_shash(ima_algo_array[i].tfm); } + kfree(ima_algo_array); out: crypto_free_shash(ima_shash_tfm); return rc; From 5a0d38c6a1d6d6e7eb1c8f02e0077d117167749c Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 6 Jul 2022 15:52:46 -0400 Subject: [PATCH 128/299] drm/amd/display: Ignore First MST Sideband Message Return Error [ Upstream commit acea108fa067d140bd155161a79b1fcd967f4137 ] [why] First MST sideband message returns AUX_RET_ERROR_HPD_DISCON on certain intel platform. Aux transaction considered failure if HPD unexpected pulled low. The actual aux transaction success in such case, hence do not return error. [how] Not returning error when AUX_RET_ERROR_HPD_DISCON detected on the first sideband message. v2: squash in additional DMI entries v3: squash in static fix Signed-off-by: Fangzhi Zuo Acked-by: Solomon Chiu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 39 +++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 8 ++++ .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 17 ++++++++ 3 files changed, 64 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b55a433e829e2..bfbd701a4c9ad 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include @@ -1391,6 +1392,41 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } +static const struct dmi_system_id hpd_disconnect_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), + }, + }, + {} +}; + +static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +{ + const struct dmi_system_id *dmi_id; + + dm->aux_hpd_discon_quirk = false; + + dmi_id = dmi_first_match(hpd_disconnect_quirk_table); + if (dmi_id) { + dm->aux_hpd_discon_quirk = true; + DRM_INFO("aux_hpd_discon_quirk attached\n"); + } +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1521,6 +1557,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } INIT_LIST_HEAD(&adev->dm.da_list); + + retrieve_dmi_info(&adev->dm); + /* Display Core create. */ adev->dm.dc = dc_create(&init_data); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 7e44b04294488..4844601a5f47a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -546,6 +546,14 @@ struct amdgpu_display_manager { * last successfully applied backlight values. */ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; + + /** + * @aux_hpd_discon_quirk: + * + * quirk for hpd discon while aux is on-going. + * occurred on certain intel platform + */ + bool aux_hpd_discon_quirk; }; enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 31ac1fce36f84..d864cae1af67d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -58,6 +58,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, ssize_t result = 0; struct aux_payload payload; enum aux_return_code_type operation_result; + struct amdgpu_device *adev; + struct ddc_service *ddc; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -76,6 +78,21 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); + /* + * w/a on certain intel platform where hpd is unexpected to pull low during + * 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON + * aux transaction is succuess in such case, therefore bypass the error + */ + ddc = TO_DM_AUX(aux)->ddc_service; + adev = ddc->ctx->driver_context; + if (adev->dm.aux_hpd_discon_quirk) { + if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && + operation_result == AUX_RET_ERROR_HPD_DISCON) { + result = 0; + operation_result = AUX_RET_SUCCESS; + } + } + if (payload.write && result >= 0) result = msg->size; From 05c7d624b6a17fa89e9acd5ed75e01732dbf8f94 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 11 Jul 2022 16:03:08 +0800 Subject: [PATCH 129/299] drm/amdkfd: correct the MEC atomic support firmware checking for GC 10.3.7 [ Upstream commit c0044865480a162146b9dfe7783e73a08e97b2b9 ] On the GC 10.3.7 platform the initial MEC release version #3 can support atomic operation,so need correct and set its MEC atomic support version to #3. Signed-off-by: Prike Liang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.18.x Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 651498bfecc8d..2059c3138410b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -158,6 +158,8 @@ static void kfd_device_info_init(struct kfd_dev *kfd, /* Navi2x+, Navi1x+ */ if (gc_version == IP_VERSION(10, 3, 6)) kfd->device_info.no_atomic_fw_version = 14; + else if (gc_version == IP_VERSION(10, 3, 7)) + kfd->device_info.no_atomic_fw_version = 3; else if (gc_version >= IP_VERSION(10, 3, 0)) kfd->device_info.no_atomic_fw_version = 92; else if (gc_version >= IP_VERSION(10, 1, 1)) From 19206b2bfccc3e834bcda885a5b84ac286109c2a Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Mon, 11 Jul 2022 19:39:28 +0200 Subject: [PATCH 130/299] drm/amd/display: Only use depth 36 bpp linebuffers on DCN display engines. [ Upstream commit add61d3c31de6a4b5e11a2ab96aaf4c873481568 ] Various DCE versions had trouble with 36 bpp lb depth, requiring fixes, last time in commit 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs") for DCE-8. So far >= DCE-11.2 was considered ok, but now I found out that on DCE-11.2 it causes dithering when there shouldn't be any, so identity pixel passthrough with identity gamma LUTs doesn't work when it should. This breaks various important neuroscience applications, as reported to me by scientific users of Polaris cards under Ubuntu 22.04 with Linux 5.15, and confirmed by testing it myself on DCE-11.2. Lets only use depth 36 for DCN engines, where my testing showed that it is both necessary for high color precision output, e.g., RGBA16 fb's, and not harmful, as far as more than one year in real-world use showed. DCE engines seem to work fine for high precision output at 30 bpp, so this ("famous last words") depth 30 should hopefully fix all known problems without introducing new ones. Successfully retested on DCE-11.2 Polaris and DCN-1.0 Raven Ridge on top of Linux 5.19.0-rc2 + drm-next. Fixes: 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs") Signed-off-by: Mario Kleiner Tested-by: Mario Kleiner Cc: stable@vger.kernel.org # 5.14.0 Cc: Alex Deucher Cc: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d251c3f3a7140..5cdbd2b8aa4d2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1113,12 +1113,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) * on certain displays, such as the Sharp 4k. 36bpp is needed * to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and * SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc - * precision on at least DCN display engines. However, at least - * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, - * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3 - * did not show such problems, so this seems to be the exception. + * precision on DCN display engines, but apparently not for DCE, as + * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have + * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, + * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel + * passthrough). Therefore only use 36 bpp on DCN where it is actually needed. */ - if (plane_state->ctx->dce_version > DCE_VERSION_11_0) + if (plane_state->ctx->dce_version > DCE_VERSION_MAX) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP; else pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; From 17ece75cdbc2d816c54c3a43152a09db9fd918ae Mon Sep 17 00:00:00 2001 From: Yefim Barashkin Date: Mon, 11 Jul 2022 14:35:11 -0800 Subject: [PATCH 131/299] drm/amd/pm: Prevent divide by zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0638c98c17aa12fe914459c82cd178247e21fb2b ] divide error: 0000 [#1] SMP PTI CPU: 3 PID: 78925 Comm: tee Not tainted 5.15.50-1-lts #1 Hardware name: MSI MS-7A59/Z270 SLI PLUS (MS-7A59), BIOS 1.90 01/30/2018 RIP: 0010:smu_v11_0_set_fan_speed_rpm+0x11/0x110 [amdgpu] Speed is user-configurable through a file. I accidentally set it to zero, and the driver crashed. Reviewed-by: Evan Quan Reviewed-by: André Almeida Signed-off-by: Yefim Barashkin Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 5f8809f6990dd..2fbd2926a5310 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1228,6 +1228,8 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; + if (speed == 0) + return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement * for minimum fan speed: From f094271f6bcf183d29964c1f963d9e896e5a7462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 11 Jul 2022 16:51:31 +0200 Subject: [PATCH 132/299] drm/amd/display: Ensure valid event timestamp for cursor-only commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3283c83eb6fcfbda8ea03d7149d8e42e71c5d45e ] Requires enabling the vblank machinery for them. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2030 Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bfbd701a4c9ad..810965bd06921 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -464,6 +464,26 @@ static void dm_pflip_high_irq(void *interrupt_params) vrr_active, (int) !e); } +static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) +{ + struct drm_crtc *crtc = &acrtc->base; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + drm_crtc_handle_vblank(crtc); + + spin_lock_irqsave(&dev->event_lock, flags); + + /* Send completion event for cursor-only commits */ + if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { + drm_crtc_send_vblank_event(crtc, acrtc->event); + drm_crtc_vblank_put(crtc); + acrtc->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, flags); +} + static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -502,7 +522,7 @@ static void dm_vupdate_high_irq(void *interrupt_params) * if a pageflip happened inside front-porch. */ if (vrr_active) { - drm_crtc_handle_vblank(&acrtc->base); + dm_crtc_handle_vblank(acrtc); /* BTR processing for pre-DCE12 ASICs */ if (acrtc->dm_irq_params.stream && @@ -554,7 +574,7 @@ static void dm_crtc_high_irq(void *interrupt_params) * to dm_vupdate_high_irq after end of front-porch. */ if (!vrr_active) - drm_crtc_handle_vblank(&acrtc->base); + dm_crtc_handle_vblank(acrtc); /** * Following stuff must happen at start of vblank, for crc @@ -9199,6 +9219,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct amdgpu_bo *abo; uint32_t target_vblank, last_flip_vblank; bool vrr_active = amdgpu_dm_vrr_active(acrtc_state); + bool cursor_update = false; bool pflip_present = false; struct { struct dc_surface_update surface_updates[MAX_SURFACES]; @@ -9234,8 +9255,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); /* Cursor plane is handled after stream updates */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) + if (plane->type == DRM_PLANE_TYPE_CURSOR) { + if ((fb && crtc == pcrtc) || + (old_plane_state->fb && old_plane_state->crtc == pcrtc)) + cursor_update = true; + continue; + } if (!fb || !crtc || pcrtc != crtc) continue; @@ -9397,6 +9423,17 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->stream_update.vrr_infopacket = &acrtc_state->stream->vrr_infopacket; } + } else if (cursor_update && acrtc_state->active_planes > 0 && + !acrtc_state->force_dpms_off && + acrtc_attach->base.state->event) { + drm_crtc_vblank_get(pcrtc); + + spin_lock_irqsave(&pcrtc->dev->event_lock, flags); + + acrtc_attach->event = acrtc_attach->base.state->event; + acrtc_attach->base.state->event = NULL; + + spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } /* Update the planes if changed or disable if we don't have any. */ From 68ea71d4eafcf5e6881286f551e9c35a750e0097 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 12 Jul 2022 00:11:42 -0500 Subject: [PATCH 133/299] smb3: workaround negprot bug in some Samba servers [ Upstream commit 32f319183c439b239294cb2d70ada3564c4c7c39 ] Mount can now fail to older Samba servers due to a server bug handling padding at the end of the last negotiate context (negotiate contexts typically are rounded up to 8 bytes by adding padding if needed). This server bug can be avoided by switching the order of negotiate contexts, placing a negotiate context at the end that does not require padding (prior to the recent netname context fix this was the case on the client). Fixes: 73130a7b1ac9 ("smb3: fix empty netname context on secondary channels") Reported-by: Julian Sikorski Tested-by: Julian Sikorski Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2pdu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6a8a00f28b192..2e6c0f4d84499 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -571,10 +571,6 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, *total_len += ctxt_len; pneg_ctxt += ctxt_len; - build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); - *total_len += sizeof(struct smb2_posix_neg_context); - pneg_ctxt += sizeof(struct smb2_posix_neg_context); - /* * secondary channels don't have the hostname field populated * use the hostname field in the primary channel instead @@ -586,9 +582,14 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, hostname); *total_len += ctxt_len; pneg_ctxt += ctxt_len; - neg_context_count = 4; - } else /* second channels do not have a hostname */ neg_context_count = 3; + } else + neg_context_count = 2; + + build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); + *total_len += sizeof(struct smb2_posix_neg_context); + pneg_ctxt += sizeof(struct smb2_posix_neg_context); + neg_context_count++; if (server->compress_algorithm) { build_compression_ctxt((struct smb2_compression_capabilities_context *) From e435c4aeeaa073091f7f3b7735af2ef5c97d63f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Tue, 12 Jul 2022 08:26:42 +0200 Subject: [PATCH 134/299] sfc: fix use after free when disabling sriov MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ebe41da5d47ac0fff877e57bd14c54dccf168827 ] Use after free is detected by kfence when disabling sriov. What was read after being freed was vf->pci_dev: it was freed from pci_disable_sriov and later read in efx_ef10_sriov_free_vf_vports, called from efx_ef10_sriov_free_vf_vswitching. Set the pointer to NULL at release time to not trying to read it later. Reproducer and dmesg log (note that kfence doesn't detect it every time): $ echo 1 > /sys/class/net/enp65s0f0np0/device/sriov_numvfs $ echo 0 > /sys/class/net/enp65s0f0np0/device/sriov_numvfs BUG: KFENCE: use-after-free read in efx_ef10_sriov_free_vf_vswitching+0x82/0x170 [sfc] Use-after-free read at 0x00000000ff3c1ba5 (in kfence-#224): efx_ef10_sriov_free_vf_vswitching+0x82/0x170 [sfc] efx_ef10_pci_sriov_disable+0x38/0x70 [sfc] efx_pci_sriov_configure+0x24/0x40 [sfc] sriov_numvfs_store+0xfe/0x140 kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x11f/0x1b0 vfs_write+0x1eb/0x280 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae kfence-#224: 0x00000000edb8ef95-0x00000000671f5ce1, size=2792, cache=kmalloc-4k allocated by task 6771 on cpu 10 at 3137.860196s: pci_alloc_dev+0x21/0x60 pci_iov_add_virtfn+0x2a2/0x320 sriov_enable+0x212/0x3e0 efx_ef10_sriov_configure+0x67/0x80 [sfc] efx_pci_sriov_configure+0x24/0x40 [sfc] sriov_numvfs_store+0xba/0x140 kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x11f/0x1b0 vfs_write+0x1eb/0x280 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae freed by task 6771 on cpu 12 at 3170.991309s: device_release+0x34/0x90 kobject_cleanup+0x3a/0x130 pci_iov_remove_virtfn+0xd9/0x120 sriov_disable+0x30/0xe0 efx_ef10_pci_sriov_disable+0x57/0x70 [sfc] efx_pci_sriov_configure+0x24/0x40 [sfc] sriov_numvfs_store+0xfe/0x140 kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x11f/0x1b0 vfs_write+0x1eb/0x280 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 3c5eb87605e85 ("sfc: create vports for VFs and assign random MAC addresses") Reported-by: Yanghang Liu Signed-off-by: Íñigo Huguet Acked-by: Martin Habets Link: https://lore.kernel.org/r/20220712062642.6915-1-ihuguet@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10_sriov.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 7f5aa4a8c4512..92550c7e85ce2 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -408,8 +408,9 @@ static int efx_ef10_pci_sriov_enable(struct efx_nic *efx, int num_vfs) static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) { struct pci_dev *dev = efx->pci_dev; + struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int vfs_assigned = pci_vfs_assigned(dev); - int rc = 0; + int i, rc = 0; if (vfs_assigned && !force) { netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; " @@ -417,10 +418,13 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) return -EBUSY; } - if (!vfs_assigned) + if (!vfs_assigned) { + for (i = 0; i < efx->vf_count; i++) + nic_data->vf[i].pci_dev = NULL; pci_disable_sriov(dev); - else + } else { rc = -EBUSY; + } efx_ef10_sriov_free_vf_vswitching(efx); efx->vf_count = 0; From 77be53ba0425ed1d4496f2b45708a863fe918730 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 11 Jul 2022 12:11:21 +0800 Subject: [PATCH 135/299] netfs: do not unlock and put the folio twice [ Upstream commit fac47b43c760ea90e64b895dba60df0327be7775 ] check_write_begin() will unlock and put the folio when return non-zero. So we should avoid unlocking and putting it twice in netfs layer. Change the way ->check_write_begin() works in the following two ways: (1) Pass it a pointer to the folio pointer, allowing it to unlock and put the folio prior to doing the stuff it wants to do, provided it clears the folio pointer. (2) Change the return values such that 0 with folio pointer set means continue, 0 with folio pointer cleared means re-get and all error codes indicating an error (no special treatment for -EAGAIN). [ bagasdotme: use Sphinx code text syntax for *foliop pointer ] Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/56423 Link: https://lore.kernel.org/r/cf169f43-8ee7-8697-25da-0204d1b4343e@redhat.com Co-developed-by: David Howells Signed-off-by: Xiubo Li Signed-off-by: David Howells Signed-off-by: Bagas Sanjaya Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- Documentation/filesystems/netfs_library.rst | 8 +++++--- fs/afs/file.c | 2 +- fs/ceph/addr.c | 11 ++++++----- fs/netfs/buffered_read.c | 17 ++++++++++------- include/linux/netfs.h | 2 +- 5 files changed, 23 insertions(+), 17 deletions(-) diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index 0483abcafcb01..0542358724f19 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -300,7 +300,7 @@ through which it can issue requests and negotiate:: void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; @@ -376,8 +376,10 @@ The operations are as follows: allocated/grabbed the folio to be modified to allow the filesystem to flush conflicting state before allowing it to be modified. - It should return 0 if everything is now fine, -EAGAIN if the folio should be - regrabbed and any other error code to abort the operation. + It may unlock and discard the folio it was given and set the caller's folio + pointer to NULL. It should return 0 if everything is now fine (``*foliop`` + left set) or the op should be retried (``*foliop`` cleared) and any other + error code to abort the operation. * ``done`` diff --git a/fs/afs/file.c b/fs/afs/file.c index fab8324833ba0..a8a5a91dc3750 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -376,7 +376,7 @@ static int afs_begin_cache_operation(struct netfs_io_request *rreq) } static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata) + struct folio **foliop, void **_fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 11dbb1133a21e..ae567fb7f65a0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -63,7 +63,7 @@ (CONGESTION_ON_THRESH(congestion_kb) >> 2)) static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); static inline struct ceph_snap_context *page_snap_context(struct page *page) { @@ -1285,18 +1285,19 @@ ceph_find_incompatible(struct page *page) } static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len, - struct folio *folio, void **_fsdata) + struct folio **foliop, void **_fsdata) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; - snapc = ceph_find_incompatible(folio_page(folio, 0)); + snapc = ceph_find_incompatible(folio_page(*foliop, 0)); if (snapc) { int r; - folio_unlock(folio); - folio_put(folio); + folio_unlock(*foliop); + folio_put(*foliop); + *foliop = NULL; if (IS_ERR(snapc)) return PTR_ERR(snapc); diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index e8e3359a4c547..8d03826c2b152 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -320,8 +320,9 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, * conflicting writes once the folio is grabbed and locked. It is passed a * pointer to the fsdata cookie that gets returned to the VM to be passed to * write_end. It is permitted to sleep. It should return 0 if the request - * should go ahead; unlock the folio and return -EAGAIN to cause the folio to - * be regot; or return an error. + * should go ahead or it may return an error. It may also unlock and put the + * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 + * will cause the folio to be re-got and the process to be retried. * * The calling netfs must initialise a netfs context contiguous to the vfs * inode before calling this. @@ -352,13 +353,13 @@ int netfs_write_begin(struct file *file, struct address_space *mapping, if (ctx->ops->check_write_begin) { /* Allow the netfs (eg. ceph) to flush conflicts. */ - ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata); + ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); if (ret < 0) { trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); - if (ret == -EAGAIN) - goto retry; goto error; } + if (!folio) + goto retry; } if (folio_test_uptodate(folio)) @@ -420,8 +421,10 @@ int netfs_write_begin(struct file *file, struct address_space *mapping, error_put: netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); error: - folio_unlock(folio); - folio_put(folio); + if (folio) { + folio_unlock(folio); + folio_put(folio); + } _leave(" = %d", ret); return ret; } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index a9c6f73877ecb..95dadf0cd4b8e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -211,7 +211,7 @@ struct netfs_request_ops { void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; From 281e3679e62403e74ba7862c98227b96c678fa88 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Tue, 12 Jul 2022 19:58:35 +0200 Subject: [PATCH 136/299] seg6: fix skb checksum evaluation in SRH encapsulation/insertion [ Upstream commit df8386d13ea280d55beee1b95f61a59234a3798b ] Support for SRH encapsulation and insertion was introduced with commit 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels"), through the seg6_do_srh_encap() and seg6_do_srh_inline() functions, respectively. The former encapsulates the packet in an outer IPv6 header along with the SRH, while the latter inserts the SRH between the IPv6 header and the payload. Then, the headers are initialized/updated according to the operating mode (i.e., encap/inline). Finally, the skb checksum is calculated to reflect the changes applied to the headers. The IPv6 payload length ('payload_len') is not initialized within seg6_do_srh_{inline,encap}() but is deferred in seg6_do_srh(), i.e. the caller of seg6_do_srh_{inline,encap}(). However, this operation invalidates the skb checksum, since the 'payload_len' is updated only after the checksum is evaluated. To solve this issue, the initialization of the IPv6 payload length is moved from seg6_do_srh() directly into the seg6_do_srh_{inline,encap}() functions and before the skb checksum update takes place. Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") Reported-by: Paolo Abeni Link: https://lore.kernel.org/all/20220705190727.69d532417be7438b15404ee1@uniroma2.it Signed-off-by: Andrea Mayer Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/seg6_iptunnel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index d64855010948d..e756ba705fd9b 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -189,6 +189,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, tot_len); return 0; @@ -241,6 +243,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); return 0; @@ -302,7 +306,6 @@ static int seg6_do_srh(struct sk_buff *skb) break; } - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); nf_reset_ct(skb); From a8e911a97f579d8e1997cb530f154c1e3b0da724 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Tue, 12 Jul 2022 19:58:36 +0200 Subject: [PATCH 137/299] seg6: fix skb checksum in SRv6 End.B6 and End.B6.Encaps behaviors [ Upstream commit f048880fc77058d864aff5c674af7918b30f312a ] The SRv6 End.B6 and End.B6.Encaps behaviors rely on functions seg6_do_srh_{encap,inline}() to, respectively: i) encapsulate the packet within an outer IPv6 header with the specified Segment Routing Header (SRH); ii) insert the specified SRH directly after the IPv6 header of the packet. This patch removes the initialization of the IPv6 header payload length from the input_action_end_b6{_encap}() functions, as it is now handled properly by seg6_do_srh_{encap,inline}() to avoid corruption of the skb checksum. Fixes: 140f04c33bbc ("ipv6: sr: implement several seg6local actions") Signed-off-by: Andrea Mayer Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/seg6_local.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 98a34287439cc..2cd4a8d3b30ad 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -826,7 +826,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); @@ -858,7 +857,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb, if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); From e980d6a20162396ea1858121892b396181c07b53 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Tue, 12 Jul 2022 19:58:37 +0200 Subject: [PATCH 138/299] seg6: bpf: fix skb checksum in bpf_push_seg6_encap() [ Upstream commit 4889fbd98deaf243c3baadc54e296d71c6af1eb0 ] Both helper functions bpf_lwt_seg6_action() and bpf_lwt_push_encap() use the bpf_push_seg6_encap() to encapsulate the packet in an IPv6 with Segment Routing Header (SRH) or insert an SRH between the IPv6 header and the payload. To achieve this result, such helper functions rely on bpf_push_seg6_encap() which, in turn, leverages seg6_do_srh_{encap,inline}() to perform the required operation (i.e. encap/inline). This patch removes the initialization of the IPv6 header payload length from bpf_push_seg6_encap(), as it is now handled properly by seg6_do_srh_{encap,inline}() to prevent corruption of the skb checksum. Fixes: fe94cc290f53 ("bpf: Add IPv6 Segment Routing helpers") Signed-off-by: Andrea Mayer Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/filter.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index af1e77f2f24a8..6391c1885bca8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6148,7 +6148,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); From 16662524ec5da801fb78a1afcaf6e782f1cf103a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Wed, 13 Jul 2022 11:21:16 +0200 Subject: [PATCH 139/299] sfc: fix kernel panic when creating VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ada74c5539eba06cf8b47d068f92e0b3963a9a6e ] When creating VFs a kernel panic can happen when calling to efx_ef10_try_update_nic_stats_vf. When releasing a DMA coherent buffer, sometimes, I don't know in what specific circumstances, it has to unmap memory with vunmap. It is disallowed to do that in IRQ context or with BH disabled. Otherwise, we hit this line in vunmap, causing the crash: BUG_ON(in_interrupt()); This patch reenables BH to release the buffer. Log messages when the bug is hit: kernel BUG at mm/vmalloc.c:2727! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 6 PID: 1462 Comm: NetworkManager Kdump: loaded Tainted: G I --------- --- 5.14.0-119.el9.x86_64 #1 Hardware name: Dell Inc. PowerEdge R740/06WXJT, BIOS 2.8.2 08/27/2020 RIP: 0010:vunmap+0x2e/0x30 ...skip... Call Trace: __iommu_dma_free+0x96/0x100 efx_nic_free_buffer+0x2b/0x40 [sfc] efx_ef10_try_update_nic_stats_vf+0x14a/0x1c0 [sfc] efx_ef10_update_stats_vf+0x18/0x40 [sfc] efx_start_all+0x15e/0x1d0 [sfc] efx_net_open+0x5a/0xe0 [sfc] __dev_open+0xe7/0x1a0 __dev_change_flags+0x1d7/0x240 dev_change_flags+0x21/0x60 ...skip... Fixes: d778819609a2 ("sfc: DMA the VF stats only when requested") Reported-by: Ma Yuying Signed-off-by: Íñigo Huguet Acked-by: Edward Cree Link: https://lore.kernel.org/r/20220713092116.21238-1-ihuguet@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 186cb28c03bdb..8b62ce21aff3d 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1932,7 +1932,10 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx) efx_update_sw_stats(efx, stats); out: + /* releasing a DMA coherent buffer with BH disabled can panic */ + spin_unlock_bh(&efx->stats_lock); efx_nic_free_buffer(efx, &stats_buf); + spin_lock_bh(&efx->stats_lock); return rc; } From 5e151c507b7a7a77e78f666d9de00a38f3b2192c Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Wed, 13 Jul 2022 19:12:23 +0800 Subject: [PATCH 140/299] net: atlantic: remove deep parameter on suspend/resume functions [ Upstream commit 0f33250760384e05c36466b0a2f92f3c6007ba92 ] Below commit claims that atlantic NIC requires to reset the device on pm op, and had set the deep to true for all suspend/resume functions. commit 1809c30b6e5a ("net: atlantic: always deep reset on pm op, fixing up my null deref regression") So, we could remove deep parameter on suspend/resume functions without any functional change. Fixes: 1809c30b6e5a ("net: atlantic: always deep reset on pm op, fixing up my null deref regression") Signed-off-by: Chia-Lin Kao (AceLan) Link: https://lore.kernel.org/r/20220713111224.1535938-1-acelan.kao@canonical.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/aquantia/atlantic/aq_pci_func.c | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 831833911a525..dbd5263130f9e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -379,7 +379,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev) } } -static int aq_suspend_common(struct device *dev, bool deep) +static int aq_suspend_common(struct device *dev) { struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev)); @@ -392,17 +392,15 @@ static int aq_suspend_common(struct device *dev, bool deep) if (netif_running(nic->ndev)) aq_nic_stop(nic); - if (deep) { - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - aq_nic_set_power(nic); - } + aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); + aq_nic_set_power(nic); rtnl_unlock(); return 0; } -static int atl_resume_common(struct device *dev, bool deep) +static int atl_resume_common(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; @@ -415,10 +413,8 @@ static int atl_resume_common(struct device *dev, bool deep) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (deep) { - /* Reinitialize Nic/Vecs objects */ - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - } + /* Reinitialize Nic/Vecs objects */ + aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); if (netif_running(nic->ndev)) { ret = aq_nic_init(nic); @@ -444,22 +440,22 @@ static int atl_resume_common(struct device *dev, bool deep) static int aq_pm_freeze(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_thaw(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static const struct dev_pm_ops aq_pm_ops = { From 702419db7ded2e3686fbc05040b61d7428d611d5 Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Wed, 13 Jul 2022 19:12:24 +0800 Subject: [PATCH 141/299] net: atlantic: remove aq_nic_deinit() when resume [ Upstream commit 2e15c51fefaffaf9f72255eaef4fada05055e4c5 ] aq_nic_deinit() has been called while suspending, so we don't have to call it again on resume. Actually, call it again leads to another hang issue when resuming from S3. Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992345] Call Trace: Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992346] Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992348] aq_nic_deinit+0xb4/0xd0 [atlantic] Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992356] aq_pm_thaw+0x7f/0x100 [atlantic] Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992362] pci_pm_resume+0x5c/0x90 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992366] ? pci_pm_thaw+0x80/0x80 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992368] dpm_run_callback+0x4e/0x120 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992371] device_resume+0xad/0x200 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992373] async_resume+0x1e/0x40 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992374] async_run_entry_fn+0x33/0x120 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992377] process_one_work+0x220/0x3c0 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992380] worker_thread+0x4d/0x3f0 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992382] ? process_one_work+0x3c0/0x3c0 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992384] kthread+0x12a/0x150 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992386] ? set_kthread_struct+0x40/0x40 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992387] ret_from_fork+0x22/0x30 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992391] Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992392] ---[ end trace 1ec8c79604ed5e0d ]--- Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992394] PM: dpm_run_callback(): pci_pm_resume+0x0/0x90 returns -110 Jul 8 03:09:44 u-Precision-7865-Tower kernel: [ 5910.992397] atlantic 0000:02:00.0: PM: failed to resume async: error -110 Fixes: 1809c30b6e5a ("net: atlantic: always deep reset on pm op, fixing up my null deref regression") Signed-off-by: Chia-Lin Kao (AceLan) Link: https://lore.kernel.org/r/20220713111224.1535938-2-acelan.kao@canonical.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index dbd5263130f9e..8647125d60aef 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -413,9 +413,6 @@ static int atl_resume_common(struct device *dev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - /* Reinitialize Nic/Vecs objects */ - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - if (netif_running(nic->ndev)) { ret = aq_nic_init(nic); if (ret) From 80866cf468d062c8e2226e2f376103e971254707 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 8 Jul 2022 14:51:47 +0200 Subject: [PATCH 142/299] KVM: x86: Fully initialize 'struct kvm_lapic_irq' in kvm_pv_kick_cpu_op() [ Upstream commit 8a414f943f8b5f94bbaafdec863d6f3dbef33f8a ] 'vector' and 'trig_mode' fields of 'struct kvm_lapic_irq' are left uninitialized in kvm_pv_kick_cpu_op(). While these fields are normally not needed for APIC_DM_REMRD, they're still referenced by __apic_accept_irq() for trace_kvm_apic_accept_irq(). Fully initialize the structure to avoid consuming random stack memory. Fixes: a183b638b61c ("KVM: x86: make apic_accept_irq tracepoint more generic") Reported-by: syzbot+d6caa905917d353f0d07@syzkaller.appspotmail.com Signed-off-by: Vitaly Kuznetsov Reviewed-by: Sean Christopherson Message-Id: <20220708125147.593975-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 558d1f2ab5b49..828f5cf1af459 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9074,15 +9074,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, */ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) { - struct kvm_lapic_irq lapic_irq; - - lapic_irq.shorthand = APIC_DEST_NOSHORT; - lapic_irq.dest_mode = APIC_DEST_PHYSICAL; - lapic_irq.level = 0; - lapic_irq.dest_id = apicid; - lapic_irq.msi_redir_hint = false; + /* + * All other fields are unused for APIC_DM_REMRD, but may be consumed by + * common code, e.g. for tracing. Defer initialization to the compiler. + */ + struct kvm_lapic_irq lapic_irq = { + .delivery_mode = APIC_DM_REMRD, + .dest_mode = APIC_DEST_PHYSICAL, + .shorthand = APIC_DEST_NOSHORT, + .dest_id = apicid, + }; - lapic_irq.delivery_mode = APIC_DM_REMRD; kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } From b00c5375a16413ada7537567d42cc0008f3378ae Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 14 Jul 2022 10:07:54 +0300 Subject: [PATCH 143/299] net/tls: Check for errors in tls_device_init [ Upstream commit 3d8c51b25a235e283e37750943bbf356ef187230 ] Add missing error checks in tls_device_init. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Reported-by: Jakub Kicinski Reviewed-by: Maxim Mikityanskiy Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20220714070754.1428-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tls.h | 4 ++-- net/tls/tls_device.c | 4 ++-- net/tls/tls_main.c | 7 ++++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index b6968a5b55389..e8764d3da41ac 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -708,7 +708,7 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_crypto_info *crypto_info); #ifdef CONFIG_TLS_DEVICE -void tls_device_init(void); +int tls_device_init(void); void tls_device_cleanup(void); void tls_device_sk_destruct(struct sock *sk); int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); @@ -728,7 +728,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) return tls_get_ctx(sk)->rx_conf == TLS_HW; } #else -static inline void tls_device_init(void) {} +static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 3919fe2c58c5c..3a61bb5945441 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1394,9 +1394,9 @@ static struct notifier_block tls_dev_notifier = { .notifier_call = tls_dev_event, }; -void __init tls_device_init(void) +int __init tls_device_init(void) { - register_netdevice_notifier(&tls_dev_notifier); + return register_netdevice_notifier(&tls_dev_notifier); } void __exit tls_device_cleanup(void) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 5c9697840ef70..13058b0ee4cd1 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -993,7 +993,12 @@ static int __init tls_register(void) if (err) return err; - tls_device_init(); + err = tls_device_init(); + if (err) { + unregister_pernet_subsys(&tls_proc_ops); + return err; + } + tcp_register_ulp(&tcp_tls_ulp_ops); return 0; From ed388232ed26096648c451ba0d602af8d849389d Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 9 Jun 2022 18:40:32 +0800 Subject: [PATCH 144/299] mm: sysctl: fix missing numa_stat when !CONFIG_HUGETLB_PAGE [ Upstream commit 43b5240ca6b33108998810593248186b1e3ae34a ] "numa_stat" should not be included in the scope of CONFIG_HUGETLB_PAGE, if CONFIG_HUGETLB_PAGE is not configured even if CONFIG_NUMA is configured, "numa_stat" is missed form /proc. Move it out of CONFIG_HUGETLB_PAGE to fix it. Fixes: 4518085e127d ("mm, sysctl: make NUMA stats configurable") Signed-off-by: Muchun Song Cc: Acked-by: Michal Hocko Acked-by: Mel Gorman Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- kernel/sysctl.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f165ea67dd33e..c42ba2d669dcc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2466,6 +2466,17 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO_HUNDRED, }, +#ifdef CONFIG_NUMA + { + .procname = "numa_stat", + .data = &sysctl_vm_numa_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_vm_numa_stat_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", @@ -2482,15 +2493,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &hugetlb_mempolicy_sysctl_handler, }, - { - .procname = "numa_stat", - .data = &sysctl_vm_numa_stat, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sysctl_vm_numa_stat_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { .procname = "hugetlb_shm_group", From 82089fcb1051230b5dc7145d07034b407e4c1ce6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 19 Jun 2022 20:39:29 +0100 Subject: [PATCH 145/299] ARM: 9211/1: domain: drop modify_domain() [ Upstream commit cc45b836388f0ccc6831288a08f77a33845f10b0 ] This function/macro isn't used anywhere in the kernel. The only user was set_fs() and was deleted in the set_fs() removal patch set. Fixes: 8ac6f5d7f84b ("ARM: 9113/1: uaccess: remove set_fs() implementation") Acked-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/include/asm/domain.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index f1d0a7807cd0e..41536feb43921 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -112,19 +112,6 @@ static __always_inline void set_domain(unsigned int val) } #endif -#ifdef CONFIG_CPU_USE_DOMAINS -#define modify_domain(dom,type) \ - do { \ - unsigned int domain = get_domain(); \ - domain &= ~domain_mask(dom); \ - domain = domain | domain_val(dom, type); \ - set_domain(domain); \ - } while (0) - -#else -static inline void modify_domain(unsigned dom, unsigned type) { } -#endif - /* * Generate the T (user) versions of the LDR/STR and related * instructions (inline assembly) From 9987bb02667ffe5971881933119c6eaed7c3a9ed Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 19 Jun 2022 20:40:16 +0100 Subject: [PATCH 146/299] ARM: 9212/1: domain: Modify Kconfig help text [ Upstream commit 2bf6204240fddb22cc4940b9e3f40c538390212e ] After the removal of set_fs() the reference to set_fs() is stale. Alter the helptext to reflect what the config option really does. Fixes: 8ac6f5d7f84b ("ARM: 9113/1: uaccess: remove set_fs() implementation") Acked-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/mm/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d30ee26ccc870..6284914f43610 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -631,7 +631,11 @@ config CPU_USE_DOMAINS bool help This option enables or disables the use of domain switching - via the set_fs() function. + using the DACR (domain access control register) to protect memory + domains from each other. In Linux we use three domains: kernel, user + and IO. The domains are used to protect userspace from kernelspace + and to handle IO-space as a special type of memory by assigning + manager or client roles to running code (such as a process). config CPU_V7M_NUM_IRQ int "Number of external interrupts connected to the NVIC" From 847f0c80c2ef9dcda7c47ee129d4253b5654c34c Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 29 Jun 2022 12:40:12 +0100 Subject: [PATCH 147/299] ASoC: dt-bindings: Fix description for msm8916 [ Upstream commit 94c65dffd4c4af052b3ea8934fbcb2fa8da276a8 ] For the existing msm8916 bindings the minimum reg/reg-names is 1 not 2. Similarly the minimum interrupt/interrupt-names is 1 not 2. Fixes: f3fc4fbfa2d2 ("ASoC: dt-bindings: Add SC7280 lpass cpu bindings") Signed-off-by: Bryan O'Donoghue Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220629114012.3282945-1-bryan.odonoghue@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- .../devicetree/bindings/sound/qcom,lpass-cpu.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml index 2c81efb5fa370..47bb67d43ac29 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml @@ -25,12 +25,12 @@ properties: - qcom,sc7280-lpass-cpu reg: - minItems: 2 + minItems: 1 maxItems: 6 description: LPAIF core registers reg-names: - minItems: 2 + minItems: 1 maxItems: 6 clocks: @@ -42,12 +42,12 @@ properties: maxItems: 7 interrupts: - minItems: 2 + minItems: 1 maxItems: 4 description: LPAIF DMA buffer interrupt interrupt-names: - minItems: 2 + minItems: 1 maxItems: 4 qcom,adsp: From e1bbe15676a37e4a67989ddd9cf357c0d1517387 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 6 Jun 2022 13:43:53 +0200 Subject: [PATCH 148/299] tee: tee_get_drvdata(): fix description of return value [ Upstream commit e5ce073c8a1e01b215a5eb32ba48f8d17ded3bd5 ] This patch fixes the description of tee_get_drvdata()'s return value. It actually returns the driver_data pointer supplied to tee_device_alloc() since the TEE subsystem was added to the kernel. Fixes: 967c9cca2cc5 ("tee: generic TEE subsystem") Cc: Jens Wiklander Signed-off-by: Marc Kleine-Budde Signed-off-by: Jens Wiklander Signed-off-by: Sasha Levin --- drivers/tee/tee_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 8aa1a4836b92f..cebe4963ad873 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -1075,7 +1075,7 @@ EXPORT_SYMBOL_GPL(tee_device_unregister); /** * tee_get_drvdata() - Return driver_data pointer * @teedev: Device containing the driver_data pointer - * @returns the driver_data pointer supplied to tee_register(). + * @returns the driver_data pointer supplied to tee_device_alloc(). */ void *tee_get_drvdata(struct tee_device *teedev) { From a1b922311ed063519f395f874a14ddd644a6868d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 7 Jul 2022 10:25:57 +0200 Subject: [PATCH 149/299] tty: extract tty_flip_buffer_commit() from tty_flip_buffer_push() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 716b10580283fda66f2b88140e3964f8a7f9da89 ] We will need this new helper in the next patch. Cc: Hillf Danton Cc: 一只狗 Cc: Dan Carpenter Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220707082558.9250-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/tty_buffer.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index bfa431a8e6902..303a26c1b8211 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -532,6 +532,15 @@ static void flush_to_ldisc(struct work_struct *work) } +static inline void tty_flip_buffer_commit(struct tty_buffer *tail) +{ + /* + * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees + * buffer data. + */ + smp_store_release(&tail->commit, tail->used); +} + /** * tty_flip_buffer_push - push terminal buffers * @port: tty port to push @@ -546,11 +555,7 @@ void tty_flip_buffer_push(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; - /* - * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees - * buffer data. - */ - smp_store_release(&buf->tail->commit, buf->tail->used); + tty_flip_buffer_commit(buf->tail); queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL(tty_flip_buffer_push); From fa3302714c03e4e6c9b5aad5dacae33e75f76cf7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 7 Jul 2022 10:25:58 +0200 Subject: [PATCH 150/299] tty: use new tty_insert_flip_string_and_push_buffer() in pty_write() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a501ab75e7624d133a5a3c7ec010687c8b961d23 ] There is a race in pty_write(). pty_write() can be called in parallel with e.g. ioctl(TIOCSTI) or ioctl(TCXONC) which also inserts chars to the buffer. Provided, tty_flip_buffer_push() in pty_write() is called outside the lock, it can commit inconsistent tail. This can lead to out of bounds writes and other issues. See the Link below. To fix this, we have to introduce a new helper called tty_insert_flip_string_and_push_buffer(). It does both tty_insert_flip_string() and tty_flip_buffer_commit() under the port lock. It also calls queue_work(), but outside the lock. See 71a174b39f10 (pty: do tty_flip_buffer_push without port->lock in pty_write) for the reasons. Keep the helper internal-only (in drivers' tty.h). It is not intended to be used widely. Link: https://seclists.org/oss-sec/2022/q2/155 Fixes: 71a174b39f10 (pty: do tty_flip_buffer_push without port->lock in pty_write) Cc: 一只狗 Cc: Dan Carpenter Suggested-by: Hillf Danton Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220707082558.9250-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/pty.c | 14 ++------------ drivers/tty/tty.h | 3 +++ drivers/tty/tty_buffer.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 74bfabe5b4538..752dab3356d72 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -111,21 +111,11 @@ static void pty_unthrottle(struct tty_struct *tty) static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) { struct tty_struct *to = tty->link; - unsigned long flags; - if (tty->flow.stopped) + if (tty->flow.stopped || !c) return 0; - if (c > 0) { - spin_lock_irqsave(&to->port->lock, flags); - /* Stuff the data into the input queue of the other end */ - c = tty_insert_flip_string(to->port, buf, c); - spin_unlock_irqrestore(&to->port->lock, flags); - /* And shovel */ - if (c) - tty_flip_buffer_push(to->port); - } - return c; + return tty_insert_flip_string_and_push_buffer(to->port, buf, c); } /** diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index b710c5ef89ab2..f310a8274df15 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -111,4 +111,7 @@ static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t cnt); + #endif diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 303a26c1b8211..595d8b49c745d 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -560,6 +560,37 @@ void tty_flip_buffer_push(struct tty_port *port) } EXPORT_SYMBOL(tty_flip_buffer_push); +/** + * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and + * push + * @port: tty port + * @chars: characters + * @size: size + * + * The function combines tty_insert_flip_string() and tty_flip_buffer_push() + * with the exception of properly holding the @port->lock. + * + * To be used only internally (by pty currently). + * + * Returns: the number added. + */ +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t size) +{ + struct tty_bufhead *buf = &port->buf; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + size = tty_insert_flip_string(port, chars, size); + if (size) + tty_flip_buffer_commit(buf->tail); + spin_unlock_irqrestore(&port->lock, flags); + + queue_work(system_unbound_wq, &buf->work); + + return size; +} + /** * tty_buffer_init - prepare a tty buffer structure * @port: tty port to initialise From e40c724237218671af114d710175abbf9c91361f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 27 Jun 2022 14:50:53 +0200 Subject: [PATCH 151/299] s390/nospec: build expoline.o for modules_prepare target [ Upstream commit c4e789572557aa147b13bf7fe09cc99663ed0cf5 ] When CONFIG_EXPOLINE_EXTERN is used expoline thunks are generated from arch/s390/lib/expoline.S and postlinked into every module. This is also true for external modules. Add expoline.o build to the modules_prepare target. Fixes: 1d2ad084800e ("s390/nospec: add an option to use thunk-extern") Reported-by: Joe Lawrence Tested-by: Sumanth Korikkar Acked-by: Sumanth Korikkar Tested-by: C. Erastus Toe Tested-by: Joe Lawrence Link: https://lore.kernel.org/r/patch-1.thread-d13b6c.git-a2387a74dc49.your-ad-here.call-01656331067-ext-4899@work.hours Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- arch/s390/Makefile | 8 +++++++- arch/s390/lib/Makefile | 3 ++- arch/s390/lib/expoline/Makefile | 3 +++ arch/s390/lib/{ => expoline}/expoline.S | 0 4 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 arch/s390/lib/expoline/Makefile rename arch/s390/lib/{ => expoline}/expoline.S (100%) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index eba70d585cb2c..5b7e761b2d507 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -80,7 +80,7 @@ endif ifdef CONFIG_EXPOLINE ifdef CONFIG_EXPOLINE_EXTERN - KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o + KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern else @@ -162,6 +162,12 @@ vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h) + +ifdef CONFIG_EXPOLINE_EXTERN +modules_prepare: expoline_prepare +expoline_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o +endif endif # Don't use tabs in echo arguments diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 5d415b3db6d14..580d2e3265cb2 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -7,7 +7,6 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o -obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o @@ -22,3 +21,5 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/ diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile new file mode 100644 index 0000000000000..854631d9cb03a --- /dev/null +++ b/arch/s390/lib/expoline/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += expoline.o diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline/expoline.S similarity index 100% rename from arch/s390/lib/expoline.S rename to arch/s390/lib/expoline/expoline.S From afeb95a11a4cc4b45914b10d34dd71417b3f356b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 Jul 2022 20:59:42 +0800 Subject: [PATCH 152/299] scsi: megaraid: Clear READ queue map's nr_queues [ Upstream commit 8312cd3a7b835ae3033a679e5f0014a40e7891c5 ] The megaraid SCSI driver sets set->nr_maps as 3 if poll_queues is > 0, and blk-mq actually initializes each map's nr_queues as nr_hw_queues. Consequently the driver has to clear READ queue map's nr_queues, otherwise the queue map becomes broken if poll_queues is set as non-zero. Link: https://lore.kernel.org/r/20220706125942.528533-1-ming.lei@redhat.com Fixes: 9e4bec5b2a23 ("scsi: megaraid_sas: mq_poll support") Cc: Kashyap Desai Cc: sumit.saxena@broadcom.com Cc: chandrakanth.patil@broadcom.com Cc: linux-block@vger.kernel.org Cc: Hannes Reinecke Reported-by: Guangwu Zhang Tested-by: Guangwu Zhang Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index db6793608447a..f5deb0e561a93 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3195,6 +3195,9 @@ static int megasas_map_queues(struct Scsi_Host *shost) qoff += map->nr_queues; offset += map->nr_queues; + /* we never use READ queue, so can't cheat blk-mq */ + shost->tag_set.map[HCTX_TYPE_READ].nr_queues = 0; + /* Setup Poll hctx */ map = &shost->tag_set.map[HCTX_TYPE_POLL]; map->nr_queues = instance->iopoll_q_count; From 799dcbf4d943401eb91c210e79d1f9d2e535ebfa Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 8 Jul 2022 17:00:27 -0700 Subject: [PATCH 153/299] scsi: ufs: core: Drop loglevel of WriteBoost message [ Upstream commit 2ae57c995003a7840cb6b5ec5f0c06193695321b ] Commit '3b5f3c0d0548 ("scsi: ufs: core: Tidy up WB configuration code")' changed the log level of the write boost enable/disable notification from debug to info. This results in a lot of noise in the kernel log during normal operation. Drop it back to debug level to avoid this. Link: https://lore.kernel.org/r/20220709000027.3929970-1-bjorn.andersson@linaro.org Fixes: 3b5f3c0d0548 ("scsi: ufs: core: Tidy up WB configuration code") Reviewed-by: Alim Akhtar Acked-by: Bean Huo Signed-off-by: Bjorn Andersson Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 4c9eb4be449ca..452ad06120671 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5722,7 +5722,7 @@ int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable) } hba->dev_info.wb_enabled = enable; - dev_info(hba->dev, "%s Write Booster %s\n", + dev_dbg(hba->dev, "%s Write Booster %s\n", __func__, enable ? "enabled" : "disabled"); return ret; From 94c8cc503bec42e3d580d45716843ef9bcfe103a Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 14 Jul 2022 12:42:10 +0000 Subject: [PATCH 154/299] nvme: fix block device naming collision [ Upstream commit 6961b5e02876b3b47f030a1f1ee8fd3e631ac270 ] The issue exists when multipath is enabled and the namespace is shared, but all the other controller checks at nvme_is_unique_nsid() are false. The reason for this issue is that nvme_is_unique_nsid() returns false when is called from nvme_mpath_alloc_disk() due to an uninitialized value of head->shared. The patch fixes it by setting head->shared before nvme_mpath_alloc_disk() is called. Fixes: 5974ea7ce0f9 ("nvme: allow duplicate NSIDs for private namespaces") Signed-off-by: Israel Rukshin Reviewed-by: Keith Busch Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a2862a56fadc4..0fef31c935de1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3726,7 +3726,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns) } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_ns_ids *ids) + unsigned nsid, struct nvme_ns_ids *ids, bool is_shared) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3750,6 +3750,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->subsys = ctrl->subsys; head->ns_id = nsid; head->ids = *ids; + head->shared = is_shared; kref_init(&head->ref); if (head->ids.csi) { @@ -3830,12 +3831,11 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, nsid); goto out_unlock; } - head = nvme_alloc_ns_head(ctrl, nsid, ids); + head = nvme_alloc_ns_head(ctrl, nsid, ids, is_shared); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; } - head->shared = is_shared; } else { ret = -EINVAL; if (!is_shared || !head->shared) { From 8e2b1b8402efdf1a2fd8df71cbe6c6c07551b774 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 2 Jun 2022 10:07:38 +0900 Subject: [PATCH 155/299] ksmbd: use SOCK_NONBLOCK type for kernel_accept() [ Upstream commit fe0fde09e1cb83effcf8fafa372533f438d93a1a ] I found that normally it is O_NONBLOCK but there are different value for some arch. /include/linux/net.h: #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif /arch/alpha/include/asm/socket.h: #define SOCK_NONBLOCK 0x40000000 Use SOCK_NONBLOCK instead of O_NONBLOCK for kernel_accept(). Suggested-by: David Howells Signed-off-by: Namjae Jeon Reviewed-by: Hyunchul Lee Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/ksmbd/transport_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 8fef9de787d34..143bba4e4db81 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p) break; } ret = kernel_accept(iface->ksmbd_socket, &client_sk, - O_NONBLOCK); + SOCK_NONBLOCK); mutex_unlock(&iface->sock_release_lock); if (ret) { if (ret == -EAGAIN) From 99d1c36bddd93919072b5a51a89297bbb5ad6a6f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 23 Jun 2022 13:25:09 -0500 Subject: [PATCH 156/299] powerpc/xive/spapr: correct bitmap allocation size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 19fc5bb93c6bbdce8292b4d7eed04e2fa118d2fe ] kasan detects access beyond the end of the xibm->bitmap allocation: BUG: KASAN: slab-out-of-bounds in _find_first_zero_bit+0x40/0x140 Read of size 8 at addr c00000001d1d0118 by task swapper/0/1 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc2-00001-g90df023b36dd #28 Call Trace: [c00000001d98f770] [c0000000012baab8] dump_stack_lvl+0xac/0x108 (unreliable) [c00000001d98f7b0] [c00000000068faac] print_report+0x37c/0x710 [c00000001d98f880] [c0000000006902c0] kasan_report+0x110/0x354 [c00000001d98f950] [c000000000692324] __asan_load8+0xa4/0xe0 [c00000001d98f970] [c0000000011c6ed0] _find_first_zero_bit+0x40/0x140 [c00000001d98f9b0] [c0000000000dbfbc] xive_spapr_get_ipi+0xcc/0x260 [c00000001d98fa70] [c0000000000d6d28] xive_setup_cpu_ipi+0x1e8/0x450 [c00000001d98fb30] [c000000004032a20] pSeries_smp_probe+0x5c/0x118 [c00000001d98fb60] [c000000004018b44] smp_prepare_cpus+0x944/0x9ac [c00000001d98fc90] [c000000004009f9c] kernel_init_freeable+0x2d4/0x640 [c00000001d98fd90] [c0000000000131e8] kernel_init+0x28/0x1d0 [c00000001d98fe10] [c00000000000cd54] ret_from_kernel_thread+0x5c/0x64 Allocated by task 0: kasan_save_stack+0x34/0x70 __kasan_kmalloc+0xb4/0xf0 __kmalloc+0x268/0x540 xive_spapr_init+0x4d0/0x77c pseries_init_irq+0x40/0x27c init_IRQ+0x44/0x84 start_kernel+0x2a4/0x538 start_here_common+0x1c/0x20 The buggy address belongs to the object at c00000001d1d0118 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 0 bytes inside of 8-byte region [c00000001d1d0118, c00000001d1d0120) The buggy address belongs to the physical page: page:c00c000000074740 refcount:1 mapcount:0 mapping:0000000000000000 index:0xc00000001d1d0558 pfn:0x1d1d flags: 0x7ffff000000200(slab|node=0|zone=0|lastcpupid=0x7ffff) raw: 007ffff000000200 c00000001d0003c8 c00000001d0003c8 c00000001d010480 raw: c00000001d1d0558 0000000001e1000a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: c00000001d1d0000: fc 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc c00000001d1d0080: fc fc 00 fc fc fc fc fc fc fc fc fc fc fc fc fc >c00000001d1d0100: fc fc fc 02 fc fc fc fc fc fc fc fc fc fc fc fc ^ c00000001d1d0180: fc fc fc fc 04 fc fc fc fc fc fc fc fc fc fc fc c00000001d1d0200: fc fc fc fc fc 04 fc fc fc fc fc fc fc fc fc fc This happens because the allocation uses the wrong unit (bits) when it should pass (BITS_TO_LONGS(count) * sizeof(long)) or equivalent. With small numbers of bits, the allocated object can be smaller than sizeof(long), which results in invalid accesses. Use bitmap_zalloc() to allocate and initialize the irq bitmap, paired with bitmap_free() for consistency. Signed-off-by: Nathan Lynch Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220623182509.3985625-1-nathanl@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/sysdev/xive/spapr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 503f544d28e29..b0d36e430dbc4 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +56,7 @@ static int __init xive_irq_bitmap_add(int base, int count) spin_lock_init(&xibm->lock); xibm->base = base; xibm->count = count; - xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL); + xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL); if (!xibm->bitmap) { kfree(xibm); return -ENOMEM; @@ -73,7 +74,7 @@ static void xive_irq_bitmap_remove_all(void) list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { list_del(&xibm->list); - kfree(xibm->bitmap); + bitmap_free(xibm->bitmap); kfree(xibm); } } From 8a53aed793fb799d2c6f519c389ea84963d5c807 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 13 Jun 2022 10:59:58 +0300 Subject: [PATCH 157/299] vdpa/mlx5: Initialize CVQ vringh only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ace9252446ec615cd79a5f77d90edb25c0b9d024 ] Currently, CVQ vringh is initialized inside setup_virtqueues() which is called every time a memory update is done. This is undesirable since it resets all the context of the vring, including the available and used indices. Move the initialization to mlx5_vdpa_set_status() when VIRTIO_CONFIG_S_DRIVER_OK is set. Signed-off-by: Eli Cohen Message-Id: <20220613075958.511064-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index c290386aa2f37..ae4cfbd927fd2 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1965,7 +1965,6 @@ static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_control_vq *cvq = &mvdev->cvq; int err; int i; @@ -1975,16 +1974,6 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) goto err_vq; } - if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { - err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, - (struct vring_desc *)(uintptr_t)cvq->desc_addr, - (struct vring_avail *)(uintptr_t)cvq->driver_addr, - (struct vring_used *)(uintptr_t)cvq->device_addr); - if (err) - goto err_vq; - } - return 0; err_vq: @@ -2257,6 +2246,21 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) ndev->mvdev.cvq.ready = false; } +static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_control_vq *cvq = &mvdev->cvq; + int err = 0; + + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, + MLX5_CVQ_MAX_ENT, false, + (struct vring_desc *)(uintptr_t)cvq->desc_addr, + (struct vring_avail *)(uintptr_t)cvq->driver_addr, + (struct vring_used *)(uintptr_t)cvq->device_addr); + + return err; +} + static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2269,6 +2273,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + err = setup_cvq_vring(mvdev); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); + goto err_setup; + } err = setup_driver(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); From fbfa91978c0be7d92ee39e77a71dbe0b4814ef9f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 13 Jun 2022 22:52:23 +0300 Subject: [PATCH 158/299] vduse: Tie vduse mgmtdev and its device [ Upstream commit 0e0348ac3f0a6e6606f1aa5acb1803ada913aa3d ] vduse devices are not backed by any real devices such as PCI. Hence it doesn't have any parent device linked to it. Kernel driver model in [1] suggests to avoid an empty device release callback. Hence tie the mgmtdevice object's life cycle to an allocate dummy struct device instead of static one. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/core-api/kobject.rst?h=v5.18-rc7#n284 Signed-off-by: Parav Pandit Message-Id: <20220613195223.473966-1-parav@nvidia.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xie Yongji Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_user/vduse_dev.c | 60 ++++++++++++++++++------------ 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 160e40d030847..02709f8a78bd2 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1475,16 +1475,12 @@ static char *vduse_devnode(struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); } -static void vduse_mgmtdev_release(struct device *dev) -{ -} - -static struct device vduse_mgmtdev = { - .init_name = "vduse", - .release = vduse_mgmtdev_release, +struct vduse_mgmt_dev { + struct vdpa_mgmt_dev mgmt_dev; + struct device dev; }; -static struct vdpa_mgmt_dev mgmt_dev; +static struct vduse_mgmt_dev *vduse_mgmt; static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) { @@ -1509,7 +1505,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) } set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); vdev->vdpa.dma_dev = &vdev->vdpa.dev; - vdev->vdpa.mdev = &mgmt_dev; + vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; return 0; } @@ -1555,34 +1551,52 @@ static struct virtio_device_id id_table[] = { { 0 }, }; -static struct vdpa_mgmt_dev mgmt_dev = { - .device = &vduse_mgmtdev, - .id_table = id_table, - .ops = &vdpa_dev_mgmtdev_ops, -}; +static void vduse_mgmtdev_release(struct device *dev) +{ + struct vduse_mgmt_dev *mgmt_dev; + + mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); + kfree(mgmt_dev); +} static int vduse_mgmtdev_init(void) { int ret; - ret = device_register(&vduse_mgmtdev); - if (ret) + vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); + if (!vduse_mgmt) + return -ENOMEM; + + ret = dev_set_name(&vduse_mgmt->dev, "vduse"); + if (ret) { + kfree(vduse_mgmt); return ret; + } - ret = vdpa_mgmtdev_register(&mgmt_dev); + vduse_mgmt->dev.release = vduse_mgmtdev_release; + + ret = device_register(&vduse_mgmt->dev); if (ret) - goto err; + goto dev_reg_err; - return 0; -err: - device_unregister(&vduse_mgmtdev); + vduse_mgmt->mgmt_dev.id_table = id_table; + vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; + vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; + ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); + if (ret) + device_unregister(&vduse_mgmt->dev); + + return ret; + +dev_reg_err: + put_device(&vduse_mgmt->dev); return ret; } static void vduse_mgmtdev_exit(void) { - vdpa_mgmtdev_unregister(&mgmt_dev); - device_unregister(&vduse_mgmtdev); + vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); + device_unregister(&vduse_mgmt->dev); } static int vduse_init(void) From 03303d4d9f99139ca69b3a49e1302caaf906cd2e Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Tue, 14 Jun 2022 17:27:51 -0700 Subject: [PATCH 159/299] platform/x86: intel/pmc: Add Alder Lake N support to PMC core driver [ Upstream commit d63eae6747eb8b3192e89712f6553c6aa162f872 ] Add Alder Lake N (ADL-N) to the list of the platforms that Intel's PMC core driver supports. Alder Lake N reuses all the TigerLake PCH IPs. Cc: Srinivas Pandruvada Cc: Andy Shevchenko Cc: David E. Box Signed-off-by: Gayatri Kammela Reviewed-by: Rajneesh Bhardwaj Link: https://lore.kernel.org/r/20220615002751.3371730-1-gayatri.kammela@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/pmc/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 8ee15a7252c71..c3ec5dc88bbf7 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1911,6 +1911,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &icl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &tgl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &tgl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &tgl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &tgl_reg_map), {} From 0723688d0c539d97a7a5bb1ebae907907cf0d19b Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 21 Jun 2022 13:06:20 +0200 Subject: [PATCH 160/299] virtio_mmio: Add missing PM calls to freeze/restore [ Upstream commit ed7ac37fde33ccd84e4bd2b9363c191f925364c7 ] Most virtio drivers provide freeze/restore callbacks to finish up device usage before suspend and to reinitialize the virtio device after resume. However, these callbacks are currently only called when using virtio_pci. virtio_mmio does not have any PM ops defined. This causes problems for example after suspend to disk (hibernation), since the virtio devices might lose their state after the VMM is restarted. Calling virtio_device_freeze()/restore() ensures that the virtio devices are re-initialized correctly. Fix this by implementing the dev_pm_ops for virtio_mmio, similar to virtio_pci_common. Signed-off-by: Stephan Gerhold Message-Id: <20220621110621.3638025-2-stephan.gerhold@kernkonzept.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_mmio.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 1dd396d4bebb2..7522832529dd6 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -543,6 +544,25 @@ static const struct virtio_config_ops virtio_mmio_config_ops = { .get_shm_region = vm_get_shm_region, }; +#ifdef CONFIG_PM_SLEEP +static int virtio_mmio_freeze(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_freeze(&vm_dev->vdev); +} + +static int virtio_mmio_restore(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_restore(&vm_dev->vdev); +} + +static const struct dev_pm_ops virtio_mmio_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore) +}; +#endif static void virtio_mmio_release_dev(struct device *_d) { @@ -786,6 +806,9 @@ static struct platform_driver virtio_mmio_driver = { .name = "virtio-mmio", .of_match_table = virtio_mmio_match, .acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match), +#ifdef CONFIG_PM_SLEEP + .pm = &virtio_mmio_pm_ops, +#endif }, }; From 387c23776010b6f6dbdf53ca267e34c092f23c58 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 21 Jun 2022 13:06:21 +0200 Subject: [PATCH 161/299] virtio_mmio: Restore guest page size on resume [ Upstream commit e0c2ce8217955537dd5434baeba061f209797119 ] Virtio devices might lose their state when the VMM is restarted after a suspend to disk (hibernation) cycle. This means that the guest page size register must be restored for the virtio_mmio legacy interface, since otherwise the virtio queues are not functional. This is particularly problematic for QEMU that currently still defaults to using the legacy interface for virtio_mmio. Write the guest page size register again in virtio_mmio_restore() to make legacy virtio_mmio devices work correctly after hibernation. Signed-off-by: Stephan Gerhold Message-Id: <20220621110621.3638025-3-stephan.gerhold@kernkonzept.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_mmio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 7522832529dd6..fe696aafaed86 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -556,6 +556,9 @@ static int virtio_mmio_restore(struct device *dev) { struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + if (vm_dev->version == 1) + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + return virtio_device_restore(&vm_dev->vdev); } From 0016d5d46d7440729a3132f61a8da3bf7f84e2ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2022 16:43:57 +0200 Subject: [PATCH 162/299] netfilter: nf_tables: avoid skb access on nf_stolen [ Upstream commit e34b9ed96ce3b06c79bf884009b16961ca478f87 ] When verdict is NF_STOLEN, the skb might have been freed. When tracing is enabled, this can result in a use-after-free: 1. access to skb->nf_trace 2. access to skb->mark 3. computation of trace id 4. dump of packet payload To avoid 1, keep a cached copy of skb->nf_trace in the trace state struct. Refresh this copy whenever verdict is != STOLEN. Avoid 2 by skipping skb->mark access if verdict is STOLEN. 3 is avoided by precomputing the trace id. Only dump the packet when verdict is not "STOLEN". Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 16 ++++++----- net/netfilter/nf_tables_core.c | 24 ++++++++++++++--- net/netfilter/nf_tables_trace.c | 44 +++++++++++++++++-------------- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f0c3a1ee197c0..64cf655c818cc 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1342,24 +1342,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message * @pkt: pktinfo currently processed * @basechain: base chain currently processed * @chain: chain currently processed * @rule: rule that was evaluated * @verdict: verdict given by rule - * @type: event type (enum nft_trace_types) - * @packet_dumped: packet headers sent in a previous traceinfo message - * @trace: other struct members are initialised */ struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; const struct nft_rule_dp *rule; const struct nft_verdict *verdict; - enum nft_trace_types type; - bool packet_dumped; - bool trace; }; void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 53f40e4738557..3ddce24ac76dd 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -25,9 +25,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, enum nft_trace_types type) { - const struct nft_pktinfo *pkt = info->pkt; - - if (!info->trace || !pkt->skb->nf_trace) + if (!info->trace || !info->nf_trace) return; info->chain = chain; @@ -42,11 +40,24 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + info->nf_trace = pkt->skb->nf_trace; info->rule = rule; __nft_trace_packet(info, chain, type); } } +static inline void nft_trace_copy_nftrace(struct nft_traceinfo *info) +{ + if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + if (info->trace) + info->nf_trace = pkt->skb->nf_trace; + } +} + static void nft_bitwise_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { @@ -85,6 +96,7 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, const struct nft_chain *chain, const struct nft_regs *regs) { + const struct nft_pktinfo *pkt = info->pkt; enum nft_trace_types type; switch (regs->verdict.code) { @@ -92,8 +104,13 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, case NFT_RETURN: type = NFT_TRACETYPE_RETURN; break; + case NF_STOLEN: + type = NFT_TRACETYPE_RULE; + /* can't access skb->nf_trace; use copy */ + break; default: type = NFT_TRACETYPE_RULE; + info->nf_trace = pkt->skb->nf_trace; break; } @@ -254,6 +271,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) switch (regs.verdict.code) { case NFT_BREAK: regs.verdict.code = NFT_CONTINUE; + nft_trace_copy_nftrace(&info); continue; case NFT_CONTINUE: nft_trace_packet(&info, chain, rule, diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 5041725423c2d..1163ba9c14011 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,22 +25,6 @@ DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); EXPORT_SYMBOL_GPL(nft_trace_enabled); -static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) -{ - __be32 id; - - /* using skb address as ID results in a limited number of - * values (and quick reuse). - * - * So we attempt to use as many skb members that will not - * change while skb is with netfilter. - */ - id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), - skb->skb_iif); - - return nla_put_be32(nlskb, NFTA_TRACE_ID, id); -} - static int trace_fill_header(struct sk_buff *nlskb, u16 type, const struct sk_buff *skb, int off, unsigned int len) @@ -186,6 +170,7 @@ void nft_trace_notify(struct nft_traceinfo *info) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; + u32 mark = 0; u16 event; if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) @@ -229,7 +214,7 @@ void nft_trace_notify(struct nft_traceinfo *info) if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) goto nla_put_failure; - if (trace_fill_id(skb, pkt->skb)) + if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid)) goto nla_put_failure; if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) @@ -249,16 +234,24 @@ void nft_trace_notify(struct nft_traceinfo *info) case NFT_TRACETYPE_RULE: if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) goto nla_put_failure; + + /* pkt->skb undefined iff NF_STOLEN, disable dump */ + if (info->verdict->code == NF_STOLEN) + info->packet_dumped = true; + else + mark = pkt->skb->mark; + break; case NFT_TRACETYPE_POLICY: + mark = pkt->skb->mark; + if (nla_put_be32(skb, NFTA_TRACE_POLICY, htonl(info->basechain->policy))) goto nla_put_failure; break; } - if (pkt->skb->mark && - nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark))) goto nla_put_failure; if (!info->packet_dumped) { @@ -283,9 +276,20 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, const struct nft_chain *chain) { + static siphash_key_t trace_key __read_mostly; + struct sk_buff *skb = pkt->skb; + info->basechain = nft_base_chain(chain); info->trace = true; + info->nf_trace = pkt->skb->nf_trace; info->packet_dumped = false; info->pkt = pkt; info->verdict = verdict; + + net_get_random_once(&trace_key, sizeof(trace_key)); + + info->skbid = (u32)siphash_3u32(hash32_ptr(skb), + skb_get_hash(skb), + skb->skb_iif, + &trace_key); } From 05dfa88e73c9fa9c495384534afbc0d2589b373e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 21 Jun 2022 18:26:03 +0200 Subject: [PATCH 163/299] netfilter: br_netfilter: do not skip all hooks with 0 priority [ Upstream commit c2577862eeb0be94f151f2f1fff662b028061b00 ] When br_netfilter module is loaded, skbs may be diverted to the ipv4/ipv6 hooks, just like as if we were routing. Unfortunately, bridge filter hooks with priority 0 may be skipped in this case. Example: 1. an nftables bridge ruleset is loaded, with a prerouting hook that has priority 0. 2. interface is added to the bridge. 3. no tcp packet is ever seen by the bridge prerouting hook. 4. flush the ruleset 5. load the bridge ruleset again. 6. tcp packets are processed as expected. After 1) the only registered hook is the bridge prerouting hook, but its not called yet because the bridge hasn't been brought up yet. After 2), hook order is: 0 br_nf_pre_routing // br_netfilter internal hook 0 chain bridge f prerouting // nftables bridge ruleset The packet is diverted to br_nf_pre_routing. If call-iptables is off, the nftables bridge ruleset is called as expected. But if its enabled, br_nf_hook_thresh() will skip it because it assumes that all 0-priority hooks had been called previously in bridge context. To avoid this, check for the br_nf_pre_routing hook itself, we need to resume directly after it, even if this hook has a priority of 0. Unfortunately, this still results in different packet flow. With this fix, the eval order after in 3) is: 1. br_nf_pre_routing 2. ip(6)tables (if enabled) 3. nftables bridge but after 5 its the much saner: 1. nftables bridge 2. br_nf_pre_routing 3. ip(6)tables (if enabled) Unfortunately I don't see a solution here: It would be possible to move br_nf_pre_routing to a higher priority so that it will be called later in the pipeline, but this also impacts ebtables evaluation order, and would still result in this very ordering problem for all nftables-bridge hooks with the same priority as the br_nf_pre_routing one. Searching back through the git history I don't think this has ever behaved in any other way, hence, no fixes-tag. Reported-by: Radim Hrazdil Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/br_netfilter_hooks.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 4fd882686b04d..ff47790366497 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); - for (i = 0; i < e->num_hook_entries && - ops[i]->priority <= NF_BR_PRI_BRNF; i++) - ; + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); From 811ebff1ed15ff5860ed753b5d5de46a458a974f Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 23 Jun 2022 20:41:59 +0800 Subject: [PATCH 164/299] scsi: hisi_sas: Limit max hw sectors for v3 HW [ Upstream commit fce54ed027577517df1e74b7d54dc2b1bd536887 ] If the controller is behind an IOMMU then the IOMMU IOVA caching range can affect performance, as discussed in [0]. Limit the max HW sectors to not exceed this limit. We need to hardcode the value until a proper DMA mapping API is available. [0] https://lore.kernel.org/linux-iommu/20210129092120.1482-1-thunder.leizhen@huawei.com/ Link: https://lore.kernel.org/r/1655988119-223714-1-git-send-email-john.garry@huawei.com Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 7d819fc0395e4..eb86afb21aabb 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2782,6 +2782,7 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) struct hisi_hba *hisi_hba = shost_priv(shost); struct device *dev = hisi_hba->dev; int ret = sas_slave_configure(sdev); + unsigned int max_sectors; if (ret) return ret; @@ -2799,6 +2800,12 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) } } + /* Set according to IOMMU IOVA caching limit */ + max_sectors = min_t(size_t, queue_max_hw_sectors(sdev->request_queue), + (PAGE_SIZE * 32) >> SECTOR_SHIFT); + + blk_queue_max_hw_sectors(sdev->request_queue, max_sectors); + return 0; } From 4513018d0bd739097570d26a7760551cba3deb56 Mon Sep 17 00:00:00 2001 From: Liang He Date: Sat, 18 Jun 2022 10:25:45 +0800 Subject: [PATCH 165/299] cpufreq: pmac32-cpufreq: Fix refcount leak bug [ Upstream commit ccd7567d4b6cf187fdfa55f003a9e461ee629e36 ] In pmac_cpufreq_init_MacRISC3(), we need to add corresponding of_node_put() for the three node pointers whose refcount have been incremented by of_find_node_by_name(). Signed-off-by: Liang He Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/pmac32-cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 4f20c6a9108df..8e41fe9ee870d 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -470,6 +470,10 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) if (slew_done_gpio_np) slew_done_gpio = read_gpio(slew_done_gpio_np); + of_node_put(volt_gpio_np); + of_node_put(freq_gpio_np); + of_node_put(slew_done_gpio_np); + /* If we use the frequency GPIOs, calculate the min/max speeds based * on the bus frequencies */ From 01453386fb9233ff274c60ff2ba0689e62cffb4a Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Fri, 3 Jun 2022 13:02:09 -0400 Subject: [PATCH 166/299] platform/x86: thinkpad-acpi: profile capabilities as integer [ Upstream commit 42504af775361ca2330a2bfde496a5ebc5655c86 ] Currently the active mode (PSC/MMC) is stored in an enum and queried throughout the driver. Other driver changes will enumerate additional submodes that are relevant to be tracked, so instead track PSC/MMC in a single integer variable. Co-developed-by: Mario Limonciello Signed-off-by: Mario Limonciello Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220603170212.164963-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 45 +++++++++++----------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index aa6ffeaa39329..b4d6a356b7462 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10300,21 +10300,15 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_DISABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 0) #define DYTC_ENABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 1) -enum dytc_profile_funcmode { - DYTC_FUNCMODE_NONE = 0, - DYTC_FUNCMODE_MMC, - DYTC_FUNCMODE_PSC, -}; - -static enum dytc_profile_funcmode dytc_profile_available; static enum platform_profile_option dytc_current_profile; static atomic_t dytc_ignore_event = ATOMIC_INIT(0); static DEFINE_MUTEX(dytc_mutex); +static int dytc_capabilities; static bool dytc_mmc_get_available; static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile) { - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { switch (dytcmode) { case DYTC_MODE_MMC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10331,7 +10325,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p } return 0; } - if (dytc_profile_available == DYTC_FUNCMODE_PSC) { + if (dytc_capabilities & BIT(DYTC_FC_PSC)) { switch (dytcmode) { case DYTC_MODE_PSC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10353,21 +10347,21 @@ static int convert_profile_to_dytc(enum platform_profile_option profile, int *pe { switch (profile) { case PLATFORM_PROFILE_LOW_POWER: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_LOWPOWER; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_LOWPOWER; break; case PLATFORM_PROFILE_BALANCED: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_BALANCE; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_BALANCE; break; case PLATFORM_PROFILE_PERFORMANCE: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_PERFORM; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_PERFORM; break; default: /* Unknown profile */ @@ -10446,7 +10440,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, if (err) goto unlock; - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { if (profile == PLATFORM_PROFILE_BALANCED) { /* * To get back to balanced mode we need to issue a reset command. @@ -10465,7 +10459,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, goto unlock; } } - if (dytc_profile_available == DYTC_FUNCMODE_PSC) { + if (dytc_capabilities & BIT(DYTC_FC_PSC)) { err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output); if (err) goto unlock; @@ -10484,12 +10478,12 @@ static void dytc_profile_refresh(void) int perfmode; mutex_lock(&dytc_mutex); - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { if (dytc_mmc_get_available) err = dytc_command(DYTC_CMD_MMC_GET, &output); else err = dytc_cql_command(DYTC_CMD_GET, &output); - } else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) err = dytc_command(DYTC_CMD_GET, &output); mutex_unlock(&dytc_mutex); @@ -10518,7 +10512,6 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) set_bit(PLATFORM_PROFILE_BALANCED, dytc_profile.choices); set_bit(PLATFORM_PROFILE_PERFORMANCE, dytc_profile.choices); - dytc_profile_available = DYTC_FUNCMODE_NONE; err = dytc_command(DYTC_CMD_QUERY, &output); if (err) return err; @@ -10531,13 +10524,12 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) return -ENODEV; /* Check what capabilities are supported */ - err = dytc_command(DYTC_CMD_FUNC_CAP, &output); + err = dytc_command(DYTC_CMD_FUNC_CAP, &dytc_capabilities); if (err) return err; - if (output & BIT(DYTC_FC_MMC)) { /* MMC MODE */ - dytc_profile_available = DYTC_FUNCMODE_MMC; - + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { /* MMC MODE */ + pr_debug("MMC is supported\n"); /* * Check if MMC_GET functionality available * Version > 6 and return success from MMC_GET command @@ -10548,8 +10540,8 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (!err && ((output & DYTC_ERR_MASK) == DYTC_ERR_SUCCESS)) dytc_mmc_get_available = true; } - } else if (output & BIT(DYTC_FC_PSC)) { /* PSC MODE */ - dytc_profile_available = DYTC_FUNCMODE_PSC; + } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ + pr_debug("PSC is supported\n"); } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); return -ENODEV; @@ -10575,7 +10567,6 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) static void dytc_profile_exit(void) { - dytc_profile_available = DYTC_FUNCMODE_NONE; platform_profile_remove(); } From 2c27eb27e70f869e078d3179b3a12a527d33894f Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 27 Jun 2022 14:14:49 -0400 Subject: [PATCH 167/299] platform/x86: thinkpad_acpi: do not use PSC mode on Intel platforms [ Upstream commit bce6243f767f7da88aa4674d5d678f9f156eaba9 ] PSC platform profile mode is only supported on Linux for AMD platforms. Some older Intel platforms (e.g T490) are advertising it's capability as Windows uses it - but on Linux we should only be using MMC profile for Intel systems. Add a check to prevent it being enabled incorrectly. Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20220627181449.3537-1-markpearson@lenovo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index b4d6a356b7462..a8b3830515285 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10541,6 +10541,11 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) dytc_mmc_get_available = true; } } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ + /* Support for this only works on AMD platforms */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + dbg_printk(TPACPI_DBG_INIT, "PSC not support on Intel platforms\n"); + return -ENODEV; + } pr_debug("PSC is supported\n"); } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); From 05b92733fd6cc44622b7869358aca46e004f0e27 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 28 Jun 2022 20:37:26 +0800 Subject: [PATCH 168/299] platform/x86: hp-wmi: Ignore Sanitization Mode event [ Upstream commit 9ab762a84b8094540c18a170e5ddd6488632c456 ] After system resume the hp-wmi driver may complain: [ 702.620180] hp_wmi: Unknown event_id - 23 - 0x0 According to HP it means 'Sanitization Mode' and it's harmless to just ignore the event. Cc: Jorge Lopez Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20220628123726.250062-1-kai.heng.feng@canonical.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/hp-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 0e6ed75c70f36..c63ec1471b844 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -89,6 +89,7 @@ enum hp_wmi_event_ids { HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D, HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, + HPWMI_SANITIZATION_MODE = 0x17, }; /* @@ -846,6 +847,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_BATTERY_CHARGE_PERIOD: break; + case HPWMI_SANITIZATION_MODE: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; From 6f36471e568f95d5440ba89412e5086f57baa1af Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:34 +0200 Subject: [PATCH 169/299] firmware: sysfb: Make sysfb_create_simplefb() return a pdev pointer [ Upstream commit 9e121040e54abef9ed5542e5fdfa87911cd96204 ] This function just returned 0 on success or an errno code on error, but it could be useful for sysfb_init() callers to have a pointer to the device. Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-2-javierm@redhat.com Signed-off-by: Sasha Levin --- drivers/firmware/sysfb.c | 4 ++-- drivers/firmware/sysfb_simplefb.c | 16 ++++++++-------- include/linux/sysfb.h | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 2bfbb05f7d896..b032f40a92de0 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -46,8 +46,8 @@ static __init int sysfb_init(void) /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { - ret = sysfb_create_simplefb(si, &mode); - if (!ret) + pd = sysfb_create_simplefb(si, &mode); + if (!IS_ERR(pd)) return 0; } diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index bda8712bfd8c5..a353e27f83f54 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -57,8 +57,8 @@ __init bool sysfb_parse_mode(const struct screen_info *si, return false; } -__init int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +__init struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { struct platform_device *pd; struct resource res; @@ -76,7 +76,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si, base |= (u64)si->ext_lfb_base << 32; if (!base || (u64)(resource_size_t)base != base) { printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } /* @@ -93,7 +93,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si, length = mode->height * mode->stride; if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } length = PAGE_ALIGN(length); @@ -104,11 +104,11 @@ __init int sysfb_create_simplefb(const struct screen_info *si, res.start = base; res.end = res.start + length - 1; if (res.end <= res.start) - return -EINVAL; + return ERR_PTR(-EINVAL); pd = platform_device_alloc("simple-framebuffer", 0); if (!pd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); sysfb_apply_efi_quirks(pd); @@ -124,10 +124,10 @@ __init int sysfb_create_simplefb(const struct screen_info *si, if (ret) goto err_put_device; - return 0; + return pd; err_put_device: platform_device_put(pd); - return ret; + return ERR_PTR(ret); } diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index b0dcfa26d07bd..708152e9037bf 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -72,8 +72,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd) bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); -int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); +struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode); #else /* CONFIG_SYSFB_SIMPLE */ @@ -83,10 +83,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, return false; } -static inline int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { - return -EINVAL; + return ERR_PTR(-EINVAL); } #endif /* CONFIG_SYSFB_SIMPLE */ From 38e549484675b6eaf20bbae4bad1eb33452ab95e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:35 +0200 Subject: [PATCH 170/299] firmware: sysfb: Add sysfb_disable() helper function [ Upstream commit bde376e9de3c0bc55eedc8956b0f114c05531595 ] This can be used by subsystems to unregister a platform device registered by sysfb and also to disable future platform device registration in sysfb. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-3-javierm@redhat.com Signed-off-by: Sasha Levin --- .../driver-api/firmware/other_interfaces.rst | 6 +++ drivers/firmware/sysfb.c | 54 ++++++++++++++++--- include/linux/sysfb.h | 12 +++++ 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst index b81794e0cfbb9..06ac89adaafba 100644 --- a/Documentation/driver-api/firmware/other_interfaces.rst +++ b/Documentation/driver-api/firmware/other_interfaces.rst @@ -13,6 +13,12 @@ EDD Interfaces .. kernel-doc:: drivers/firmware/edd.c :internal: +Generic System Framebuffers Interface +------------------------------------- + +.. kernel-doc:: drivers/firmware/sysfb.c + :export: + Intel Stratix10 SoC Service Layer --------------------------------- Some features of the Intel Stratix10 SoC require a level of privilege diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index b032f40a92de0..1f276f108cc93 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -34,21 +34,59 @@ #include #include +static struct platform_device *pd; +static DEFINE_MUTEX(disable_lock); +static bool disabled; + +static bool sysfb_unregister(void) +{ + if (IS_ERR_OR_NULL(pd)) + return false; + + platform_device_unregister(pd); + pd = NULL; + + return true; +} + +/** + * sysfb_disable() - disable the Generic System Framebuffers support + * + * This disables the registration of system framebuffer devices that match the + * generic drivers that make use of the system framebuffer set up by firmware. + * + * It also unregisters a device if this was already registered by sysfb_init(). + * + * Context: The function can sleep. A @disable_lock mutex is acquired to serialize + * against sysfb_init(), that registers a system framebuffer device. + */ +void sysfb_disable(void) +{ + mutex_lock(&disable_lock); + sysfb_unregister(); + disabled = true; + mutex_unlock(&disable_lock); +} +EXPORT_SYMBOL_GPL(sysfb_disable); + static __init int sysfb_init(void) { struct screen_info *si = &screen_info; struct simplefb_platform_data mode; - struct platform_device *pd; const char *name; bool compatible; - int ret; + int ret = 0; + + mutex_lock(&disable_lock); + if (disabled) + goto unlock_mutex; /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { pd = sysfb_create_simplefb(si, &mode); if (!IS_ERR(pd)) - return 0; + goto unlock_mutex; } /* if the FB is incompatible, create a legacy framebuffer device */ @@ -60,8 +98,10 @@ static __init int sysfb_init(void) name = "platform-framebuffer"; pd = platform_device_alloc(name, 0); - if (!pd) - return -ENOMEM; + if (!pd) { + ret = -ENOMEM; + goto unlock_mutex; + } sysfb_apply_efi_quirks(pd); @@ -73,9 +113,11 @@ static __init int sysfb_init(void) if (ret) goto err; - return 0; + goto unlock_mutex; err: platform_device_put(pd); +unlock_mutex: + mutex_unlock(&disable_lock); return ret; } diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 708152e9037bf..8ba8b5be55675 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -55,6 +55,18 @@ struct efifb_dmi_info { int flags; }; +#ifdef CONFIG_SYSFB + +void sysfb_disable(void); + +#else /* CONFIG_SYSFB */ + +static inline void sysfb_disable(void) +{ +} + +#endif /* CONFIG_SYSFB */ + #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; From 07186778cf645cc79e6913a28dadf445cd3e2439 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:36 +0200 Subject: [PATCH 171/299] fbdev: Disable sysfb device registration when removing conflicting FBs [ Upstream commit ee7a69aa38d87a3bbced7b8245c732c05ed0c6ec ] The platform devices registered by sysfb match with firmware-based DRM or fbdev drivers, that are used to have early graphics using a framebuffer provided by the system firmware. DRM or fbdev drivers later are probed and remove conflicting framebuffers, leading to these platform devices for generic drivers to be unregistered. But the current solution has a race, since the sysfb_init() function could be called after a DRM or fbdev driver is probed and request to unregister the devices for drivers with conflicting framebuffes. To prevent this, disable any future sysfb platform device registration by calling sysfb_disable(), if a driver requests to remove the conflicting framebuffers. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-4-javierm@redhat.com Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbmem.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 85de02d0d3aaa..643383d74edc7 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1787,6 +1788,17 @@ int remove_conflicting_framebuffers(struct apertures_struct *a, do_free = true; } + /* + * If a driver asked to unregister a platform device registered by + * sysfb, then can be assumed that this is a driver for a display + * that is set up by the system firmware and has a generic driver. + * + * Drivers for devices that don't have a generic driver will never + * ask for this, so let's assume that a real driver for the display + * was already probed and prevent sysfb to register devices later. + */ + sysfb_disable(); + mutex_lock(®istration_lock); do_remove_conflicting_framebuffers(a, name, primary); mutex_unlock(®istration_lock); From 3b2957fc09fe1ac7f07f40dd50dd5f93e3f3a7a2 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 29 Jun 2022 14:34:18 +0800 Subject: [PATCH 172/299] net: tipc: fix possible refcount leak in tipc_sk_create() [ Upstream commit 00aff3590fc0a73bddd3b743863c14e76fd35c0c ] Free sk in case tipc_sk_insert() fails. Signed-off-by: Hangyu Hua Reviewed-by: Tung Nguyen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 17f8c523e33b0..43509c7e90fc2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -502,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { + sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } From e84a77bc836160b6b85bbc002465d5aaea307807 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 27 Jun 2022 19:06:43 +0200 Subject: [PATCH 173/299] NFC: nxp-nci: don't print header length mismatch on i2c error [ Upstream commit 9577fc5fdc8b07b891709af6453545db405e24ad ] Don't print a misleading header length mismatch error if the i2c call returns an error. Instead just return the error code without any error message. Signed-off-by: Michael Walle Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/nxp-nci/i2c.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index e8f3b35afbee4..ae2ba08d8ac3f 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -122,7 +122,9 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN); r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len); - if (r != frame_len) { + if (r < 0) { + goto fw_read_exit_free_skb; + } else if (r != frame_len) { nfc_err(&client->dev, "Invalid frame length: %u (expected %zu)\n", r, frame_len); @@ -166,7 +168,9 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, return 0; r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); - if (r != header.plen) { + if (r < 0) { + goto nci_read_exit_free_skb; + } else if (r != header.plen) { nfc_err(&client->dev, "Invalid frame payload length: %u (expected %u)\n", r, header.plen); From 569d1c493070eca57425c1db1875157a37c452b7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 26 Jun 2022 12:24:51 +0300 Subject: [PATCH 174/299] nvme-tcp: always fail a request when sending it failed [ Upstream commit 41d07df7de841bfbc32725ce21d933ad358f2844 ] queue stoppage and inflight requests cancellation is fully fenced from io_work and thus failing a request from this context. Hence we don't need to try to guess from the socket retcode if this failure is because the queue is about to be torn down or not. We are perfectly safe to just fail it, the request will not be cancelled later on. This solves possible very long shutdown delays when the users issues a 'nvme disconnect-all' Reported-by: Daniel Wagner Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index ad3a2bf2f1e9b..e44d0570e6940 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1180,8 +1180,7 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) } else if (ret < 0) { dev_err(queue->ctrl->ctrl.device, "failed to send request %d\n", ret); - if (ret != -EPIPE && ret != -ECONNRESET) - nvme_tcp_fail_request(queue->request); + nvme_tcp_fail_request(queue->request); nvme_tcp_done_send_req(queue); } return ret; From a8ba4bd0f542a2bdf28751896ebe515e7117d0dc Mon Sep 17 00:00:00 2001 From: Ruozhu Li Date: Thu, 23 Jun 2022 14:45:39 +0800 Subject: [PATCH 175/299] nvme: fix regression when disconnect a recovering ctrl [ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ] We encountered a problem that the disconnect command hangs. After analyzing the log and stack, we found that the triggering process is as follows: CPU0 CPU1 nvme_rdma_error_recovery_work nvme_rdma_teardown_io_queues nvme_do_delete_ctrl nvme_stop_queues nvme_remove_namespaces --clear ctrl->namespaces nvme_start_queues --no ns in ctrl->namespaces nvme_ns_remove return(because ctrl is deleting) blk_freeze_queue blk_mq_freeze_queue_wait --wait for ns to unquiesce to clean infligt IO, hang forever This problem was not found in older kernels because we will flush err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not seem to be modified for functional reasons, the patch can be revert to solve the problem. Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout") Signed-off-by: Ruozhu Li Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/rdma.c | 12 +++++++++--- drivers/nvme/host/tcp.c | 10 +++++++--- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0fef31c935de1..c9831daafbc61 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4519,6 +4519,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a2b53ca633359..337ae1e3ad252 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -501,6 +501,7 @@ struct nvme_ctrl_ops { void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); }; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d9f19d9013139..5aef2b81dbec6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1048,6 +1048,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, } } +static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -2255,9 +2263,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - nvme_rdma_teardown_io_queues(ctrl, shutdown); nvme_stop_admin_queue(&ctrl->ctrl); if (shutdown) @@ -2307,6 +2312,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, }; /* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e44d0570e6940..1fb4f9b1621ea 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2193,9 +2193,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); - cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); - nvme_tcp_teardown_io_queues(ctrl, shutdown); nvme_stop_admin_queue(ctrl); if (shutdown) @@ -2235,6 +2232,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work) nvme_tcp_reconnect_or_remove(ctrl); } +static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) +{ + cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); + cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); +} + static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); @@ -2559,6 +2562,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_tcp_stop_ctrl, }; static bool From f22ddc8a5278d7fb6369a0aeb0d8775a0aefaaee Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Wed, 29 Jun 2022 15:55:50 +0800 Subject: [PATCH 176/299] net: sfp: fix memory leak in sfp_probe() [ Upstream commit 0a18d802d65cf662644fd1d369c86d84a5630652 ] sfp_probe() allocates a memory chunk from sfp with sfp_alloc(). When devm_add_action() fails, sfp is not freed, which leads to a memory leak. We should use devm_add_action_or_reset() instead of devm_add_action(). Signed-off-by: Jianglei Nie Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20220629075550.2152003-1-niejianglei2021@163.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/sfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 9a5d5a10560fb..e7b0e12cc75bf 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2516,7 +2516,7 @@ static int sfp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sfp); - err = devm_add_action(sfp->dev, sfp_cleanup, sfp); + err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp); if (err < 0) return err; From 55ad380852bcb7fe9b70935af0a7d2e8c51166af Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 4 Jun 2022 11:52:46 +0100 Subject: [PATCH 177/299] ASoC: ops: Fix off by one in range control validation [ Upstream commit 5871321fb4558c55bf9567052b618ff0be6b975e ] We currently report that range controls accept a range of 0..(max-min) but accept writes in the range 0..(max-min+1). Remove that extra +1. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220604105246.4055214-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index e693070f51fe8..d867f449d82db 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -526,7 +526,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, return -EINVAL; if (mc->platform_max && tmp > mc->platform_max) return -EINVAL; - if (tmp > mc->max - mc->min + 1) + if (tmp > mc->max - mc->min) return -EINVAL; if (invert) @@ -547,7 +547,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, return -EINVAL; if (mc->platform_max && tmp > mc->platform_max) return -EINVAL; - if (tmp > mc->max - mc->min + 1) + if (tmp > mc->max - mc->min) return -EINVAL; if (invert) From e162a24f1dd06c0dcae71f2565c9f3da2827b98e Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 21 Apr 2022 10:26:59 +0800 Subject: [PATCH 178/299] pinctrl: aspeed: Fix potential NULL dereference in aspeed_pinmux_set_mux() [ Upstream commit 84a85d3fef2e75b1fe9fc2af6f5267122555a1ed ] pdesc could be null but still dereference pdesc->name and it will lead to a null pointer access. So we move a null check before dereference. Signed-off-by: Haowen Bai Link: https://lore.kernel.org/r/1650508019-22554-1-git-send-email-baihaowen@meizu.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index c94e24aadf922..83d47ff1cea8f 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -236,11 +236,11 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, const struct aspeed_sig_expr **funcs; const struct aspeed_sig_expr ***prios; - pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name); - if (!pdesc) return -EINVAL; + pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name); + prios = pdesc->prios; if (!prios) From f3a2c0631302613f41fd8f7ce9882c633d66b9a5 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:46 -0500 Subject: [PATCH 179/299] ASoC: Realtek/Maxim SoundWire codecs: disable pm_runtime on remove [ Upstream commit ac63716da3070f8cb6baaba3a058a0c7f22aeb5b ] When binding/unbinding codec drivers, the following warnings are thrown: [ 107.266879] rt715-sdca sdw:3:025d:0714:01: Unbalanced pm_runtime_enable! [ 306.879700] rt711-sdca sdw:0:025d:0711:01: Unbalanced pm_runtime_enable! Add a remove callback for all Realtek/Maxim SoundWire codecs and remove this warning. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/max98373-sdw.c | 12 +++++++++++- sound/soc/codecs/rt1308-sdw.c | 11 +++++++++++ sound/soc/codecs/rt1316-sdw.c | 11 +++++++++++ sound/soc/codecs/rt5682-sdw.c | 5 ++++- sound/soc/codecs/rt700-sdw.c | 6 +++++- sound/soc/codecs/rt711-sdca-sdw.c | 6 +++++- sound/soc/codecs/rt711-sdw.c | 6 +++++- sound/soc/codecs/rt715-sdca-sdw.c | 12 ++++++++++++ sound/soc/codecs/rt715-sdw.c | 12 ++++++++++++ 9 files changed, 76 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/max98373-sdw.c b/sound/soc/codecs/max98373-sdw.c index f47e956d4f55a..97b64477dde67 100644 --- a/sound/soc/codecs/max98373-sdw.c +++ b/sound/soc/codecs/max98373-sdw.c @@ -862,6 +862,16 @@ static int max98373_sdw_probe(struct sdw_slave *slave, return max98373_init(slave, regmap); } +static int max98373_sdw_remove(struct sdw_slave *slave) +{ + struct max98373_priv *max98373 = dev_get_drvdata(&slave->dev); + + if (max98373->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + #if defined(CONFIG_OF) static const struct of_device_id max98373_of_match[] = { { .compatible = "maxim,max98373", }, @@ -893,7 +903,7 @@ static struct sdw_driver max98373_sdw_driver = { .pm = &max98373_pm, }, .probe = max98373_sdw_probe, - .remove = NULL, + .remove = max98373_sdw_remove, .ops = &max98373_slave_ops, .id_table = max98373_id, }; diff --git a/sound/soc/codecs/rt1308-sdw.c b/sound/soc/codecs/rt1308-sdw.c index 1ef836a68a56f..e42a63ee07f4d 100644 --- a/sound/soc/codecs/rt1308-sdw.c +++ b/sound/soc/codecs/rt1308-sdw.c @@ -690,6 +690,16 @@ static int rt1308_sdw_probe(struct sdw_slave *slave, return 0; } +static int rt1308_sdw_remove(struct sdw_slave *slave) +{ + struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(&slave->dev); + + if (rt1308->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + static const struct sdw_device_id rt1308_id[] = { SDW_SLAVE_ENTRY_EXT(0x025d, 0x1308, 0x2, 0, 0), {}, @@ -749,6 +759,7 @@ static struct sdw_driver rt1308_sdw_driver = { .pm = &rt1308_pm, }, .probe = rt1308_sdw_probe, + .remove = rt1308_sdw_remove, .ops = &rt1308_slave_ops, .id_table = rt1308_id, }; diff --git a/sound/soc/codecs/rt1316-sdw.c b/sound/soc/codecs/rt1316-sdw.c index c66d7b20cb4dd..1e04aa8ab1666 100644 --- a/sound/soc/codecs/rt1316-sdw.c +++ b/sound/soc/codecs/rt1316-sdw.c @@ -675,6 +675,16 @@ static int rt1316_sdw_probe(struct sdw_slave *slave, return rt1316_sdw_init(&slave->dev, regmap, slave); } +static int rt1316_sdw_remove(struct sdw_slave *slave) +{ + struct rt1316_sdw_priv *rt1316 = dev_get_drvdata(&slave->dev); + + if (rt1316->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + static const struct sdw_device_id rt1316_id[] = { SDW_SLAVE_ENTRY_EXT(0x025d, 0x1316, 0x3, 0x1, 0), {}, @@ -734,6 +744,7 @@ static struct sdw_driver rt1316_sdw_driver = { .pm = &rt1316_pm, }, .probe = rt1316_sdw_probe, + .remove = rt1316_sdw_remove, .ops = &rt1316_slave_ops, .id_table = rt1316_id, }; diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c index 248257a2e4e0f..f04e18c32489d 100644 --- a/sound/soc/codecs/rt5682-sdw.c +++ b/sound/soc/codecs/rt5682-sdw.c @@ -719,9 +719,12 @@ static int rt5682_sdw_remove(struct sdw_slave *slave) { struct rt5682_priv *rt5682 = dev_get_drvdata(&slave->dev); - if (rt5682 && rt5682->hw_init) + if (rt5682->hw_init) cancel_delayed_work_sync(&rt5682->jack_detect_work); + if (rt5682->first_hw_init) + pm_runtime_disable(&slave->dev); + return 0; } diff --git a/sound/soc/codecs/rt700-sdw.c b/sound/soc/codecs/rt700-sdw.c index bda5948996642..f7439e40ca8b5 100644 --- a/sound/soc/codecs/rt700-sdw.c +++ b/sound/soc/codecs/rt700-sdw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "rt700.h" @@ -463,11 +464,14 @@ static int rt700_sdw_remove(struct sdw_slave *slave) { struct rt700_priv *rt700 = dev_get_drvdata(&slave->dev); - if (rt700 && rt700->hw_init) { + if (rt700->hw_init) { cancel_delayed_work_sync(&rt700->jack_detect_work); cancel_delayed_work_sync(&rt700->jack_btn_check_work); } + if (rt700->first_hw_init) + pm_runtime_disable(&slave->dev); + return 0; } diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index aaf5af153d3fe..c722a2b0041f7 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "rt711-sdca.h" #include "rt711-sdca-sdw.h" @@ -364,11 +365,14 @@ static int rt711_sdca_sdw_remove(struct sdw_slave *slave) { struct rt711_sdca_priv *rt711 = dev_get_drvdata(&slave->dev); - if (rt711 && rt711->hw_init) { + if (rt711->hw_init) { cancel_delayed_work_sync(&rt711->jack_detect_work); cancel_delayed_work_sync(&rt711->jack_btn_check_work); } + if (rt711->first_hw_init) + pm_runtime_disable(&slave->dev); + return 0; } diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index bda2cc9439c98..f49c94baa37c6 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "rt711.h" @@ -464,12 +465,15 @@ static int rt711_sdw_remove(struct sdw_slave *slave) { struct rt711_priv *rt711 = dev_get_drvdata(&slave->dev); - if (rt711 && rt711->hw_init) { + if (rt711->hw_init) { cancel_delayed_work_sync(&rt711->jack_detect_work); cancel_delayed_work_sync(&rt711->jack_btn_check_work); cancel_work_sync(&rt711->calibration_work); } + if (rt711->first_hw_init) + pm_runtime_disable(&slave->dev); + return 0; } diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c index a5c673f43d824..0f4354eafef25 100644 --- a/sound/soc/codecs/rt715-sdca-sdw.c +++ b/sound/soc/codecs/rt715-sdca-sdw.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "rt715-sdca.h" @@ -195,6 +196,16 @@ static int rt715_sdca_sdw_probe(struct sdw_slave *slave, return rt715_sdca_init(&slave->dev, mbq_regmap, regmap, slave); } +static int rt715_sdca_sdw_remove(struct sdw_slave *slave) +{ + struct rt715_sdca_priv *rt715 = dev_get_drvdata(&slave->dev); + + if (rt715->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + static const struct sdw_device_id rt715_sdca_id[] = { SDW_SLAVE_ENTRY_EXT(0x025d, 0x715, 0x3, 0x1, 0), SDW_SLAVE_ENTRY_EXT(0x025d, 0x714, 0x3, 0x1, 0), @@ -269,6 +280,7 @@ static struct sdw_driver rt715_sdw_driver = { .pm = &rt715_pm, }, .probe = rt715_sdca_sdw_probe, + .remove = rt715_sdca_sdw_remove, .ops = &rt715_sdca_slave_ops, .id_table = rt715_sdca_id, }; diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index a7b21b03c08bb..b047bf87a100c 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -514,6 +515,16 @@ static int rt715_sdw_probe(struct sdw_slave *slave, return 0; } +static int rt715_sdw_remove(struct sdw_slave *slave) +{ + struct rt715_priv *rt715 = dev_get_drvdata(&slave->dev); + + if (rt715->first_hw_init) + pm_runtime_disable(&slave->dev); + + return 0; +} + static const struct sdw_device_id rt715_id[] = { SDW_SLAVE_ENTRY_EXT(0x025d, 0x714, 0x2, 0, 0), SDW_SLAVE_ENTRY_EXT(0x025d, 0x715, 0x2, 0, 0), @@ -575,6 +586,7 @@ static struct sdw_driver rt715_sdw_driver = { .pm = &rt715_pm, }, .probe = rt715_sdw_probe, + .remove = rt715_sdw_remove, .ops = &rt715_slave_ops, .id_table = rt715_id, }; From f85a8e86015f2074a79d30b9425eb8b66eaacf60 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:47 -0500 Subject: [PATCH 180/299] ASoC: rt711-sdca-sdw: fix calibrate mutex initialization [ Upstream commit ed0a7fb29c9fd4f53eeb37d1fe2354df7a038047 ] In codec driver bind/unbind test, the following warning is thrown: DEBUG_LOCKS_WARN_ON(lock->magic != lock) ... [ 699.182495] rt711_sdca_jack_init+0x1b/0x1d0 [snd_soc_rt711_sdca] [ 699.182498] rt711_sdca_set_jack_detect+0x3b/0x90 [snd_soc_rt711_sdca] [ 699.182500] snd_soc_component_set_jack+0x24/0x50 [snd_soc_core] A quick check in the code shows that the 'calibrate_mutex' used by this driver are not initialized at probe time. Moving the initialization to the probe removes the issue. BugLink: https://github.com/thesofproject/linux/issues/3644 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdca-sdw.c | 3 +++ sound/soc/codecs/rt711-sdca.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index c722a2b0041f7..a085b2f530aa1 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -373,6 +373,9 @@ static int rt711_sdca_sdw_remove(struct sdw_slave *slave) if (rt711->first_hw_init) pm_runtime_disable(&slave->dev); + mutex_destroy(&rt711->calibrate_mutex); + mutex_destroy(&rt711->disable_irq_lock); + return 0; } diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index 9d59e653b941b..8a0b74d3fa9e2 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -1414,6 +1414,7 @@ int rt711_sdca_init(struct device *dev, struct regmap *regmap, rt711->regmap = regmap; rt711->mbq_regmap = mbq_regmap; + mutex_init(&rt711->calibrate_mutex); mutex_init(&rt711->disable_irq_lock); /* @@ -1552,7 +1553,6 @@ int rt711_sdca_io_init(struct device *dev, struct sdw_slave *slave) rt711_sdca_jack_detect_handler); INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_sdca_btn_check_handler); - mutex_init(&rt711->calibrate_mutex); } /* calibration */ From 09bca0ffc95c50369f1345d80ecfaca51864126f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:48 -0500 Subject: [PATCH 181/299] ASoC: Intel: sof_sdw: handle errors on card registration [ Upstream commit fe154c4ff376bc31041c6441958a08243df09c99 ] If the card registration fails, typically because of deferred probes, the device properties added for headset codecs are not removed, which leads to kernel oopses in driver bind/unbind tests. We already clean-up the device properties when the card is removed, this code can be moved as a helper and called upon card registration errors. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/sof_sdw.c | 51 ++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 1f00679b42409..ad826ad82d51a 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -1398,6 +1398,33 @@ static struct snd_soc_card card_sof_sdw = { .late_probe = sof_sdw_card_late_probe, }; +static void mc_dailink_exit_loop(struct snd_soc_card *card) +{ + struct snd_soc_dai_link *link; + int ret; + int i, j; + + for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) { + if (!codec_info_list[i].exit) + continue; + /* + * We don't need to call .exit function if there is no matched + * dai link found. + */ + for_each_card_prelinks(card, j, link) { + if (!strcmp(link->codecs[0].dai_name, + codec_info_list[i].dai_name)) { + ret = codec_info_list[i].exit(card, link); + if (ret) + dev_warn(card->dev, + "codec exit failed %d\n", + ret); + break; + } + } + } +} + static int mc_probe(struct platform_device *pdev) { struct snd_soc_card *card = &card_sof_sdw; @@ -1462,6 +1489,7 @@ static int mc_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, card); if (ret) { dev_err(card->dev, "snd_soc_register_card failed %d\n", ret); + mc_dailink_exit_loop(card); return ret; } @@ -1473,29 +1501,8 @@ static int mc_probe(struct platform_device *pdev) static int mc_remove(struct platform_device *pdev) { struct snd_soc_card *card = platform_get_drvdata(pdev); - struct snd_soc_dai_link *link; - int ret; - int i, j; - for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) { - if (!codec_info_list[i].exit) - continue; - /* - * We don't need to call .exit function if there is no matched - * dai link found. - */ - for_each_card_prelinks(card, j, link) { - if (!strcmp(link->codecs[0].dai_name, - codec_info_list[i].dai_name)) { - ret = codec_info_list[i].exit(card, link); - if (ret) - dev_warn(&pdev->dev, - "codec exit failed %d\n", - ret); - break; - } - } - } + mc_dailink_exit_loop(card); return 0; } From 517fa7405a48c702591887bdf378963b0e1e1328 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:49 -0500 Subject: [PATCH 182/299] ASoC: rt711: fix calibrate mutex initialization [ Upstream commit 08bb5dc6ce02374169213cea772b1c297eaf32d5 ] Follow the same flow as rt711-sdca and initialize all mutexes at probe time. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-5-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdw.c | 3 +++ sound/soc/codecs/rt711.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index f49c94baa37c6..4fe68bcf2a7c2 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -474,6 +474,9 @@ static int rt711_sdw_remove(struct sdw_slave *slave) if (rt711->first_hw_init) pm_runtime_disable(&slave->dev); + mutex_destroy(&rt711->calibrate_mutex); + mutex_destroy(&rt711->disable_irq_lock); + return 0; } diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 9958067e80f11..db70d8073c0b6 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -1206,6 +1206,7 @@ int rt711_init(struct device *dev, struct regmap *sdw_regmap, rt711->sdw_regmap = sdw_regmap; rt711->regmap = regmap; + mutex_init(&rt711->calibrate_mutex); mutex_init(&rt711->disable_irq_lock); /* @@ -1320,7 +1321,6 @@ int rt711_io_init(struct device *dev, struct sdw_slave *slave) rt711_jack_detect_handler); INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_btn_check_handler); - mutex_init(&rt711->calibrate_mutex); INIT_WORK(&rt711->calibration_work, rt711_calibration_work); schedule_work(&rt711->calibration_work); } From 1d75b73ec6d6b705cca528b36d8315e43e8d7fa5 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:50 -0500 Subject: [PATCH 183/299] ASoC: rt7*-sdw: harden jack_detect_handler [ Upstream commit 0484271ab0ce50649329fa9dc23c50853c5b26a4 ] Realtek headset codec drivers typically check if the card is instantiated before proceeding with the jack detection. The rt700, rt711 and rt711-sdca are however missing a check on the card pointer, which can lead to NULL dereferences encountered in driver bind/unbind tests. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-6-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt700.c | 2 +- sound/soc/codecs/rt711-sdca.c | 2 +- sound/soc/codecs/rt711.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c index 360d61a36c354..b16fbde02986b 100644 --- a/sound/soc/codecs/rt700.c +++ b/sound/soc/codecs/rt700.c @@ -162,7 +162,7 @@ static void rt700_jack_detect_handler(struct work_struct *work) if (!rt700->hs_jack) return; - if (!rt700->component->card->instantiated) + if (!rt700->component->card || !rt700->component->card->instantiated) return; reg = RT700_VERB_GET_PIN_SENSE | RT700_HP_OUT; diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index 8a0b74d3fa9e2..83e4c4e4d1e26 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -294,7 +294,7 @@ static void rt711_sdca_jack_detect_handler(struct work_struct *work) if (!rt711->hs_jack) return; - if (!rt711->component->card->instantiated) + if (!rt711->component->card || !rt711->component->card->instantiated) return; /* SDW_SCP_SDCA_INT_SDCA_0 is used for jack detection */ diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index db70d8073c0b6..18a0de77c4777 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -242,7 +242,7 @@ static void rt711_jack_detect_handler(struct work_struct *work) if (!rt711->hs_jack) return; - if (!rt711->component->card->instantiated) + if (!rt711->component->card || !rt711->component->card->instantiated) return; if (pm_runtime_status_suspended(rt711->slave->dev.parent)) { From 11e35a6c6607216eeaf047ba85251f902f622a72 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 6 Jun 2022 15:37:51 -0500 Subject: [PATCH 184/299] ASoC: codecs: rt700/rt711/rt711-sdca: initialize workqueues in probe [ Upstream commit ba98d7d8b60ba410aa03834f6aa48fd3b2e68478 ] The workqueues are initialized in the io_init functions, which isn't quite right. In some tests, this leads to warnings throw from __queue_delayed_work() WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn); Move all the initializations to the probe functions. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20220606203752.144159-7-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt700.c | 12 +++++------- sound/soc/codecs/rt711-sdca.c | 10 +++------- sound/soc/codecs/rt711.c | 12 +++++------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c index b16fbde02986b..3de3406d653e4 100644 --- a/sound/soc/codecs/rt700.c +++ b/sound/soc/codecs/rt700.c @@ -1124,6 +1124,11 @@ int rt700_init(struct device *dev, struct regmap *sdw_regmap, mutex_init(&rt700->disable_irq_lock); + INIT_DELAYED_WORK(&rt700->jack_detect_work, + rt700_jack_detect_handler); + INIT_DELAYED_WORK(&rt700->jack_btn_check_work, + rt700_btn_check_handler); + /* * Mark hw_init to false * HW init will be performed when device reports present @@ -1218,13 +1223,6 @@ int rt700_io_init(struct device *dev, struct sdw_slave *slave) /* Finish Initial Settings, set power to D3 */ regmap_write(rt700->regmap, RT700_SET_AUDIO_POWER_STATE, AC_PWRST_D3); - if (!rt700->first_hw_init) { - INIT_DELAYED_WORK(&rt700->jack_detect_work, - rt700_jack_detect_handler); - INIT_DELAYED_WORK(&rt700->jack_btn_check_work, - rt700_btn_check_handler); - } - /* * if set_jack callback occurred early than io_init, * we set up the jack detection function now diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index 83e4c4e4d1e26..dfe3c9299ebde 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -1417,6 +1417,9 @@ int rt711_sdca_init(struct device *dev, struct regmap *regmap, mutex_init(&rt711->calibrate_mutex); mutex_init(&rt711->disable_irq_lock); + INIT_DELAYED_WORK(&rt711->jack_detect_work, rt711_sdca_jack_detect_handler); + INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_sdca_btn_check_handler); + /* * Mark hw_init to false * HW init will be performed when device reports present @@ -1548,13 +1551,6 @@ int rt711_sdca_io_init(struct device *dev, struct sdw_slave *slave) rt711_sdca_index_update_bits(rt711, RT711_VENDOR_HDA_CTL, RT711_PUSH_BTN_INT_CTL0, 0x20, 0x00); - if (!rt711->first_hw_init) { - INIT_DELAYED_WORK(&rt711->jack_detect_work, - rt711_sdca_jack_detect_handler); - INIT_DELAYED_WORK(&rt711->jack_btn_check_work, - rt711_sdca_btn_check_handler); - } - /* calibration */ ret = rt711_sdca_calibration(rt711); if (ret < 0) diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 18a0de77c4777..9df800abfc2d8 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -1209,6 +1209,10 @@ int rt711_init(struct device *dev, struct regmap *sdw_regmap, mutex_init(&rt711->calibrate_mutex); mutex_init(&rt711->disable_irq_lock); + INIT_DELAYED_WORK(&rt711->jack_detect_work, rt711_jack_detect_handler); + INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_btn_check_handler); + INIT_WORK(&rt711->calibration_work, rt711_calibration_work); + /* * Mark hw_init to false * HW init will be performed when device reports present @@ -1316,14 +1320,8 @@ int rt711_io_init(struct device *dev, struct sdw_slave *slave) if (rt711->first_hw_init) rt711_calibration(rt711); - else { - INIT_DELAYED_WORK(&rt711->jack_detect_work, - rt711_jack_detect_handler); - INIT_DELAYED_WORK(&rt711->jack_btn_check_work, - rt711_btn_check_handler); - INIT_WORK(&rt711->calibration_work, rt711_calibration_work); + else schedule_work(&rt711->calibration_work); - } /* * if set_jack callback occurred early than io_init, From d275601a18cf0d98e57acf370221566477f95eb5 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 9 Jun 2022 11:59:47 +0300 Subject: [PATCH 185/299] ASoC: SOF: Intel: hda-dsp: Expose hda_dsp_core_power_up() [ Upstream commit 08f8a93198e300dff9649bbae424cd805d313326 ] The hda_dsp_core_power_up() needs to be exposed so that it can be used in hda-loader.c to correct the boot flow. The first step must not unstall the core, it should only power up the core(s). Add sanity check for the core_mask while exposing it to be safe. Complements: 2a68ff846164 ("ASoC: SOF: Intel: hda: Revisit IMR boot sequence") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20220609085949.29062-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/intel/hda-dsp.c | 10 +++++++++- sound/soc/sof/intel/hda.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 8ddde60c56b3f..68a8074c956a7 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -181,12 +181,20 @@ int hda_dsp_core_run(struct snd_sof_dev *sdev, unsigned int core_mask) * Power Management. */ -static int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask) +int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask) { + struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; + const struct sof_intel_dsp_desc *chip = hda->desc; unsigned int cpa; u32 adspcs; int ret; + /* restrict core_mask to host managed cores mask */ + core_mask &= chip->host_managed_cores_mask; + /* return if core_mask is not valid */ + if (!core_mask) + return 0; + /* update bits */ snd_sof_dsp_update_bits(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPCS, HDA_DSP_ADSPCS_SPA_MASK(core_mask), diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index 196494ba1245b..db066d094afa9 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -490,6 +490,7 @@ struct sof_intel_hda_stream { */ int hda_dsp_probe(struct snd_sof_dev *sdev); int hda_dsp_remove(struct snd_sof_dev *sdev); +int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask); int hda_dsp_core_run(struct snd_sof_dev *sdev, unsigned int core_mask); int hda_dsp_enable_core(struct snd_sof_dev *sdev, unsigned int core_mask); int hda_dsp_core_reset_power_down(struct snd_sof_dev *sdev, From 71d199d622c4c9bdf864a9b1f26218efe85613e1 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 9 Jun 2022 11:59:48 +0300 Subject: [PATCH 186/299] ASoC: SOF: Intel: hda-loader: Make sure that the fw load sequence is followed [ Upstream commit c31691e0d126ec5d60d2b6b03f699c11b613b219 ] The hda_dsp_enable_core() is powering up _and_ unstall the core in one call while the first step of the firmware loading must not unstall the core. The core can be unstalled only after the set cpb_cfp and the configuration of the IPC register for the ROM_CONTROL message. Complements: 2a68ff846164 ("ASoC: SOF: Intel: hda: Revisit IMR boot sequence") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20220609085949.29062-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/intel/hda-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 2ac5d9d0719bc..9f624a84182be 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -112,7 +112,7 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) int ret; /* step 1: power up corex */ - ret = hda_dsp_enable_core(sdev, chip->host_managed_cores_mask); + ret = hda_dsp_core_power_up(sdev, chip->host_managed_cores_mask); if (ret < 0) { if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, "error: dsp core 0/1 power up failed\n"); From 79096060364c41b5627facc431c851f1367eb03e Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 9 Jun 2022 11:59:49 +0300 Subject: [PATCH 187/299] ASoC: SOF: Intel: hda-loader: Clarify the cl_dsp_init() flow [ Upstream commit bbfef046c6613404c01aeb9e9928bebb78dd327a ] Update the comment for the cl_dsp_init() to clarify what is done by the function and use the chip->init_core_mask instead of BIT(0) when unstalling/running the init core. Complements: 2a68ff846164 ("ASoC: SOF: Intel: hda: Revisit IMR boot sequence") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20220609085949.29062-4-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/intel/hda-loader.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 9f624a84182be..88d23924e1bf2 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -97,9 +97,9 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig } /* - * first boot sequence has some extra steps. core 0 waits for power - * status on core 1, so power up core 1 also momentarily, keep it in - * reset/stall and then turn it off + * first boot sequence has some extra steps. + * power on all host managed cores and only unstall/run the boot core to boot the + * DSP then turn off all non boot cores (if any) is powered on. */ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) { @@ -127,7 +127,7 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) ((stream_tag - 1) << 9))); /* step 3: unset core 0 reset state & unstall/run core 0 */ - ret = hda_dsp_core_run(sdev, BIT(0)); + ret = hda_dsp_core_run(sdev, chip->init_core_mask); if (ret < 0) { if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, From e58a32e25ef12810a4684ce31f5d48645fcedec5 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Mon, 6 Jun 2022 19:22:26 +0400 Subject: [PATCH 188/299] ASoC: wcd9335: Remove RX channel from old list before adding it to a new one [ Upstream commit be6dd72edb216f20fc80e426ece9fe9b8aabf033 ] Currently in slim_rx_mux_put, an RX channel gets added to a new list even if it is already in one. This can mess up links and make either it, the new list head, or both, get linked to the wrong entries. This can cause an entry to link to itself which in turn ends up making list_for_each_entry in other functions loop infinitely. To avoid issues, always remove the RX channel from any list it's in before adding it to a new list. Signed-off-by: Yassine Oudjana Link: https://lore.kernel.org/r/20220606152226.149164-1-y.oudjana@protonmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wcd9335.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 1e60db4056ada..12be043ee9a34 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -1289,9 +1289,12 @@ static int slim_rx_mux_put(struct snd_kcontrol *kc, wcd->rx_port_value[port_id] = ucontrol->value.enumerated.item[0]; + /* Remove channel from any list it's in before adding it to a new one */ + list_del_init(&wcd->rx_chs[port_id].list); + switch (wcd->rx_port_value[port_id]) { case 0: - list_del_init(&wcd->rx_chs[port_id].list); + /* Channel already removed from lists. Nothing to do here */ break; case 1: list_add_tail(&wcd->rx_chs[port_id].list, From 4d93ab0d2f6a82661d0bd9b79cc1a557f7a73353 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 3 Jun 2022 14:46:09 +0200 Subject: [PATCH 189/299] ASoC: wcd9335: Fix spurious event generation [ Upstream commit a7786cbae4b2732815da98efa39df96746b5bd0d ] The slimbus mux put operation unconditionally reports a change in value which means that spurious events are generated. Fix this by exiting early in that case. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220603124609.4024666-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wcd9335.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 12be043ee9a34..aa685980a97b6 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -1287,6 +1287,9 @@ static int slim_rx_mux_put(struct snd_kcontrol *kc, struct snd_soc_dapm_update *update = NULL; u32 port_id = w->shift; + if (wcd->rx_port_value[port_id] == ucontrol->value.enumerated.item[0]) + return 0; + wcd->rx_port_value[port_id] = ucontrol->value.enumerated.item[0]; /* Remove channel from any list it's in before adding it to a new one */ From f3071f04cf125c1d4fb0698b524e1c030c1b0f17 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 3 Jun 2022 14:25:26 +0200 Subject: [PATCH 190/299] ASoC: wcd938x: Fix event generation for some controls [ Upstream commit 10e7ff0047921e32b919ecee7be706dd33c107f8 ] Currently wcd938x_*_put() unconditionally report that the value of the control changed, resulting in spurious events being generated. Return 0 in that case instead as we should. There is still an issue in the compander control which is a bit more complex. Signed-off-by: Mark Brown Reported-by: kernel test robot Link: https://lore.kernel.org/r/20220603122526.3914942-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wcd938x.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 898b2887fa637..088cfda767cc8 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -2519,6 +2519,9 @@ static int wcd938x_tx_mode_put(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; int path = e->shift_l; + if (wcd938x->tx_mode[path] == ucontrol->value.enumerated.item[0]) + return 0; + wcd938x->tx_mode[path] = ucontrol->value.enumerated.item[0]; return 1; @@ -2541,6 +2544,9 @@ static int wcd938x_rx_hph_mode_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); + if (wcd938x->hph_mode == ucontrol->value.enumerated.item[0]) + return 0; + wcd938x->hph_mode = ucontrol->value.enumerated.item[0]; return 1; @@ -2632,6 +2638,9 @@ static int wcd938x_ldoh_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); + if (wcd938x->ldoh == ucontrol->value.integer.value[0]) + return 0; + wcd938x->ldoh = ucontrol->value.integer.value[0]; return 1; @@ -2654,6 +2663,9 @@ static int wcd938x_bcs_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); + if (wcd938x->bcs_dis == ucontrol->value.integer.value[0]) + return 0; + wcd938x->bcs_dis = ucontrol->value.integer.value[0]; return 1; From 03f97b638dd9a8eb8c7e63b243bcef12c99264a9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Jun 2022 17:56:52 +0200 Subject: [PATCH 191/299] ASoC: Intel: bytcr_wm5102: Fix GPIO related probe-ordering problem [ Upstream commit 4e07479eab8a044cc9542414ccb4aeb8eb033bde ] The "wlf,spkvdd-ena" GPIO needed by the bytcr_wm5102 driver is made available through a gpio-lookup table. This gpio-lookup table is registered by drivers/mfd/arizona-spi.c, which may get probed after the bytcr_wm5102 driver. If the gpio-lookup table has not registered yet then the gpiod_get() will return -ENOENT. Treat -ENOENT as -EPROBE_DEFER to still keep things working in this case. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20220612155652.107310-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/bytcr_wm5102.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c index 8d8e96e3cd2df..f6d0cef1b28c3 100644 --- a/sound/soc/intel/boards/bytcr_wm5102.c +++ b/sound/soc/intel/boards/bytcr_wm5102.c @@ -421,8 +421,17 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev) priv->spkvdd_en_gpio = gpiod_get(codec_dev, "wlf,spkvdd-ena", GPIOD_OUT_LOW); put_device(codec_dev); - if (IS_ERR(priv->spkvdd_en_gpio)) - return dev_err_probe(dev, PTR_ERR(priv->spkvdd_en_gpio), "getting spkvdd-GPIO\n"); + if (IS_ERR(priv->spkvdd_en_gpio)) { + ret = PTR_ERR(priv->spkvdd_en_gpio); + /* + * The spkvdd gpio-lookup is registered by: drivers/mfd/arizona-spi.c, + * so -ENOENT means that arizona-spi hasn't probed yet. + */ + if (ret == -ENOENT) + ret = -EPROBE_DEFER; + + return dev_err_probe(dev, ret, "getting spkvdd-GPIO\n"); + } /* override platform name, if required */ byt_wm5102_card.dev = dev; From 89c2c0dd28250e1def6fdf37af311a02edb4d702 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 21 Jun 2022 11:20:38 +0100 Subject: [PATCH 192/299] ASoC: wm_adsp: Fix event for preloader [ Upstream commit 9896c029f0df628c6cb108253d09b1d61f1d4a88 ] The preloader controls on ADSP should return a value of 1 if the preloader value was changed, update to correct this. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220621102041.1713504-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 9cfd4f18493fb..d3ecff3bdef29 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -997,7 +997,7 @@ int wm_adsp2_preloader_put(struct snd_kcontrol *kcontrol, snd_soc_dapm_sync(dapm); } - return 0; + return 1; } EXPORT_SYMBOL_GPL(wm_adsp2_preloader_put); From 83431e500b2c5d5c95712053c1cea2f462a4366e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 21 Jun 2022 11:20:39 +0100 Subject: [PATCH 193/299] ASoC: wm5110: Fix DRE control [ Upstream commit 0bc0ae9a5938d512fd5d44f11c9c04892dcf4961 ] The DRE controls on wm5110 should return a value of 1 if the DRE state is actually changed, update to fix this. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220621102041.1713504-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm5110.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 4973ba1ed7791..4ab7a672f8de8 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -413,6 +413,7 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, unsigned int rnew = (!!ucontrol->value.integer.value[1]) << mc->rshift; unsigned int lold, rold; unsigned int lena, rena; + bool change = false; int ret; snd_soc_dapm_mutex_lock(dapm); @@ -440,8 +441,8 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, goto err; } - ret = regmap_update_bits(arizona->regmap, ARIZONA_DRE_ENABLE, - mask, lnew | rnew); + ret = regmap_update_bits_check(arizona->regmap, ARIZONA_DRE_ENABLE, + mask, lnew | rnew, &change); if (ret) { dev_err(arizona->dev, "Failed to set DRE: %d\n", ret); goto err; @@ -454,6 +455,9 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, if (!rnew && rold) wm5110_clear_pga_volume(arizona, mc->rshift); + if (change) + ret = 1; + err: snd_soc_dapm_mutex_unlock(dapm); From f3f5f2a9e2bbccc6f4e3a0d2e588a02703cdfd7f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 21 Jun 2022 11:20:40 +0100 Subject: [PATCH 194/299] ASoC: cs35l41: Correct some control names [ Upstream commit c6a5f22f9b4fd5f21414be690ce34046d9712f05 ] Various boolean controls on cs35l41 are missing the required "Switch" in the name, add these. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220621102041.1713504-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs35l41.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index 6b784a62df0ce..20c76a53a5080 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -392,7 +392,7 @@ static const struct snd_kcontrol_new cs35l41_aud_controls[] = { SOC_SINGLE("HW Noise Gate Enable", CS35L41_NG_CFG, 8, 63, 0), SOC_SINGLE("HW Noise Gate Delay", CS35L41_NG_CFG, 4, 7, 0), SOC_SINGLE("HW Noise Gate Threshold", CS35L41_NG_CFG, 0, 7, 0), - SOC_SINGLE("Aux Noise Gate CH1 Enable", + SOC_SINGLE("Aux Noise Gate CH1 Switch", CS35L41_MIXER_NGATE_CH1_CFG, 16, 1, 0), SOC_SINGLE("Aux Noise Gate CH1 Entry Delay", CS35L41_MIXER_NGATE_CH1_CFG, 8, 15, 0), @@ -400,15 +400,15 @@ static const struct snd_kcontrol_new cs35l41_aud_controls[] = { CS35L41_MIXER_NGATE_CH1_CFG, 0, 7, 0), SOC_SINGLE("Aux Noise Gate CH2 Entry Delay", CS35L41_MIXER_NGATE_CH2_CFG, 8, 15, 0), - SOC_SINGLE("Aux Noise Gate CH2 Enable", + SOC_SINGLE("Aux Noise Gate CH2 Switch", CS35L41_MIXER_NGATE_CH2_CFG, 16, 1, 0), SOC_SINGLE("Aux Noise Gate CH2 Threshold", CS35L41_MIXER_NGATE_CH2_CFG, 0, 7, 0), - SOC_SINGLE("SCLK Force", CS35L41_SP_FORMAT, CS35L41_SCLK_FRC_SHIFT, 1, 0), - SOC_SINGLE("LRCLK Force", CS35L41_SP_FORMAT, CS35L41_LRCLK_FRC_SHIFT, 1, 0), - SOC_SINGLE("Invert Class D", CS35L41_AMP_DIG_VOL_CTRL, + SOC_SINGLE("SCLK Force Switch", CS35L41_SP_FORMAT, CS35L41_SCLK_FRC_SHIFT, 1, 0), + SOC_SINGLE("LRCLK Force Switch", CS35L41_SP_FORMAT, CS35L41_LRCLK_FRC_SHIFT, 1, 0), + SOC_SINGLE("Invert Class D Switch", CS35L41_AMP_DIG_VOL_CTRL, CS35L41_AMP_INV_PCM_SHIFT, 1, 0), - SOC_SINGLE("Amp Gain ZC", CS35L41_AMP_GAIN_CTRL, + SOC_SINGLE("Amp Gain ZC Switch", CS35L41_AMP_GAIN_CTRL, CS35L41_AMP_GAIN_ZC_SHIFT, 1, 0), WM_ADSP2_PRELOAD_SWITCH("DSP1", 1), WM_ADSP_FW_CONTROL("DSP1", 0), From 7bb71133cae88d3003a3490b97864af76533072b Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Tue, 21 Jun 2022 17:07:19 +0800 Subject: [PATCH 195/299] ASoC: rt711-sdca: fix kernel NULL pointer dereference when IO error [ Upstream commit 1df793d479bef546569fc2e409ff8bb3f0fb8e99 ] The initial settings will be written before the codec probe function. But, the rt711->component doesn't be assigned yet. If IO error happened during initial settings operations, it will cause the kernel panic. This patch changed component->dev to slave->dev to fix this issue. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20220621090719.30558-1-shumingf@realtek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt711-sdca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index dfe3c9299ebde..5ad53bbc85284 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -34,7 +34,7 @@ static int rt711_sdca_index_write(struct rt711_sdca_priv *rt711, ret = regmap_write(regmap, addr, value); if (ret < 0) - dev_err(rt711->component->dev, + dev_err(&rt711->slave->dev, "Failed to set private value: %06x <= %04x ret=%d\n", addr, value, ret); @@ -50,7 +50,7 @@ static int rt711_sdca_index_read(struct rt711_sdca_priv *rt711, ret = regmap_read(regmap, addr, value); if (ret < 0) - dev_err(rt711->component->dev, + dev_err(&rt711->slave->dev, "Failed to get private value: %06x => %04x ret=%d\n", addr, *value, ret); From ed5ddd50ffb5dbbb51ec4b525baf069991eebb4b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:15 +0100 Subject: [PATCH 196/299] ASoC: dapm: Initialise kcontrol data for mux/demux controls [ Upstream commit 11d7a12f7f50baa5af9090b131c9b03af59503e7 ] DAPM keeps a copy of the current value of mux/demux controls, however this value is only initialised in the case of autodisable controls. This leads to false notification events when first modifying a DAPM kcontrol that has a non-zero default. Autodisable controls are left as they are, since they already initialise the value, and there would be more work required to support autodisable muxes where the first option isn't disabled and/or that isn't the default. Technically this issue could affect mixer/switch elements as well, although not on any of the devices I am currently running. There is also a little more work to do to address the issue there due to that side supporting stereo controls, so that has not been tackled in this patch. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-dapm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 869c76506b669..a8e842e02cdc2 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -62,6 +62,8 @@ struct snd_soc_dapm_widget * snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_widget *widget); +static unsigned int soc_dapm_read(struct snd_soc_dapm_context *dapm, int reg); + /* dapm power sequences - make this per codec in the future */ static int dapm_up_seq[] = { [snd_soc_dapm_pre] = 1, @@ -442,6 +444,9 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, snd_soc_dapm_add_path(widget->dapm, data->widget, widget, NULL, NULL); + } else if (e->reg != SND_SOC_NOPM) { + data->value = soc_dapm_read(widget->dapm, e->reg) & + (e->mask << e->shift_l); } break; default: From 14937a06ab710601906fcc71fe73fcd5f20e376c Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:16 +0100 Subject: [PATCH 197/299] ASoC: cs35l41: Add ASP TX3/4 source to register patch [ Upstream commit 46b0d050c8c7df6dfb2c376aaa149bf2cfc5ca3e ] The mixer controls for ASP TX3/4 are set to values that are not included in their enumeration control. This will cause spurious event notifications when the controls are first changed, as the register value changes whilst the actual visible enumeration value does not. Use the register patch to set them to a known value, zero, which equates to zero fill, thereby avoiding the spurious notifications. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs35l41-lib.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs35l41-lib.c b/sound/soc/codecs/cs35l41-lib.c index 17cf782f39af6..538b5c4d3abf8 100644 --- a/sound/soc/codecs/cs35l41-lib.c +++ b/sound/soc/codecs/cs35l41-lib.c @@ -36,8 +36,8 @@ static const struct reg_default cs35l41_reg[] = { { CS35L41_DAC_PCM1_SRC, 0x00000008 }, { CS35L41_ASP_TX1_SRC, 0x00000018 }, { CS35L41_ASP_TX2_SRC, 0x00000019 }, - { CS35L41_ASP_TX3_SRC, 0x00000020 }, - { CS35L41_ASP_TX4_SRC, 0x00000021 }, + { CS35L41_ASP_TX3_SRC, 0x00000000 }, + { CS35L41_ASP_TX4_SRC, 0x00000000 }, { CS35L41_DSP1_RX1_SRC, 0x00000008 }, { CS35L41_DSP1_RX2_SRC, 0x00000009 }, { CS35L41_DSP1_RX3_SRC, 0x00000018 }, @@ -643,6 +643,8 @@ static const struct reg_sequence cs35l41_reva0_errata_patch[] = { { CS35L41_DSP1_XM_ACCEL_PL0_PRI, 0x00000000 }, { CS35L41_PWR_CTRL2, 0x00000000 }, { CS35L41_AMP_GAIN_CTRL, 0x00000000 }, + { CS35L41_ASP_TX3_SRC, 0x00000000 }, + { CS35L41_ASP_TX4_SRC, 0x00000000 }, }; static const struct reg_sequence cs35l41_revb0_errata_patch[] = { @@ -654,6 +656,8 @@ static const struct reg_sequence cs35l41_revb0_errata_patch[] = { { CS35L41_DSP1_XM_ACCEL_PL0_PRI, 0x00000000 }, { CS35L41_PWR_CTRL2, 0x00000000 }, { CS35L41_AMP_GAIN_CTRL, 0x00000000 }, + { CS35L41_ASP_TX3_SRC, 0x00000000 }, + { CS35L41_ASP_TX4_SRC, 0x00000000 }, }; static const struct reg_sequence cs35l41_revb2_errata_patch[] = { @@ -665,6 +669,8 @@ static const struct reg_sequence cs35l41_revb2_errata_patch[] = { { CS35L41_DSP1_XM_ACCEL_PL0_PRI, 0x00000000 }, { CS35L41_PWR_CTRL2, 0x00000000 }, { CS35L41_AMP_GAIN_CTRL, 0x00000000 }, + { CS35L41_ASP_TX3_SRC, 0x00000000 }, + { CS35L41_ASP_TX4_SRC, 0x00000000 }, }; static const struct cs35l41_otp_map_element_t cs35l41_otp_map_map[] = { From 2308fdb5191ad292d626502a9c99ca7da8bf3b90 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:17 +0100 Subject: [PATCH 198/299] ASoC: cs47l15: Fix event generation for low power mux control [ Upstream commit 7f103af4a10f375b9b346b4d0b730f6a66b8c451 ] cs47l15_in1_adc_put always returns zero regardless of if the control value was updated. This results in missing notifications to user-space of the control change. Update the handling to return 1 when the value is changed. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs47l15.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs47l15.c b/sound/soc/codecs/cs47l15.c index 391fd7da331fd..1c7d52bef8935 100644 --- a/sound/soc/codecs/cs47l15.c +++ b/sound/soc/codecs/cs47l15.c @@ -122,6 +122,9 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol, snd_soc_kcontrol_component(kcontrol); struct cs47l15 *cs47l15 = snd_soc_component_get_drvdata(component); + if (!!ucontrol->value.integer.value[0] == cs47l15->in1_lp_mode) + return 0; + switch (ucontrol->value.integer.value[0]) { case 0: /* Set IN1 to normal mode */ @@ -150,7 +153,7 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol, break; } - return 0; + return 1; } static const struct snd_kcontrol_new cs47l15_snd_controls[] = { From 0303f460b85680f8d49ddfd9b7dbc5876e24f5c7 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:18 +0100 Subject: [PATCH 199/299] ASoC: madera: Fix event generation for OUT1 demux [ Upstream commit e3cabbef3db8269207a6b8808f510137669f8deb ] madera_out1_demux_put returns the value of snd_soc_dapm_mux_update_power, which returns a 1 if a path was found for the kcontrol. This is obviously different to the expected return a 1 if the control was updated value. This results in spurious notifications to user-space. Update the handling to only return a 1 when the value is changed. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-4-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/madera.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index 272041c6236a9..8095a87117cf8 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -618,7 +618,13 @@ int madera_out1_demux_put(struct snd_kcontrol *kcontrol, end: snd_soc_dapm_mutex_unlock(dapm); - return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + if (ret < 0) { + dev_err(madera->dev, "Failed to update demux power state: %d\n", ret); + return ret; + } + + return change; } EXPORT_SYMBOL_GPL(madera_out1_demux_put); From 7f97af49f6013c684192f63720d6fa81c974874f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jun 2022 11:51:19 +0100 Subject: [PATCH 200/299] ASoC: madera: Fix event generation for rate controls [ Upstream commit 980555e95f7cabdc9c80a07107622b097ba23703 ] madera_adsp_rate_put always returns zero regardless of if the control value was updated. This results in missing notifications to user-space of the control change. Update the handling to return 1 when the value is changed. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220623105120.1981154-5-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/madera.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index 8095a87117cf8..b9f19fbd29114 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -899,7 +899,7 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; const int adsp_num = e->shift_l; const unsigned int item = ucontrol->value.enumerated.item[0]; - int ret; + int ret = 0; if (item >= e->items) return -EINVAL; @@ -916,10 +916,10 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, "Cannot change '%s' while in use by active audio paths\n", kcontrol->id.name); ret = -EBUSY; - } else { + } else if (priv->adsp_rate_cache[adsp_num] != e->values[item]) { /* Volatile register so defer until the codec is powered up */ priv->adsp_rate_cache[adsp_num] = e->values[item]; - ret = 0; + ret = 1; } mutex_unlock(&priv->rate_lock); From d6ef5aca6fe46c48f4d3b3c53d1fd9b5b86c07b4 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Wed, 15 Jun 2022 08:54:26 +0900 Subject: [PATCH 201/299] irqchip: or1k-pic: Undefine mask_ack for level triggered hardware [ Upstream commit 8520501346ed8d1c4a6dfa751cb57328a9c843f1 ] The mask_ack operation clears the interrupt by writing to the PICSR register. This we don't want for level triggered interrupt because it does not actually clear the interrupt on the source hardware. This was causing issues in qemu with multi core setups where interrupts would continue to fire even though they had been cleared in PICSR. Just remove the mask_ack operation. Acked-by: Marc Zyngier Signed-off-by: Stafford Horne Signed-off-by: Sasha Levin --- drivers/irqchip/irq-or1k-pic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/irqchip/irq-or1k-pic.c b/drivers/irqchip/irq-or1k-pic.c index 49b47e7876448..f289ccd952914 100644 --- a/drivers/irqchip/irq-or1k-pic.c +++ b/drivers/irqchip/irq-or1k-pic.c @@ -66,7 +66,6 @@ static struct or1k_pic_dev or1k_pic_level = { .name = "or1k-PIC-level", .irq_unmask = or1k_pic_unmask, .irq_mask = or1k_pic_mask, - .irq_mask_ack = or1k_pic_mask_ack, }, .handle = handle_level_irq, .flags = IRQ_LEVEL | IRQ_NOPROBE, From 4dc6fad52c5ca66ded0c5c00b2a25dc6c1e6a793 Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Mon, 13 Jun 2022 11:18:54 +0800 Subject: [PATCH 202/299] pinctrl: imx: Add the zero base flag for imx93 [ Upstream commit fbc24ebc65507feb9728dc38197f90486148dda0 ] On i.MX93, the pin mux reg offset is from 0x0, so need to add the 'ZERO_OFFSET_VALID' flag to make sure the pin at mux offset 0 can be found. Signed-off-by: Jacky Bai Link: https://lore.kernel.org/r/20220613031854.1571357-1-ping.bai@nxp.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/freescale/pinctrl-imx93.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/freescale/pinctrl-imx93.c b/drivers/pinctrl/freescale/pinctrl-imx93.c index c0630f69e9954..417e41b37a6fd 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx93.c +++ b/drivers/pinctrl/freescale/pinctrl-imx93.c @@ -239,6 +239,7 @@ static const struct pinctrl_pin_desc imx93_pinctrl_pads[] = { static const struct imx_pinctrl_soc_info imx93_pinctrl_info = { .pins = imx93_pinctrl_pads, .npins = ARRAY_SIZE(imx93_pinctrl_pads), + .flags = ZERO_OFFSET_VALID, .gpr_compatible = "fsl,imx93-iomuxc-gpr", }; From 2334bdfc2da469c9807767002a2831274b82c39a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 30 Jun 2022 09:14:40 +0200 Subject: [PATCH 203/299] x86: Clear .brk area at early boot [ Upstream commit 38fa5479b41376dc9d7f57e71c83514285a25ca0 ] The .brk section has the same properties as .bss: it is an alloc-only section and should be cleared before being used. Not doing so is especially a problem for Xen PV guests, as the hypervisor will validate page tables (check for writable page tables and hypervisor private bits) before accepting them to be used. Make sure .brk is initially zero by letting clear_bss() clear the brk area, too. Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220630071441.28576-3-jgross@suse.com Signed-off-by: Sasha Levin --- arch/x86/kernel/head64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2e10a33778cf2..92eae95f1a0b7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -425,6 +425,8 @@ void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); + memset(__brk_base, 0, + (unsigned long) __brk_limit - (unsigned long) __brk_base); } static unsigned long get_cmd_line_ptr(void) From bd77b8298f851916f4c9fd555dbc65298a4750b6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 26 Jun 2022 09:43:15 +0200 Subject: [PATCH 204/299] soc: ixp4xx/npe: Fix unused match warning [ Upstream commit 620f83b8326ce9706b1118334f0257ae028ce045 ] The kernel test robot found this inconsistency: drivers/soc/ixp4xx/ixp4xx-npe.c:737:34: warning: 'ixp4xx_npe_of_match' defined but not used [-Wunused-const-variable=] 737 | static const struct of_device_id ixp4xx_npe_of_match[] = { This is because the match is enclosed in the of_match_ptr() which compiles into NULL when OF is disabled and this is unnecessary. Fix it by dropping of_match_ptr() around the match. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220626074315.61209-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- drivers/soc/ixp4xx/ixp4xx-npe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index 613935cb6a488..58240e320c132 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -758,7 +758,7 @@ static const struct of_device_id ixp4xx_npe_of_match[] = { static struct platform_driver ixp4xx_npe_driver = { .driver = { .name = "ixp4xx-npe", - .of_match_table = of_match_ptr(ixp4xx_npe_of_match), + .of_match_table = ixp4xx_npe_of_match, }, .probe = ixp4xx_npe_probe, .remove = ixp4xx_npe_remove, From 2764762d0d6a2fab76f3f68b5298e4ab634053b7 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Fri, 24 Jun 2022 11:27:13 +0200 Subject: [PATCH 205/299] ARM: dts: stm32: use the correct clock source for CEC on stm32mp151 [ Upstream commit 78ece8cce1ba0c3f3e5a7c6c1b914b3794f04c44 ] The peripheral clock of CEC is not LSE but CEC. Signed-off-by: Gabriel Fernandez Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32mp151.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index 9c2bbf115f4cc..de4d651f95752 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -565,7 +565,7 @@ compatible = "st,stm32-cec"; reg = <0x40016000 0x400>; interrupts = ; - clocks = <&rcc CEC_K>, <&clk_lse>; + clocks = <&rcc CEC_K>, <&rcc CEC>; clock-names = "cec", "hdmi-cec"; status = "disabled"; }; From 449f51b5d6e10eeda2a59d1319334e5853e7cb3a Mon Sep 17 00:00:00 2001 From: Srinivas Neeli Date: Thu, 9 Jun 2022 13:54:32 +0530 Subject: [PATCH 206/299] Revert "can: xilinx_can: Limit CANFD brp to 2" [ Upstream commit c6da4590fe819dfe28a4f8037a8dc1e056542fb4 ] This reverts commit 05ca14fdb6fe65614e0652d03e44b02748d25af7. On early silicon engineering samples observed bit shrinking issue when we use brp as 1. Hence updated brp_min as 2. As in production silicon this issue is fixed, so reverting the patch. Link: https://lore.kernel.org/all/20220609082433.1191060-2-srinivas.neeli@xilinx.com Signed-off-by: Srinivas Neeli Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/xilinx_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 43f0c6a064ba1..75b4db4d050b9 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -259,7 +259,7 @@ static const struct can_bittiming_const xcan_bittiming_const_canfd2 = { .tseg2_min = 1, .tseg2_max = 128, .sjw_max = 128, - .brp_min = 2, + .brp_min = 1, .brp_max = 256, .brp_inc = 1, }; @@ -272,7 +272,7 @@ static const struct can_bittiming_const xcan_data_bittiming_const_canfd2 = { .tseg2_min = 1, .tseg2_max = 16, .sjw_max = 16, - .brp_min = 2, + .brp_min = 1, .brp_max = 256, .brp_inc = 1, }; From 9b47d6dfaecb60c490a7c43d896e101e85490257 Mon Sep 17 00:00:00 2001 From: John Veness Date: Fri, 24 Jun 2022 15:07:57 +0100 Subject: [PATCH 207/299] ALSA: usb-audio: Add quirks for MacroSilicon MS2100/MS2106 devices [ Upstream commit 6e2c9105e0b743c92a157389d40f00b81bdd09fe ] Treat the claimed 96kHz 1ch in the descriptors as 48kHz 2ch, so that the audio stream doesn't sound mono. Also fix initial stream alignment, so that left and right channels are in the correct order. Signed-off-by: John Veness Link: https://lore.kernel.org/r/20220624140757.28758-1-john-linux@pelago.org.uk Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks-table.h | 48 ++++++++++++++++++++++++++++++++++++++++ sound/usb/quirks.c | 3 +++ 2 files changed, 51 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 4f56e1784932a..853da162fd18c 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3802,6 +3802,54 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* + * MacroSilicon MS2100/MS2106 based AV capture cards + * + * These claim 96kHz 1ch in the descriptors, but are actually 48kHz 2ch. + * They also need QUIRK_FLAG_ALIGN_TRANSFER, which makes one wonder if + * they pretend to be 96kHz mono as a workaround for stereo being broken + * by that... + * + * They also have an issue with initial stream alignment that causes the + * channels to be swapped and out of phase, which is dealt with in quirks.c. + */ +{ + USB_AUDIO_DEVICE(0x534d, 0x0021), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "MacroSilicon", + .product_name = "MS210x", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + { + .ifnum = 3, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .iface = 3, + .altsetting = 1, + .altset_idx = 1, + .attributes = 0, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + .rate_min = 48000, + .rate_max = 48000, + } + }, + { + .ifnum = -1 + } + } + } +}, + /* * MacroSilicon MS2109 based HDMI capture cards * diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 12ce69b04f634..a7bcae0a2c75b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1478,6 +1478,7 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, case USB_ID(0x041e, 0x3f19): /* E-Mu 0204 USB */ set_format_emu_quirk(subs, fmt); break; + case USB_ID(0x534d, 0x0021): /* MacroSilicon MS2100/MS2106 */ case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ subs->stream_offset_adj = 2; break; @@ -1908,6 +1909,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x534d, 0x0021, /* MacroSilicon MS2100/MS2106 */ + QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ From da7fdaadc13506a902667fb98cfb584d70b10e0a Mon Sep 17 00:00:00 2001 From: Egor Vorontsov Date: Mon, 27 Jun 2022 13:00:34 +0300 Subject: [PATCH 208/299] ALSA: usb-audio: Add quirk for Fiero SC-01 [ Upstream commit 4fb7c24f69c48fdc02ea7858dbd5a60ff08bf7e5 ] Fiero SC-01 is a USB sound card with two mono inputs and a single stereo output. The inputs are composed into a single stereo stream. The device uses a vendor-provided driver on Windows and does not work at all without it. The driver mostly provides ASIO functionality, but also alters the way the sound card is queried for sample rates and clocks. ALSA queries those failing with an EPIPE (same as Windows 10 does). Presumably, the vendor-provided driver does not query it at all, simply matching by VID:PID. Thus, I consider this a buggy firmware and adhere to a set of fixed endpoint quirks instead. The soundcard has an internal clock. Implicit feedback mode is required for the playback. I have updated my device to v1.1.0 from a Windows 10 VM using a vendor- provided binary prior to the development, hoping for it to just begin working. The device provides no obvious way to downgrade the firmware, and regardless, there's no binary available for v1.0.0 anyway. Thus, I will be getting another unit to extend the patch with support for that. Expected to be a simple copy-paste of the existing one, though. There were no previous reports of that device in context of Linux anywhere. Other issues have been reported though, but that's out of the scope. Signed-off-by: Egor Vorontsov Link: https://lore.kernel.org/r/20220627100041.2861494-1-sdoregor@sdore.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks-table.h | 68 ++++++++++++++++++++++++++++++++++++++++ sound/usb/quirks.c | 2 ++ 2 files changed, 70 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 853da162fd18c..7067d314fecd4 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -4167,6 +4167,74 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + /* + * Fiero SC-01 (firmware v1.1.0) + */ + USB_DEVICE(0x2b53, 0x0031), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Fiero", + .product_name = "SC-01", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + /* Playback */ + { + .ifnum = 1, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 1, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_48000 | + SNDRV_PCM_RATE_96000, + .rate_min = 48000, + .rate_max = 96000, + .nr_rates = 2, + .rate_table = (unsigned int[]) { 48000, 96000 }, + .clock = 0x29 + } + }, + /* Capture */ + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 2, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC | + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_48000 | + SNDRV_PCM_RATE_96000, + .rate_min = 48000, + .rate_max = 96000, + .nr_rates = 2, + .rate_table = (unsigned int[]) { 48000, 96000 }, + .clock = 0x29 + } + }, + { + .ifnum = -1 + } + } + } +}, #undef USB_DEVICE_VENDOR_SPEC #undef USB_AUDIO_DEVICE diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index a7bcae0a2c75b..51138350f03c5 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1915,6 +1915,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ From 7bd54d31155e3da12eb357f4a56d2612c942c4db Mon Sep 17 00:00:00 2001 From: Egor Vorontsov Date: Mon, 27 Jun 2022 13:00:35 +0300 Subject: [PATCH 209/299] ALSA: usb-audio: Add quirk for Fiero SC-01 (fw v1.0.0) [ Upstream commit 2307a0e1ca0b5c1337b37ac6302f96e017ebac3c ] The patch applies the same quirks used for SC-01 at firmware v1.1.0 to the ones running v1.0.0, with respect to hard-coded sample rates. I got two more units and successfully tested the patch series with both firmwares. The support is now complete (not accounting ASIO). Signed-off-by: Egor Vorontsov Link: https://lore.kernel.org/r/20220627100041.2861494-2-sdoregor@sdore.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks-table.h | 132 +++++++++++++++++++++++++++++++++++++++ sound/usb/quirks.c | 4 ++ 2 files changed, 136 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 7067d314fecd4..f93201a830b5a 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -4167,6 +4167,138 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + /* + * Fiero SC-01 (firmware v1.0.0 @ 48 kHz) + */ + USB_DEVICE(0x2b53, 0x0023), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Fiero", + .product_name = "SC-01", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + /* Playback */ + { + .ifnum = 1, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 1, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 }, + .clock = 0x29 + } + }, + /* Capture */ + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 2, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC | + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 }, + .clock = 0x29 + } + }, + { + .ifnum = -1 + } + } + } +}, +{ + /* + * Fiero SC-01 (firmware v1.0.0 @ 96 kHz) + */ + USB_DEVICE(0x2b53, 0x0024), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Fiero", + .product_name = "SC-01", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + /* Playback */ + { + .ifnum = 1, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 1, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_96000, + .rate_min = 96000, + .rate_max = 96000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 96000 }, + .clock = 0x29 + } + }, + /* Capture */ + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .channels = 2, + .fmt_bits = 24, + .iface = 2, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC | + USB_ENDPOINT_SYNC_ASYNC | + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_96000, + .rate_min = 96000, + .rate_max = 96000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 96000 }, + .clock = 0x29 + } + }, + { + .ifnum = -1 + } + } + } +}, { /* * Fiero SC-01 (firmware v1.1.0) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 51138350f03c5..968d90caeefa0 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1915,6 +1915,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), From 97f9cba7686e395669ffb2808c4a7b140bfdb5bb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 5 Jul 2022 10:21:02 -0700 Subject: [PATCH 210/299] nvme-pci: phison e16 has bogus namespace ids [ Upstream commit 73029c9b23cf1213e5f54c2b59efce08665199e7 ] Add the quirk. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216049 Reported-by: Chris Egolf Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fe829377c7c2a..ab575fdd8015f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3432,7 +3432,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, From 3d8b35387e01cab217dc4691a6f770cbb6ed852c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 6 Jul 2022 10:05:05 -0700 Subject: [PATCH 211/299] nvme: use struct group for generic command dwords [ Upstream commit 5c629dc9609dc43492a7bc8060cc6120875bf096 ] This will allow the trace event to know the full size of the data intended to be copied and silence read overflow checks. Reported-by: John Garry Suggested-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/trace.h | 2 +- include/linux/nvme.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index b5f85259461a6..37c7f4c89f92e 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -69,7 +69,7 @@ TRACE_EVENT(nvme_setup_cmd, __entry->metadata = !!blk_integrity_rq(req); __entry->fctype = cmd->fabrics.fctype; __assign_disk_name(__entry->disk, req->q->disk); - memcpy(__entry->cdw10, &cmd->common.cdw10, + memcpy(__entry->cdw10, &cmd->common.cdws, sizeof(__entry->cdw10)); ), TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)", diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f626a445d1a87..99b1b56f0cd39 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -867,12 +867,14 @@ struct nvme_common_command { __le32 cdw2[2]; __le64 metadata; union nvme_data_ptr dptr; + struct_group(cdws, __le32 cdw10; __le32 cdw11; __le32 cdw12; __le32 cdw13; __le32 cdw14; __le32 cdw15; + ); }; struct nvme_rw_command { From d2f02e532200add4dba5ff47d44c218bb9d214ad Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 7 Jul 2022 02:31:52 +0200 Subject: [PATCH 212/299] wireguard: selftests: set fake real time in init [ Upstream commit 829be057dbc1e71383b8d7de8edb31dcf07b4aa0 ] Not all platforms have an RTC, and rather than trying to force one into each, it's much easier to just set a fixed time. This is necessary because WireGuard's latest handshakes parameter is returned in wallclock time, and if the system time isn't set, and the system is really fast, then this returns 0, which trips the test. Turning this on requires setting CONFIG_COMPAT_32BIT_TIME=y, as musl doesn't support settimeofday without it. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../testing/selftests/wireguard/qemu/arch/arm.config | 1 + .../selftests/wireguard/qemu/arch/armeb.config | 1 + .../testing/selftests/wireguard/qemu/arch/i686.config | 1 + .../testing/selftests/wireguard/qemu/arch/m68k.config | 1 + .../testing/selftests/wireguard/qemu/arch/mips.config | 1 + .../selftests/wireguard/qemu/arch/mipsel.config | 1 + .../selftests/wireguard/qemu/arch/powerpc.config | 1 + tools/testing/selftests/wireguard/qemu/init.c | 11 +++++++++++ 8 files changed, 18 insertions(+) diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config index fc7959bef9c25..0579c66be83ee 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/arm.config +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -7,6 +7,7 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config index f3066be81c199..2a3307bbe534f 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -7,6 +7,7 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CPU_BIG_ENDIAN=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config index 6d90892a85a24..cd864b9be6fb5 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/i686.config +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -1,6 +1,7 @@ CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config index 82c925e49beb7..9639bfe060744 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config +++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config @@ -5,5 +5,6 @@ CONFIG_MAC=y CONFIG_SERIAL_PMACZILOG=y CONFIG_SERIAL_PMACZILOG_TTYS=y CONFIG_SERIAL_PMACZILOG_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config index d7ec63c17b30e..2a84402353ab8 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mips.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config @@ -6,6 +6,7 @@ CONFIG_POWER_RESET=y CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config index 18a4982937376..56146a101e7e4 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config @@ -7,6 +7,7 @@ CONFIG_POWER_RESET=y CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config index 5e04882e8e35b..174a9ffe2a362 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config @@ -4,6 +4,7 @@ CONFIG_PPC_85xx=y CONFIG_PHYS_64BIT=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_MATH_EMULATION=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c index 2a0f48fac925a..542c34b00eb06 100644 --- a/tools/testing/selftests/wireguard/qemu/init.c +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,15 @@ static void seed_rng(void) close(fd); } +static void set_time(void) +{ + if (time(NULL)) + return; + pretty_message("[+] Setting fake time..."); + if (stime(&(time_t){1433512680}) < 0) + panic("settimeofday()"); +} + static void mount_filesystems(void) { pretty_message("[+] Mounting filesystems..."); @@ -256,6 +266,7 @@ int main(int argc, char *argv[]) print_banner(); mount_filesystems(); seed_rng(); + set_time(); kmod_selftests(); enable_logging(); clear_leaks(); From 700364130bfa1689baa9d4385a92a7f122093d1e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 7 Jul 2022 02:31:54 +0200 Subject: [PATCH 213/299] wireguard: selftests: always call kernel makefile [ Upstream commit 1a087eec257154e26a81a7a0a15380d7a2431765 ] These selftests are used for much more extensive changes than just the wireguard source files. So always call the kernel's build file, which will do something or nothing after checking the whole tree, per usual. Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/wireguard/qemu/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index bca07b93eeb07..fd060c5d6f39a 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -19,8 +19,6 @@ endif MIRROR := https://download.wireguard.com/qemu-test/distfiles/ KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) -rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) -WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) default: qemu @@ -324,8 +322,9 @@ $(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$( cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) -$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) +$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) +.PHONY: $(KERNEL_BZIMAGE) $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux From db738f76b03beaccfe9de35d838d9ec825226478 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 6 Jul 2022 12:20:59 -0700 Subject: [PATCH 214/299] signal handling: don't use BUG_ON() for debugging [ Upstream commit a382f8fee42ca10c9bfce0d2352d4153f931f5dc ] These are indeed "should not happen" situations, but it turns out recent changes made the 'task_is_stopped_or_trace()' case trigger (fix for that exists, is pending more testing), and the BUG_ON() makes it unnecessarily hard to actually debug for no good reason. It's been that way for a long time, but let's make it clear: BUG_ON() is not good for debugging, and should never be used in situations where you could just say "this shouldn't happen, but we can continue". Use WARN_ON_ONCE() instead to make sure it gets logged, and then just continue running. Instead of making the system basically unusuable because you crashed the machine while potentially holding some very core locks (eg this function is commonly called while holding 'tasklist_lock' for writing). Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/signal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index e43bc2a692f5e..75cc2339d83ed 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2031,12 +2031,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig) bool autoreap = false; u64 utime, stime; - BUG_ON(sig == -1); + WARN_ON_ONCE(sig == -1); - /* do_notify_parent_cldstop should have been called instead. */ - BUG_ON(task_is_stopped_or_traced(tsk)); + /* do_notify_parent_cldstop should have been called instead. */ + WARN_ON_ONCE(task_is_stopped_or_traced(tsk)); - BUG_ON(!tsk->ptrace && + WARN_ON_ONCE(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); /* Wake up all pidfd waiters */ From f6c04bde97baf7a0a62a7b4954b223db404f9757 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 13 Jul 2022 23:11:01 +0200 Subject: [PATCH 215/299] ACPI: video: Fix acpi_video_handles_brightness_key_presses() [ Upstream commit 5ad26161a371e4aa2d2553286f0cac580987a493 ] Commit 3a0cf7ab8df3 ("ACPI: video: Change how we determine if brightness key-presses are handled") made acpi_video_handles_brightness_key_presses() report false when none of the ACPI Video Devices support backlight control. But it turns out that at least on a Dell Inspiron N4010 there is no ACPI backlight control, yet brightness hotkeys are still reported through the ACPI Video Bus; and since acpi_video_handles_brightness_key_presses() now returns false, brightness keypresses are now reported twice. To fix this rename the has_backlight flag to may_report_brightness_keys and also set it the first time a brightness key press event is received. Depending on the delivery of the other ACPI (WMI) event vs the ACPI Video Bus event this means that the first brightness key press might still get reported twice, but all further keypresses will be filtered as before. Note that this relies on other drivers reporting brightness key events calling acpi_video_handles_brightness_key_presses() when delivering the events (rather then once during driver probe). This is already required and documented in include/acpi/video.h: /* * Note: The value returned by acpi_video_handles_brightness_key_presses() * may change over time and should not be cached. */ Fixes: 3a0cf7ab8df3 ("ACPI: video: Change how we determine if brightness key-presses are handled") Link: https://lore.kernel.org/regressions/CALF=6jEe5G8+r1Wo0vvz4GjNQQhdkLT5p8uCHn6ZXhg4nsOWow@mail.gmail.com/ Reported-and-tested-by: Ben Greening Signed-off-by: Hans de Goede Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20220713211101.85547-2-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/acpi/acpi_video.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index e4ea42b83b512..3bd0de69aa115 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -73,7 +73,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); -static bool has_backlight; +static bool may_report_brightness_keys; static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); @@ -1224,7 +1224,7 @@ acpi_video_bus_get_one_device(struct acpi_device *device, acpi_video_device_find_cap(data); if (data->cap._BCM && data->cap._BCL) - has_backlight = true; + may_report_brightness_keys = true; mutex_lock(&video->device_list_lock); list_add_tail(&data->entry, &video->video_device_list); @@ -1693,6 +1693,9 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data) break; } + if (keycode) + may_report_brightness_keys = true; + acpi_notifier_call_chain(device, event, 0); if (keycode && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS)) { @@ -2254,7 +2257,7 @@ void acpi_video_unregister(void) if (register_count) { acpi_bus_unregister_driver(&acpi_video_bus); register_count = 0; - has_backlight = false; + may_report_brightness_keys = false; } mutex_unlock(®ister_count_mutex); } @@ -2276,7 +2279,7 @@ void acpi_video_unregister_backlight(void) bool acpi_video_handles_brightness_key_presses(void) { - return has_backlight && + return may_report_brightness_keys && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS); } EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses); From 14d2cc21ca622310babf373e3a8f0b40acfe8265 Mon Sep 17 00:00:00 2001 From: Yangxi Xiang Date: Tue, 28 Jun 2022 17:33:22 +0800 Subject: [PATCH 216/299] vt: fix memory overlapping when deleting chars in the buffer commit 39cdb68c64d84e71a4a717000b6e5de208ee60cc upstream. A memory overlapping copy occurs when deleting a long line. This memory overlapping copy can cause data corruption when scr_memcpyw is optimized to memcpy because memcpy does not ensure its behavior if the destination buffer overlaps with the source buffer. The line buffer is not always broken, because the memcpy utilizes the hardware acceleration, whose result is not deterministic. Fix this problem by using replacing the scr_memcpyw with scr_memmovew. Fixes: 81732c3b2fed ("tty vt: Fix line garbage in virtual console on command line edition") Cc: stable Signed-off-by: Yangxi Xiang Link: https://lore.kernel.org/r/20220628093322.5688-1-xyangxi5@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index f8c87c4d73995..dfc1f4b445f3b 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -855,7 +855,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr) unsigned short *p = (unsigned short *) vc->vc_pos; vc_uniscr_delete(vc, nr); - scr_memcpyw(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); + scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char, nr * 2); vc->vc_need_wrap = 0; From ceb7a7e7ab0640f6f75d7eaa7611aabed7981c35 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Wed, 6 Jul 2022 17:43:29 -0400 Subject: [PATCH 217/299] s390/ap: fix error handling in __verify_queue_reservations() commit 2f23256c0ea20627c91ea2d468cda945f68c3395 upstream. The AP bus's __verify_queue_reservations function increments the ref count for the device driver passed in as a parameter, but fails to decrement it before returning control to the caller. This will prevents any subsequent removal of the module. Signed-off-by: Tony Krowiak Reported-by: Tony Krowiak Reviewed-by: Harald Freudenberger Fixes: 4f8206b88286 ("s390/ap: driver callback to indicate resource in use") Link: https://lore.kernel.org/r/20220706222619.602094-1-akrowiak@linux.ibm.com Cc: stable@vger.kernel.org [agordeev@linux.ibm.com fixed description, added Fixes and Link] Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/ap_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index fdf16cb708819..eb8645d44a9cf 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1410,7 +1410,7 @@ static int __verify_queue_reservations(struct device_driver *drv, void *data) if (ap_drv->in_use) { rc = ap_drv->in_use(ap_perms.apm, newaqm); if (rc) - return -EBUSY; + rc = -EBUSY; } /* release the driver's module */ From 89c3c9dd7e0331d4e2529e91f273bc31e3c04f33 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 13 Jul 2022 12:53:46 -0500 Subject: [PATCH 218/299] ACPI: CPPC: Fix enabling CPPC on AMD systems with shared memory commit fbd74d16890b9f5d08ea69b5282b123c894f8860 upstream. When commit 72f2ecb7ece7 ("ACPI: bus: Set CPPC _OSC bits for all and when CPPC_LIB is supported") was introduced, we found collateral damage that a number of AMD systems that supported CPPC but didn't advertise support in _OSC stopped having a functional amd-pstate driver. The _OSC was only enforced on Intel systems at that time. This was fixed for the MSR based designs by commit 8b356e536e69f ("ACPI: CPPC: Don't require _OSC if X86_FEATURE_CPPC is supported") but some shared memory based designs also support CPPC but haven't advertised support in the _OSC. Add support for those designs as well by hardcoding the list of systems. Fixes: 72f2ecb7ece7 ("ACPI: bus: Set CPPC _OSC bits for all and when CPPC_LIB is supported") Fixes: 8b356e536e69f ("ACPI: CPPC: Don't require _OSC if X86_FEATURE_CPPC is supported") Link: https://lore.kernel.org/all/3559249.JlDtxWtqDm@natalenko.name/ Cc: 5.18+ # 5.18+ Reported-and-tested-by: Oleksandr Natalenko Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/cppc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 3677df836e910..0961bf38bdb09 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -16,6 +16,12 @@ bool cpc_supported_by_cpu(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: case X86_VENDOR_HYGON: + if (boot_cpu_data.x86 == 0x19 && ((boot_cpu_data.x86_model <= 0x0f) || + (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f))) + return true; + else if (boot_cpu_data.x86 == 0x17 && + boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f) + return true; return boot_cpu_has(X86_FEATURE_CPPC); } return false; From dda20f46124a80e3c832a3aa024befd998f847e1 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Tue, 28 Jun 2022 16:35:15 +0800 Subject: [PATCH 219/299] serial: 8250: fix return error code in serial8250_request_std_resource() commit 6e690d54cfa802f939cefbd2fa2c91bd0b8bd1b6 upstream. If port->mapbase = NULL in serial8250_request_std_resource() , it need return a error code instead of 0. If uart_set_info() fail to request new regions by serial8250_request_std_resource() but the return value of serial8250_request_std_resource() is 0, The system incorrectly considers that the resource application is successful and does not attempt to restore the old setting. A null pointer reference is triggered when the port resource is later invoked. Signed-off-by: Yi Yang Cc: stable Link: https://lore.kernel.org/r/20220628083515.64138-1-yiyang13@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 7e295d2701b28..ddaf35daf316b 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2971,8 +2971,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: - if (!port->mapbase) + if (!port->mapbase) { + ret = -EINVAL; break; + } if (!request_mem_region(port->mapbase, size, "serial")) { ret = -EBUSY; From a762cee5d933fe4e2e1b773d60fc74fb8248d8c4 Mon Sep 17 00:00:00 2001 From: Dorian Rudolph Date: Sat, 14 May 2022 17:23:40 +0200 Subject: [PATCH 220/299] power: supply: core: Fix boundary conditions in interpolation commit 093d27bb6f2d1963f927ef59c9a2d37059175426 upstream. The functions power_supply_temp2resist_simple and power_supply_ocv2cap_simple handle boundary conditions incorrectly. The change was introduced in a4585ba2050f460f749bbaf2b67bd56c41e30283 ("power: supply: core: Use library interpolation"). There are two issues: First, the lines "high = i - 1" and "high = i" in ocv2cap have the wrong order compared to temp2resist. As a consequence, ocv2cap sets high=-1 if ocv>table[0].ocv, which causes an out-of-bounds read. Second, the logic of temp2resist is also not correct. Consider the case table[] = {{20, 100}, {10, 80}, {0, 60}}. For temp=5, we expect a resistance of 70% by interpolation. However, temp2resist sets high=low=2 and returns 60. Cc: stable@vger.kernel.org Signed-off-by: Dorian Rudolph Reviewed-by: Linus Walleij Fixes: a4585ba2050f ("power: supply: core: Use library interpolation") Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/power_supply_core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index fad5890c899e2..470253c337c73 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -846,17 +846,17 @@ int power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *t { int i, high, low; - /* Break loop at table_len - 1 because that is the highest index */ - for (i = 0; i < table_len - 1; i++) + for (i = 0; i < table_len; i++) if (temp > table[i].temp) break; /* The library function will deal with high == low */ - if ((i == 0) || (i == (table_len - 1))) - high = i; + if (i == 0) + high = low = i; + else if (i == table_len) + high = low = i - 1; else - high = i - 1; - low = i; + high = (low = i) - 1; return fixp_linear_interpolate(table[low].temp, table[low].resistance, @@ -958,17 +958,17 @@ int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, { int i, high, low; - /* Break loop at table_len - 1 because that is the highest index */ - for (i = 0; i < table_len - 1; i++) + for (i = 0; i < table_len; i++) if (ocv > table[i].ocv) break; /* The library function will deal with high == low */ - if ((i == 0) || (i == (table_len - 1))) - high = i - 1; + if (i == 0) + high = low = i; + else if (i == table_len) + high = low = i - 1; else - high = i; /* i.e. i == 0 */ - low = i; + high = (low = i) - 1; return fixp_linear_interpolate(table[low].ocv, table[low].capacity, From 484ec3ca92e47d7fb7bad005ddc66d8965485ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 27 Jun 2022 18:07:52 +0300 Subject: [PATCH 221/299] serial: stm32: Clear prev values before setting RTS delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5c5f44e36217de5ead789ff25da71c31c2331c96 upstream. The code lacks clearing of previous DEAT/DEDT values. Thus, changing values on the fly results in garbage delays tending towards the maximum value as more and more bits are ORed together. (Leaving RS485 mode would have cleared the old values though). Fixes: 1bcda09d2910 ("serial: stm32: add support for RS485 hardware control mode") Cc: stable Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220627150753.34510-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 3c551fd4f3ff8..0fb535b9505cd 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -71,6 +71,8 @@ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, *cr3 |= USART_CR3_DEM; over8 = *cr1 & USART_CR1_OVER8; + *cr1 &= ~(USART_CR1_DEDT_MASK | USART_CR1_DEAT_MASK); + if (over8) rs485_deat_dedt = delay_ADE * baud * 8; else From b2f63fefefc5c1cba4e192949751a67bc166a671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 14 Jun 2022 10:56:37 +0300 Subject: [PATCH 222/299] serial: pl011: UPSTAT_AUTORTS requires .throttle/unthrottle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 211565b100993c90b53bf40851eacaefc830cfe0 upstream. The driver must provide throttle and unthrottle in uart_ops when it sets UPSTAT_AUTORTS. Add them using existing stop_rx & enable_interrupts functions. Fixes: 2a76fa283098 (serial: pl011: Adopt generic flag to store auto RTS status) Cc: stable Cc: Lukas Wunner Reported-by: Nuno Gonçalves Tested-by: Nuno Gonçalves Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220614075637.8558-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 4d11a3e547f94..e69700275e073 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1339,6 +1339,15 @@ static void pl011_stop_rx(struct uart_port *port) pl011_dma_rx_stop(uap); } +static void pl011_throttle_rx(struct uart_port *port) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + pl011_stop_rx(port); + spin_unlock_irqrestore(&port->lock, flags); +} + static void pl011_enable_ms(struct uart_port *port) { struct uart_amba_port *uap = @@ -1760,9 +1769,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap) */ static void pl011_enable_interrupts(struct uart_amba_port *uap) { + unsigned long flags; unsigned int i; - spin_lock_irq(&uap->port.lock); + spin_lock_irqsave(&uap->port.lock, flags); /* Clear out any spuriously appearing RX interrupts */ pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR); @@ -1784,7 +1794,14 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap) if (!pl011_dma_rx_running(uap)) uap->im |= UART011_RXIM; pl011_write(uap->im, uap, REG_IMSC); - spin_unlock_irq(&uap->port.lock); + spin_unlock_irqrestore(&uap->port.lock, flags); +} + +static void pl011_unthrottle_rx(struct uart_port *port) +{ + struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + + pl011_enable_interrupts(uap); } static int pl011_startup(struct uart_port *port) @@ -2211,6 +2228,8 @@ static const struct uart_ops amba_pl011_pops = { .stop_tx = pl011_stop_tx, .start_tx = pl011_start_tx, .stop_rx = pl011_stop_rx, + .throttle = pl011_throttle_rx, + .unthrottle = pl011_unthrottle_rx, .enable_ms = pl011_enable_ms, .break_ctl = pl011_break_ctl, .startup = pl011_startup, From 5df66302f03f87ae8953785a882d78e911f00c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 29 Jun 2022 12:48:41 +0300 Subject: [PATCH 223/299] serial: 8250: Fix PM usage_count for console handover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f9b11229b79c0fb2100b5bb4628a101b1d37fbf6 upstream. When console is enabled, univ8250_console_setup() calls serial8250_console_setup() before .dev is set to uart_port. Therefore, it will not call pm_runtime_get_sync(). Later, when the actual driver is going to take over univ8250_console_exit() is called. As .dev is already set, serial8250_console_exit() makes pm_runtime_put_sync() call with usage count being zero triggering PM usage count warning (extra debug for univ8250_console_setup(), univ8250_console_exit(), and serial8250_register_ports()): [ 0.068987] univ8250_console_setup ttyS0 nodev [ 0.499670] printk: console [ttyS0] enabled [ 0.717955] printk: console [ttyS0] printing thread started [ 1.960163] serial8250_register_ports assigned dev for ttyS0 [ 1.976830] printk: console [ttyS0] disabled [ 1.976888] printk: console [ttyS0] printing thread stopped [ 1.977073] univ8250_console_exit ttyS0 usage:0 [ 1.977075] serial8250 serial8250: Runtime PM usage count underflow! [ 1.977429] dw-apb-uart.6: ttyS0 at MMIO 0x4010006000 (irq = 33, base_baud = 115200) is a 16550A [ 1.977812] univ8250_console_setup ttyS0 usage:2 [ 1.978167] printk: console [ttyS0] printing thread started [ 1.978203] printk: console [ttyS0] enabled To fix the issue, call pm_runtime_get_sync() in serial8250_register_ports() as soon as .dev is set for an uart_port if it has console enabled. This problem became apparent only recently because 82586a721595 ("PM: runtime: Avoid device usage count underflows") added the warning printout. I confirmed this problem also occurs with v5.18 (w/o the warning printout, obviously). Fixes: bedb404e91bb ("serial: 8250_port: Don't use power management for kernel console") Cc: stable Tested-by: Tony Lindgren Reviewed-by: Andy Shevchenko Reviewed-by: Tony Lindgren Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/b4f428e9-491f-daf2-2232-819928dc276e@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 4 ++++ drivers/tty/serial/serial_core.c | 5 ----- include/linux/serial_core.h | 5 +++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 01d30f6ed8fb5..cbad398f48b25 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -560,6 +561,9 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) up->port.dev = dev; + if (uart_console_enabled(&up->port)) + pm_runtime_get_sync(up->port.dev); + serial8250_apply_quirks(up); uart_add_one_port(drv, &up->port); } diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 6a8963caf954a..95d8d1fcd5437 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1904,11 +1904,6 @@ static int uart_proc_show(struct seq_file *m, void *v) } #endif -static inline bool uart_console_enabled(struct uart_port *port) -{ - return uart_console(port) && (port->cons->flags & CON_ENABLED); -} - static void uart_port_spin_lock_init(struct uart_port *port) { spin_lock_init(&port->lock); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d4828e69087a4..ca57d686d4d1e 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -388,6 +388,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif +static inline bool uart_console_enabled(struct uart_port *port) +{ + return uart_console(port) && (port->cons->flags & CON_ENABLED); +} + struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, From ec02687a33e7ef731afbf77c7e60a7561e9cb441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 28 Jun 2022 12:09:22 +0200 Subject: [PATCH 224/299] serial: mvebu-uart: correctly report configured baudrate value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4f532c1e25319e42996ec18a1f473fd50c8e575d upstream. Functions tty_termios_encode_baud_rate() and uart_update_timeout() should be called with the baudrate value which was set to hardware. Linux then report exact values via ioctl(TCGETS2) to userspace. Change mvebu_uart_baud_rate_set() function to return baudrate value which was set to hardware and propagate this value to above mentioned functions. With this change userspace would see precise value in termios c_ospeed field. Fixes: 68a0db1d7da2 ("serial: mvebu-uart: add function to change baudrate") Cc: stable Reviewed-by: Ilpo Järvinen Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20220628100922.10717-1-pali@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 0429c2a542902..93489fe334d0f 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -470,14 +470,14 @@ static void mvebu_uart_shutdown(struct uart_port *port) } } -static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) +static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) { unsigned int d_divisor, m_divisor; unsigned long flags; u32 brdv, osamp; if (!port->uartclk) - return -EOPNOTSUPP; + return 0; /* * The baudrate is derived from the UART clock thanks to divisors: @@ -548,7 +548,7 @@ static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) (m_divisor << 16) | (m_divisor << 24); writel(osamp, port->membase + UART_OSAMP); - return 0; + return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor); } static void mvebu_uart_set_termios(struct uart_port *port, @@ -587,15 +587,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, max_baud = port->uartclk / 80; baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud); - if (mvebu_uart_baud_rate_set(port, baud)) { - /* No clock available, baudrate cannot be changed */ - if (old) - baud = uart_get_baud_rate(port, old, NULL, - min_baud, max_baud); - } else { - tty_termios_encode_baud_rate(termios, baud, baud); - uart_update_timeout(port, termios->c_cflag, baud); - } + baud = mvebu_uart_baud_rate_set(port, baud); + + /* In case baudrate cannot be changed, report previous old value */ + if (baud == 0 && old) + baud = tty_termios_baud_rate(old); /* Only the following flag changes are supported */ if (old) { @@ -606,6 +602,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, termios->c_cflag |= CS8; } + if (baud != 0) { + tty_termios_encode_baud_rate(termios, baud, baud); + uart_update_timeout(port, termios->c_cflag, baud); + } + spin_unlock_irqrestore(&port->lock, flags); } From 99c8a3c41e0791c39482f34ee8ca3f7014ee2e3f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 8 Jul 2022 15:14:56 +0200 Subject: [PATCH 225/299] x86/pat: Fix x86_has_pat_wp() commit 230ec83d4299b30c51a1c133b4f2a669972cc08a upstream. x86_has_pat_wp() is using a wrong test, as it relies on the normal PAT configuration used by the kernel. In case the PAT MSR has been setup by another entity (e.g. Xen hypervisor) it might return false even if the PAT configuration is allowing WP mappings. This due to the fact that when running as Xen PV guest the PAT MSR is setup by the hypervisor and cannot be changed by the guest. This results in the WP related entry to be at a different position when running as Xen PV guest compared to the bare metal or fully virtualized case. The correct way to test for WP support is: 1. Get the PTE protection bits needed to select WP mode by reading __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP] (depending on the PAT MSR setting this might return protection bits for a stronger mode, e.g. UC-) 2. Translate those bits back into the real cache mode selected by those PTE bits by reading __pte2cachemode_tbl[__pte2cm_idx(prot)] 3. Test for the cache mode to be _PAGE_CACHE_MODE_WP Fixes: f88a68facd9a ("x86/mm: Extend early_memremap() support with additional attrs") Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Cc: # 4.14 Link: https://lore.kernel.org/r/20220503132207.17234-1-jgross@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d8cfce221275e..57ba5502aecf9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -77,10 +77,20 @@ static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -/* Check that the write-protect PAT entry is set for write-protect */ +/* + * Check that the write-protect PAT entry is set for write-protect. + * To do this without making assumptions how PAT has been set up (Xen has + * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache + * mode via the __cachemode2pte_tbl[] into protection bits (those protection + * bits will select a cache mode of WP or better), and then translate the + * protection bits back into the cache mode using __pte2cm_idx() and the + * __pte2cachemode_tbl[] array. This will return the really used cache mode. + */ bool x86_has_pat_wp(void) { - return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP; + uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP]; + + return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP; } enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) From 7642e42a64b09177060c4712fbbcc1e1882fb98e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 12 Jul 2022 18:40:50 +0100 Subject: [PATCH 226/299] drm/i915/ttm: fix 32b build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ced7866db39fc5c59ee05e154d4abc0977a17f6b upstream. Since segment_pages is no longer a compile time constant, it looks the DIV_ROUND_UP(node->size, segment_pages) breaks the 32b build. Simplest is just to use the ULL variant, but really we should need not need more than u32 for the page alignment (also we are limited by that due to the sg->length type), so also make it all u32. Reported-by: Ville Syrjälä Fixes: aff1e0b09b54 ("drm/i915/ttm: fix sg_table construction") Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220712174050.592550-1-matthew.auld@intel.com (cherry picked from commit 9306b2b2dfce6931241ef804783692cee526599c) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_region.c | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.c | 16 ++++++++-------- drivers/gpu/drm/i915/i915_scatterlist.h | 4 ++-- drivers/gpu/drm/i915/intel_region_ttm.c | 2 +- drivers/gpu/drm/i915/intel_region_ttm.h | 2 +- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 6cf94469d5a84..34cfa88ae0c01 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -57,6 +57,8 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (page_size) default_page_size = page_size; + /* We should be able to fit a page within an sg entry */ + GEM_BUG_ON(overflows_type(default_page_size, u32)); GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); GEM_BUG_ON(default_page_size < PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 342ca303eae4d..6f393f32ac368 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -583,7 +583,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - u64 page_alignment; + u32 page_alignment; if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index f63b50b71e10b..dcc081874ec8d 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -79,10 +79,10 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) */ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, u64 region_start, - u64 page_alignment) + u32 page_alignment) { - const u64 max_segment = round_down(UINT_MAX, page_alignment); - u64 segment_pages = max_segment >> PAGE_SHIFT; + const u32 max_segment = round_down(UINT_MAX, page_alignment); + const u32 segment_pages = max_segment >> PAGE_SHIFT; u64 block_size, offset, prev_end; struct i915_refct_sgt *rsgt; struct sg_table *st; @@ -96,7 +96,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); st = &rsgt->table; - if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages), + if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages), GFP_KERNEL)) { i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); @@ -123,7 +123,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, st->nents++; } - len = min(block_size, max_segment - sg->length); + len = min_t(u64, block_size, max_segment - sg->length); sg->length += len; sg_dma_len(sg) += len; @@ -155,11 +155,11 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, */ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, u64 region_start, - u64 page_alignment) + u32 page_alignment) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->num_pages << PAGE_SHIFT; - const u64 max_segment = round_down(UINT_MAX, page_alignment); + const u32 max_segment = round_down(UINT_MAX, page_alignment); struct drm_buddy *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; struct drm_buddy_block *block; @@ -207,7 +207,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, st->nents++; } - len = min(block_size, max_segment - sg->length); + len = min_t(u64, block_size, max_segment - sg->length); sg->length += len; sg_dma_len(sg) += len; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index b13e4cdea9238..9ddb3e743a3e5 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -214,10 +214,10 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, u64 region_start, - u64 page_alignment); + u32 page_alignment); struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, u64 region_start, - u64 page_alignment); + u32 page_alignment); #endif diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index d896558cf4588..d6bc83adef005 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -162,7 +162,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, struct ttm_resource *res, - u64 page_alignment) + u32 page_alignment) { if (mem->is_range_manager) { struct ttm_range_mgr_node *range_node = diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index b17e494ef79cd..b44183a771afa 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -25,7 +25,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem); struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, struct ttm_resource *res, - u64 page_alignment); + u32 page_alignment); void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res); From 6e3a6dddad55010a893b07e4e9f2ba30ff95b6d8 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 17 Jun 2022 14:10:27 +0200 Subject: [PATCH 227/299] drm/aperture: Run fbdev removal before internal helpers commit bf43e4521ff3223a613f3a496991a22a4d78e04b upstream. Always run fbdev removal first to remove simpledrm via sysfb_disable(). This clears the internal state. The later call to drm_aperture_detach_drivers() then does nothing. Otherwise, with drm_aperture_detach_drivers() running first, the call to sysfb_disable() uses inconsistent state. Example backtrace show below: [ 11.663422] ================================================================== [ 11.663426] BUG: KASAN: use-after-free in device_del+0x79/0x5f0 [ 11.663435] Read of size 8 at addr ffff888108185050 by task systemd-udevd/311 [ 11.663440] CPU: 0 PID: 311 Comm: systemd-udevd Tainted: G E 5 .19.0-rc2-1-default+ #1689 [ 11.663445] Hardware name: HP ProLiant DL120 G7, BIOS J01 04/21/2011 [ 11.663447] Call Trace: [ 11.663449] [ 11.663451] ? device_del+0x79/0x5f0 [ 11.663456] dump_stack_lvl+0x5b/0x73 [ 11.663462] print_address_description.constprop.0+0x1f/0x1b0 [ 11.663468] ? device_del+0x79/0x5f0 [ 11.663471] ? device_del+0x79/0x5f0 [ 11.663475] print_report.cold+0x3c/0x21c [ 11.663481] ? lock_acquired+0x87/0x1e0 [ 11.663484] ? lock_acquired+0x87/0x1e0 [ 11.663489] ? device_del+0x79/0x5f0 [ 11.663492] kasan_report+0xbf/0xf0 [ 11.663498] ? device_del+0x79/0x5f0 [ 11.663503] device_del+0x79/0x5f0 [ 11.663509] ? device_remove_attrs+0x170/0x170 [ 11.663514] ? lock_is_held_type+0xe8/0x140 [ 11.663523] platform_device_del.part.0+0x19/0xe0 [ 11.663530] platform_device_unregister+0x1c/0x30 [ 11.663535] sysfb_disable+0x2d/0x70 [ 11.663540] remove_conflicting_framebuffers+0x1c/0xf0 [ 11.663546] remove_conflicting_pci_framebuffers+0x130/0x1a0 [ 11.663554] drm_aperture_remove_conflicting_pci_framebuffers+0x86/0xb0 [ 11.663561] ? mgag200_pci_remove+0x30/0x30 [mgag200] [ 11.663578] mgag200_pci_probe+0x2d/0x140 [mgag200] Reported-by: Zack Rusin Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Reviewed-by: Zack Rusin Fixes: ee7a69aa38d8 ("fbdev: Disable sysfb device registration when removing conflicting FBs") Cc: Javier Martinez Canillas Cc: Daniel Vetter Cc: Daniel Vetter Cc: Sam Ravnborg Cc: Helge Deller Cc: Thomas Zimmermann Cc: Alex Deucher Cc: Zhen Lei Cc: Changcheng Deng Link: https://patchwork.freedesktop.org/patch/msgid/20220617121027.30273-1-tzimmermann@suse.de (cherry picked from commit fb84efa28a48e30b87fa1122e8aab8016c7347cd) Signed-off-by: Thomas Zimmermann Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_aperture.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_aperture.c b/drivers/gpu/drm/drm_aperture.c index 74bd4a76b253c..059fd71424f6b 100644 --- a/drivers/gpu/drm/drm_aperture.c +++ b/drivers/gpu/drm/drm_aperture.c @@ -329,7 +329,20 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const struct drm_driver *req_driver) { resource_size_t base, size; - int bar, ret = 0; + int bar, ret; + + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ +#if IS_REACHABLE(CONFIG_FB) + ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); + if (ret) + return ret; +#endif + ret = vga_remove_vgacon(pdev); + if (ret) + return ret; for (bar = 0; bar < PCI_STD_NUM_BARS; ++bar) { if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) @@ -339,15 +352,6 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, drm_aperture_detach_drivers(base, size); } - /* - * WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. - */ -#if IS_REACHABLE(CONFIG_FB) - ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); -#endif - if (ret == 0) - ret = vga_remove_vgacon(pdev); - return ret; + return 0; } EXPORT_SYMBOL(drm_aperture_remove_conflicting_pci_framebuffers); From 0283cbd1473380d3ae46f653eac128aebb288423 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 22 Jul 2022 10:21:59 +0200 Subject: [PATCH 228/299] Linux 5.18.13 Link: https://lore.kernel.org/r/20220719114714.247441733@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Bagas Sanjaya Tested-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20220721180421.921249751@linuxfoundation.org Link: https://lore.kernel.org/r/20220721182818.743726259@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: Florian Fainelli Tested-by: Justin M. Forbes Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f8e2445b60447..1f3c753cb28df 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 18 -SUBLEVEL = 12 +SUBLEVEL = 13 EXTRAVERSION = NAME = Superb Owl From a1a70a37ddbbf59e2abb41d0d7a8735b54754a4b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:48 +0800 Subject: [PATCH 229/299] x86/traps: Use pt_regs directly in fixup_bad_iret() commit 0aca53c6b522f8d6e2681ca875acbbe105f5fdcf upstream. Always stash the address error_entry() is going to return to, in %r12 and get rid of the void *error_entry_ret; slot in struct bad_iret_stack which was supposed to account for it and pt_regs pushed on the stack. After this, both fixup_bad_iret() and sync_regs() can work on a struct pt_regs pointer directly. [ bp: Rewrite commit message, touch ups. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-2-jiangshanlai@gmail.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 5 ++++- arch/x86/include/asm/traps.h | 2 +- arch/x86/kernel/traps.c | 19 +++++++------------ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d8376e5fe1afe..c78a0c0d8c645 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1062,9 +1062,12 @@ SYM_CODE_START_LOCAL(error_entry) * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - mov %rsp, %rdi + popq %r12 /* save return addr in %12 */ + movq %rsp, %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret mov %rax, %rsp + ENCODE_FRAME_POINTER + pushq %r12 jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 35317c5c551d9..47ecfff2c83da 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -13,7 +13,7 @@ #ifdef CONFIG_X86_64 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); asmlinkage __visible notrace -struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); +struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs); void __init trap_init(void); asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1563fb9950059..4167215333fd8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -892,14 +892,10 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r } #endif -struct bad_iret_stack { - void *error_entry_ret; - struct pt_regs regs; -}; - -asmlinkage __visible noinstr -struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) +asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs) { + struct pt_regs tmp, *new_stack; + /* * This is called from entry_64.S early in handling a fault * caused by a bad iret to user mode. To handle the fault @@ -908,19 +904,18 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) * just below the IRET frame) and we want to pretend that the * exception came from the IRET target. */ - struct bad_iret_stack tmp, *new_stack = - (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; + new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* Copy the IRET target to the temporary storage. */ - __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); + __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8); /* Copy the remainder of the stack from the current stack. */ - __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); + __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip)); /* Update the entry stack */ __memcpy(new_stack, &tmp, sizeof(tmp)); - BUG_ON(!user_mode(&new_stack->regs)); + BUG_ON(!user_mode(new_stack)); return new_stack; } #endif From 6089d278e9ebfb68d7f3612f63521786d3718c2c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:49 +0800 Subject: [PATCH 230/299] x86/entry: Switch the stack after error_entry() returns commit 520a7e80c96d655fbe4650d9cc985bd9d0443389 upstream. error_entry() calls fixup_bad_iret() before sync_regs() if it is a fault from a bad IRET, to copy pt_regs to the kernel stack. It switches to the kernel stack directly after sync_regs(). But error_entry() itself is also a function call, so it has to stash the address it is going to return to, in %r12 which is unnecessarily complicated. Move the stack switching after error_entry() and get rid of the need to handle the return address. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220503032107.680190-3-jiangshanlai@gmail.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c78a0c0d8c645..af0bd1d84c464 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -326,6 +326,8 @@ SYM_CODE_END(ret_from_fork) .macro idtentry_body cfunc has_error_code:req call error_entry + movq %rax, %rsp /* switch to the task stack if from userspace */ + ENCODE_FRAME_POINTER UNWIND_HINT_REGS movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ @@ -1003,14 +1005,10 @@ SYM_CODE_START_LOCAL(error_entry) /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ call sync_regs - movq %rax, %rsp /* switch stack */ - ENCODE_FRAME_POINTER - pushq %r12 RET /* @@ -1042,6 +1040,7 @@ SYM_CODE_START_LOCAL(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY + leaq 8(%rsp), %rax /* return pt_regs pointer */ RET .Lbstep_iret: @@ -1062,12 +1061,9 @@ SYM_CODE_START_LOCAL(error_entry) * Pretend that the exception came from user mode: set up pt_regs * as if we faulted immediately after IRET. */ - popq %r12 /* save return addr in %12 */ - movq %rsp, %rdi /* arg0 = pt_regs pointer */ + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret - mov %rax, %rsp - ENCODE_FRAME_POINTER - pushq %r12 + mov %rax, %rdi jmp .Lerror_entry_from_usermode_after_swapgs SYM_CODE_END(error_entry) From eda94dbf471b5b6b3ea14c7c9323a8c46899e8d1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Apr 2022 22:10:50 +0800 Subject: [PATCH 231/299] x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry() commit ee774dac0da1543376a69fd90840af6aa86879b3 upstream. The macro idtentry() (through idtentry_body()) calls error_entry() unconditionally even on XENPV. But XENPV needs to only push and clear regs. PUSH_AND_CLEAR_REGS in error_entry() makes the stack not return to its original place when the function returns, which means it is not possible to convert it to a C function. Carve out PUSH_AND_CLEAR_REGS out of error_entry() and into a separate function and call it before error_entry() in order to avoid calling error_entry() on XENPV. It will also allow for error_entry() to be converted to C code that can use inlined sync_regs() and save a function call. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-4-jiangshanlai@gmail.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index af0bd1d84c464..f367da666f7e0 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -318,6 +318,14 @@ SYM_CODE_END(ret_from_fork) #endif .endm +/* Save all registers in pt_regs */ +SYM_CODE_START_LOCAL(push_and_clear_regs) + UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + RET +SYM_CODE_END(push_and_clear_regs) + /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -325,6 +333,9 @@ SYM_CODE_END(ret_from_fork) */ .macro idtentry_body cfunc has_error_code:req + call push_and_clear_regs + UNWIND_HINT_REGS + call error_entry movq %rax, %rsp /* switch to the task stack if from userspace */ ENCODE_FRAME_POINTER @@ -986,13 +997,11 @@ SYM_CODE_START_LOCAL(paranoid_exit) SYM_CODE_END(paranoid_exit) /* - * Save all registers in pt_regs, and switch GS if needed. + * Switch GS and CR3 if needed. */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC cld - PUSH_AND_CLEAR_REGS save_ret=1 - ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace From 973ee2729511e96d3a3569c7d85255e4267f99ad Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 May 2022 11:21:06 +0800 Subject: [PATCH 232/299] x86/entry: Don't call error_entry() for XENPV commit 64cbd0acb58203fb769ed2f4eab526d43e243847 upstream. XENPV guests enter already on the task stack and they can't fault for native_iret() nor native_load_gs_index() since they use their own pvop for IRET and load_gs_index(). A CR3 switch is not needed either. So there is no reason to call error_entry() in XENPV. [ bp: Massage commit message. ] Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220503032107.680190-6-jiangshanlai@gmail.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f367da666f7e0..deb36cfe49d72 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -336,8 +336,17 @@ SYM_CODE_END(push_and_clear_regs) call push_and_clear_regs UNWIND_HINT_REGS - call error_entry - movq %rax, %rsp /* switch to the task stack if from userspace */ + /* + * Call error_entry() and switch to the task stack if from userspace. + * + * When in XENPV, it is already in the task stack, and it can't fault + * for native_iret() nor native_load_gs_index() since XENPV uses its + * own pvops for IRET and load_gs_index(). And it doesn't need to + * switch the CR3. So it can skip invoking error_entry(). + */ + ALTERNATIVE "call error_entry; movq %rax, %rsp", \ + "", X86_FEATURE_XENPV + ENCODE_FRAME_POINTER UNWIND_HINT_REGS From c29a2229f5a70c8916c6abefb98aea8a38d763d4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 May 2022 14:14:35 +0200 Subject: [PATCH 233/299] x86/entry: Remove skip_r11rcx commit 1b331eeea7b8676fc5dbdf80d0a07e41be226177 upstream. Yes, r11 and rcx have been restored previously, but since they're being popped anyway (into rsi) might as well pop them into their own regs -- setting them to the value they already are. Less magical code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220506121631.365070674@infradead.org Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 10 +--------- arch/x86/entry/entry_64.S | 3 +-- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index a4c061fb7c6ea..2b12c691fbde4 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -119,27 +119,19 @@ For 32-bit we have the following conventions - kernel is built with CLEAR_REGS .endm -.macro POP_REGS pop_rdi=1 skip_r11rcx=0 +.macro POP_REGS pop_rdi=1 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx - .if \skip_r11rcx - popq %rsi - .else popq %r11 - .endif popq %r10 popq %r9 popq %r8 popq %rax - .if \skip_r11rcx - popq %rsi - .else popq %rcx - .endif popq %rdx popq %rsi .if \pop_rdi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index deb36cfe49d72..d8cfe744d0f9c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -191,8 +191,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - POP_REGS pop_rdi=0 skip_r11rcx=1 + POP_REGS pop_rdi=0 /* * Now all regs are restored except RSP and RDI. From e492002673b03c636d2297fb869d68ae545c41c4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:32 +0200 Subject: [PATCH 234/299] x86/kvm/vmx: Make noinstr clean commit 742ab6df974ae8384a2dd213db1a3a06cf6d8936 upstream. The recent mmio_stale_data fixes broke the noinstr constraints: vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section make it all happy again. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 6 +++--- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9646ae886b4b5..da103938e62ee 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -383,9 +383,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) if (!vmx->disable_fb_clear) return; - rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); msr |= FB_CLEAR_DIS; - wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); /* Cache the MSR value to avoid reading it later */ vmx->msr_ia32_mcu_opt_ctrl = msr; } @@ -396,7 +396,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) return; vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; - wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); } static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 828f5cf1af459..53b6fdf30c99b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12533,9 +12533,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); -bool kvm_arch_has_assigned_device(struct kvm *kvm) +bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) { - return atomic_read(&kvm->arch.assigned_device_count); + return arch_atomic_read(&kvm->arch.assigned_device_count); } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 34eed5f85ed60..88d94cf515e13 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1511,7 +1511,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) { } -static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) { return false; } From e0ed7445cbb5a10bebec4f582894460453b3c0f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:33 +0200 Subject: [PATCH 235/299] x86/cpufeatures: Move RETPOLINE flags to word 11 commit a883d624aed463c84c22596006e5a96f5b44db31 upstream. In order to extend the RETPOLINE features to 4, move them to word 11 where there is still room. This mostly keeps DISABLE_RETPOLINE simple. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index e17de69faa543..81706f467df85 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ +/* FREE! ( 7*32+12) */ +/* FREE! ( 7*32+13) */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -295,6 +295,10 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +/* FREE! (11*32+10) */ +/* FREE! (11*32+11) */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ From 079c71b6e380c40ee870bc59f176b36d93786db5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:34 +0200 Subject: [PATCH 236/299] x86/retpoline: Cleanup some #ifdefery commit 369ae6ffc41a3c1137cab697635a84d0cc7cdcea upstream. On it's own not much of a cleanup but it prepares for more/similar code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/disabled-features.h | 9 ++++++++- arch/x86/include/asm/nospec-branch.h | 7 +++---- arch/x86/net/bpf_jit_comp.c | 7 +++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 1231d63f836d8..f5db93822fc13 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,13 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -82,7 +89,7 @@ #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index da251a5645b0e..5728539a3e777 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -120,17 +120,16 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#ifdef CONFIG_RETPOLINE - typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +extern retpoline_thunk_t __x86_indirect_thunk_array[]; + +#ifdef CONFIG_RETPOLINE #define GEN(reg) \ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; #include #undef GEN -extern retpoline_thunk_t __x86_indirect_thunk_array[]; - #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4c71fa04e784c..2ad01c75863e6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -407,16 +407,15 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; -#ifdef CONFIG_RETPOLINE if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); - } else -#endif - EMIT2(0xFF, 0xE0 + reg); + } else { + EMIT2(0xFF, 0xE0 + reg); + } *pprog = prog; } From 7ce2011c8b28a44ae80d7081dc634eec174650ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:35 +0200 Subject: [PATCH 237/299] x86/retpoline: Swizzle retpoline thunk commit 00e1533325fd1fb5459229fe37f235462649f668 upstream. Put the actual retpoline thunk as the original code so that it can become more complicated. Specifically, it allows RET to be a JMP, which can't be .altinstr_replacement since that doesn't do relocations (except for the very first instruction). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b2b2366885a2b..2cdd62499d547 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE + ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ + __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) .endm From 86fbd2844858c5aef57a28ebc3d53d298f37cc67 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:36 +0200 Subject: [PATCH 238/299] x86/retpoline: Use -mfunction-return commit 0b53c374b9eff2255a386f1f1cfb9a928e52a5ae upstream. Utilize -mfunction-return=thunk-extern when available to have the compiler replace RET instructions with direct JMPs to the symbol __x86_return_thunk. This does not affect assembler (.S) sources, only C sources. -mfunction-return=thunk-extern has been available since gcc 7.3 and clang 15. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Nick Desaulniers Reviewed-by: Josh Poimboeuf Tested-by: Nick Desaulniers Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 2 ++ arch/x86/include/asm/nospec-branch.h | 2 ++ arch/x86/lib/retpoline.S | 13 +++++++++++++ 3 files changed, 17 insertions(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 63d50f65b8283..ca3c0de24bc35 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -15,11 +15,13 @@ endif ifdef CONFIG_CC_IS_GCC RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) +RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) endif ifdef CONFIG_CC_IS_CLANG RETPOLINE_CFLAGS := -mretpoline-external-thunk RETPOLINE_VDSO_CFLAGS := -mretpoline +RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) endif export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5728539a3e777..829c9f827a966 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -123,6 +123,8 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; +extern void __x86_return_thunk(void); + #ifdef CONFIG_RETPOLINE #define GEN(reg) \ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 2cdd62499d547..4467c21215f43 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -67,3 +67,16 @@ SYM_CODE_END(__x86_indirect_thunk_array) #define GEN(reg) EXPORT_THUNK(reg) #include #undef GEN + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + ret + int3 +SYM_CODE_END(__x86_return_thunk) + +__EXPORT_THUNK(__x86_return_thunk) From e0c27dc584f6395e57d67f5c60b3ee2347a45590 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:37 +0200 Subject: [PATCH 239/299] x86: Undo return-thunk damage commit 15e67227c49a57837108acfe1c80570e1bd9f962 upstream. Introduce X86_FEATURE_RETHUNK for those afflicted with needing this. [ bp: Do only INT3 padding - simpler. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: CONFIG_STACK_VALIDATION vs CONFIG_OBJTOOL] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/alternative.h | 1 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 3 +- arch/x86/kernel/alternative.c | 60 ++++++++++++++++++++++++ arch/x86/kernel/module.c | 8 +++- arch/x86/kernel/vmlinux.lds.S | 7 +++ 6 files changed, 78 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 9b10c8c76087a..9542c582d546b 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 81706f467df85..541b5c446fa9e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -299,6 +299,7 @@ /* FREE! (11*32+11) */ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index f5db93822fc13..997b88052e660 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -60,7 +60,8 @@ # define DISABLE_RETPOLINE 0 #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ - (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ + (1 << (X86_FEATURE_RETHUNK & 31))) #endif #ifdef CONFIG_INTEL_IOMMU_SVM diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d374cb3cf024c..14ddac6d9d8cf 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __return_sites[], __return_sites_end[]; extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; @@ -507,9 +508,67 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } +/* + * Rewrite the compiler generated return thunk tail-calls. + * + * For example, convert: + * + * JMP __x86_return_thunk + * + * into: + * + * RET + */ +static int patch_return(void *addr, struct insn *insn, u8 *bytes) +{ + int i = 0; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return -1; + + bytes[i++] = RET_INSN_OPCODE; + + for (; i < insn->length;) + bytes[i++] = INT3_INSN_OPCODE; + + return i; +} + +void __init_or_module noinline apply_returns(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op1; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op1 = insn.opcode.bytes[0]; + if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + + len = patch_return(addr, &insn, bytes); + if (len == insn.length) { + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } +} #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ @@ -860,6 +919,7 @@ void __init alternative_instructions(void) * those can rewrite the retpoline thunks. */ apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); /* * Then patch alternatives, such that those paravirt calls that are in diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b98ffcf4d250f..67828d9733890 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL, *ibt_endbr = NULL; + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".return_sites", secstrings + s->sh_name)) + returns = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + if (returns) { + void *rseg = (void *)returns->sh_addr; + apply_returns(rseg, rseg + returns->sh_size); + } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 7fda7f27e7620..70a0a2cce040f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -283,6 +283,13 @@ SECTIONS *(.retpoline_sites) __retpoline_sites_end = .; } + + . = ALIGN(8); + .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { + __return_sites = .; + *(.return_sites) + __return_sites_end = .; + } #endif #ifdef CONFIG_X86_KERNEL_IBT From 262941a05615d39d66dcf47909d6e67ea69d371d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:38 +0200 Subject: [PATCH 240/299] x86,objtool: Create .return_sites commit d9e9d2300681d68a775c28de6aa6e5290ae17796 upstream. Find all the return-thunk sites and record them in a .return_sites section such that the kernel can undo this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/arch/x86/decode.c | 5 ++ tools/objtool/check.c | 74 +++++++++++++++++++++++++ tools/objtool/include/objtool/arch.h | 1 + tools/objtool/include/objtool/elf.h | 1 + tools/objtool/include/objtool/objtool.h | 1 + tools/objtool/objtool.c | 1 + 6 files changed, 83 insertions(+) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 943cb41cddf7c..1ecf50bbd5544 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sym) { return !strncmp(sym->name, "__x86_indirect_", 15); } + +bool arch_is_rethunk(struct symbol *sym) +{ + return !strcmp(sym->name, "__x86_return_thunk"); +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f66e4ac0af948..3b3bfe94537a3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -747,6 +747,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file) return 0; } +static int create_return_sites_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".return_sites"); + if (sec) { + WARN("file already has .return_sites, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->return_thunk_list, call_node) + idx++; + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".return_sites", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .return_sites"); + return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->return_thunk_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .return_sites"); + return -1; + } + + idx++; + } + + return 0; +} + static int create_ibt_endbr_seal_sections(struct objtool_file *file) { struct instruction *insn; @@ -1081,6 +1127,11 @@ __weak bool arch_is_retpoline(struct symbol *sym) return false; } +__weak bool arch_is_rethunk(struct symbol *sym) +{ + return false; +} + #define NEGATIVE_RELOC ((void *)-1L) static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) @@ -1248,6 +1299,18 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in annotate_call_site(file, insn, false); } +static void add_return_call(struct objtool_file *file, struct instruction *insn) +{ + /* + * Return thunk tail calls are really just returns in disguise, + * so convert them accordingly. + */ + insn->type = INSN_RETURN; + insn->retpoline_safe = true; + + list_add_tail(&insn->call_node, &file->return_thunk_list); +} + static bool same_function(struct instruction *insn1, struct instruction *insn2) { return insn1->func->pfunc == insn2->func->pfunc; @@ -1300,6 +1363,9 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->retpoline_thunk) { add_retpoline_call(file, insn); continue; + } else if (reloc->sym->return_thunk) { + add_return_call(file, insn); + continue; } else if (insn->func) { /* * External sibling call or internal sibling call with @@ -2182,6 +2248,9 @@ static int classify_symbols(struct objtool_file *file) if (arch_is_retpoline(func)) func->retpoline_thunk = true; + if (arch_is_rethunk(func)) + func->return_thunk = true; + if (!strcmp(func->name, "__fentry__")) func->fentry = true; @@ -3935,6 +4004,11 @@ int check(struct objtool_file *file) if (ret < 0) goto out; warnings += ret; + + ret = create_return_sites_sections(file); + if (ret < 0) + goto out; + warnings += ret; } if (mcount) { diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 9b19cc3041955..beb2f3aa94ffc 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -89,6 +89,7 @@ const char *arch_ret_insn(int len); int arch_decode_hint_reg(u8 sp_reg, int *base); bool arch_is_retpoline(struct symbol *sym); +bool arch_is_rethunk(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 82e57eb4b4c5d..94a618e2a79e3 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -57,6 +57,7 @@ struct symbol { u8 uaccess_safe : 1; u8 static_call_tramp : 1; u8 retpoline_thunk : 1; + u8 return_thunk : 1; u8 fentry : 1; u8 profiling_func : 1; struct list_head pv_target; diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index a6e72d916807d..7f2d1b0953333 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -24,6 +24,7 @@ struct objtool_file { struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); struct list_head retpoline_call_list; + struct list_head return_thunk_list; struct list_head static_call_list; struct list_head mcount_loc_list; struct list_head endbr_list; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 843ff3c2f28e4..983687345d355 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -126,6 +126,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); INIT_LIST_HEAD(&file.retpoline_call_list); + INIT_LIST_HEAD(&file.return_thunk_list); INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); INIT_LIST_HEAD(&file.endbr_list); From f7b097edd3ebfea46974de5277fd6f6a35481770 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 8 Jul 2022 14:00:07 -0300 Subject: [PATCH 241/299] objtool: skip non-text sections when adding return-thunk sites The .discard.text section is added in order to reserve BRK, with a temporary function just so it can give it a size. This adds a relocation to the return thunk, which objtool will add to the .return_sites section. Linking will then fail as there are references to the .discard.text section. Do not add instructions from non-text sections to the list of return thunk calls, avoiding the reference to .discard.text. Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3b3bfe94537a3..f4a807f65a783 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1308,7 +1308,9 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn) insn->type = INSN_RETURN; insn->retpoline_safe = true; - list_add_tail(&insn->call_node, &file->return_thunk_list); + /* Skip the non-text sections, specially .discard ones */ + if (insn->sec->text) + list_add_tail(&insn->call_node, &file->return_thunk_list); } static bool same_function(struct instruction *insn1, struct instruction *insn2) From eb84031e5c599a4b218ede3e10e7b5fd8ccc391a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:39 +0200 Subject: [PATCH 242/299] x86,static_call: Use alternative RET encoding commit ee88d363d15617ff50ac24fab0ffec11113b2aeb upstream. In addition to teaching static_call about the new way to spell 'RET', there is an added complication in that static_call() is allowed to rewrite text before it is known which particular spelling is required. In order to deal with this; have a static_call specific fixup in the apply_return() 'alternative' patching routine that will rewrite the static_call trampoline to match the definite sequence. This in turn creates the problem of uniquely identifying static call trampolines. Currently trampolines are 8 bytes, the first 5 being the jmp.d32/ret sequence and the final 3 a byte sequence that spells out 'SCT'. This sequence is used in __static_call_validate() to ensure it is patching a trampoline and not a random other jmp.d32. That is, false-positives shouldn't be plenty, but aren't a big concern. OTOH the new __static_call_fixup() must not have false-positives, and 'SCT' decodes to the somewhat weird but semi plausible sequence: push %rbx rex.XB push %r12 Additionally, there are SLS concerns with immediate jumps. Combined it seems like a good moment to change the signature to a single 3 byte trap instruction that is unique to this usage and will not ever get generated by accident. As such, change the signature to: '0x0f, 0xb9, 0xcc', which decodes to: ud1 %esp, %ecx Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/static_call.h | 19 +++++++++++++- arch/x86/kernel/alternative.c | 12 ++++++--- arch/x86/kernel/static_call.c | 40 ++++++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 2d8dacd026437..70cc9ccb80298 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -21,6 +21,16 @@ * relative displacement across sections. */ +/* + * The trampoline is 8 bytes and of the general form: + * + * jmp.d32 \func + * ud1 %esp, %ecx + * + * That trailing #UD provides both a speculation stop and serves as a unique + * 3 byte signature identifying static call trampolines. Also see tramp_ud[] + * and __static_call_fixup(). + */ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ @@ -28,7 +38,7 @@ STATIC_CALL_TRAMP_STR(name) ": \n" \ ANNOTATE_NOENDBR \ insns " \n" \ - ".byte 0x53, 0x43, 0x54 \n" \ + ".byte 0x0f, 0xb9, 0xcc \n" \ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ ".popsection \n") @@ -36,8 +46,13 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") +#ifdef CONFIG_RETPOLINE +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") +#else #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#endif #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) @@ -48,4 +63,6 @@ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ ".popsection \n") +extern bool __static_call_fixup(void *tramp, u8 op, void *dest); + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 14ddac6d9d8cf..92bbe4ffd0a13 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -539,18 +539,22 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) s32 *s; for (s = start; s < end; s++) { - void *addr = (void *)s + *s; + void *dest = NULL, *addr = (void *)s + *s; struct insn insn; int len, ret; u8 bytes[16]; - u8 op1; + u8 op; ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) continue; - op1 = insn.opcode.bytes[0]; - if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) + op = insn.opcode.bytes[0]; + if (op == JMP32_INSN_OPCODE) + dest = addr + insn.length + insn.immediate.value; + + if (__static_call_fixup(addr, op, dest) || + WARN_ON_ONCE(dest != &__x86_return_thunk)) continue; DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index aa72cefdd5be6..fe21fe7781852 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -11,6 +11,13 @@ enum insn_type { RET = 3, /* tramp / site cond-tail-call */ }; +/* + * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such + * that there is no false-positive trampoline identification while also being a + * speculation stop. + */ +static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; + /* * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ @@ -43,7 +50,10 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case RET: - code = &retinsn; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + else + code = &retinsn; break; } @@ -60,7 +70,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp) { u8 opcode = *(u8 *)insn; - if (tramp && memcmp(insn+5, "SCT", 3)) { + if (tramp && memcmp(insn+5, tramp_ud, 3)) { pr_err("trampoline signature fail"); BUG(); } @@ -115,3 +125,29 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); + +#ifdef CONFIG_RETPOLINE +/* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as + * having a return trampoline. + * + * The problem is that static_call() is available before determining + * X86_FEATURE_RETHUNK and, by implication, running alternatives. + * + * This means that __static_call_transform() above can have overwritten the + * return trampoline and we now need to fix things up to be consistent. + */ +bool __static_call_fixup(void *tramp, u8 op, void *dest) +{ + if (memcmp(tramp+5, tramp_ud, 3)) { + /* Not a trampoline site, not our problem. */ + return false; + } + + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) + __static_call_transform(tramp, RET, NULL); + + return true; +} +#endif From 0d15b9c30cb222d0e5ac2ff9ba7b93bd9af82d05 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:40 +0200 Subject: [PATCH 243/299] x86/ftrace: Use alternative RET encoding commit 1f001e9da6bbf482311e45e48f53c2bd2179e59c upstream. Use the return thunk in ftrace trampolines, if needed. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/ftrace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1e31c7d21597b..6892ca67d9c6d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -303,7 +303,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) +#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) @@ -359,7 +359,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - memcpy(ip, retq, RET_SIZE); + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + else + memcpy(ip, retq, sizeof(retq)); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { From ebe3ceb43f5b5b88062ffd62c08d19a57f5fa44b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:41 +0200 Subject: [PATCH 244/299] x86/bpf: Use alternative RET encoding commit d77cfe594ad50e0bf95d457e02ccd578791b2a15 upstream. Use the return thunk in eBPF generated code, if needed. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 2ad01c75863e6..2dab2816b3f7c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -420,6 +420,21 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) *pprog = prog; } +static void emit_return(u8 **pprog, u8 *ip) +{ + u8 *prog = *pprog; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + emit_jump(&prog, &__x86_return_thunk, ip); + } else { + EMIT1(0xC3); /* ret */ + if (IS_ENABLED(CONFIG_SLS)) + EMIT1(0xCC); /* int3 */ + } + + *pprog = prog; +} + /* * Generate the following code: * @@ -1680,7 +1695,7 @@ st: if (is_imm8(insn->off)) ctx->cleanup_addr = proglen; pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; default: @@ -2157,7 +2172,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, prog); /* Make sure the trampoline generation logic doesn't overflow */ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { ret = -EFAULT; From 3525abdb3a63680b8623b0294bd9614b2352ccce Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:42 +0200 Subject: [PATCH 245/299] x86/kvm: Fix SETcc emulation for return thunks commit af2e140f34208a5dfb6b7a8ad2d56bda88f0524d upstream. Prepare the SETcc fastop stuff for when RET can be larger still. The tricky bit here is that the expressions should not only be constant C expressions, but also absolute GAS expressions. This means no ?: and 'true' is ~0. Also ensure em_setcc() has the same alignment as the actual FOP_SETCC() ops, this ensures there cannot be an alignment hole between em_setcc() and the first op. Additionally, add a .skip directive to the FOP_SETCC() macro to fill any remaining space with INT3 traps; however the primary purpose of this directive is to generate AS warnings when the remaining space goes negative. Which is a very good indication the alignment magic went side-ways. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 89b11e7dca8aa..b01437015f992 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -325,13 +325,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define FOP_RET(name) \ __FOP_RET(#name) -#define FOP_START(op) \ +#define __FOP_START(op, align) \ extern void em_##op(struct fastop *fake); \ asm(".pushsection .text, \"ax\" \n\t" \ ".global em_" #op " \n\t" \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ + ".align " __stringify(align) " \n\t" \ "em_" #op ":\n\t" +#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) + #define FOP_END \ ".popsection") @@ -435,16 +437,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* * Depending on .config the SETcc functions look like: * - * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] - * SETcc %al [3 bytes] - * RET [1 byte] - * INT3 [1 byte; CONFIG_SLS] - * - * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the - * next power-of-two alignment they become 4, 8 or 16 resp. + * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] + * SETcc %al [3 bytes] + * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] + * INT3 [1 byte; CONFIG_SLS] */ -#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) -#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) +#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ + IS_ENABLED(CONFIG_SLS)) +#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) +#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) static_assert(SETCC_LENGTH <= SETCC_ALIGN); #define FOP_SETCC(op) \ @@ -453,9 +454,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN); #op ": \n\t" \ ASM_ENDBR \ #op " %al \n\t" \ - __FOP_RET(#op) + __FOP_RET(#op) \ + ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" -FOP_START(setcc) +__FOP_START(setcc, SETCC_ALIGN) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) From 2fc0ed17c526b032c1c416d77ebc491f446f1269 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:43 +0200 Subject: [PATCH 246/299] x86/vsyscall_emu/64: Don't use RET in vsyscall emulation commit 15583e514eb16744b80be85dea0774ece153177d upstream. This is userspace code and doesn't play by the normal kernel rules. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vsyscall/vsyscall_emu_64.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S index 15e35159ebb68..ef2dd18272431 100644 --- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -19,17 +19,20 @@ __vsyscall_page: mov $__NR_gettimeofday, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_time, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_getcpu, %rax syscall - RET + ret + int3 .balign 4096, 0xcc From a302187fb8f6d2707aaadf5e8a558ff046378a80 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Jun 2022 23:15:44 +0200 Subject: [PATCH 247/299] x86/sev: Avoid using __x86_return_thunk commit 0ee9073000e8791f8b134a8ded31bcc767f7f232 upstream. Specifically, it's because __enc_copy() encrypts the kernel after being relocated outside the kernel in sme_encrypt_execute(), and the RET macro's jmp offset isn't amended prior to execution. Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mem_encrypt_boot.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 3d1dba05fce4a..d94dea450fa60 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -65,7 +65,9 @@ SYM_FUNC_START(sme_encrypt_execute) movq %rbp, %rsp /* Restore original stack pointer */ pop %rbp - RET + /* Offset to __x86_return_thunk would be wrong here */ + ret + int3 SYM_FUNC_END(sme_encrypt_execute) SYM_FUNC_START(__enc_copy) @@ -151,6 +153,8 @@ SYM_FUNC_START(__enc_copy) pop %r12 pop %r15 - RET + /* Offset to __x86_return_thunk would be wrong here */ + ret + int3 .L__enc_copy_end: SYM_FUNC_END(__enc_copy) From a05146b2ac6ab1deff475a06441b825d176b320e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:45 +0200 Subject: [PATCH 248/299] x86: Use return-thunk in asm code commit aa3d480315ba6c3025a60958e1981072ea37c3df upstream. Use the return thunk in asm code. If the thunk isn't needed, it will get patched into a RET instruction during boot by apply_returns(). Since alternatives can't handle relocations outside of the first instruction, putting a 'jmp __x86_return_thunk' in one is not valid, therefore carve out the memmove ERMS path into a separate label and jump to it. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: no RANDSTRUCT_CFLAGS] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/Makefile | 1 + arch/x86/include/asm/linkage.h | 8 ++++++++ arch/x86/lib/memmove_64.S | 7 ++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 693f8b9031fb8..e893af5aa8f55 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -92,6 +92,7 @@ endif endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 85865f1645bd3..e3ae331cabb14 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -19,19 +19,27 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif +#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define RET jmp __x86_return_thunk +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define RET ret; int3 #else #define RET ret #endif +#endif /* CONFIG_RETPOLINE */ #else /* __ASSEMBLY__ */ +#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define ASM_RET "jmp __x86_return_thunk\n\t" +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define ASM_RET "ret; int3\n\t" #else #define ASM_RET "ret\n\t" #endif +#endif /* CONFIG_RETPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index d83cba364e31d..724bbf83eb5b0 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove) /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS + ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS /* * movsq instruction have many startup latency @@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET + +.Lmemmove_erms: + movq %rdx, %rcx + rep movsb + RET SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) From df777869fe2de25b60195561d3b674c9084aaeca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:46 +0200 Subject: [PATCH 249/299] x86/entry: Avoid very early RET commit 7c81c0c9210c9bfab2bae76aab2999de5bad27db upstream. Commit ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") manages to introduce a CALL/RET pair that is before SWITCH_TO_KERNEL_CR3, which means it is before RETBleed can be mitigated. Revert to an earlier version of the commit in Fixes. Down side is that this will bloat .text size somewhat. The alternative is fully reverting it. The purpose of this patch was to allow migrating error_entry() to C, including the whole of kPTI. Much care needs to be taken moving that forward to not re-introduce this problem of early RETs. Fixes: ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d8cfe744d0f9c..2bf721b15e5de 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -317,14 +317,6 @@ SYM_CODE_END(ret_from_fork) #endif .endm -/* Save all registers in pt_regs */ -SYM_CODE_START_LOCAL(push_and_clear_regs) - UNWIND_HINT_FUNC - PUSH_AND_CLEAR_REGS save_ret=1 - ENCODE_FRAME_POINTER 8 - RET -SYM_CODE_END(push_and_clear_regs) - /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -332,8 +324,8 @@ SYM_CODE_END(push_and_clear_regs) */ .macro idtentry_body cfunc has_error_code:req - call push_and_clear_regs - UNWIND_HINT_REGS + PUSH_AND_CLEAR_REGS + ENCODE_FRAME_POINTER /* * Call error_entry() and switch to the task stack if from userspace. From 9d75af6b406702b0af616cee49ae11ec0b2abe3a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:47 +0200 Subject: [PATCH 250/299] objtool: Treat .text.__x86.* as noinstr commit 951ddecf435659553ed15a9214e153a3af43a9a1 upstream. Needed because zen_untrain_ret() will be called from noinstr code. Also makes sense since the thunks MUST NOT contain instrumentation nor be poked with dynamic instrumentation. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f4a807f65a783..ed4ee5a16186d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -374,7 +374,8 @@ static int decode_instructions(struct objtool_file *file) sec->text = true; if (!strcmp(sec->name, ".noinstr.text") || - !strcmp(sec->name, ".entry.text")) + !strcmp(sec->name, ".entry.text") || + !strncmp(sec->name, ".text.__x86.", 12)) sec->noinstr = true; for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { From 64a98375f389bf695e2a2f199175b7a5ece44f45 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 8 Jul 2022 14:00:36 -0300 Subject: [PATCH 251/299] x86: Add magic AMD return-thunk commit a149180fbcf336e97ce4eb2cdc13672727feb94d upstream. Note: needs to be in a section distinct from Retpolines such that the Retpoline RET substitution cannot possibly use immediate jumps. ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a little tricky but works due to the fact that zen_untrain_ret() doesn't have any stack ops and as such will emit a single ORC entry at the start (+0x3f). Meanwhile, unwinding an IP, including the __x86_return_thunk() one (+0x40) will search for the largest ORC entry smaller or equal to the IP, these will find the one ORC entry (+0x3f) and all works. [ Alexandre: SVM part. ] [ bp: Build fix, massages. ] Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: conflicts at arch/x86/entry/entry_64_compat.S] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 6 +++ arch/x86/entry/entry_64_compat.S | 4 ++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 3 +- arch/x86/include/asm/nospec-branch.h | 17 +++++++ arch/x86/kernel/vmlinux.lds.S | 2 +- arch/x86/kvm/svm/vmenter.S | 18 +++++++ arch/x86/lib/retpoline.S | 64 ++++++++++++++++++++++-- tools/objtool/check.c | 21 ++++++-- 9 files changed, 127 insertions(+), 9 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2bf721b15e5de..86ddb6f456fec 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -96,6 +96,7 @@ SYM_CODE_START(entry_SYSCALL_64) SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR + UNTRAIN_RET /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -708,6 +709,7 @@ native_irq_return_ldt: pushq %rdi /* Stash user RDI */ swapgs /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + UNTRAIN_RET movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ @@ -903,6 +905,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * be retrieved from a kernel internal table. */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + UNTRAIN_RET /* * Handling GSBASE depends on the availability of FSGSBASE. @@ -1013,6 +1016,7 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + UNTRAIN_RET leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: @@ -1065,6 +1069,7 @@ SYM_CODE_START_LOCAL(error_entry) SWAPGS FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + UNTRAIN_RET /* * Pretend that the exception came from user mode: set up pt_regs @@ -1160,6 +1165,7 @@ SYM_CODE_START(asm_exc_nmi) movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 + UNTRAIN_RET pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ pushq 3*8(%rdx) /* pt_regs->flags */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4fdb007cddbd1..e1200c8a12f60 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -72,6 +73,7 @@ SYM_CODE_START(entry_SYSENTER_compat) pushq $__USER32_CS /* pt_regs->cs */ pushq $0 /* pt_regs->ip = 0 (placeholder) */ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) + UNTRAIN_RET /* * User tracing code (ptrace or signal handlers) might assume that @@ -215,6 +217,7 @@ SYM_CODE_START(entry_SYSCALL_compat) SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR + UNTRAIN_RET /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -382,6 +385,7 @@ SYM_CODE_START(entry_INT80_compat) pushq (%rdi) /* pt_regs->di */ .Lint80_keep_stack: + UNTRAIN_RET pushq %rsi /* pt_regs->si */ xorl %esi, %esi /* nospec si */ pushq %rdx /* pt_regs->dx */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 541b5c446fa9e..99f3efdf90da4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -300,6 +300,7 @@ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 997b88052e660..65f1d930ff4d7 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -61,7 +61,8 @@ #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ - (1 << (X86_FEATURE_RETHUNK & 31))) + (1 << (X86_FEATURE_RETHUNK & 31)) | \ + (1 << (X86_FEATURE_UNRET & 31))) #endif #ifdef CONFIG_INTEL_IOMMU_SVM diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 829c9f827a966..5ca60ae0d14f1 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -112,6 +112,22 @@ #endif .endm +/* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * + * Doesn't clobber any registers but does require a stable stack. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +.macro UNTRAIN_RET +#ifdef CONFIG_RETPOLINE + ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET +#endif +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -124,6 +140,7 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern void __x86_return_thunk(void); +extern void zen_untrain_ret(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 70a0a2cce040f..071faf2c8a77c 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,7 +141,7 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.indirect_thunk) + *(.text.__x86.*) __indirect_thunk_end = .; #endif } :text =0xcccc diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index dfaeb47fcf2a7..723f8534986c3 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -110,6 +110,15 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize 'ret' if the return is + * from the kernel. + */ + UNTRAIN_RET + /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded @@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize RET if the return is + * from the kernel. + */ + UNTRAIN_RET + pop %_ASM_BX #ifdef CONFIG_X86_64 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 4467c21215f43..fdd16163b996b 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -72,11 +72,67 @@ SYM_CODE_END(__x86_indirect_thunk_array) * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. */ -SYM_CODE_START(__x86_return_thunk) - UNWIND_HINT_EMPTY - ANNOTATE_NOENDBR + .section .text.__x86.return_thunk + +/* + * Safety details here pertain to the AMD Zen{1,2} microarchitecture: + * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * alignment within the BTB. + * 2) The instruction at zen_untrain_ret must contain, and not + * end with, the 0xc3 byte of the RET. + * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread + * from re-poisioning the BTB prediction. + */ + .align 64 + .skip 63, 0xcc +SYM_FUNC_START_NOALIGN(zen_untrain_ret); + + /* + * As executed from zen_untrain_ret, this is: + * + * TEST $0xcc, %bl + * LFENCE + * JMP __x86_return_thunk + * + * Executing the TEST instruction has a side effect of evicting any BTB + * prediction (potentially attacker controlled) attached to the RET, as + * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + */ + .byte 0xf6 + + /* + * As executed from __x86_return_thunk, this is a plain RET. + * + * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. + * + * We subsequently jump backwards and architecturally execute the RET. + * This creates a correct BTB prediction (type=ret), but in the + * meantime we suffer Straight Line Speculation (because the type was + * no branch) which is halted by the INT3. + * + * With SMT enabled and STIBP active, a sibling thread cannot poison + * RET's prediction to a type of its choice, but can evict the + * prediction due to competitive sharing. If the prediction is + * evicted, __x86_return_thunk will suffer Straight Line Speculation + * which will be contained safely by the INT3. + */ +SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) ret int3 SYM_CODE_END(__x86_return_thunk) -__EXPORT_THUNK(__x86_return_thunk) + /* + * Ensure the TEST decoding / BTB invalidation is complete. + */ + lfence + + /* + * Jump back and execute the RET in the middle of the TEST instruction. + * INT3 is for SLS protection. + */ + jmp __x86_return_thunk + int3 +SYM_FUNC_END(zen_untrain_ret) +__EXPORT_THUNK(zen_untrain_ret) + +EXPORT_SYMBOL(__x86_return_thunk) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ed4ee5a16186d..558755043375d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1300,7 +1300,7 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in annotate_call_site(file, insn, false); } -static void add_return_call(struct objtool_file *file, struct instruction *insn) +static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) { /* * Return thunk tail calls are really just returns in disguise, @@ -1310,7 +1310,7 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn) insn->retpoline_safe = true; /* Skip the non-text sections, specially .discard ones */ - if (insn->sec->text) + if (add && insn->sec->text) list_add_tail(&insn->call_node, &file->return_thunk_list); } @@ -1367,7 +1367,7 @@ static int add_jump_destinations(struct objtool_file *file) add_retpoline_call(file, insn); continue; } else if (reloc->sym->return_thunk) { - add_return_call(file, insn); + add_return_call(file, insn, true); continue; } else if (insn->func) { /* @@ -1387,6 +1387,21 @@ static int add_jump_destinations(struct objtool_file *file) jump_dest = find_insn(file, dest_sec, dest_off); if (!jump_dest) { + struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); + + /* + * This is a special case for zen_untrain_ret(). + * It jumps to __x86_return_thunk(), but objtool + * can't find the thunk's starting RET + * instruction, because the RET is also in the + * middle of another instruction. Objtool only + * knows about the outer instruction. + */ + if (sym && sym->return_thunk) { + add_return_call(file, insn, false); + continue; + } + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", insn->sec, insn->offset, dest_sec->name, dest_off); From a70ed95a0b0a15cfa86b1df4004d47f074de7de2 Mon Sep 17 00:00:00 2001 From: Alexandre Chartre Date: Tue, 14 Jun 2022 23:15:49 +0200 Subject: [PATCH 252/299] x86/bugs: Report AMD retbleed vulnerability commit 6b80b59b3555706508008f1f127b5412c89c7fd8 upstream. Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary Speculative Code Execution with Return Instructions) attack. [peterz: add hygon] [kim: invert parity; fam15h] Co-developed-by: Kim Phillips Signed-off-by: Kim Phillips Signed-off-by: Alexandre Chartre Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 13 +++++++++++++ arch/x86/kernel/cpu/common.c | 19 +++++++++++++++++++ drivers/base/cpu.c | 8 ++++++++ include/linux/cpu.h | 2 ++ 5 files changed, 43 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 99f3efdf90da4..498e7f92fc43a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -450,5 +450,6 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a8a9f64063315..425ff2f32669e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1987,6 +1987,11 @@ static ssize_t srbds_show_state(char *buf) return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); } +static ssize_t retbleed_show_state(char *buf) +{ + return sprintf(buf, "Vulnerable\n"); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2032,6 +2037,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_MMIO_STALE_DATA: return mmio_stale_data_show_state(buf); + case X86_BUG_RETBLEED: + return retbleed_show_state(buf); + default: break; } @@ -2088,4 +2096,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at { return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); } + +ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index af5d0c188f7b8..796cc55313f4a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1231,16 +1231,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; +#define VULNBL(vendor, family, model, blacklist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ INTEL_FAM6_##model, steppings, \ X86_FEATURE_ANY, issues) +#define VULNBL_AMD(family, blacklist) \ + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) + +#define VULNBL_HYGON(family, blacklist) \ + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) + #define SRBDS BIT(0) /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ #define MMIO BIT(1) /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ #define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1273,6 +1284,11 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), + VULNBL_AMD(0x17, RETBLEED), + VULNBL_HYGON(0x18, RETBLEED), {} }; @@ -1374,6 +1390,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !arch_cap_mmio_immune(ia32_cap)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index a97776ea9d990..4c98849577d4e 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); +static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_itlb_multihit.attr, &dev_attr_srbds.attr, &dev_attr_mmio_stale_data.attr, + &dev_attr_retbleed.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2c74773547444..314802f98b9da 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, extern ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From f88b40812b6b3d483fb5de11b72aeb0c2bb73c59 Mon Sep 17 00:00:00 2001 From: Alexandre Chartre Date: Tue, 14 Jun 2022 23:15:50 +0200 Subject: [PATCH 253/299] x86/bugs: Add AMD retbleed= boot parameter commit 7fbf47c7ce50b38a64576b150e7011ae73d54669 upstream. Add the "retbleed=" boot parameter to select a mitigation for RETBleed. Possible values are "off", "auto" and "unret" (JMP2RET mitigation). The default value is "auto". Currently, "retbleed=auto" will select the unret mitigation on AMD and Hygon and no mitigation on Intel (JMP2RET is not effective on Intel). [peterz: rebase; add hygon] [jpoimboe: cleanups] Signed-off-by: Alexandre Chartre Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 15 +++ arch/x86/Kconfig | 3 + arch/x86/kernel/cpu/bugs.c | 108 +++++++++++++++++- 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c4893782055b4..1af5341eb233c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5124,6 +5124,21 @@ retain_initrd [RAM] Keep initrd memory after extraction + retbleed= [X86] Control mitigation of RETBleed (Arbitrary + Speculative Code Execution with Return Instructions) + vulnerability. + + off - unconditionally disable + auto - automatically select a migitation + unret - force enable untrained return thunks, + only effective on AMD Zen {1,2} + based systems. + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. + + Not specifying this option is equivalent to retbleed=auto. + rfkill.default_state= 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, etc. communication is blocked by default. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b2c65f5733538..c429a5c4f945f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -469,6 +469,9 @@ config RETPOLINE config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + config SLS bool "Mitigate Straight-Line-Speculation" depends on CC_HAS_SLS && X86_64 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 425ff2f32669e..5fdf031fd694b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,6 +37,7 @@ #include "cpu.h" static void __init spectre_v1_select_mitigation(void); +static void __init retbleed_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); @@ -120,6 +121,12 @@ void __init check_bugs(void) /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); + retbleed_select_mitigation(); + /* + * spectre_v2_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ spectre_v2_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); @@ -745,6 +752,100 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "RETBleed: " fmt + +enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, +}; + +enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, +}; + +const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", +}; + +static enum retbleed_mitigation retbleed_mitigation __ro_after_init = + RETBLEED_MITIGATION_NONE; +static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + +static int __init retbleed_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + retbleed_cmd = RETBLEED_CMD_OFF; + else if (!strcmp(str, "auto")) + retbleed_cmd = RETBLEED_CMD_AUTO; + else if (!strcmp(str, "unret")) + retbleed_cmd = RETBLEED_CMD_UNRET; + else + pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); + + return 0; +} +early_param("retbleed", retbleed_parse_cmdline); + +#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" + +static void __init retbleed_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + + switch (retbleed_cmd) { + case RETBLEED_CMD_OFF: + return; + + case RETBLEED_CMD_UNRET: + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + break; + + case RETBLEED_CMD_AUTO: + default: + if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) + break; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + break; + } + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: + + if (!IS_ENABLED(CONFIG_RETPOLINE) || + !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { + pr_err(RETBLEED_COMPILER_MSG); + retbleed_mitigation = RETBLEED_MITIGATION_NONE; + break; + } + + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); + break; + + default: + break; + } + + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); +} + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -1989,7 +2090,12 @@ static ssize_t srbds_show_state(char *buf) static ssize_t retbleed_show_state(char *buf) { - return sprintf(buf, "Vulnerable\n"); + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && + (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) + return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, From c85b5f77d3b224975d5caa329f28b22b7ea5addc Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Jun 2022 23:15:51 +0200 Subject: [PATCH 254/299] x86/bugs: Enable STIBP for JMP2RET commit e8ec1b6e08a2102d8755ccb06fa26d540f26a2fa upstream. For untrained return thunks to be fully effective, STIBP must be enabled or SMT disabled. Co-developed-by: Josh Poimboeuf Signed-off-by: Josh Poimboeuf Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 16 +++-- arch/x86/kernel/cpu/bugs.c | 58 +++++++++++++++---- 2 files changed, 57 insertions(+), 17 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1af5341eb233c..7631757db7995 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5128,11 +5128,17 @@ Speculative Code Execution with Return Instructions) vulnerability. - off - unconditionally disable - auto - automatically select a migitation - unret - force enable untrained return thunks, - only effective on AMD Zen {1,2} - based systems. + off - no mitigation + auto - automatically select a migitation + auto,nosmt - automatically select a mitigation, + disabling SMT if necessary for + the full mitigation (only on Zen1 + and older without STIBP). + unret - force enable untrained return thunks, + only effective on AMD f15h-f17h + based systems. + unret,nosmt - like unret, will disable SMT when STIBP + is not available. Selecting 'auto' will choose a mitigation method at run time according to the CPU. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5fdf031fd694b..50a2c813c3477 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -776,19 +776,34 @@ static enum retbleed_mitigation retbleed_mitigation __ro_after_init = static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = RETBLEED_CMD_AUTO; +static int __ro_after_init retbleed_nosmt = false; + static int __init retbleed_parse_cmdline(char *str) { if (!str) return -EINVAL; - if (!strcmp(str, "off")) - retbleed_cmd = RETBLEED_CMD_OFF; - else if (!strcmp(str, "auto")) - retbleed_cmd = RETBLEED_CMD_AUTO; - else if (!strcmp(str, "unret")) - retbleed_cmd = RETBLEED_CMD_UNRET; - else - pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); + while (str) { + char *next = strchr(str, ','); + if (next) { + *next = 0; + next++; + } + + if (!strcmp(str, "off")) { + retbleed_cmd = RETBLEED_CMD_OFF; + } else if (!strcmp(str, "auto")) { + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { + pr_err("Ignoring unknown retbleed option (%s).", str); + } + + str = next; + } return 0; } @@ -834,6 +849,10 @@ static void __init retbleed_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); + if (!boot_cpu_has(X86_FEATURE_STIBP) && + (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) pr_err(RETBLEED_UNTRAIN_MSG); @@ -1080,6 +1099,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (mode != SPECTRE_V2_USER_STRICT && + mode != SPECTRE_V2_USER_STRICT_PREFERRED) + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + } + spectre_v2_user_stibp = mode; set_mode: @@ -2090,10 +2116,18 @@ static ssize_t srbds_show_state(char *buf) static ssize_t retbleed_show_state(char *buf) { - if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && - (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) - return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + + return sprintf(buf, "%s; SMT %s\n", + retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); + } return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } From 409586fb4a6e7b2331ecb4edec71e34e21750e05 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:52 +0200 Subject: [PATCH 255/299] x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value commit caa0ff24d5d0e02abce5e65c3d2b7f20a6617be5 upstream. Due to TIF_SSBD and TIF_SPEC_IB the actual IA32_SPEC_CTRL value can differ from x86_spec_ctrl_base. As such, keep a per-CPU value reflecting the current task's MSR content. [jpoimboe: rename] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 28 +++++++++++++++++++++++----- arch/x86/kernel/process.c | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5ca60ae0d14f1..bac243da51300 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -253,6 +253,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +extern void write_spec_ctrl_current(u64 val); /* * With retpoline, we must use IBRS to restrict branch prediction diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 50a2c813c3477..4cda46cb8ab5e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,11 +49,29 @@ static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +/* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* The current value of the SPEC_CTRL MSR with task-specific bits set */ +DEFINE_PER_CPU(u64, x86_spec_ctrl_current); +EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); + static DEFINE_MUTEX(spec_ctrl_mutex); +/* + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). + */ +void write_spec_ctrl_current(u64 val) +{ + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * The vendor and possibly platform specific bits which can be modified in * x86_spec_ctrl_base. @@ -1272,7 +1290,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_eibrs_mode(mode)) { /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base); } switch (mode) { @@ -1327,7 +1345,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1570,7 +1588,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base); } } @@ -1821,7 +1839,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b370767f5b191..0284c7e63f740 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + write_spec_ctrl_current(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) From 47e51d66d93d70d60e478cc81504deb0f4ff67ad Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 8 Jul 2022 13:50:38 -0300 Subject: [PATCH 256/299] x86/entry: Add kernel IBRS implementation commit 2dbb887e875b1de3ca8f40ddf26bcfe55798c609 upstream. Implement Kernel IBRS - currently the only known option to mitigate RSB underflow speculation issues on Skylake hardware. Note: since IBRS_ENTER requires fuller context established than UNTRAIN_RET, it must be placed after it. However, since UNTRAIN_RET itself implies a RET, it must come after IBRS_ENTER. This means IBRS_ENTER needs to also move UNTRAIN_RET. Note 2: KERNEL_IBRS is sub-optimal for XenPV. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: conflict at arch/x86/entry/entry_64_compat.S] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/calling.h | 58 ++++++++++++++++++++++++++++++ arch/x86/entry/entry_64.S | 44 ++++++++++++++++++++--- arch/x86/entry/entry_64_compat.S | 17 ++++++--- arch/x86/include/asm/cpufeatures.h | 2 +- 4 files changed, 111 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 2b12c691fbde4..99d2410c18d1c 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include /* @@ -281,6 +283,62 @@ For 32-bit we have the following conventions - kernel is built with #endif +/* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +.endm + /* * Mitigate Spectre v1 for conditional swapgs code paths. * diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 86ddb6f456fec..34647657ad198 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -96,7 +96,6 @@ SYM_CODE_START(entry_SYSCALL_64) SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR - UNTRAIN_RET /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -113,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi /* Sign extend the lower 32bit as syscall numbers are treated as int */ movslq %eax, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET + call do_syscall_64 /* returns with IRQs disabled */ /* @@ -192,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: + IBRS_EXIT POP_REGS pop_rdi=0 /* @@ -596,6 +601,7 @@ __irqentry_text_end: SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + IBRS_EXIT #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -882,6 +888,9 @@ SYM_CODE_END(xen_failsafe_callback) * 1 -> no SWAPGS on exit * * Y GSBASE value at entry, must be restored in paranoid_exit + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC @@ -905,7 +914,6 @@ SYM_CODE_START_LOCAL(paranoid_entry) * be retrieved from a kernel internal table. */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 - UNTRAIN_RET /* * Handling GSBASE depends on the availability of FSGSBASE. @@ -927,7 +935,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * is needed here. */ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - RET + jmp .Lparanoid_gsbase_done .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ @@ -946,8 +954,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) xorl %ebx, %ebx swapgs .Lparanoid_kernel_gsbase: - FENCE_SWAPGS_KERNEL_ENTRY +.Lparanoid_gsbase_done: + + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + UNTRAIN_RET + RET SYM_CODE_END(paranoid_entry) @@ -969,9 +985,19 @@ SYM_CODE_END(paranoid_entry) * 1 -> no SWAPGS on exit * * Y User space GSBASE, must be restored unconditionally + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + /* * The order of operations is important. RESTORE_CR3 requires * kernel GSBASE. @@ -1016,10 +1042,12 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER UNTRAIN_RET leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ call sync_regs RET @@ -1069,6 +1097,7 @@ SYM_CODE_START_LOCAL(error_entry) SWAPGS FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER UNTRAIN_RET /* @@ -1165,7 +1194,6 @@ SYM_CODE_START(asm_exc_nmi) movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 - UNTRAIN_RET pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ pushq 3*8(%rdx) /* pt_regs->flags */ @@ -1176,6 +1204,9 @@ SYM_CODE_START(asm_exc_nmi) PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER + IBRS_ENTER + UNTRAIN_RET + /* * At this point we no longer need to worry about stack damage * due to nesting -- we're on the normal thread stack and we're @@ -1400,6 +1431,9 @@ end_repeat_nmi: movq $-1, %rsi call exc_nmi + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e1200c8a12f60..46f5a2886fca6 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -4,7 +4,6 @@ * * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include "calling.h" #include #include #include @@ -18,6 +17,8 @@ #include #include +#include "calling.h" + .section .entry.text, "ax" /* @@ -73,7 +74,6 @@ SYM_CODE_START(entry_SYSENTER_compat) pushq $__USER32_CS /* pt_regs->cs */ pushq $0 /* pt_regs->ip = 0 (placeholder) */ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) - UNTRAIN_RET /* * User tracing code (ptrace or signal handlers) might assume that @@ -115,6 +115,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld + IBRS_ENTER + UNTRAIN_RET + /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -217,7 +220,6 @@ SYM_CODE_START(entry_SYSCALL_compat) SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR - UNTRAIN_RET /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -259,6 +261,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) UNWIND_HINT_REGS + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -273,6 +278,8 @@ sysret32_from_system_call: */ STACKLEAK_ERASE + IBRS_EXIT + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -385,7 +392,6 @@ SYM_CODE_START(entry_INT80_compat) pushq (%rdi) /* pt_regs->di */ .Lint80_keep_stack: - UNTRAIN_RET pushq %rsi /* pt_regs->si */ xorl %esi, %esi /* nospec si */ pushq %rdx /* pt_regs->dx */ @@ -418,6 +424,9 @@ SYM_CODE_START(entry_INT80_compat) cld + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_int80_syscall_32 jmp swapgs_restore_regs_and_return_to_usermode diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 498e7f92fc43a..43e2d7d4cbf19 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,7 +203,7 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -/* FREE! ( 7*32+12) */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ /* FREE! ( 7*32+13) */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ From 2c0d8e35807a6086542919e2d044cfa6683476de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:54 +0200 Subject: [PATCH 257/299] x86/bugs: Optimize SPEC_CTRL MSR writes commit c779bc1a9002fa474175b80e72b85c9bf628abb0 upstream. When changing SPEC_CTRL for user control, the WRMSR can be delayed until return-to-user when KERNEL_IBRS has been enabled. This avoids an MSR write during context switch. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------ arch/x86/kernel/process.c | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index bac243da51300..b6abf0c6b41d8 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -253,7 +253,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; -extern void write_spec_ctrl_current(u64 val); +extern void write_spec_ctrl_current(u64 val, bool force); /* * With retpoline, we must use IBRS to restrict branch prediction diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4cda46cb8ab5e..9defb942e9411 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -63,13 +63,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex); * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val) +void write_spec_ctrl_current(u64 val, bool force) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; this_cpu_write(x86_spec_ctrl_current, val); - wrmsrl(MSR_IA32_SPEC_CTRL, val); + + /* + * When KERNEL_IBRS this MSR is written on return-to-user, unless + * forced the update can be delayed until that time. + */ + if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + wrmsrl(MSR_IA32_SPEC_CTRL, val); } /* @@ -1290,7 +1296,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_eibrs_mode(mode)) { /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } switch (mode) { @@ -1345,7 +1351,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { - write_spec_ctrl_current(x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1588,7 +1594,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } } @@ -1839,7 +1845,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0284c7e63f740..622dc3673c37f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr); + write_spec_ctrl_current(msr, false); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) From e604d260c633926089e81f8e52c90c91bd797f12 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 14 Jun 2022 23:15:55 +0200 Subject: [PATCH 258/299] x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS commit 7c693f54c873691a4b7da05c7e0f74e67745d144 upstream. Extend spectre_v2= boot option with Kernel IBRS. [jpoimboe: no STIBP with IBRS] Signed-off-by: Pawan Gupta Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 1 + arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 66 +++++++++++++++---- 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7631757db7995..61d9eced3070d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5503,6 +5503,7 @@ eibrs - enhanced IBRS eibrs,retpoline - enhanced IBRS + Retpolines eibrs,lfence - enhanced IBRS + LFENCE + ibrs - use IBRS to protect kernel Not specifying this option is equivalent to spectre_v2=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b6abf0c6b41d8..e14046daa7ba3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -211,6 +211,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_EIBRS, SPECTRE_V2_EIBRS_RETPOLINE, SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9defb942e9411..bb5eaadeef381 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -965,6 +965,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_EIBRS, SPECTRE_V2_CMD_EIBRS_RETPOLINE, SPECTRE_V2_CMD_EIBRS_LFENCE, + SPECTRE_V2_CMD_IBRS, }; enum spectre_v2_user_cmd { @@ -1037,11 +1038,12 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { - return (mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE); + return mode == SPECTRE_V2_IBRS || + mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; } static void __init @@ -1106,12 +1108,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } /* - * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not - * required. + * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, + * STIBP is not required. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return; /* @@ -1143,6 +1145,7 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; static const struct { @@ -1160,6 +1163,7 @@ static const struct { { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "ibrs", SPECTRE_V2_CMD_IBRS, false }, }; static void __init spec_v2_print_cond(const char *reason, bool secure) @@ -1222,6 +1226,24 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + spec_v2_print_cond(mitigation_options[i].option, mitigation_options[i].secure); return cmd; @@ -1261,6 +1283,14 @@ static void __init spectre_v2_select_mitigation(void) break; } + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + mode = SPECTRE_V2_IBRS; + break; + } + mode = spectre_v2_select_retpoline(); break; @@ -1277,6 +1307,10 @@ static void __init spectre_v2_select_mitigation(void) mode = spectre_v2_select_retpoline(); break; + case SPECTRE_V2_CMD_IBRS: + mode = SPECTRE_V2_IBRS; + break; + case SPECTRE_V2_CMD_EIBRS: mode = SPECTRE_V2_EIBRS; break; @@ -1293,7 +1327,7 @@ static void __init spectre_v2_select_mitigation(void) if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); - if (spectre_v2_in_eibrs_mode(mode)) { + if (spectre_v2_in_ibrs_mode(mode)) { /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; write_spec_ctrl_current(x86_spec_ctrl_base, true); @@ -1304,6 +1338,10 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_EIBRS: break; + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + break; + case SPECTRE_V2_LFENCE: case SPECTRE_V2_EIBRS_LFENCE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); @@ -1330,17 +1368,17 @@ static void __init spectre_v2_select_mitigation(void) pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); /* - * Retpoline means the kernel is safe because it has no indirect - * branches. Enhanced IBRS protects firmware too, so, enable restricted - * speculation around firmware calls only when Enhanced IBRS isn't - * supported. + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise + * enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } @@ -2082,7 +2120,7 @@ static ssize_t mmio_stale_data_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { From fb32593f8f383e32bb82fd85cc3dd372c89566ac Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:56 +0200 Subject: [PATCH 259/299] x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation() commit 166115c08a9b0b846b783088808a27d739be6e8d upstream. retbleed will depend on spectre_v2, while spectre_v2_user depends on retbleed. Break this cycle. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bb5eaadeef381..35db9f84739e0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,8 +37,9 @@ #include "cpu.h" static void __init spectre_v1_select_mitigation(void); -static void __init retbleed_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); +static void __init retbleed_select_mitigation(void); +static void __init spectre_v2_user_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); @@ -145,13 +146,19 @@ void __init check_bugs(void) /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); + /* + * retbleed_select_mitigation() relies on the state set by + * spectre_v2_select_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ retbleed_select_mitigation(); /* - * spectre_v2_select_mitigation() relies on the state set by + * spectre_v2_user_select_mitigation() relies on the state set by * retbleed_select_mitigation(); specifically the STIBP selection is * forced for UNRET. */ - spectre_v2_select_mitigation(); + spectre_v2_user_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); md_clear_select_mitigation(); @@ -1006,13 +1013,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) pr_info("spectre_v2_user=%s forced on command line.\n", reason); } +static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; + static enum spectre_v2_user_cmd __init -spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_parse_user_cmdline(void) { char arg[20]; int ret, i; - switch (v2_cmd) { + switch (spectre_v2_cmd) { case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; case SPECTRE_V2_CMD_FORCE: @@ -1047,7 +1056,7 @@ static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) } static void __init -spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_user_select_mitigation(void) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); @@ -1060,7 +1069,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - cmd = spectre_v2_parse_user_cmdline(v2_cmd); + cmd = spectre_v2_parse_user_cmdline(); switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; @@ -1384,7 +1393,7 @@ static void __init spectre_v2_select_mitigation(void) } /* Set up IBPB and STIBP depending on the general spectre V2 command */ - spectre_v2_user_select_mitigation(cmd); + spectre_v2_cmd = cmd; } static void update_stibp_msr(void * __unused) From 5a3037b4de4dd52504c0842aac5f9498b3d450af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jun 2022 13:48:58 +0200 Subject: [PATCH 260/299] x86/bugs: Report Intel retbleed vulnerability commit 6ad0ad2bf8a67e27d1f9d006a1dabb0e1c360cc3 upstream. Skylake suffers from RSB underflow speculation issues; report this vulnerability and it's mitigation (spectre_v2=ibrs). [jpoimboe: cleanups, eibrs] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/bugs.c | 39 +++++++++++++++++++++++++++----- arch/x86/kernel/cpu/common.c | 24 ++++++++++---------- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4425d6773183b..bd283cdd963af 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -91,6 +91,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 35db9f84739e0..37d2973bcb398 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -783,12 +783,17 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + #undef pr_fmt #define pr_fmt(fmt) "RETBleed: " fmt enum retbleed_mitigation { RETBLEED_MITIGATION_NONE, RETBLEED_MITIGATION_UNRET, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, }; enum retbleed_mitigation_cmd { @@ -800,6 +805,8 @@ enum retbleed_mitigation_cmd { const char * const retbleed_strings[] = { [RETBLEED_MITIGATION_NONE] = "Vulnerable", [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", }; static enum retbleed_mitigation retbleed_mitigation __ro_after_init = @@ -842,6 +849,7 @@ early_param("retbleed", retbleed_parse_cmdline); #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" #define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" +#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" static void __init retbleed_select_mitigation(void) { @@ -858,12 +866,15 @@ static void __init retbleed_select_mitigation(void) case RETBLEED_CMD_AUTO: default: - if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) - break; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + + /* + * The Intel mitigation (IBRS) was already selected in + * spectre_v2_select_mitigation(). + */ + break; } @@ -893,15 +904,31 @@ static void __init retbleed_select_mitigation(void) break; } + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + switch (spectre_v2_enabled) { + case SPECTRE_V2_IBRS: + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + break; + case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_EIBRS_LFENCE: + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + break; + default: + pr_err(RETBLEED_INTEL_MSG); + } + } + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); } #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = SPECTRE_V2_USER_NONE; static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 796cc55313f4a..bd15392ffa0ee 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1263,24 +1263,24 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | - BIT(7) | BIT(0xB), MMIO), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), + BIT(7) | BIT(0xB), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), @@ -1390,7 +1390,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !arch_cap_mmio_immune(ia32_cap)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); - if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) + if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) setup_force_cpu_bug(X86_BUG_RETBLEED); if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) From 7b2649892c7728d4ad662d75a887f8b43a209189 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:58 +0200 Subject: [PATCH 261/299] intel_idle: Disable IBRS during long idle commit bf5835bcdb9635c97f85120dba9bfa21e111130f upstream. Having IBRS enabled while the SMT sibling is idle unnecessarily slows down the running sibling. OTOH, disabling IBRS around idle takes two MSR writes, which will increase the idle latency. Therefore, only disable IBRS around deeper idle states. Shallow idle states are bounded by the tick in duration, since NOHZ is not allowed for them by virtue of their short target residency. Only do this for mwait-driven idle, since that keeps interrupts disabled across idle, which makes disabling IBRS vs IRQ-entry a non-issue. Note: C6 is a random threshold, most importantly C1 probably shouldn't disable IBRS, benchmarking needed. Suggested-by: Tim Chen Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 6 ++++ drivers/idle/intel_idle.c | 44 ++++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e14046daa7ba3..ce1acb5571624 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -255,6 +255,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; extern void write_spec_ctrl_current(u64 val, bool force); +extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 37d2973bcb398..ca548b0f166de 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -79,6 +79,12 @@ void write_spec_ctrl_current(u64 val, bool force) wrmsrl(MSR_IA32_SPEC_CTRL, val); } +u64 spec_ctrl_current(void) +{ + return this_cpu_read(x86_spec_ctrl_current); +} +EXPORT_SYMBOL_GPL(spec_ctrl_current); + /* * The vendor and possibly platform specific bits which can be modified in * x86_spec_ctrl_base. diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c5a019eab5ec9..b463d85bfb358 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -47,11 +47,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -105,6 +107,12 @@ static unsigned int mwait_substates __initdata; */ #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) +/* + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE + * above. + */ +#define CPUIDLE_FLAG_IBRS BIT(16) + /* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) @@ -159,6 +167,24 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, return ret; } +static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + bool smt_active = sched_smt_active(); + u64 spec_ctrl = spec_ctrl_current(); + int ret; + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + ret = __intel_idle(dev, drv, index); + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); + + return ret; +} + /** * intel_idle_s2idle - Ask the processor to enter the given idle state. * @dev: cpuidle device of the target CPU. @@ -680,7 +706,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, .target_residency = 200, .enter = &intel_idle, @@ -688,7 +714,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C7s", .desc = "MWAIT 0x33", - .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, .target_residency = 800, .enter = &intel_idle, @@ -696,7 +722,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C8", .desc = "MWAIT 0x40", - .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle, @@ -704,7 +730,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C9", .desc = "MWAIT 0x50", - .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, .target_residency = 5000, .enter = &intel_idle, @@ -712,7 +738,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C10", .desc = "MWAIT 0x60", - .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, .target_residency = 5000, .enter = &intel_idle, @@ -741,7 +767,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 133, .target_residency = 600, .enter = &intel_idle, @@ -1686,6 +1712,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) drv->states[drv->state_count].enter = intel_idle_irq; + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && + cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { + WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); + drv->states[drv->state_count].enter = intel_idle_ibrs; + } + if ((disabled_states_mask & BIT(drv->state_count)) || ((icpu->use_acpi || force_use_acpi) && intel_idle_off_by_default(mwait_hint) && From 6864df0932578931f13c8de5006975345f8cea0d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:59 +0200 Subject: [PATCH 262/299] objtool: Update Retpoline validation commit 9bb2ec608a209018080ca262f771e6a9ff203b6f upstream. Update retpoline validation with the new CONFIG_RETPOLINE requirement of not having bare naked RET instructions. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 6 ++++++ arch/x86/mm/mem_encrypt_boot.S | 2 ++ arch/x86/xen/xen-head.S | 1 + tools/objtool/check.c | 19 +++++++++++++------ 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ce1acb5571624..455d79c6c2f39 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -75,6 +75,12 @@ .popsection .endm +/* + * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions + * vs RETBleed validation. + */ +#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index d94dea450fa60..9de3d900bc927 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -66,6 +66,7 @@ SYM_FUNC_START(sme_encrypt_execute) pop %rbp /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE ret int3 SYM_FUNC_END(sme_encrypt_execute) @@ -154,6 +155,7 @@ SYM_FUNC_START(__enc_copy) pop %r15 /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE ret int3 .L__enc_copy_end: diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 13af6fe453e3f..ffaa62167f6e8 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE ret /* * Xen will write the hypercall page, and sort out ENDBR. diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 558755043375d..43327b127f55d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2114,8 +2114,9 @@ static int read_retpoline_hints(struct objtool_file *file) } if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) { - WARN_FUNC("retpoline_safe hint not an indirect jump/call", + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN) { + WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", insn->sec, insn->offset); return -1; } @@ -3648,7 +3649,8 @@ static int validate_retpoline(struct objtool_file *file) for_each_insn(file, insn) { if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN) continue; if (insn->retpoline_safe) @@ -3663,9 +3665,14 @@ static int validate_retpoline(struct objtool_file *file) if (!strcmp(insn->sec->name, ".init.text") && !module) continue; - WARN_FUNC("indirect %s found in RETPOLINE build", - insn->sec, insn->offset, - insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + if (insn->type == INSN_RETURN) { + WARN_FUNC("'naked' return found in RETPOLINE build", + insn->sec, insn->offset); + } else { + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, + insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + } warnings++; } From 4a691f1e69163dcfb7b064a25a082071da0bb633 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:00 +0200 Subject: [PATCH 263/299] x86/xen: Rename SYS* entry points commit b75b7f8ef1148be1b9321ffc2f6c19238904b438 upstream. Native SYS{CALL,ENTER} entry points are called entry_SYS{CALL,ENTER}_{64,compat}, make sure the Xen versions are named consistently. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/setup.c | 6 +++--- arch/x86/xen/xen-asm.S | 20 ++++++++++---------- arch/x86/xen/xen-ops.h | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 81aa46f770c54..cfa99e8f054be 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -918,7 +918,7 @@ void xen_enable_sysenter(void) if (!boot_cpu_has(sysenter_feature)) return; - ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); + ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); if(ret != 0) setup_clear_cpu_cap(sysenter_feature); } @@ -927,7 +927,7 @@ void xen_enable_syscall(void) { int ret; - ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); if (ret != 0) { printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); /* Pretty fatal; 64-bit userspace has no other @@ -936,7 +936,7 @@ void xen_enable_syscall(void) if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { ret = register_callback(CALLBACKTYPE_syscall32, - xen_syscall32_target); + xen_entry_SYSCALL_compat); if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index caa9bc2fa1008..6bf9d45b91784 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -234,7 +234,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) */ /* Normal 64-bit system call target */ -SYM_CODE_START(xen_syscall_target) +SYM_CODE_START(xen_entry_SYSCALL_64) UNWIND_HINT_EMPTY ENDBR popq %rcx @@ -249,12 +249,12 @@ SYM_CODE_START(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -SYM_CODE_END(xen_syscall_target) +SYM_CODE_END(xen_entry_SYSCALL_64) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -SYM_CODE_START(xen_syscall32_target) +SYM_CODE_START(xen_entry_SYSCALL_compat) UNWIND_HINT_EMPTY ENDBR popq %rcx @@ -269,10 +269,10 @@ SYM_CODE_START(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ -SYM_CODE_START(xen_sysenter_target) +SYM_CODE_START(xen_entry_SYSENTER_compat) UNWIND_HINT_EMPTY ENDBR /* @@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSENTER_compat_after_hwframe -SYM_CODE_END(xen_sysenter_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) #else /* !CONFIG_IA32_EMULATION */ -SYM_CODE_START(xen_syscall32_target) -SYM_CODE_START(xen_sysenter_target) +SYM_CODE_START(xen_entry_SYSCALL_compat) +SYM_CODE_START(xen_entry_SYSENTER_compat) UNWIND_HINT_EMPTY ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -SYM_CODE_END(xen_sysenter_target) -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) +SYM_CODE_END(xen_entry_SYSCALL_compat) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index fd0fec6e92f4c..9a8bb972193d8 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,10 +10,10 @@ /* These are code, but not functions. Defined in entry.S */ extern const char xen_failsafe_callback[]; -void xen_sysenter_target(void); +void xen_entry_SYSENTER_compat(void); #ifdef CONFIG_X86_64 -void xen_syscall_target(void); -void xen_syscall32_target(void); +void xen_entry_SYSCALL_64(void); +void xen_entry_SYSCALL_compat(void); #endif extern void *xen_initial_gdt; From b75fada7f3cbbaf78beceb1bb71b67c2db3b473d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:01 +0200 Subject: [PATCH 264/299] x86/xen: Add UNTRAIN_RET commit d147553b64bad34d2f92cb7d8ba454ae95c3baac upstream. Ensure the Xen entry also passes through UNTRAIN_RET. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 34647657ad198..0d6cac831de03 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -323,6 +323,12 @@ SYM_CODE_END(ret_from_fork) #endif .endm +SYM_CODE_START_LOCAL(xen_error_entry) + UNWIND_HINT_FUNC + UNTRAIN_RET + RET +SYM_CODE_END(xen_error_entry) + /** * idtentry_body - Macro to emit code calling the C function * @cfunc: C function to be called @@ -342,7 +348,7 @@ SYM_CODE_END(ret_from_fork) * switch the CR3. So it can skip invoking error_entry(). */ ALTERNATIVE "call error_entry; movq %rax, %rsp", \ - "", X86_FEATURE_XENPV + "call xen_error_entry", X86_FEATURE_XENPV ENCODE_FRAME_POINTER UNWIND_HINT_REGS From bbcfdf144d2d9394e3f4aa129463dec8f53bd3b1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:02 +0200 Subject: [PATCH 265/299] x86/bugs: Add retbleed=ibpb commit 3ebc170068885b6fc7bedda6c667bb2c4d533159 upstream. jmp2ret mitigates the easy-to-attack case at relatively low overhead. It mitigates the long speculation windows after a mispredicted RET, but it does not mitigate the short speculation window from arbitrary instruction boundaries. On Zen2, there is a chicken bit which needs setting, which mitigates "arbitrary instruction boundaries" down to just "basic block boundaries". But there is no fix for the short speculation window on basic block boundaries, other than to flush the entire BTB to evict all attacker predictions. On the spectrum of "fast & blurry" -> "safe", there is (on top of STIBP or no-SMT): 1) Nothing System wide open 2) jmp2ret May stop a script kiddy 3) jmp2ret+chickenbit Raises the bar rather further 4) IBPB Only thing which can count as "safe". Tentative numbers put IBPB-on-entry at a 2.5x hit on Zen2, and a 10x hit on Zen1 according to lmbench. [ bp: Fixup feature bit comments, document option, 32-bit build fix. ] Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 3 ++ arch/x86/entry/Makefile | 2 +- arch/x86/entry/entry.S | 22 ++++++++++ arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 8 +++- arch/x86/kernel/cpu/bugs.c | 43 +++++++++++++++---- 6 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 arch/x86/entry/entry.S diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 61d9eced3070d..eb92195ca0155 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5134,6 +5134,9 @@ disabling SMT if necessary for the full mitigation (only on Zen1 and older without STIBP). + ibpb - mitigate short speculation windows on + basic block boundaries too. Safe, highest + perf impact. unret - force enable untrained return thunks, only effective on AMD f15h-f17h based systems. diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 7fec5dcf64386..eeadbd7d92cc5 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_common.o += -fno-stack-protector -obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o obj-y += vdso/ diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 0000000000000..bfb7bcb362bcf --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common place for both 32- and 64-bit entry routines. + */ + +#include +#include +#include + +.pushsection .noinstr.text, "ax" + +SYM_FUNC_START(entry_ibpb) + movl $MSR_IA32_PRED_CMD, %ecx + movl $PRED_CMD_IBPB, %eax + xorl %edx, %edx + wrmsr + RET +SYM_FUNC_END(entry_ibpb) +/* For KVM */ +EXPORT_SYMBOL_GPL(entry_ibpb); + +.popsection diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 43e2d7d4cbf19..d1e977c704faf 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -295,7 +295,7 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -/* FREE! (11*32+10) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ /* FREE! (11*32+11) */ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 455d79c6c2f39..05dd75478d7b3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -123,14 +123,17 @@ * return thunk isn't mapped into the userspace tables (then again, AMD * typically has NO_MELTDOWN). * - * Doesn't clobber any registers but does require a stable stack. + * While zen_untrain_ret() doesn't clobber anything but requires stack, + * entry_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point * where we have a stack but before any RET instruction. */ .macro UNTRAIN_RET #ifdef CONFIG_RETPOLINE - ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET + ALTERNATIVE_2 "", \ + "call zen_untrain_ret", X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm @@ -147,6 +150,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern void __x86_return_thunk(void); extern void zen_untrain_ret(void); +extern void entry_ibpb(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ca548b0f166de..5cafe7b2a97f6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -798,6 +798,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = enum retbleed_mitigation { RETBLEED_MITIGATION_NONE, RETBLEED_MITIGATION_UNRET, + RETBLEED_MITIGATION_IBPB, RETBLEED_MITIGATION_IBRS, RETBLEED_MITIGATION_EIBRS, }; @@ -806,11 +807,13 @@ enum retbleed_mitigation_cmd { RETBLEED_CMD_OFF, RETBLEED_CMD_AUTO, RETBLEED_CMD_UNRET, + RETBLEED_CMD_IBPB, }; const char * const retbleed_strings[] = { [RETBLEED_MITIGATION_NONE] = "Vulnerable", [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", + [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", }; @@ -840,6 +843,8 @@ static int __init retbleed_parse_cmdline(char *str) retbleed_cmd = RETBLEED_CMD_AUTO; } else if (!strcmp(str, "unret")) { retbleed_cmd = RETBLEED_CMD_UNRET; + } else if (!strcmp(str, "ibpb")) { + retbleed_cmd = RETBLEED_CMD_IBPB; } else if (!strcmp(str, "nosmt")) { retbleed_nosmt = true; } else { @@ -854,11 +859,13 @@ static int __init retbleed_parse_cmdline(char *str) early_param("retbleed", retbleed_parse_cmdline); #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" -#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" +#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" static void __init retbleed_select_mitigation(void) { + bool mitigate_smt = false; + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) return; @@ -870,11 +877,21 @@ static void __init retbleed_select_mitigation(void) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; break; + case RETBLEED_CMD_IBPB: + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + break; + case RETBLEED_CMD_AUTO: default: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + + if (IS_ENABLED(CONFIG_RETPOLINE) && + IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + else + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } /* * The Intel mitigation (IBRS) was already selected in @@ -890,26 +907,34 @@ static void __init retbleed_select_mitigation(void) if (!IS_ENABLED(CONFIG_RETPOLINE) || !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { pr_err(RETBLEED_COMPILER_MSG); - retbleed_mitigation = RETBLEED_MITIGATION_NONE; - break; + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + goto retbleed_force_ibpb; } setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); - if (!boot_cpu_has(X86_FEATURE_STIBP) && - (retbleed_nosmt || cpu_mitigations_auto_nosmt())) - cpu_smt_disable(false); - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) pr_err(RETBLEED_UNTRAIN_MSG); + + mitigate_smt = true; + break; + + case RETBLEED_MITIGATION_IBPB: +retbleed_force_ibpb: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; break; default: break; } + if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && + (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + /* * Let IBRS trump all on Intel without affecting the effects of the * retbleed= cmdline option. From 4c7f90f8a9554dd6a7e614529b3d7450a8dc84e2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 15:07:19 -0700 Subject: [PATCH 266/299] x86/bugs: Do IBPB fallback check only once commit 0fe4aeea9c01baabecc8c3afc7889c809d939bc2 upstream. When booting with retbleed=auto, if the kernel wasn't built with CONFIG_CC_HAS_RETURN_THUNK, the mitigation falls back to IBPB. Make sure a warning is printed in that case. The IBPB fallback check is done twice, but it really only needs to be done once. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5cafe7b2a97f6..1d514442cb235 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -884,18 +884,13 @@ static void __init retbleed_select_mitigation(void) case RETBLEED_CMD_AUTO: default: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - - if (IS_ENABLED(CONFIG_RETPOLINE) && - IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) - retbleed_mitigation = RETBLEED_MITIGATION_UNRET; - else - retbleed_mitigation = RETBLEED_MITIGATION_IBPB; - } + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; /* - * The Intel mitigation (IBRS) was already selected in - * spectre_v2_select_mitigation(). + * The Intel mitigation (IBRS or eIBRS) was already selected in + * spectre_v2_select_mitigation(). 'retbleed_mitigation' will + * be set accordingly below. */ break; From a8a370f08eb55359980fe29165569333b1e0c54d Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 11 Jul 2022 10:51:17 -0300 Subject: [PATCH 267/299] objtool: Add entry UNRET validation commit a09a6e2399ba0595c3042b3164f3ca68a3cff33e upstream. Since entry asm is tricky, add a validation pass that ensures the retbleed mitigation has been done before the first actual RET instruction. Entry points are those that either have UNWIND_HINT_ENTRY, which acts as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or those that have UWIND_HINT_IRET_REGS at +0. This is basically a variant of validate_branch() that is intra-function and it will simply follow all branches from marked entry points and ensures that all paths lead to ANNOTATE_UNRET_END. If a path hits RET or an indirection the path is a fail and will be reported. There are 3 ANNOTATE_UNRET_END instances: - UNTRAIN_RET itself - exception from-kernel; this path doesn't need UNTRAIN_RET - all early exceptions; these also don't need UNTRAIN_RET Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov [cascardo: tools/objtool/builtin-check.c no link option validation] [cascardo: tools/objtool/check.c opts.ibt is ibt] [cascardo: tools/objtool/include/objtool/builtin.h leave unret option as bool, no struct opts] [cascardo: objtool is still called from scripts/link-vmlinux.sh] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 3 +- arch/x86/entry/entry_64_compat.S | 6 +- arch/x86/include/asm/nospec-branch.h | 12 ++ arch/x86/include/asm/unwind_hints.h | 4 + arch/x86/kernel/head_64.S | 5 + arch/x86/xen/xen-asm.S | 10 +- include/linux/objtool.h | 3 + scripts/link-vmlinux.sh | 3 + tools/include/linux/objtool.h | 3 + tools/objtool/builtin-check.c | 3 +- tools/objtool/check.c | 177 ++++++++++++++++++++++-- tools/objtool/include/objtool/builtin.h | 2 +- tools/objtool/include/objtool/check.h | 11 +- 13 files changed, 220 insertions(+), 22 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 0d6cac831de03..3f02b3bb54b1e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -85,7 +85,7 @@ */ SYM_CODE_START(entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR swapgs @@ -1088,6 +1088,7 @@ SYM_CODE_START_LOCAL(error_entry) .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY leaq 8(%rsp), %rax /* return pt_regs pointer */ + ANNOTATE_UNRET_END RET .Lbstep_iret: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 46f5a2886fca6..4f479cdc7a408 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -49,7 +49,7 @@ * 0(%ebp) arg6 */ SYM_CODE_START(entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* Interrupts are off on entry. */ SWAPGS @@ -204,7 +204,7 @@ SYM_CODE_END(entry_SYSENTER_compat) * 0(%esp) arg6 */ SYM_CODE_START(entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* Interrupts are off on entry. */ swapgs @@ -353,7 +353,7 @@ SYM_CODE_END(entry_SYSCALL_compat) * ebp arg6 */ SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* * Interrupts are off on entry. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 05dd75478d7b3..bba42bd78edfb 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -81,6 +81,17 @@ */ #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE +/* + * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should + * eventually turn into it's own annotation. + */ +.macro ANNOTATE_UNRET_END +#ifdef CONFIG_DEBUG_ENTRY + ANNOTATE_RETPOLINE_SAFE + nop +#endif +.endm + /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 @@ -131,6 +142,7 @@ */ .macro UNTRAIN_RET #ifdef CONFIG_RETPOLINE + ANNOTATE_UNRET_END ALTERNATIVE_2 "", \ "call zen_untrain_ret", X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 8b33674288ea7..6f70fe4c93f26 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -11,6 +11,10 @@ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 .endm +.macro UNWIND_HINT_ENTRY + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 +.endm + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 .if \base == %rsp .if \indirect diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b8e3019547a5d..3178fd81f93fc 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -334,6 +334,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -393,6 +395,7 @@ SYM_CODE_END(early_idt_handler_array) SYM_CODE_START_LOCAL(early_idt_handler_common) UNWIND_HINT_IRET_REGS offset=16 + ANNOTATE_UNRET_END /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -442,6 +445,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 6bf9d45b91784..6b4fdf6b95422 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR pop %rcx pop %r11 @@ -235,7 +235,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) /* Normal 64-bit system call target */ SYM_CODE_START(xen_entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR popq %rcx popq %r11 @@ -255,7 +255,7 @@ SYM_CODE_END(xen_entry_SYSCALL_64) /* 32-bit compat syscall target */ SYM_CODE_START(xen_entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR popq %rcx popq %r11 @@ -273,7 +273,7 @@ SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ SYM_CODE_START(xen_entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means @@ -297,7 +297,7 @@ SYM_CODE_END(xen_entry_SYSENTER_compat) SYM_CODE_START(xen_entry_SYSCALL_compat) SYM_CODE_START(xen_entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax diff --git a/include/linux/objtool.h b/include/linux/objtool.h index c81ea2264ad8a..47a4e23749cbd 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -32,11 +32,14 @@ struct unwind_hint { * * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 #ifdef CONFIG_STACK_VALIDATION diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 9361a1ef02c99..b5159fe1c4fc9 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -130,6 +130,9 @@ objtool_link() if is_enabled CONFIG_VMLINUX_VALIDATION; then objtoolopt="${objtoolopt} --noinstr" + if is_enabled CONFIG_RETPOLINE; then + objtoolopt="${objtoolopt} --unret" + fi fi if [ -n "${objtoolopt}" ]; then diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index c81ea2264ad8a..47a4e23749cbd 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -32,11 +32,14 @@ struct unwind_hint { * * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 #ifdef CONFIG_STACK_VALIDATION diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index fc6975ab8b06e..c2bcf7703453f 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -21,7 +21,7 @@ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, lto, vmlinux, mcount, noinstr, backup, sls, dryrun, - ibt; + ibt, unret; static const char * const check_usage[] = { "objtool check [] file.o", @@ -37,6 +37,7 @@ const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), + OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 43327b127f55d..f351af3e912ad 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2031,16 +2031,24 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; - if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); - if (sym && sym->bind == STB_GLOBAL && - insn->type != INSN_ENDBR && !insn->noendbr) { - WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", - insn->sec, insn->offset); + if (sym && sym->bind == STB_GLOBAL) { + if (ibt && insn->type != INSN_ENDBR && !insn->noendbr) { + WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", + insn->sec, insn->offset); + } + + insn->entry = 1; } } + if (hint->type == UNWIND_HINT_TYPE_ENTRY) { + hint->type = UNWIND_HINT_TYPE_CALL; + insn->entry = 1; + } + if (hint->type == UNWIND_HINT_TYPE_FUNC) { insn->cfi = &func_cfi; continue; @@ -2115,8 +2123,9 @@ static int read_retpoline_hints(struct objtool_file *file) if (insn->type != INSN_JUMP_DYNAMIC && insn->type != INSN_CALL_DYNAMIC && - insn->type != INSN_RETURN) { - WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop", insn->sec, insn->offset); return -1; } @@ -3412,8 +3421,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - visited = 1 << state.uaccess; - if (insn->visited) { + visited = VISITED_BRANCH << state.uaccess; + if (insn->visited & VISITED_BRANCH_MASK) { if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) return 1; @@ -3642,6 +3651,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) return warnings; } +/* + * Validate rethunk entry constraint: must untrain RET before the first RET. + * + * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes + * before an actual RET instruction. + */ +static int validate_entry(struct objtool_file *file, struct instruction *insn) +{ + struct instruction *next, *dest; + int ret, warnings = 0; + + for (;;) { + next = next_insn_to_validate(file, insn); + + if (insn->visited & VISITED_ENTRY) + return 0; + + insn->visited |= VISITED_ENTRY; + + if (!insn->ignore_alts && !list_empty(&insn->alts)) { + struct alternative *alt; + bool skip_orig = false; + + list_for_each_entry(alt, &insn->alts, list) { + if (alt->skip_orig) + skip_orig = true; + + ret = validate_entry(file, alt->insn); + if (ret) { + if (backtrace) + BT_FUNC("(alt)", insn); + return ret; + } + } + + if (skip_orig) + return 0; + } + + switch (insn->type) { + + case INSN_CALL_DYNAMIC: + case INSN_JUMP_DYNAMIC: + case INSN_JUMP_DYNAMIC_CONDITIONAL: + WARN_FUNC("early indirect call", insn->sec, insn->offset); + return 1; + + case INSN_JUMP_UNCONDITIONAL: + case INSN_JUMP_CONDITIONAL: + if (!is_sibling_call(insn)) { + if (!insn->jump_dest) { + WARN_FUNC("unresolved jump target after linking?!?", + insn->sec, insn->offset); + return -1; + } + ret = validate_entry(file, insn->jump_dest); + if (ret) { + if (backtrace) { + BT_FUNC("(branch%s)", insn, + insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : ""); + } + return ret; + } + + if (insn->type == INSN_JUMP_UNCONDITIONAL) + return 0; + + break; + } + + /* fallthrough */ + case INSN_CALL: + dest = find_insn(file, insn->call_dest->sec, + insn->call_dest->offset); + if (!dest) { + WARN("Unresolved function after linking!?: %s", + insn->call_dest->name); + return -1; + } + + ret = validate_entry(file, dest); + if (ret) { + if (backtrace) + BT_FUNC("(call)", insn); + return ret; + } + /* + * If a call returns without error, it must have seen UNTRAIN_RET. + * Therefore any non-error return is a success. + */ + return 0; + + case INSN_RETURN: + WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset); + return 1; + + case INSN_NOP: + if (insn->retpoline_safe) + return 0; + break; + + default: + break; + } + + if (!next) { + WARN_FUNC("teh end!", insn->sec, insn->offset); + return -1; + } + insn = next; + } + + return warnings; +} + +/* + * Validate that all branches starting at 'insn->entry' encounter UNRET_END + * before RET. + */ +static int validate_unret(struct objtool_file *file) +{ + struct instruction *insn; + int ret, warnings = 0; + + for_each_insn(file, insn) { + if (!insn->entry) + continue; + + ret = validate_entry(file, insn); + if (ret < 0) { + WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset); + return ret; + } + warnings += ret; + } + + return warnings; +} + static int validate_retpoline(struct objtool_file *file) { struct instruction *insn; @@ -4005,6 +4153,17 @@ int check(struct objtool_file *file) goto out; warnings += ret; + if (unret) { + /* + * Must be after validate_branch() and friends, it plays + * further games with insn->visited. + */ + ret = validate_unret(file); + if (ret < 0) + return ret; + warnings += ret; + } + if (ibt) { ret = validate_ibt(file); if (ret < 0) diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index c39dbfaef6dcb..d92abd0c228e8 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -10,7 +10,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, lto, vmlinux, mcount, noinstr, backup, sls, dryrun, - ibt; + ibt, unret; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index f10d7374f388a..0eeedeacbefb0 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -51,8 +51,10 @@ struct instruction { ignore_alts : 1, hint : 1, retpoline_safe : 1, - noendbr : 1; - /* 2 bit hole */ + noendbr : 1, + entry : 1; + /* 1 bit hole */ + s8 instr; u8 visited; /* u8 hole */ @@ -69,6 +71,11 @@ struct instruction { struct cfi_state *cfi; }; +#define VISITED_BRANCH 0x01 +#define VISITED_BRANCH_UACCESS 0x02 +#define VISITED_BRANCH_MASK 0x03 +#define VISITED_ENTRY 0x04 + static inline bool is_static_jump(struct instruction *insn) { return insn->type == INSN_JUMP_CONDITIONAL || From 80f8a9e9d530fec6094641b96fe3e5b5acb44830 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:04 +0200 Subject: [PATCH 268/299] x86/cpu/amd: Add Spectral Chicken commit d7caac991feeef1b871ee6988fd2c9725df09039 upstream. Zen2 uarchs have an undocumented, unnamed, MSR that contains a chicken bit for some speculation behaviour. It needs setting. Note: very belatedly AMD released naming; it's now officially called MSR_AMD64_DE_CFG2 and MSR_AMD64_DE_CFG2_SUPPRESS_NOBR_PRED_BIT but shall remain the SPECTRAL CHICKEN. Suggested-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 3 +++ arch/x86/kernel/cpu/amd.c | 23 ++++++++++++++++++++++- arch/x86/kernel/cpu/cpu.h | 2 ++ arch/x86/kernel/cpu/hygon.c | 6 ++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bd283cdd963af..0ff05c7882b20 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -553,6 +553,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0c0b09796ced3..8cf0659c05219 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -862,6 +862,26 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +void init_spectral_chicken(struct cpuinfo_x86 *c) +{ + u64 value; + + /* + * On Zen2 we offer this chicken (bit) on the altar of Speculation. + * + * This suppresses speculation from the middle of a basic block, i.e. it + * suppresses non-branch predictions. + * + * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { + if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { + value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } +} + static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); @@ -907,7 +927,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; - case 0x17: fallthrough; + case 0x17: init_spectral_chicken(c); + fallthrough; case 0x19: init_amd_zn(c); break; } diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2a8e584fc9913..7c9b5893c30ab 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -61,6 +61,8 @@ static inline void tsx_init(void) { } static inline void tsx_ap_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ +extern void init_spectral_chicken(struct cpuinfo_x86 *c); + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 3fcdda4c1e114..21fd425088fe5 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c) /* get apicid instead of initial apic id from cpuid */ c->apicid = hard_smp_processor_id(); + /* + * XXX someone from Hygon needs to confirm this DTRT + * + init_spectral_chicken(c); + */ + set_cpu_cap(c, X86_FEATURE_ZEN); set_cpu_cap(c, X86_FEATURE_CPB); From 3d6bdd768577847ae680b27bfb50c6de2037afe7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:05 +0200 Subject: [PATCH 269/299] x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n commit b2620facef4889fefcbf2e87284f34dcd4189bce upstream. If a kernel is built with CONFIG_RETPOLINE=n, but the user still wants to mitigate Spectre v2 using IBRS or eIBRS, the RSB filling will be silently disabled. There's nothing retpoline-specific about RSB buffer filling. Remove the CONFIG_RETPOLINE guards around it. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 2 -- arch/x86/entry/entry_64.S | 2 -- arch/x86/include/asm/nospec-branch.h | 2 -- 3 files changed, 6 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 8874208440664..e309e71560389 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm) movl %ebx, PER_CPU_VAR(__stack_chk_guard) #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* Restore flags or the incoming task to restore AC state. */ popfl diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3f02b3bb54b1e..3d3c682e4e757 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -250,7 +250,6 @@ SYM_FUNC_START(__switch_to_asm) movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -259,7 +258,6 @@ SYM_FUNC_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popq %r15 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index bba42bd78edfb..08b03c12e6c25 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -122,11 +122,9 @@ * monstrosity above, manually. */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -#ifdef CONFIG_RETPOLINE ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) .Lskip_rsb_\@: -#endif .endm /* From 3e89c42462722bbf778ac1e97236dca518fabbf9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:06 +0200 Subject: [PATCH 270/299] x86/speculation: Fix firmware entry SPEC_CTRL handling commit e6aa13622ea8283cc699cac5d018cc40a2ba2010 upstream. The firmware entry code may accidentally clear STIBP or SSBD. Fix that. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 08b03c12e6c25..dee9ef77af138 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -285,18 +285,16 @@ extern u64 spec_ctrl_current(void); */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) From ff110fe719555fd358ac9e0bd0ca549fae3e26e9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:07 +0200 Subject: [PATCH 271/299] x86/speculation: Fix SPEC_CTRL write on SMT state change commit 56aa4d221f1ee2c3a49b45b800778ec6e0ab73c5 upstream. If the SMT state changes, SSBD might get accidentally disabled. Fix that. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1d514442cb235..24fd2ff6e8af6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1451,7 +1451,8 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { - write_spec_ctrl_current(x86_spec_ctrl_base, true); + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + write_spec_ctrl_current(val, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ From 8a95fadc8f3264dc98376d0de66ec59dd9eafb6f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:08 +0200 Subject: [PATCH 272/299] x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit commit bbb69e8bee1bd882784947095ffb2bfe0f7c9470 upstream. There's no need to recalculate the host value for every entry/exit. Just use the cached value in spec_ctrl_current(). Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 24fd2ff6e8af6..c17593310890c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -208,7 +208,7 @@ void __init check_bugs(void) void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = x86_spec_ctrl_base; + u64 msrval, guestval, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); /* Is MSR_SPEC_CTRL implemented ? */ @@ -221,15 +221,6 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval = hostval & ~x86_spec_ctrl_mask; guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) - hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - - /* Conditional STIBP enabled? */ - if (static_branch_unlikely(&switch_to_cond_stibp)) - hostval |= stibp_tif_to_spec_ctrl(ti->flags); - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -1390,7 +1381,6 @@ static void __init spectre_v2_select_mitigation(void) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); if (spectre_v2_in_ibrs_mode(mode)) { - /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; write_spec_ctrl_current(x86_spec_ctrl_base, true); } From 7377eea29dbcad2ad042eee66df17c11b8421654 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 17 Jun 2022 12:12:48 -0700 Subject: [PATCH 273/299] x86/speculation: Remove x86_spec_ctrl_mask commit acac5e98ef8d638a411cfa2ee676c87e1973f126 upstream. This mask has been made redundant by kvm_spec_ctrl_test_value(). And it doesn't even work when MSR interception is disabled, as the guest can just write to SPEC_CTRL directly. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Paolo Bonzini Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c17593310890c..d4a17e77b5f40 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -85,12 +85,6 @@ u64 spec_ctrl_current(void) } EXPORT_SYMBOL_GPL(spec_ctrl_current); -/* - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. - */ -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; - /* * AMD specific MSR info for Speculative Store Bypass control. * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). @@ -146,10 +140,6 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - /* Allow STIBP in MSR_SPEC_CTRL if supported */ - if (boot_cpu_has(X86_FEATURE_STIBP)) - x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -208,19 +198,10 @@ void __init check_bugs(void) void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = spec_ctrl_current(); + u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); - /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - /* - * Restrict guest_spec_ctrl to supported values. Clear the - * modifiable bits in the host base value and or the - * modifiable bits from the guest value. - */ - guestval = hostval & ~x86_spec_ctrl_mask; - guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -1658,16 +1639,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) break; } - /* - * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper - * bit in the mask to allow guests to use the mitigation even in the - * case where the host does not enable it. - */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; - } - /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. From 43827446da732ed012c9008c429424f81e36331b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 24 Jun 2022 12:52:40 +0200 Subject: [PATCH 274/299] objtool: Re-add UNWIND_HINT_{SAVE_RESTORE} commit 8faea26e611189e933ea2281975ff4dc7c1106b6 upstream. Commit c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints") removed the save/restore unwind hints because they were no longer needed. Now they're going to be needed again so re-add them. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/unwind_hints.h | 12 ++++++-- include/linux/objtool.h | 6 ++-- tools/include/linux/objtool.h | 6 ++-- tools/objtool/check.c | 40 +++++++++++++++++++++++++++ tools/objtool/include/objtool/check.h | 19 +++++++------ 5 files changed, 68 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 6f70fe4c93f26..f66fbe6537dd7 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -8,11 +8,11 @@ #ifdef __ASSEMBLY__ .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 .endm .macro UNWIND_HINT_ENTRY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 + UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 @@ -56,6 +56,14 @@ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm + #else #define UNWIND_HINT_FUNC \ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 47a4e23749cbd..376110ead758e 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -40,6 +40,8 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 #define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_STACK_VALIDATION @@ -125,7 +127,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -178,7 +180,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 47a4e23749cbd..376110ead758e 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -40,6 +40,8 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 #define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_STACK_VALIDATION @@ -125,7 +127,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -178,7 +180,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f351af3e912ad..41be9f09d3079 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2031,6 +2031,17 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; + if (hint->type == UNWIND_HINT_TYPE_SAVE) { + insn->hint = false; + insn->save = true; + continue; + } + + if (hint->type == UNWIND_HINT_TYPE_RESTORE) { + insn->restore = true; + continue; + } + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); @@ -3436,6 +3447,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, state.instr += insn->instr; if (insn->hint) { + if (insn->restore) { + struct instruction *save_insn, *i; + + i = insn; + save_insn = NULL; + + sym_for_each_insn_continue_reverse(file, func, i) { + if (i->save) { + save_insn = i; + break; + } + } + + if (!save_insn) { + WARN_FUNC("no corresponding CFI save for CFI restore", + sec, insn->offset); + return 1; + } + + if (!save_insn->visited) { + WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", + sec, insn->offset); + return 1; + } + + insn->cfi = save_insn->cfi; + nr_cfi_reused++; + } + state.cfi = *insn->cfi; } else { /* XXX track if we actually changed state.cfi */ diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 0eeedeacbefb0..036129cebeeef 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -46,18 +46,19 @@ struct instruction { enum insn_type type; unsigned long immediate; - u8 dead_end : 1, - ignore : 1, - ignore_alts : 1, - hint : 1, - retpoline_safe : 1, - noendbr : 1, - entry : 1; - /* 1 bit hole */ + u16 dead_end : 1, + ignore : 1, + ignore_alts : 1, + hint : 1, + save : 1, + restore : 1, + retpoline_safe : 1, + noendbr : 1, + entry : 1; + /* 7 bit hole */ s8 instr; u8 visited; - /* u8 hole */ struct alt_group *alt_group; struct symbol *call_dest; From bcb9508413dc8a73cb8abd761a85dc5c6f9bd911 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:11 +0200 Subject: [PATCH 275/299] KVM: VMX: Flatten __vmx_vcpu_run() commit 8bd200d23ec42d66ccd517a72dd0b9cc6132d2fd upstream. Move the vmx_vm{enter,exit}() functionality into __vmx_vcpu_run(). This will make it easier to do the spec_ctrl handling before the first RET. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmenter.S | 119 ++++++++++++++----------------------- 1 file changed, 46 insertions(+), 73 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 435c187927c48..c83163fb2e9c7 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -30,68 +30,6 @@ .section .noinstr.text, "ax" -/** - * vmx_vmenter - VM-Enter the current loaded VMCS - * - * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME - * - * Returns: - * %RFLAGS.CF is set on VM-Fail Invalid - * %RFLAGS.ZF is set on VM-Fail Valid - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * Note that VMRESUME/VMLAUNCH fall-through and return directly if - * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump - * to vmx_vmexit. - */ -SYM_FUNC_START_LOCAL(vmx_vmenter) - /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ - je 2f - -1: vmresume - RET - -2: vmlaunch - RET - -3: cmpb $0, kvm_rebooting - je 4f - RET -4: ud2 - - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) - -SYM_FUNC_END(vmx_vmenter) - -/** - * vmx_vmexit - Handle a VMX VM-Exit - * - * Returns: - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump - * here after hardware loads the host's state, i.e. this is the destination - * referred to by VMCS.HOST_RIP. - */ -SYM_FUNC_START(vmx_vmexit) -#ifdef CONFIG_RETPOLINE - ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE - /* Preserve guest's RAX, it's used to stuff the RSB. */ - push %_ASM_AX - - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE - - /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ - or $1, %_ASM_AX - - pop %_ASM_AX -.Lvmexit_skip_rsb: -#endif - RET -SYM_FUNC_END(vmx_vmexit) - /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) @@ -124,8 +62,7 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Copy @launched to BL, _ASM_ARG3 is volatile. */ mov %_ASM_ARG3B, %bl - /* Adjust RSP to account for the CALL to vmx_vmenter(). */ - lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 + lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp /* Load @regs to RAX. */ @@ -154,11 +91,37 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Enter guest mode */ - call vmx_vmenter + /* Check EFLAGS.ZF from 'testb' above */ + je .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" + * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. + * So this isn't a typical function and objtool needs to be told to + * save the unwind state here and restore it below. + */ + UNWIND_HINT_SAVE + +/* + * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at + * the 'vmx_vmexit' label below. + */ +.Lvmresume: + vmresume + jmp .Lvmfail + +.Lvmlaunch: + vmlaunch + jmp .Lvmfail + + _ASM_EXTABLE(.Lvmresume, .Lfixup) + _ASM_EXTABLE(.Lvmlaunch, .Lfixup) - /* Jump on VM-Fail. */ - jbe 2f +SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + + /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ + UNWIND_HINT_RESTORE + ENDBR /* Temporarily save guest's RAX. */ push %_ASM_AX @@ -185,9 +148,13 @@ SYM_FUNC_START(__vmx_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* IMPORTANT: RSB must be stuffed before the first return. */ + FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ xor %eax, %eax +.Lclear_regs: /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded @@ -197,7 +164,7 @@ SYM_FUNC_START(__vmx_vcpu_run) * free. RSP and RAX are exempt as RSP is restored by hardware during * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. */ -1: xor %ecx, %ecx + xor %ecx, %ecx xor %edx, %edx xor %ebx, %ebx xor %ebp, %ebp @@ -216,8 +183,8 @@ SYM_FUNC_START(__vmx_vcpu_run) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP - pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 pop %r13 @@ -230,9 +197,15 @@ SYM_FUNC_START(__vmx_vcpu_run) pop %_ASM_BP RET - /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ -2: mov $1, %eax - jmp 1b +.Lfixup: + cmpb $0, kvm_rebooting + jne .Lvmfail + ud2 +.Lvmfail: + /* VM-Fail: set return value to 1 */ + mov $1, %eax + jmp .Lclear_regs + SYM_FUNC_END(__vmx_vcpu_run) From 245800423a576925d0bd571eacf09cc12e94a9ff Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:12 +0200 Subject: [PATCH 276/299] KVM: VMX: Convert launched argument to flags commit bb06650634d3552c0f8557e9d16aa1a408040e28 upstream. Convert __vmx_vcpu_run()'s 'launched' argument to 'flags', in preparation for doing SPEC_CTRL handling immediately after vmexit, which will need another flag. This is much easier than adding a fourth argument, because this code supports both 32-bit and 64-bit, and the fourth argument on 32-bit would have to be pushed on the stack. Note that __vmx_vcpu_run_flags() is called outside of the noinstr critical section because it will soon start calling potentially traceable functions. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/run_flags.h | 7 +++++++ arch/x86/kvm/vmx/vmenter.S | 9 +++++---- arch/x86/kvm/vmx/vmx.c | 17 ++++++++++++++--- arch/x86/kvm/vmx/vmx.h | 5 ++++- 5 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 arch/x86/kvm/vmx/run_flags.h diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ee7df31883cd9..28ccf25c41248 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3091,7 +3091,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) } vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + __vmx_vcpu_run_flags(vmx)); if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h new file mode 100644 index 0000000000000..57f4c664ea9c1 --- /dev/null +++ b/arch/x86/kvm/vmx/run_flags.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_RUN_FLAGS_H +#define __KVM_X86_VMX_RUN_FLAGS_H + +#define VMX_RUN_VMRESUME (1 << 0) + +#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index c83163fb2e9c7..ddc3bf85db33a 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -5,6 +5,7 @@ #include #include #include +#include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) @@ -34,7 +35,7 @@ * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) * @regs: unsigned long * (to guest registers) - * @launched: %true if the VMCS has been launched + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -59,7 +60,7 @@ SYM_FUNC_START(__vmx_vcpu_run) */ push %_ASM_ARG2 - /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + /* Copy @flags to BL, _ASM_ARG3 is volatile. */ mov %_ASM_ARG3B, %bl lea (%_ASM_SP), %_ASM_ARG2 @@ -69,7 +70,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb %bl, %bl + testb $VMX_RUN_VMRESUME, %bl /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -92,7 +93,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov VCPU_RAX(%_ASM_AX), %_ASM_AX /* Check EFLAGS.ZF from 'testb' above */ - je .Lvmlaunch + jz .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index da103938e62ee..a961e276273b0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -839,6 +839,16 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) MSR_IA32_SPEC_CTRL); } +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) +{ + unsigned int flags = 0; + + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + + return flags; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -6827,7 +6837,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) } static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, - struct vcpu_vmx *vmx) + struct vcpu_vmx *vmx, + unsigned long flags) { guest_state_enter_irqoff(); @@ -6846,7 +6857,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, native_write_cr2(vcpu->arch.cr2); vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + flags); vcpu->arch.cr2 = native_read_cr2(); @@ -6954,7 +6965,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); /* The actual VMENTER/EXIT is in the .noinstr.text section. */ - vmx_vcpu_enter_exit(vcpu, vmx); + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); /* * We do not use IBRS in the kernel. If this vCPU has used the diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 8d2342ede0c59..1b1982448aa46 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -13,6 +13,7 @@ #include "vmcs.h" #include "vmx_ops.h" #include "cpuid.h" +#include "run_flags.h" #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 @@ -404,7 +405,9 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); From d58141112c9965092a0f39d354b22394882585b4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:13 +0200 Subject: [PATCH 277/299] KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS commit fc02735b14fff8c6678b521d324ade27b1a3d4cf upstream. On eIBRS systems, the returns in the vmexit return path from __vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks. Fix that by moving the post-vmexit spec_ctrl handling to immediately after the vmexit. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 4 +++ arch/x86/kvm/vmx/run_flags.h | 1 + arch/x86/kvm/vmx/vmenter.S | 49 +++++++++++++++++++++------- arch/x86/kvm/vmx/vmx.c | 48 +++++++++++++++------------ arch/x86/kvm/vmx/vmx.h | 1 + 6 files changed, 73 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index dee9ef77af138..ccde87e6eabb0 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -274,6 +274,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +extern u64 x86_spec_ctrl_current; extern void write_spec_ctrl_current(u64 val, bool force); extern u64 spec_ctrl_current(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d4a17e77b5f40..f3079734a5839 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -195,6 +195,10 @@ void __init check_bugs(void) #endif } +/* + * NOTE: For VMX, this function is not called in the vmexit path. + * It uses vmx_spec_ctrl_restore_host() instead. + */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index 57f4c664ea9c1..edc3f16cc1896 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -3,5 +3,6 @@ #define __KVM_X86_VMX_RUN_FLAGS_H #define VMX_RUN_VMRESUME (1 << 0) +#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index ddc3bf85db33a..8641ea74a307d 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -33,9 +33,10 @@ /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode - * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @vmx: struct vcpu_vmx * * @regs: unsigned long * (to guest registers) - * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run) #endif push %_ASM_BX + /* Save @vmx for SPEC_CTRL handling */ + push %_ASM_ARG1 + + /* Save @flags for SPEC_CTRL handling */ + push %_ASM_ARG3 + /* * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and * @regs is needed after VM-Exit to save the guest's register values. @@ -149,25 +156,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) mov %r15, VCPU_R15(%_ASM_AX) #endif - /* IMPORTANT: RSB must be stuffed before the first return. */ - FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE - - /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ - xor %eax, %eax + /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ + xor %ebx, %ebx .Lclear_regs: /* - * Clear all general purpose registers except RSP and RAX to prevent + * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers * could lead to speculative execution with the guest's values. * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially * free. RSP and RAX are exempt as RSP is restored by hardware during - * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return + * value. */ + xor %eax, %eax xor %ecx, %ecx xor %edx, %edx - xor %ebx, %ebx xor %ebp, %ebp xor %esi, %esi xor %edi, %edi @@ -185,6 +190,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP + /* + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * + * For retpoline, RSB filling is needed to prevent poisoned RSB entries + * and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled + * before the first unbalanced RET. + */ + + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ + + call vmx_spec_ctrl_restore_host + + /* Put return value in AX */ + mov %_ASM_BX, %_ASM_AX + pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 @@ -204,7 +231,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ud2 .Lvmfail: /* VM-Fail: set return value to 1 */ - mov $1, %eax + mov $1, %_ASM_BX jmp .Lclear_regs SYM_FUNC_END(__vmx_vcpu_run) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a961e276273b0..0c9b8a16ea7ec 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -846,6 +846,14 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) if (vmx->loaded_vmcs->launched) flags |= VMX_RUN_VMRESUME; + /* + * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free + * to change it directly without causing a vmexit. In that case read + * it after vmexit and store it in vmx->spec_ctrl. + */ + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + flags |= VMX_RUN_SAVE_SPEC_CTRL; + return flags; } @@ -6824,6 +6832,26 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, + unsigned int flags) +{ + u64 hostval = this_cpu_read(x86_spec_ctrl_current); + + if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) + return; + + if (flags & VMX_RUN_SAVE_SPEC_CTRL) + vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. + */ + if (vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); +} + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { @@ -6967,26 +6995,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) /* The actual VMENTER/EXIT is in the .noinstr.text section. */ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); - /* All fields are clean at this point */ if (static_branch_unlikely(&enable_evmcs)) { current_evmcs->hv_clean_fields |= diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 1b1982448aa46..da654af12ccba 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -405,6 +405,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, unsigned int flags); From 48fe9931c7ddf18063aa0c8d16c3831f9d9a16c4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:14 +0200 Subject: [PATCH 278/299] KVM: VMX: Fix IBRS handling after vmexit commit bea7e31a5caccb6fe8ed989c065072354f0ecb52 upstream. For legacy IBRS to work, the IBRS bit needs to be always re-written after vmexit, even if it's already on. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0c9b8a16ea7ec..7715462d0123f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6845,8 +6845,13 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, /* * If the guest/host SPEC_CTRL values differ, restore the host value. + * + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. */ - if (vmx->spec_ctrl != hostval) + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || + vmx->spec_ctrl != hostval) native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); barrier_nospec(); From 8c38306e2e9257af4af2819aa287a4711ff36329 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:15 +0200 Subject: [PATCH 279/299] x86/speculation: Fill RSB on vmexit for IBRS commit 9756bba28470722dacb79ffce554336dd1f6a6cd upstream. Prevent RSB underflow/poisoning attacks with RSB. While at it, add a bunch of comments to attempt to document the current state of tribal knowledge about RSB attacks and what exactly is being mitigated. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/bugs.c | 63 +++++++++++++++++++++++++++--- arch/x86/kvm/vmx/vmenter.S | 6 +-- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d1e977c704faf..00aa5f4df1304 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -/* FREE! ( 7*32+13) */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f3079734a5839..e3f2c9a33a4d4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1394,16 +1394,69 @@ static void __init spectre_v2_select_mitigation(void) pr_info("%s\n", spectre_v2_strings[mode]); /* - * If spectre v2 protection has been enabled, unconditionally fill - * RSB during a context switch; this protects against two independent - * issues: + * If Spectre v2 protection has been enabled, fill the RSB during a + * context switch. In general there are two types of RSB attacks + * across context switches, for which the CALLs/RETs may be unbalanced. * - * - RSB underflow (and switch to BTB) on Skylake+ - * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs + * 1) RSB underflow + * + * Some Intel parts have "bottomless RSB". When the RSB is empty, + * speculated return targets may come from the branch predictor, + * which could have a user-poisoned BTB or BHB entry. + * + * AMD has it even worse: *all* returns are speculated from the BTB, + * regardless of the state of the RSB. + * + * When IBRS or eIBRS is enabled, the "user -> kernel" attack + * scenario is mitigated by the IBRS branch prediction isolation + * properties, so the RSB buffer filling wouldn't be necessary to + * protect against this type of attack. + * + * The "user -> user" attack scenario is mitigated by RSB filling. + * + * 2) Poisoned RSB entry + * + * If the 'next' in-kernel return stack is shorter than 'prev', + * 'next' could be tricked into speculating with a user-poisoned RSB + * entry. + * + * The "user -> kernel" attack scenario is mitigated by SMEP and + * eIBRS. + * + * The "user -> user" scenario, also known as SpectreBHB, requires + * RSB clearing. + * + * So to mitigate all cases, unconditionally fill RSB on context + * switches. + * + * FIXME: Is this pointless for retbleed-affected AMD? */ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + /* + * Similar to context switches, there are two types of RSB attacks + * after vmexit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS, on the other hand, has RSB-poisoning protections, so it + * doesn't need RSB clearing after vmexit. + */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) || + boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS * and Enhanced IBRS protect firmware too, so enable IBRS around diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 8641ea74a307d..4c743fa98a1ff 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -194,15 +194,15 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before * the first unbalanced RET after vmexit! * - * For retpoline, RSB filling is needed to prevent poisoned RSB entries - * and (in some cases) RSB underflow. + * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB + * entries and (in some cases) RSB underflow. * * eIBRS has its own protection against poisoned RSB, so it doesn't * need the RSB filling sequence. But it does need to be enabled * before the first unbalanced RET. */ - FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT pop %_ASM_ARG2 /* @flags */ pop %_ASM_ARG1 /* @vmx */ From afd743f6dde87296c6f3414706964c491bb85862 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Jun 2022 23:16:16 +0200 Subject: [PATCH 280/299] KVM: VMX: Prevent RSB underflow before vmenter commit 07853adc29a058c5fd143c14e5ac528448a72ed9 upstream. On VMX, there are some balanced returns between the time the guest's SPEC_CTRL value is written, and the vmenter. Balanced returns (matched by a preceding call) are usually ok, but it's at least theoretically possible an NMI with a deep call stack could empty the RSB before one of the returns. For maximum paranoia, don't allow *any* returns (balanced or otherwise) between the SPEC_CTRL write and the vmenter. [ bp: Fix 32-bit build. ] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov [cascardo: header conflict fixup at arch/x86/kernel/asm-offsets.c] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/asm-offsets.c | 6 ++++++ arch/x86/kernel/cpu/bugs.c | 4 ++-- arch/x86/kvm/vmx/capabilities.h | 4 ++-- arch/x86/kvm/vmx/vmenter.S | 29 +++++++++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.c | 8 -------- arch/x86/kvm/vmx/vmx.h | 4 ++-- arch/x86/kvm/vmx/vmx_ops.h | 2 +- 7 files changed, 42 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 9fb0a2f8b62a2..6434ea9413485 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -18,6 +18,7 @@ #include #include #include +#include "../kvm/vmx/vmx.h" #ifdef CONFIG_XEN #include @@ -90,4 +91,9 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); + + if (IS_ENABLED(CONFIG_KVM_INTEL)) { + BLANK(); + OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); + } } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e3f2c9a33a4d4..31bf5307a7509 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -196,8 +196,8 @@ void __init check_bugs(void) } /* - * NOTE: For VMX, this function is not called in the vmexit path. - * It uses vmx_spec_ctrl_restore_host() instead. + * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is + * done in vmenter.S. */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 3f430e2183753..c0e24826a86f7 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -4,8 +4,8 @@ #include -#include "lapic.h" -#include "x86.h" +#include "../lapic.h" +#include "../x86.h" extern bool __read_mostly enable_vpid; extern bool __read_mostly flexpriority_enabled; diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 4c743fa98a1ff..4182c7ffc9091 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -1,9 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include #include #include #include +#include #include #include "run_flags.h" @@ -73,6 +75,33 @@ SYM_FUNC_START(__vmx_vcpu_run) lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp + ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL + + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI + movl VMX_spec_ctrl(%_ASM_DI), %edi + movl PER_CPU_VAR(x86_spec_ctrl_current), %esi + cmp %edi, %esi + je .Lspec_ctrl_done + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + mov %edi, %eax + wrmsr + +.Lspec_ctrl_done: + + /* + * Since vmentry is serializing on affected CPUs, there's no need for + * an LFENCE to stop speculation from skipping the wrmsr. + */ + /* Load @regs to RAX. */ mov (%_ASM_SP), %_ASM_AX diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7715462d0123f..4b6a0268c78e3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6989,14 +6989,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) kvm_wait_lapic_expire(vcpu); - /* - * If this vCPU has touched SPEC_CTRL, restore the guest's value if - * it's non-zero. Since vmentry is serialising on affected CPUs, there - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - /* The actual VMENTER/EXIT is in the .noinstr.text section. */ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index da654af12ccba..1e7f9453894b1 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -8,11 +8,11 @@ #include #include "capabilities.h" -#include "kvm_cache_regs.h" +#include "../kvm_cache_regs.h" #include "posted_intr.h" #include "vmcs.h" #include "vmx_ops.h" -#include "cpuid.h" +#include "../cpuid.h" #include "run_flags.h" #define MSR_TYPE_R 1 diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 5e7f412257801..5cfc49ddb1b44 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -8,7 +8,7 @@ #include "evmcs.h" #include "vmcs.h" -#include "x86.h" +#include "../x86.h" asmlinkage void vmread_error(unsigned long field, bool fault); __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, From 373e6942143b5ca27b24ee953ae450dd26a0dbfb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jun 2022 14:03:25 +0200 Subject: [PATCH 281/299] x86/common: Stamp out the stepping madness commit 7a05bc95ed1c5a59e47aaade9fb4083c27de9e62 upstream. The whole MMIO/RETBLEED enumeration went overboard on steppings. Get rid of all that and simply use ANY. If a future stepping of these models would not be affected, it had better set the relevant ARCH_CAP_$FOO_NO bit in IA32_ARCH_CAPABILITIES. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 37 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bd15392ffa0ee..5b1deb3bdf85e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1258,32 +1258,27 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | - BIT(7) | BIT(0xB), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), From 409f6047a43315f2b9661149cb29d6f2ef2440fe Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Fri, 24 Jun 2022 14:41:21 +0100 Subject: [PATCH 282/299] x86/cpu/amd: Enumerate BTC_NO commit 26aae8ccbc1972233afd08fb3f368947c0314265 upstream. BTC_NO indicates that hardware is not susceptible to Branch Type Confusion. Zen3 CPUs don't suffer BTC. Hypervisors are expected to synthesise BTC_NO when it is appropriate given the migration pool, to prevent kernels using heuristics. [ bp: Massage. ] Signed-off-by: Andrew Cooper Signed-off-by: Borislav Petkov [cascardo: no X86_FEATURE_BRS] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/amd.c | 21 +++++++++++++++------ arch/x86/kernel/cpu/common.c | 6 ++++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 00aa5f4df1304..fb425c1159f5a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -321,6 +321,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8cf0659c05219..9cfd11f7ba112 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -890,12 +890,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) node_reclaim_distance = 32; #endif - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. - * Always set it, except when running under a hypervisor. - */ - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) - set_cpu_cap(c, X86_FEATURE_CPB); + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + + /* Erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) + set_cpu_cap(c, X86_FEATURE_CPB); + + /* + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. + */ + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } } static void init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5b1deb3bdf85e..162c422fd1cd6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1385,8 +1385,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !arch_cap_mmio_immune(ia32_cap)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); - if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) - setup_force_cpu_bug(X86_BUG_RETBLEED); + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; From 813423f90f0553c81c5fb4d531fc688a5d506b24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Jun 2022 22:21:17 +0000 Subject: [PATCH 283/299] x86/retbleed: Add fine grained Kconfig knobs commit f43b9876e857c739d407bc56df288b0ebe1a9164 upstream. Do fine-grained Kconfig for all the various retbleed parts. NOTE: if your compiler doesn't support return thunks this will silently 'upgrade' your mitigation to IBPB, you might not like this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov [cascardo: there is no CONFIG_OBJTOOL] [cascardo: objtool calling and option parsing has changed] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 106 ++++++++++++++++++----- arch/x86/Makefile | 8 +- arch/x86/entry/calling.h | 4 + arch/x86/include/asm/disabled-features.h | 18 +++- arch/x86/include/asm/linkage.h | 4 +- arch/x86/include/asm/nospec-branch.h | 10 ++- arch/x86/include/asm/static_call.h | 2 +- arch/x86/kernel/alternative.c | 5 ++ arch/x86/kernel/cpu/amd.c | 2 + arch/x86/kernel/cpu/bugs.c | 42 +++++---- arch/x86/kernel/static_call.c | 2 +- arch/x86/kvm/emulate.c | 4 +- arch/x86/lib/retpoline.S | 4 + scripts/Makefile.build | 1 + scripts/link-vmlinux.sh | 2 +- security/Kconfig | 11 --- tools/objtool/builtin-check.c | 3 +- tools/objtool/check.c | 9 +- tools/objtool/include/objtool/builtin.h | 2 +- 19 files changed, 170 insertions(+), 69 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c429a5c4f945f..4d1d87f76a74f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -457,30 +457,6 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH -config RETPOLINE - bool "Avoid speculative indirect branches in kernel" - default y - help - Compile kernel with the retpoline compiler options to guard against - kernel-to-user data leaks by avoiding speculative indirect - branches. Requires a compiler with -mindirect-branch=thunk-extern - support for full protection. The kernel may run slower. - -config CC_HAS_SLS - def_bool $(cc-option,-mharden-sls=all) - -config CC_HAS_RETURN_THUNK - def_bool $(cc-option,-mfunction-return=thunk-extern) - -config SLS - bool "Mitigate Straight-Line-Speculation" - depends on CC_HAS_SLS && X86_64 - default n - help - Compile the kernel with straight-line-speculation options to guard - against straight line speculation. The kernel image might be slightly - larger. - config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) @@ -2452,6 +2428,88 @@ source "kernel/livepatch/Kconfig" endmenu +config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + +menuconfig SPECULATION_MITIGATIONS + bool "Mitigations for speculative execution vulnerabilities" + default y + help + Say Y here to enable options which enable mitigations for + speculative execution hardware vulnerabilities. + + If you say N, all mitigations will be disabled. You really + should know what you are doing to say so. + +if SPECULATION_MITIGATIONS + +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on (X86_64 || X86_PAE) + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pti.rst for more details. + +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + +config RETHUNK + bool "Enable return-thunks" + depends on RETPOLINE && CC_HAS_RETURN_THUNK + default y + help + Compile the kernel with the return-thunks compiler option to guard + against kernel-to-user data leaks by avoiding return speculation. + Requires a compiler with -mfunction-return=thunk-extern + support for full protection. The kernel may run slower. + +config CPU_UNRET_ENTRY + bool "Enable UNRET on kernel entry" + depends on CPU_SUP_AMD && RETHUNK + default y + help + Compile the kernel with support for the retbleed=unret mitigation. + +config CPU_IBPB_ENTRY + bool "Enable IBPB on kernel entry" + depends on CPU_SUP_AMD + default y + help + Compile the kernel with support for the retbleed=ibpb mitigation. + +config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" + depends on CPU_SUP_INTEL + default y + help + Compile the kernel with support for the spectre_v2=ibrs mitigation. + This mitigates both spectre_v2 and retbleed at great cost to + performance. + +config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 + default n + help + Compile the kernel with straight-line-speculation options to guard + against straight line speculation. The kernel image might be slightly + larger. + +endif + config ARCH_HAS_ADD_PAGES def_bool y depends on ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/x86/Makefile b/arch/x86/Makefile index ca3c0de24bc35..fb0de637411cb 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -15,14 +15,18 @@ endif ifdef CONFIG_CC_IS_GCC RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) -RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) endif ifdef CONFIG_CC_IS_CLANG RETPOLINE_CFLAGS := -mretpoline-external-thunk RETPOLINE_VDSO_CFLAGS := -mretpoline -RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) endif + +ifdef CONFIG_RETHUNK +RETHUNK_CFLAGS := -mfunction-return=thunk-extern +RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) +endif + export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 99d2410c18d1c..b00a3a95fbfab 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -296,6 +296,7 @@ For 32-bit we have the following conventions - kernel is built with * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. */ .macro IBRS_ENTER save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS movl $MSR_IA32_SPEC_CTRL, %ecx @@ -316,6 +317,7 @@ For 32-bit we have the following conventions - kernel is built with shr $32, %rdx wrmsr .Lend_\@: +#endif .endm /* @@ -323,6 +325,7 @@ For 32-bit we have the following conventions - kernel is built with * regs. Must be called after the last RET. */ .macro IBRS_EXIT save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS movl $MSR_IA32_SPEC_CTRL, %ecx @@ -337,6 +340,7 @@ For 32-bit we have the following conventions - kernel is built with shr $32, %rdx wrmsr .Lend_\@: +#endif .endm /* diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 65f1d930ff4d7..f7be189e97232 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -60,9 +60,19 @@ # define DISABLE_RETPOLINE 0 #else # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ - (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ - (1 << (X86_FEATURE_RETHUNK & 31)) | \ - (1 << (X86_FEATURE_UNRET & 31))) + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) #endif #ifdef CONFIG_INTEL_IOMMU_SVM @@ -91,7 +101,7 @@ #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 (DISABLE_RETPOLINE) +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index e3ae331cabb14..73ca200498356 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -19,7 +19,7 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif -#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk #else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS @@ -31,7 +31,7 @@ #else /* __ASSEMBLY__ */ -#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define ASM_RET "jmp __x86_return_thunk\n\t" #else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ccde87e6eabb0..bb05ed4f46bdb 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -127,6 +127,12 @@ .Lskip_rsb_\@: .endm +#ifdef CONFIG_CPU_UNRET_ENTRY +#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#else +#define CALL_ZEN_UNTRAIN_RET "" +#endif + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the * return thunk isn't mapped into the userspace tables (then again, AMD @@ -139,10 +145,10 @@ * where we have a stack but before any RET instruction. */ .macro UNTRAIN_RET -#ifdef CONFIG_RETPOLINE +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) ANNOTATE_UNRET_END ALTERNATIVE_2 "", \ - "call zen_untrain_ret", X86_FEATURE_UNRET, \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 70cc9ccb80298..343b722ccaf21 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -46,7 +46,7 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_RETHUNK #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") #else diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 92bbe4ffd0a13..46427b785bc89 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -508,6 +508,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } +#ifdef CONFIG_RETHUNK /* * Rewrite the compiler generated return thunk tail-calls. * @@ -569,6 +570,10 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) } } } +#else +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +#endif /* CONFIG_RETHUNK */ + #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9cfd11f7ba112..35d5288394cb8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -864,6 +864,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) void init_spectral_chicken(struct cpuinfo_x86 *c) { +#ifdef CONFIG_CPU_UNRET_ENTRY u64 value; /* @@ -880,6 +881,7 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); } } +#endif } static void init_amd_zn(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 31bf5307a7509..5ea9fce997137 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -835,7 +835,6 @@ static int __init retbleed_parse_cmdline(char *str) early_param("retbleed", retbleed_parse_cmdline); #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" -#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" static void __init retbleed_select_mitigation(void) @@ -850,18 +849,33 @@ static void __init retbleed_select_mitigation(void) return; case RETBLEED_CMD_UNRET: - retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + } else { + pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); + goto do_cmd_auto; + } break; case RETBLEED_CMD_IBPB: - retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto do_cmd_auto; + } break; +do_cmd_auto: case RETBLEED_CMD_AUTO: default: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } /* * The Intel mitigation (IBRS or eIBRS) was already selected in @@ -874,14 +888,6 @@ static void __init retbleed_select_mitigation(void) switch (retbleed_mitigation) { case RETBLEED_MITIGATION_UNRET: - - if (!IS_ENABLED(CONFIG_RETPOLINE) || - !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { - pr_err(RETBLEED_COMPILER_MSG); - retbleed_mitigation = RETBLEED_MITIGATION_IBPB; - goto retbleed_force_ibpb; - } - setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); @@ -893,7 +899,6 @@ static void __init retbleed_select_mitigation(void) break; case RETBLEED_MITIGATION_IBPB: -retbleed_force_ibpb: setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); mitigate_smt = true; break; @@ -1264,6 +1269,12 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", mitigation_options[i].option); @@ -1321,7 +1332,8 @@ static void __init spectre_v2_select_mitigation(void) break; } - if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + boot_cpu_has_bug(X86_BUG_RETBLEED) && retbleed_cmd != RETBLEED_CMD_OFF && boot_cpu_has(X86_FEATURE_IBRS) && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index fe21fe7781852..be7038a0da4de 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -126,7 +126,7 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) } EXPORT_SYMBOL_GPL(arch_static_call_transform); -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_RETHUNK /* * This is called by apply_returns() to fix up static call trampolines, * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b01437015f992..db96bf7d1122d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -439,10 +439,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] * SETcc %al [3 bytes] - * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] + * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ +#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ IS_ENABLED(CONFIG_SLS)) #define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index fdd16163b996b..073289a55f849 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -72,6 +72,8 @@ SYM_CODE_END(__x86_indirect_thunk_array) * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. */ +#ifdef CONFIG_RETHUNK + .section .text.__x86.return_thunk /* @@ -136,3 +138,5 @@ SYM_FUNC_END(zen_untrain_ret) __EXPORT_THUNK(zen_untrain_ret) EXPORT_SYMBOL(__x86_return_thunk) + +#endif /* CONFIG_RETHUNK */ diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 33c1ed5815229..2a0521f77e5f0 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -233,6 +233,7 @@ objtool_args = \ $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ $(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \ $(if $(CONFIG_RETPOLINE), --retpoline) \ + $(if $(CONFIG_RETHUNK), --rethunk) \ $(if $(CONFIG_X86_SMAP), --uaccess) \ $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ $(if $(CONFIG_SLS), --sls) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index b5159fe1c4fc9..d4d028595fb4b 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -130,7 +130,7 @@ objtool_link() if is_enabled CONFIG_VMLINUX_VALIDATION; then objtoolopt="${objtoolopt} --noinstr" - if is_enabled CONFIG_RETPOLINE; then + if is_enabled CONFIG_CPU_UNRET_ENTRY; then objtoolopt="${objtoolopt} --unret" fi fi diff --git a/security/Kconfig b/security/Kconfig index 9b2c4925585a3..34e2d7edd0857 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -54,17 +54,6 @@ config SECURITY_NETWORK implement socket and networking access controls. If you are unsure how to answer this question, answer N. -config PAGE_TABLE_ISOLATION - bool "Remove the kernel mapping in user mode" - default y - depends on (X86_64 || X86_PAE) && !UML - help - This feature reduces the number of hardware side channels by - ensuring that the majority of kernel addresses are not mapped - into userspace. - - See Documentation/x86/pti.rst for more details. - config SECURITY_INFINIBAND bool "Infiniband Security Hooks" depends on SECURITY && INFINIBAND diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c2bcf7703453f..cd4bbc98f8c1f 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -21,7 +21,7 @@ bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, lto, vmlinux, mcount, noinstr, backup, sls, dryrun, - ibt, unret; + ibt, unret, rethunk; static const char * const check_usage[] = { "objtool check [] file.o", @@ -37,6 +37,7 @@ const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), + OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"), OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 41be9f09d3079..57b7a68d3b66a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3854,8 +3854,11 @@ static int validate_retpoline(struct objtool_file *file) continue; if (insn->type == INSN_RETURN) { - WARN_FUNC("'naked' return found in RETPOLINE build", - insn->sec, insn->offset); + if (rethunk) { + WARN_FUNC("'naked' return found in RETHUNK build", + insn->sec, insn->offset); + } else + continue; } else { WARN_FUNC("indirect %s found in RETPOLINE build", insn->sec, insn->offset, @@ -4228,7 +4231,9 @@ int check(struct objtool_file *file) if (ret < 0) goto out; warnings += ret; + } + if (rethunk) { ret = create_return_sites_sections(file); if (ret < 0) goto out; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index d92abd0c228e8..b6bb605faf3f7 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -10,7 +10,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, lto, vmlinux, mcount, noinstr, backup, sls, dryrun, - ibt, unret; + ibt, unret, rethunk; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); From ee02cbcebb0985394910d8868c6eef49184b20f7 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 15:01:15 -0700 Subject: [PATCH 284/299] x86/bugs: Add Cannon lake to RETBleed affected CPU list commit f54d45372c6ac9c993451de5e51312485f7d10bc upstream. Cannon lake is also affected by RETBleed, add it to the list. Fixes: 6ad0ad2bf8a6 ("x86/bugs: Report Intel retbleed vulnerability") Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 162c422fd1cd6..1f43ddf2ffc36 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1268,6 +1268,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), From df6fc784e8db07b8fe5aa1c624411f381f3abeaa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jul 2022 15:33:30 +0200 Subject: [PATCH 285/299] x86/entry: Move PUSH_AND_CLEAR_REGS() back into error_entry commit 2c08b9b38f5b0f4a6c2d29be22b695e4ec4a556b upstream. Commit ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") moved PUSH_AND_CLEAR_REGS out of error_entry, into its own function, in part to avoid calling error_entry() for XenPV. However, commit 7c81c0c9210c ("x86/entry: Avoid very early RET") had to change that because the 'ret' was too early and moved it into idtentry, bloating the text size, since idtentry is expanded for every exception vector. However, with the advent of xen_error_entry() in commit d147553b64bad ("x86/xen: Add UNTRAIN_RET") it became possible to remove PUSH_AND_CLEAR_REGS from idtentry, back into *error_entry(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov [cascardo: error_entry still does cld] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3d3c682e4e757..2ea185d47cfd1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -323,6 +323,8 @@ SYM_CODE_END(ret_from_fork) SYM_CODE_START_LOCAL(xen_error_entry) UNWIND_HINT_FUNC + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 UNTRAIN_RET RET SYM_CODE_END(xen_error_entry) @@ -334,9 +336,6 @@ SYM_CODE_END(xen_error_entry) */ .macro idtentry_body cfunc has_error_code:req - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER - /* * Call error_entry() and switch to the task stack if from userspace. * @@ -1035,6 +1034,10 @@ SYM_CODE_END(paranoid_exit) SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC cld + + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + testb $3, CS+8(%rsp) jz .Lerror_kernelspace From e2fe046fe230c5159660257712566a849847cffa Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 7 Jul 2022 13:41:52 -0300 Subject: [PATCH 286/299] x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported commit 2259da159fbe5dba8ac00b560cf00b6a6537fa18 upstream. There are some VM configurations which have Skylake model but do not support IBPB. In those cases, when using retbleed=ibpb, userspace is going to be killed and kernel is going to panic. If the CPU does not support IBPB, warn and proceed with the auto option. Also, do not fallback to IBPB on AMD/Hygon systems if it is not supported. Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5ea9fce997137..7e6d476b23217 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -858,7 +858,10 @@ static void __init retbleed_select_mitigation(void) break; case RETBLEED_CMD_IBPB: - if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (!boot_cpu_has(X86_FEATURE_IBPB)) { + pr_err("WARNING: CPU does not support IBPB.\n"); + goto do_cmd_auto; + } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { retbleed_mitigation = RETBLEED_MITIGATION_IBPB; } else { pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); @@ -873,7 +876,7 @@ static void __init retbleed_select_mitigation(void) boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; - else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) + else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) retbleed_mitigation = RETBLEED_MITIGATION_IBPB; } From 845351c56ca069162433cf935afb2257a4c021d1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 8 Jul 2022 19:10:11 +0200 Subject: [PATCH 287/299] x86/kexec: Disable RET on kexec commit 697977d8415d61f3acbc4ee6d564c9dcf0309507 upstream. All the invocations unroll to __x86_return_thunk and this file must be PIC independent. This fixes kexec on 64-bit AMD boxes. [ bp: Fix 32-bit build. ] Reported-by: Edward Tran Reported-by: Awais Tanveer Suggested-by: Ankur Arora Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Alexandre Chartre Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/relocate_kernel_32.S | 25 +++++++++++++++++++------ arch/x86/kernel/relocate_kernel_64.S | 23 +++++++++++++++++------ 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index fcc8a7699103a..c7c4b1917336d 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,10 +7,12 @@ #include #include #include +#include #include /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 2) @@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movl %edi, %eax addl $(identity_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %edx, %edx xorl %esi, %esi xorl %ebp, %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popl %edx movl CP_PA_SWAP_PAGE(%edi), %esp addl $PAGE_SIZE, %esp 2: + ANNOTATE_RETPOLINE_SAFE call *%edx /* get the re-entry point of the peer system */ @@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movl %edi, %eax addl $(virtual_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popl %edi popl %esi popl %ebx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) popl %edi popl %ebx popl %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index c1d8626c53b66..4809c0dc4eb0c 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -13,7 +13,8 @@ #include /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 3) @@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* jump to identity mapped page */ addq $(identity_mapped - relocate_kernel), %r8 pushq %r8 - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %r14d, %r14d xorl %r15d, %r15d - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popq %rdx @@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) call swap_pages movq $virtual_mapped, %rax pushq %rax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popq %r12 popq %rbp popq %rbx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) lea PAGE_SIZE(%rax), %rsi jmp 0b 3: - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size From ffdd31e8db4e94f399e68727fadf776fc0a2d1ba Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 8 Jul 2022 13:36:09 -0700 Subject: [PATCH 288/299] x86/speculation: Disable RRSBA behavior commit 4ad3278df6fe2b0852b00d5757fc2ccd8e92c26e upstream. Some Intel processors may use alternate predictors for RETs on RSB-underflow. This condition may be vulnerable to Branch History Injection (BHI) and intramode-BTI. Kernel earlier added spectre_v2 mitigation modes (eIBRS+Retpolines, eIBRS+LFENCE, Retpolines) which protect indirect CALLs and JMPs against such attacks. However, on RSB-underflow, RET target prediction may fallback to alternate predictors. As a result, RET's predicted target may get influenced by branch history. A new MSR_IA32_SPEC_CTRL bit (RRSBA_DIS_S) controls this fallback behavior when in kernel mode. When set, RETs will not take predictions from alternate predictors, hence mitigating RETs as well. Support for this is enumerated by CPUID.7.2.EDX[RRSBA_CTRL] (bit2). For spectre v2 mitigation, when a user selects a mitigation that protects indirect CALLs and JMPs against BHI and intramode-BTI, set RRSBA_DIS_S also to protect RETs for RSB-underflow case. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/msr-index.h | 9 +++++++++ arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/scattered.c | 1 + tools/arch/x86/include/asm/msr-index.h | 9 +++++++++ 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index fb425c1159f5a..5d09ded0c491f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -296,7 +296,7 @@ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ #define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -/* FREE! (11*32+11) */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 0ff05c7882b20..ad084326f24c2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -139,6 +141,13 @@ * bit available to control VERW * behavior. */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7e6d476b23217..f6dfa26ed88b9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1311,6 +1311,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +/* Disable in-kernel use of non-RSB RET predictors */ +static void __init spec_ctrl_disable_kernel_rrsba(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + if (ia32_cap & ARCH_CAP_RRSBA) { + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1405,6 +1421,16 @@ static void __init spectre_v2_select_mitigation(void) break; } + /* + * Disable alternate RSB predictions in kernel when indirect CALLs and + * JMPs gets protection against BHI and Intramode-BTI, but RET + * prediction from a non-RSB predictor is still a risk. + */ + if (mode == SPECTRE_V2_EIBRS_LFENCE || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_RETPOLINE) + spec_ctrl_disable_kernel_rrsba(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 4143b1e4c5c6d..fcfb03f5f89b3 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 4425d6773183b..8a0a53cf360d5 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -138,6 +140,13 @@ * bit available to control VERW * behavior. */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* From 6461cc8f22a1266498290b122b56f040d51d9224 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Jul 2022 14:01:06 +0200 Subject: [PATCH 289/299] x86/static_call: Serialize __static_call_fixup() properly commit c27c753ea6fd1237f4f96abf8b623d7bab505513 upstream. __static_call_fixup() invokes __static_call_transform() without holding text_mutex, which causes lockdep to complain in text_poke_bp(). Adding the proper locking cures that, but as this is either used during early boot or during module finalizing, it's not required to use text_poke_bp(). Add an argument to __static_call_transform() which tells it to use text_poke_early() for it. Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/static_call.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index be7038a0da4de..aaaba85d6d7ff 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -25,7 +25,8 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; -static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) +static void __ref __static_call_transform(void *insn, enum insn_type type, + void *func, bool modinit) { const void *emulate = NULL; int size = CALL_INSN_SIZE; @@ -60,7 +61,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void if (memcmp(insn, code, size) == 0) return; - if (unlikely(system_state == SYSTEM_BOOTING)) + if (system_state == SYSTEM_BOOTING || modinit) return text_poke_early(insn, code, size); text_poke_bp(insn, code, size, emulate); @@ -114,12 +115,12 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) if (tramp) { __static_call_validate(tramp, true, true); - __static_call_transform(tramp, __sc_insn(!func, true), func); + __static_call_transform(tramp, __sc_insn(!func, true), func, false); } if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { __static_call_validate(site, tail, false); - __static_call_transform(site, __sc_insn(!func, tail), func); + __static_call_transform(site, __sc_insn(!func, tail), func, false); } mutex_unlock(&text_mutex); @@ -145,8 +146,10 @@ bool __static_call_fixup(void *tramp, u8 op, void *dest) return false; } + mutex_lock(&text_mutex); if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) - __static_call_transform(tramp, RET, NULL); + __static_call_transform(tramp, RET, NULL, true); + mutex_unlock(&text_mutex); return true; } From be6acce256f8ff2f6ff767465a7a6e46f087f9fc Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 13 Jul 2022 11:50:46 +0200 Subject: [PATCH 290/299] x86/asm/32: Fix ANNOTATE_UNRET_SAFE use on 32-bit commit 3131ef39fb03bbde237d0b8260445898f3dfda5b upstream. The build on x86_32 currently fails after commit 9bb2ec608a20 (objtool: Update Retpoline validation) with: arch/x86/kernel/../../x86/xen/xen-head.S:35: Error: no such instruction: `annotate_unret_safe' ANNOTATE_UNRET_SAFE is defined in nospec-branch.h. And head_32.S is missing this include. Fix this. Fixes: 9bb2ec608a20 ("objtool: Update Retpoline validation") Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/63e23f80-033f-f64e-7522-2816debbc367@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_32.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index eb8656bac99b6..9b7acc9c7874c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include From 713fbf23a1a05359512549fb9f5e5a5012ff8d88 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 13 Jul 2022 08:24:37 -0700 Subject: [PATCH 291/299] x86/speculation: Use DECLARE_PER_CPU for x86_spec_ctrl_current commit db886979683a8360ced9b24ab1125ad0c4d2cf76 upstream. Clang warns: arch/x86/kernel/cpu/bugs.c:58:21: error: section attribute is specified on redeclared variable [-Werror,-Wsection] DEFINE_PER_CPU(u64, x86_spec_ctrl_current); ^ arch/x86/include/asm/nospec-branch.h:283:12: note: previous declaration is here extern u64 x86_spec_ctrl_current; ^ 1 error generated. The declaration should be using DECLARE_PER_CPU instead so all attributes stay in sync. Cc: stable@vger.kernel.org Fixes: fc02735b14ff ("KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS") Reported-by: kernel test robot Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index bb05ed4f46bdb..10a3bfc1eb230 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,6 +11,7 @@ #include #include #include +#include #define RETPOLINE_THUNK_SIZE 32 @@ -280,7 +281,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; -extern u64 x86_spec_ctrl_current; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); extern void write_spec_ctrl_current(u64 val, bool force); extern u64 spec_ctrl_current(void); From fcd83bea60a116250c70552af0b334c79903bb54 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 15 Jul 2022 16:45:50 -0300 Subject: [PATCH 292/299] efi/x86: use naked RET on mixed mode call wrapper commit 51a6fa0732d6be6a44e0032752ad2ac10d67c796 upstream. When running with return thunks enabled under 32-bit EFI, the system crashes with: kernel tried to execute NX-protected page - exploit attempt? (uid: 0) BUG: unable to handle page fault for address: 000000005bc02900 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0011) - permissions violation PGD 18f7063 P4D 18f7063 PUD 18ff063 PMD 190e063 PTE 800000005bc02063 Oops: 0011 [#1] PREEMPT SMP PTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.19.0-rc6+ #166 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:0x5bc02900 Code: Unable to access opcode bytes at RIP 0x5bc028d6. RSP: 0018:ffffffffb3203e10 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000048 RDX: 000000000190dfac RSI: 0000000000001710 RDI: 000000007eae823b RBP: ffffffffb3203e70 R08: 0000000001970000 R09: ffffffffb3203e28 R10: 747563657865206c R11: 6c6977203a696665 R12: 0000000000001710 R13: 0000000000000030 R14: 0000000001970000 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8e013ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 0000000080050033 CR2: 000000005bc02900 CR3: 0000000001930000 CR4: 00000000000006f0 Call Trace: ? efi_set_virtual_address_map+0x9c/0x175 efi_enter_virtual_mode+0x4a6/0x53e start_kernel+0x67c/0x71e x86_64_start_reservations+0x24/0x2a x86_64_start_kernel+0xe9/0xf4 secondary_startup_64_no_verify+0xe5/0xeb That's because it cannot jump to the return thunk from the 32-bit code. Using a naked RET and marking it as safe allows the system to proceed booting. Fixes: aa3d480315ba ("x86: Use return-thunk in asm code") Reported-by: Guenter Roeck Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Tested-by: Guenter Roeck Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/efi/efi_thunk_64.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 854dd81804b73..bc740a7c438c9 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -23,6 +23,7 @@ #include #include #include +#include .text .code64 @@ -75,7 +76,9 @@ STACK_FRAME_NON_STANDARD __efi64_thunk 1: movq 0x20(%rsp), %rsp pop %rbx pop %rbp - RET + ANNOTATE_UNRET_SAFE + ret + int3 .code32 2: pushl $__KERNEL_CS From d417d9d84a0406afa422a957d2b47f1e054f9912 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 13 Jul 2022 14:12:41 -0300 Subject: [PATCH 293/299] x86/kvm: fix FASTOP_SIZE when return thunks are enabled commit 84e7051c0bc1f2a13101553959b3a9d9a8e24939 upstream. The return thunk call makes the fastop functions larger, just like IBT does. Consider a 16-byte FASTOP_SIZE when CONFIG_RETHUNK is enabled. Otherwise, functions will be incorrectly aligned and when computing their position for differently sized operators, they will executed in the middle or end of a function, which may as well be an int3, leading to a crash like: [ 36.091116] int3: 0000 [#1] SMP NOPTI [ 36.091119] CPU: 3 PID: 1371 Comm: qemu-system-x86 Not tainted 5.15.0-41-generic #44 [ 36.091120] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 36.091121] RIP: 0010:xaddw_ax_dx+0x9/0x10 [kvm] [ 36.091185] Code: 00 0f bb d0 c3 cc cc cc cc 48 0f bb d0 c3 cc cc cc cc 0f 1f 80 00 00 00 00 0f c0 d0 c3 cc cc cc cc 66 0f c1 d0 c3 cc cc cc cc <0f> 1f 80 00 00 00 00 0f c1 d0 c3 cc cc cc cc 48 0f c1 d0 c3 cc cc [ 36.091186] RSP: 0018:ffffb1f541143c98 EFLAGS: 00000202 [ 36.091188] RAX: 0000000089abcdef RBX: 0000000000000001 RCX: 0000000000000000 [ 36.091188] RDX: 0000000076543210 RSI: ffffffffc073c6d0 RDI: 0000000000000200 [ 36.091189] RBP: ffffb1f541143ca0 R08: ffff9f1803350a70 R09: 0000000000000002 [ 36.091190] R10: ffff9f1803350a70 R11: 0000000000000000 R12: ffff9f1803350a70 [ 36.091190] R13: ffffffffc077fee0 R14: 0000000000000000 R15: 0000000000000000 [ 36.091191] FS: 00007efdfce8d640(0000) GS:ffff9f187dd80000(0000) knlGS:0000000000000000 [ 36.091192] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.091192] CR2: 0000000000000000 CR3: 0000000009b62002 CR4: 0000000000772ee0 [ 36.091195] PKRU: 55555554 [ 36.091195] Call Trace: [ 36.091197] [ 36.091198] ? fastop+0x5a/0xa0 [kvm] [ 36.091222] x86_emulate_insn+0x7b8/0xe90 [kvm] [ 36.091244] x86_emulate_instruction+0x2f4/0x630 [kvm] [ 36.091263] ? kvm_arch_vcpu_load+0x7c/0x230 [kvm] [ 36.091283] ? vmx_prepare_switch_to_host+0xf7/0x190 [kvm_intel] [ 36.091290] complete_emulated_mmio+0x297/0x320 [kvm] [ 36.091310] kvm_arch_vcpu_ioctl_run+0x32f/0x550 [kvm] [ 36.091330] kvm_vcpu_ioctl+0x29e/0x6d0 [kvm] [ 36.091344] ? kvm_vcpu_ioctl+0x120/0x6d0 [kvm] [ 36.091357] ? __fget_files+0x86/0xc0 [ 36.091362] ? __fget_files+0x86/0xc0 [ 36.091363] __x64_sys_ioctl+0x92/0xd0 [ 36.091366] do_syscall_64+0x59/0xc0 [ 36.091369] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091370] ? do_syscall_64+0x69/0xc0 [ 36.091371] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091372] ? __x64_sys_writev+0x1c/0x30 [ 36.091374] ? do_syscall_64+0x69/0xc0 [ 36.091374] ? exit_to_user_mode_prepare+0x37/0xb0 [ 36.091378] ? syscall_exit_to_user_mode+0x27/0x50 [ 36.091379] ? do_syscall_64+0x69/0xc0 [ 36.091379] ? do_syscall_64+0x69/0xc0 [ 36.091380] ? do_syscall_64+0x69/0xc0 [ 36.091381] ? do_syscall_64+0x69/0xc0 [ 36.091381] entry_SYSCALL_64_after_hwframe+0x61/0xcb [ 36.091384] RIP: 0033:0x7efdfe6d1aff [ 36.091390] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 [ 36.091391] RSP: 002b:00007efdfce8c460 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 36.091393] RAX: ffffffffffffffda RBX: 000000000000ae80 RCX: 00007efdfe6d1aff [ 36.091393] RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 000000000000000c [ 36.091394] RBP: 0000558f1609e220 R08: 0000558f13fb8190 R09: 00000000ffffffff [ 36.091394] R10: 0000558f16b5e950 R11: 0000000000000246 R12: 0000000000000000 [ 36.091394] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 [ 36.091396] [ 36.091397] Modules linked in: isofs nls_iso8859_1 kvm_intel joydev kvm input_leds serio_raw sch_fq_codel dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua ipmi_devintf ipmi_msghandler drm msr ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel virtio_net net_failover crypto_simd ahci xhci_pci cryptd psmouse virtio_blk libahci xhci_pci_renesas failover [ 36.123271] ---[ end trace db3c0ab5a48fabcc ]--- [ 36.123272] RIP: 0010:xaddw_ax_dx+0x9/0x10 [kvm] [ 36.123319] Code: 00 0f bb d0 c3 cc cc cc cc 48 0f bb d0 c3 cc cc cc cc 0f 1f 80 00 00 00 00 0f c0 d0 c3 cc cc cc cc 66 0f c1 d0 c3 cc cc cc cc <0f> 1f 80 00 00 00 00 0f c1 d0 c3 cc cc cc cc 48 0f c1 d0 c3 cc cc [ 36.123320] RSP: 0018:ffffb1f541143c98 EFLAGS: 00000202 [ 36.123321] RAX: 0000000089abcdef RBX: 0000000000000001 RCX: 0000000000000000 [ 36.123321] RDX: 0000000076543210 RSI: ffffffffc073c6d0 RDI: 0000000000000200 [ 36.123322] RBP: ffffb1f541143ca0 R08: ffff9f1803350a70 R09: 0000000000000002 [ 36.123322] R10: ffff9f1803350a70 R11: 0000000000000000 R12: ffff9f1803350a70 [ 36.123323] R13: ffffffffc077fee0 R14: 0000000000000000 R15: 0000000000000000 [ 36.123323] FS: 00007efdfce8d640(0000) GS:ffff9f187dd80000(0000) knlGS:0000000000000000 [ 36.123324] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.123325] CR2: 0000000000000000 CR3: 0000000009b62002 CR4: 0000000000772ee0 [ 36.123327] PKRU: 55555554 [ 36.123328] Kernel panic - not syncing: Fatal exception in interrupt [ 36.123410] Kernel Offset: 0x1400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 36.135305] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: aa3d480315ba ("x86: Use return-thunk in asm code") Signed-off-by: Thadeu Lima de Souza Cascardo Co-developed-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Paolo Bonzini Reported-by: Linux Kernel Functional Testing Message-Id: <20220713171241.184026-1-cascardo@canonical.com> Tested-by: Jack Wang Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index db96bf7d1122d..0a15b0fec6d94 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,8 +189,12 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) +#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) +#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ + IS_ENABLED(CONFIG_SLS)) +#define FASTOP_LENGTH (ENDBR_INSN_SIZE + 7 + RET_LENGTH) +#define FASTOP_SIZE (8 << ((FASTOP_LENGTH > 8) & 1) << ((FASTOP_LENGTH > 16) & 1)) +static_assert(FASTOP_LENGTH <= FASTOP_SIZE); struct opcode { u64 flags; @@ -442,8 +446,6 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ - IS_ENABLED(CONFIG_SLS)) #define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) static_assert(SETCC_LENGTH <= SETCC_ALIGN); From 9db4affd5e5d366d079ef6492c975fe00b926208 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 15 Jul 2022 07:34:55 -0400 Subject: [PATCH 294/299] KVM: emulate: do not adjust size of fastop and setcc subroutines commit 79629181607e801c0b41b8790ac4ee2eb5d7bc3e upstream. Instead of doing complicated calculations to find the size of the subroutines (which are even more complicated because they need to be stringified into an asm statement), just hardcode to 16. It is less dense for a few combinations of IBT/SLS/retbleed, but it has the advantage of being really simple. Cc: stable@vger.kernel.org # 5.15.x: 84e7051c0bc1: x86/kvm: fix FASTOP_SIZE when return thunks are enabled Cc: stable@vger.kernel.org Suggested-by: Linus Torvalds Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0a15b0fec6d94..f8382abe22ff8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,13 +189,6 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ - IS_ENABLED(CONFIG_SLS)) -#define FASTOP_LENGTH (ENDBR_INSN_SIZE + 7 + RET_LENGTH) -#define FASTOP_SIZE (8 << ((FASTOP_LENGTH > 8) & 1) << ((FASTOP_LENGTH > 16) & 1)) -static_assert(FASTOP_LENGTH <= FASTOP_SIZE); - struct opcode { u64 flags; u8 intercept; @@ -310,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * different operand sizes can be reached by calculation, rather than a jump * table (which would be bigger than the code). + * + * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR + * and 1 for the straight line speculation INT3, leaves 7 bytes for the + * body of the function. Currently none is larger than 4. */ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); +#define FASTOP_SIZE 16 + #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ @@ -446,9 +445,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) -#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) -static_assert(SETCC_LENGTH <= SETCC_ALIGN); +#define SETCC_ALIGN 16 #define FOP_SETCC(op) \ ".align " __stringify(SETCC_ALIGN) " \n\t" \ From 699b83c618473c1eeca68c950b832a3e8d3ffddc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Jul 2021 13:32:18 -0300 Subject: [PATCH 295/299] tools arch x86: Sync the msr-index.h copy with the kernel sources commit 91d248c3b903b46a58cbc7e8d38d684d3e4007c2 upstream. To pick up the changes from these csets: 4ad3278df6fe2b08 ("x86/speculation: Disable RRSBA behavior") d7caac991feeef1b ("x86/cpu/amd: Add Spectral Chicken") That cause no changes to tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after $ Just silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Pawan Gupta Cc: Peter Zijlstra Link: https://lore.kernel.org/lkml/YtQTm9wsB3hxQWvy@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/arch/x86/include/asm/msr-index.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 8a0a53cf360d5..ad084326f24c2 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -93,6 +93,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -561,6 +562,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 From 33e0e7fdbc2bfc615ea54b741988b8015c593024 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Jul 2021 13:39:15 -0300 Subject: [PATCH 296/299] tools headers cpufeatures: Sync with the kernel sources commit f098addbdb44c8a565367f5162f3ab170ed9404a upstream. To pick the changes from: f43b9876e857c739 ("x86/retbleed: Add fine grained Kconfig knobs") a149180fbcf336e9 ("x86: Add magic AMD return-thunk") 15e67227c49a5783 ("x86: Undo return-thunk damage") 369ae6ffc41a3c11 ("x86/retpoline: Cleanup some #ifdefery") 4ad3278df6fe2b08 x86/speculation: Disable RRSBA behavior 26aae8ccbc197223 x86/cpu/amd: Enumerate BTC_NO 9756bba28470722d x86/speculation: Fill RSB on vmexit for IBRS 3ebc170068885b6f x86/bugs: Add retbleed=ibpb 2dbb887e875b1de3 x86/entry: Add kernel IBRS implementation 6b80b59b35557065 x86/bugs: Report AMD retbleed vulnerability a149180fbcf336e9 x86: Add magic AMD return-thunk 15e67227c49a5783 x86: Undo return-thunk damage a883d624aed463c8 x86/cpufeatures: Move RETPOLINE flags to word 11 51802186158c74a0 x86/speculation/mmio: Enumerate Processor MMIO Stale Data bug This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Greg Kroah-Hartman --- tools/arch/x86/include/asm/cpufeatures.h | 12 +++++++++-- .../arch/x86/include/asm/disabled-features.h | 21 ++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e17de69faa543..5d09ded0c491f 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -295,6 +295,12 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -315,6 +321,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -444,5 +451,6 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 1231d63f836d8..f7be189e97232 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,25 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -82,7 +101,7 @@ #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 From deacf52d1acaf1d6038a3d450823134847c88ae4 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 8 Jul 2022 16:21:28 -0500 Subject: [PATCH 297/299] x86/bugs: Remove apostrophe typo commit bcf163150cd37348a0cb59e95c916a83a9344b0e upstream. Remove a superfluous ' in the mitigation string. Fixes: e8ec1b6e08a2 ("x86/bugs: Enable STIBP for JMP2RET") Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f6dfa26ed88b9..0b64e894b3838 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1174,7 +1174,7 @@ spectre_v2_user_select_mitigation(void) if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { if (mode != SPECTRE_V2_USER_STRICT && mode != SPECTRE_V2_USER_STRICT_PREFERRED) - pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n"); mode = SPECTRE_V2_USER_STRICT_PREFERRED; } From daf898ab0e75b8ae715adf84e0f1101775c569ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Jul 2022 12:20:19 +0200 Subject: [PATCH 298/299] um: Add missing apply_returns() commit 564d998106397394b6aad260f219b882b3347e62 upstream. Implement apply_returns() stub for UM, just like all the other patching routines. Fixes: 15e67227c49a ("x86: Undo return-thunk damage") Reported-by: Randy Dunlap Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/Ys%2Ft45l%2FgarIrD0u@worktop.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- arch/um/kernel/um_arch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 0760e24f2ebab..9838967d0b2f1 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -432,6 +432,10 @@ void apply_retpolines(s32 *start, s32 *end) { } +void apply_returns(s32 *start, s32 *end) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } From 9aa5a042881d4a99657f82c774e9e15353ebeb2d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 23 Jul 2022 12:57:00 +0200 Subject: [PATCH 299/299] Linux 5.18.14 Link: https://lore.kernel.org/r/20220722090650.665513668@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Rudi Heitbaum Tested-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1f3c753cb28df..d3723b2f6d6ca 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 18 -SUBLEVEL = 13 +SUBLEVEL = 14 EXTRAVERSION = NAME = Superb Owl