From 48de44356e077706d4063c31d584c9b477b2dbf7 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 25 Feb 2019 20:16:01 +0300 Subject: ARC: u-boot args: check that magic number is correct [ Upstream commit edb64bca50cd736c6894cc6081d5263c007ce005 ] In case of devboards we really often disable bootloader and load Linux image in memory via JTAG. Even if kernel tries to verify uboot_tag and uboot_arg there is sill a chance that we treat some garbage in registers as valid u-boot arguments in JTAG case. E.g. it is enough to have '1' in r0 to treat any value in r2 as a boot command line. So check that magic number passed from u-boot is correct and drop u-boot arguments otherwise. That helps to reduce the possibility of using garbage as u-boot arguments in JTAG case. We can safely check U-boot magic value (0x0) in linux passed via r1 register as U-boot pass it from the beginning. So there is no backward-compatibility issues. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/kernel/head.S | 1 + arch/arc/kernel/setup.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 30e090625916..a72bbda2f7aa 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -106,6 +106,7 @@ ENTRY(stext) ; r2 = pointer to uboot provided cmdline or external DTB in mem ; These are handled later in handle_uboot_args() st r0, [@uboot_tag] + st r1, [@uboot_magic] st r2, [@uboot_arg] ; setup "current" tsk and optionally cache it in dedicated r25 diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7b2340996cf8..7b3a7b3b380c 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -36,6 +36,7 @@ unsigned int intr_to_DE_cnt; /* Part of U-boot ABI: see head.S */ int __initdata uboot_tag; +int __initdata uboot_magic; char __initdata *uboot_arg; const struct machine_desc *machine_desc; @@ -497,6 +498,8 @@ static inline bool uboot_arg_invalid(unsigned long addr) #define UBOOT_TAG_NONE 0 #define UBOOT_TAG_CMDLINE 1 #define UBOOT_TAG_DTB 2 +/* We always pass 0 as magic from U-boot */ +#define UBOOT_MAGIC_VALUE 0 void __init handle_uboot_args(void) { @@ -511,6 +514,11 @@ void __init handle_uboot_args(void) goto ignore_uboot_args; } + if (uboot_magic != UBOOT_MAGIC_VALUE) { + pr_warn(IGNORE_ARGS "non zero uboot magic\n"); + goto ignore_uboot_args; + } + if (uboot_tag != UBOOT_TAG_NONE && uboot_arg_invalid((unsigned long)uboot_arg)) { pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg); -- cgit v1.2.3 From 9005b534c2bac39e69c8b87365ee58865e16a97e Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 25 Feb 2019 09:45:38 +0000 Subject: arc: hsdk_defconfig: Enable CONFIG_BLK_DEV_RAM [ Upstream commit 0728aeb7ead99a9b0dac2f3c92b3752b4e02ff97 ] We have now a HSDK device in our kernelci lab, but kernel builded via the hsdk_defconfig lacks ramfs supports, so it cannot boot kernelci jobs yet. So this patch enable CONFIG_BLK_DEV_RAM in hsdk_defconfig. Signed-off-by: Corentin Labbe Acked-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/configs/hsdk_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 87b23b7fb781..aefcf7a4e17a 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -8,6 +8,7 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_RAM=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set -- cgit v1.2.3 From 7f434180c880a89525e68aa65ca1529c90c29db1 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 2 Mar 2019 09:17:32 +0800 Subject: inotify: Fix fsnotify_mark refcount leak in inotify_update_existing_watch() [ Upstream commit 62c9d2674b31d4c8a674bee86b7edc6da2803aea ] Commit 4d97f7d53da7dc83 ("inotify: Add flag IN_MASK_CREATE for inotify_add_watch()") forgot to call fsnotify_put_mark() with IN_MASK_CREATE after fsnotify_find_mark() Fixes: 4d97f7d53da7dc83 ("inotify: Add flag IN_MASK_CREATE for inotify_add_watch()") Signed-off-by: ZhangXiaoxu Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/notify/inotify/inotify_user.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 798f1253141a..3b7b8e95c98a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -519,8 +519,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; - else if (create) - return -EEXIST; + else if (create) { + ret = -EEXIST; + goto out; + } i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); @@ -548,6 +550,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* return the wd */ ret = i_mark->wd; +out: /* match the get from fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); -- cgit v1.2.3 From c5d7b6089be546e5680c450f81f205564cecb22d Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Mar 2019 10:52:33 -0800 Subject: perf/core: Restore mmap record type correctly [ Upstream commit d9c1bb2f6a2157b38e8eb63af437cb22701d31ee ] On mmap(), perf_events generates a RECORD_MMAP record and then checks which events are interested in this record. There are currently 2 versions of mmap records: RECORD_MMAP and RECORD_MMAP2. MMAP2 is larger. The event configuration controls which version the user level tool accepts. If the event->attr.mmap2=1 field then MMAP2 record is returned. The perf_event_mmap_output() takes care of this. It checks attr->mmap2 and corrects the record fields before putting it in the sampling buffer of the event. At the end the function restores the modified MMAP record fields. The problem is that the function restores the size but not the type. Thus, if a subsequent event only accepts MMAP type, then it would instead receive an MMAP2 record with a size of MMAP record. This patch fixes the problem by restoring the record type on exit. Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra (Intel) Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Fixes: 13d7a2410fa6 ("perf: Add attr->mmap2 attribute to an event") Link: http://lkml.kernel.org/r/20190307185233.225521-1-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 26d6edab051a..2e2305a81047 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7178,6 +7178,7 @@ static void perf_event_mmap_output(struct perf_event *event, struct perf_output_handle handle; struct perf_sample_data sample; int size = mmap_event->event_id.header.size; + u32 type = mmap_event->event_id.header.type; int ret; if (!perf_event_mmap_match(event, data)) @@ -7221,6 +7222,7 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_end(&handle); out: mmap_event->event_id.header.size = size; + mmap_event->event_id.header.type = type; } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) -- cgit v1.2.3 From 0a7ef682097002cec926777228866149052e4751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Mon, 11 Mar 2019 22:08:22 +0100 Subject: mips: bcm47xx: Enable USB power on Netgear WNDR3400v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cdb8faa00e3fcdd0ad10add743516d616dc7d38e ] Eric has reported on OpenWrt's bug tracking system[1], that he's not able to use USB devices on his WNDR3400v2 device after the boot, until he turns on GPIO #21 manually through sysfs. 1. https://bugs.openwrt.org/index.php?do=details&task_id=2170 Cc: Rafał Miłecki Cc: Hauke Mehrtens Reported-by: Eric Bohlman Tested-by: Eric Bohlman Signed-off-by: Petr Štetiar Signed-off-by: Paul Burton Signed-off-by: Sasha Levin --- arch/mips/bcm47xx/workarounds.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/bcm47xx/workarounds.c b/arch/mips/bcm47xx/workarounds.c index 46eddbec8d9f..0ab95dd431b3 100644 --- a/arch/mips/bcm47xx/workarounds.c +++ b/arch/mips/bcm47xx/workarounds.c @@ -24,6 +24,7 @@ void __init bcm47xx_workarounds(void) case BCM47XX_BOARD_NETGEAR_WNR3500L: bcm47xx_workarounds_enable_usb_power(12); break; + case BCM47XX_BOARD_NETGEAR_WNDR3400V2: case BCM47XX_BOARD_NETGEAR_WNDR3400_V3: bcm47xx_workarounds_enable_usb_power(21); break; -- cgit v1.2.3 From e239811047165d8ff4ccb9e1202bee49f25ef780 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 14 Mar 2019 23:46:05 -0400 Subject: ext4: avoid panic during forced reboot [ Upstream commit 1dc1097ff60e4105216da7cd0aa99032b039a994 ] When admin calls "reboot -f" - i.e., does a hard system reboot by directly calling reboot(2) - ext4 filesystem mounted with errors=panic can panic the system. This happens because the underlying device gets disabled without unmounting the filesystem and thus some syscall running in parallel to reboot(2) can result in the filesystem getting IO errors. This is somewhat surprising to the users so try improve the behavior by switching to errors=remount-ro behavior when the system is running reboot(2). Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/super.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb12d3c17c1b..b9bca7298f96 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -430,6 +430,12 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) spin_unlock(&sbi->s_md_lock); } +static bool system_going_down(void) +{ + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF + || system_state == SYSTEM_RESTART; +} + /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * @@ -460,7 +466,12 @@ static void ext4_handle_error(struct super_block *sb) if (journal) jbd2_journal_abort(journal, -EIO); } - if (test_opt(sb, ERRORS_RO)) { + /* + * We force ERRORS_RO behavior when system is rebooting. Otherwise we + * could panic during 'reboot -f' as the underlying device got already + * disabled. + */ + if (test_opt(sb, ERRORS_RO) || system_going_down()) { ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); /* * Make sure updated value of ->s_mount_flags will be visible @@ -468,8 +479,7 @@ static void ext4_handle_error(struct super_block *sb) */ smp_wmb(); sb->s_flags |= SB_RDONLY; - } - if (test_opt(sb, ERRORS_PANIC)) { + } else if (test_opt(sb, ERRORS_PANIC)) { if (EXT4_SB(sb)->s_journal && !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) return; -- cgit v1.2.3 From d45fc2ba0e398bf41dd219ab0a84fb80228e0069 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 15 Mar 2019 00:15:32 -0400 Subject: ext4: add missing brelse() in add_new_gdb_meta_bg() [ Upstream commit d64264d6218e6892edd832dc3a5a5857c2856c53 ] Currently in add_new_gdb_meta_bg() there is a missing brelse of gdb_bh in case ext4_journal_get_write_access() fails. Additionally kvfree() is missing in the same error path. Fix it by moving the ext4_journal_get_write_access() before the ext4 sb update as Ted suggested and release n_group_desc and gdb_bh in case it fails. Fixes: 61a9c11e5e7a ("ext4: add missing brelse() add_new_gdb_meta_bg()'s error path") Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/resize.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3d9b18505c0c..90061c3d048b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -932,11 +932,18 @@ static int add_new_gdb_meta_bg(struct super_block *sb, memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); n_group_desc[gdb_num] = gdb_bh; + + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (err) { + kvfree(n_group_desc); + brelse(gdb_bh); + return err; + } + EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; kvfree(o_group_desc); - BUFFER_TRACE(gdb_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gdb_bh); return err; } -- cgit v1.2.3 From 2c20533ea60224116b4d5d03d5ff6fff276e420b Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 15 Mar 2019 00:22:28 -0400 Subject: ext4: report real fs size after failed resize [ Upstream commit 6c7328400e0488f7d49e19e02290ba343b6811b2 ] Currently when the file system resize using ext4_resize_fs() fails it will report into log that "resized filesystem to ". However this may not be true in the case of failure. Use the current block count as returned by ext4_blocks_count() to report the block count. Additionally, report a warning that "error occurred during file system resize" Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/resize.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 90061c3d048b..e7ae26e36c9c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2080,6 +2080,10 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); + if (err) + ext4_warning(sb, "error (%d) occurred during " + "file system resize", err); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", + ext4_blocks_count(es)); return err; } -- cgit v1.2.3 From b3a964aea086db87c18da7eac30ea1061e90df8b Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 22:58:29 -0500 Subject: ALSA: echoaudio: add a check for ioremap_nocache [ Upstream commit 6ade657d6125ec3ec07f95fa51e28138aef6208f ] In case ioremap_nocache fails, the fix releases chip and returns an error code upstream to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/echoaudio/echoaudio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 907cf1a46712..3ef2b27ebbe8 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1954,6 +1954,11 @@ static int snd_echo_create(struct snd_card *card, } chip->dsp_registers = (volatile u32 __iomem *) ioremap_nocache(chip->dsp_registers_phys, sz); + if (!chip->dsp_registers) { + dev_err(chip->card->dev, "ioremap failed\n"); + snd_echo_free(chip); + return -ENOMEM; + } if (request_irq(pci->irq, snd_echo_interrupt, IRQF_SHARED, KBUILD_MODNAME, chip)) { -- cgit v1.2.3 From 4c12b50fc86a4bf37441eb7c3765e8c55578cff8 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 23:04:14 -0500 Subject: ALSA: sb8: add a check for request_region [ Upstream commit dcd0feac9bab901d5739de51b3f69840851f8919 ] In case request_region fails, the fix returns an error code to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/isa/sb/sb8.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index d77dcba276b5..1eb8b61a185b 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -111,6 +111,10 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev) /* block the 0x388 port to avoid PnP conflicts */ acard->fm_res = request_region(0x388, 4, "SoundBlaster FM"); + if (!acard->fm_res) { + err = -EBUSY; + goto _err; + } if (port[dev] != SNDRV_AUTO_PORT) { if ((err = snd_sbdsp_create(card, port[dev], irq[dev], -- cgit v1.2.3 From 33550275bbcf90f236aa744bbb883d464c34def4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Mar 2019 16:44:28 +0200 Subject: auxdisplay: hd44780: Fix memory leak on ->remove() [ Upstream commit 41c8d0adf3c4df1867d98cee4a2c4531352a33ad ] We have to free on ->remove() the allocated resources on ->probe(). Fixes: d47d88361fee ("auxdisplay: Add HD44780 Character LCD support") Reviewed-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko Signed-off-by: Miguel Ojeda Signed-off-by: Sasha Levin --- drivers/auxdisplay/hd44780.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c index 9ad93ea42fdc..3cde351fb5c9 100644 --- a/drivers/auxdisplay/hd44780.c +++ b/drivers/auxdisplay/hd44780.c @@ -280,6 +280,8 @@ static int hd44780_remove(struct platform_device *pdev) struct charlcd *lcd = platform_get_drvdata(pdev); charlcd_unregister(lcd); + + kfree(lcd); return 0; } -- cgit v1.2.3 From b21723eda4cccb9b833d123e4e92f393af67dbad Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 15 Mar 2019 11:37:20 +1000 Subject: drm/udl: use drm_gem_object_put_unlocked. [ Upstream commit 8f3b487685b2acf71b42bb30d68fd9271bec8695 ] When Daniel removed struct_mutex he didn't fix this call to the unlocked variant which is required since we no longer use struct mutex. This fixes a bunch of: WARNING: CPU: 4 PID: 1370 at drivers/gpu/drm/drm_gem.c:931 drm_gem_object_put+0x2b/0x30 [drm] Modules linked in: udl xt_CHECKSUM ipt_MASQUERADE tun bridge stp llc nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t> CPU: 4 PID: 1370 Comm: Xorg Not tainted 5.0.0+ #2 backtraces when you plug in a udl device. Fixes: ae358dacd217 (drm/udl: Get rid of dev->struct_mutex usage) Reviewed-by: Daniel Vetter Cc: Sean Paul Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/udl/udl_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index d5a23295dd80..bb7b58407039 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -224,7 +224,7 @@ int udl_gem_mmap(struct drm_file *file, struct drm_device *dev, *offset = drm_vma_node_offset_addr(&gobj->base.vma_node); out: - drm_gem_object_put(&gobj->base); + drm_gem_object_put_unlocked(&gobj->base); unlock: mutex_unlock(&udl->gem_lock); return ret; -- cgit v1.2.3 From 221b45319d05ce2be91aba36c398f3e2efde4591 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 6 Mar 2019 19:17:56 +0200 Subject: IB/mlx4: Fix race condition between catas error reset and aliasguid flows [ Upstream commit 587443e7773e150ae29e643ee8f41a1eed226565 ] Code review revealed a race condition which could allow the catas error flow to interrupt the alias guid query post mechanism at random points. Thiis is fixed by doing cancel_delayed_work_sync() instead of cancel_delayed_work() during the alias guid mechanism destroy flow. Fixes: a0c64a17aba8 ("mlx4: Add alias_guid mechanism") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 782499abcd98..2a0b59a4b6eb 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -804,8 +804,8 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) unsigned long flags; for (i = 0 ; i < dev->num_ports; i++) { - cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); det = &sriov->alias_guid.ports_guid[i]; + cancel_delayed_work_sync(&det->alias_guid_work); spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); while (!list_empty(&det->cb_list)) { cb_ctx = list_entry(det->cb_list.next, -- cgit v1.2.3 From e52431f7547cd3d208b270fdf845bdb54e051543 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 14 Mar 2019 18:37:29 +0800 Subject: i40iw: Avoid panic when handling the inetdev event [ Upstream commit ec4fe4bcc584b55e24e8d1768f5510a62c0fd619 ] There is a panic reported that on a system with x722 ethernet, when doing the operations like: # ip link add br0 type bridge # ip link set eno1 master br0 # systemctl restart systemd-networkd The system will panic "BUG: unable to handle kernel null pointer dereference at 0000000000000034", with call chain: i40iw_inetaddr_event notifier_call_chain blocking_notifier_call_chain notifier_call_chain __inet_del_ifa inet_rtm_deladdr rtnetlink_rcv_msg netlink_rcv_skb rtnetlink_rcv netlink_unicast netlink_sendmsg sock_sendmsg __sys_sendto It is caused by "local_ipaddr = ntohl(in->ifa_list->ifa_address)", while the in->ifa_list is NULL. So add a check for the "in->ifa_list == NULL" case, and skip the ARP operation accordingly. Signed-off-by: Feng Tang Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 59e978141ad4..e99177533930 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -173,7 +173,12 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, rcu_read_lock(); in = __in_dev_get_rcu(upper_dev); - local_ipaddr = ntohl(in->ifa_list->ifa_address); + + if (!in->ifa_list) + local_ipaddr = 0; + else + local_ipaddr = ntohl(in->ifa_list->ifa_address); + rcu_read_unlock(); } else { local_ipaddr = ntohl(ifa->ifa_address); @@ -185,6 +190,11 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, case NETDEV_UP: /* Fall through */ case NETDEV_CHANGEADDR: + + /* Just skip if no need to handle ARP cache */ + if (!local_ipaddr) + break; + i40iw_manage_arp_cache(iwdev, netdev->dev_addr, &local_ipaddr, -- cgit v1.2.3 From bdb43acc8858df05170ae16749c8350285b8ecb1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 11:10:11 +0100 Subject: mmc: davinci: remove extraneous __init annotation [ Upstream commit 9ce58dd7d9da3ca0d7cb8c9568f1c6f4746da65a ] Building with clang finds a mistaken __init tag: WARNING: vmlinux.o(.text+0x5e4250): Section mismatch in reference from the function davinci_mmcsd_probe() to the function .init.text:init_mmcsd_host() The function davinci_mmcsd_probe() references the function __init init_mmcsd_host(). This is often because davinci_mmcsd_probe lacks a __init annotation or the annotation of init_mmcsd_host is wrong. Signed-off-by: Arnd Bergmann Acked-by: Wolfram Sang Reviewed-by: Nathan Chancellor Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/davinci_mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index 9e68c3645e22..e6f14257a7d0 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1117,7 +1117,7 @@ static inline void mmc_davinci_cpufreq_deregister(struct mmc_davinci_host *host) { } #endif -static void __init init_mmcsd_host(struct mmc_davinci_host *host) +static void init_mmcsd_host(struct mmc_davinci_host *host) { mmc_davinci_reset_ctrl(host, 1); -- cgit v1.2.3 From 1f208b0adaeee47ec4c58d0d57c83ae91a3a1644 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 17 Mar 2019 23:21:24 +0000 Subject: ALSA: opl3: fix mismatch between snd_opl3_drum_switch definition and declaration [ Upstream commit b4748e7ab731e436cf5db4786358ada5dd2db6dd ] The function snd_opl3_drum_switch declaration in the header file has the order of the two arguments on_off and vel swapped when compared to the definition arguments of vel and on_off. Fix this by swapping them around to match the definition. This error predates the git history, so no idea when this error was introduced. Signed-off-by: Colin Ian King Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/drivers/opl3/opl3_voice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/opl3/opl3_voice.h b/sound/drivers/opl3/opl3_voice.h index 5b02bd49fde4..4e4ecc21760b 100644 --- a/sound/drivers/opl3/opl3_voice.h +++ b/sound/drivers/opl3/opl3_voice.h @@ -41,7 +41,7 @@ void snd_opl3_timer_func(struct timer_list *t); /* Prototypes for opl3_drums.c */ void snd_opl3_load_drums(struct snd_opl3 *opl3); -void snd_opl3_drum_switch(struct snd_opl3 *opl3, int note, int on_off, int vel, struct snd_midi_channel *chan); +void snd_opl3_drum_switch(struct snd_opl3 *opl3, int note, int vel, int on_off, struct snd_midi_channel *chan); /* Prototypes for opl3_oss.c */ #if IS_ENABLED(CONFIG_SND_SEQUENCER_OSS) -- cgit v1.2.3 From 77323732005afb86d9e99b4c95993493ec955f8a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2019 08:08:43 -0600 Subject: paride/pf: cleanup queues when detection fails [ Upstream commit 6ce59025f1182125e75c8d121daf44056b65dd1f ] The driver allocates queues for all the units it potentially supports. But if we fail to detect any drives, then we fail loading the module without cleaning up those queues. This is now evident with the switch to blk-mq, though the bug has been there forever as far as I can tell. Also fix cleanup through regular module exit. Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/paride/pf.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index e92e7a8eeeb2..103b617cdc31 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -761,8 +761,12 @@ static int pf_detect(void) return 0; printk("%s: No ATAPI disk detected\n", name); - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) + for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + blk_cleanup_queue(pf->disk->queue); + pf->disk->queue = NULL; + blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); + } pi_unregister_driver(par_drv); return -1; } @@ -1047,13 +1051,15 @@ static void __exit pf_exit(void) int unit; unregister_blkdev(major, name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { - if (!pf->present) - continue; - del_gendisk(pf->disk); + if (pf->present) + del_gendisk(pf->disk); + blk_cleanup_queue(pf->disk->queue); blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); - pi_release(pf->pi); + + if (pf->present) + pi_release(pf->pi); } } -- cgit v1.2.3 From 6e684bd65a5cd88115fa1802664ba90449190cbc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2019 08:10:32 -0600 Subject: paride/pcd: cleanup queues when detection fails [ Upstream commit 81b74ac68c28fddb3589ad5d4d5e587baf4bb781 ] The driver allocates queues for all the units it potentially supports. But if we fail to detect any drives, then we fail loading the module without cleaning up those queues. This is now evident with the switch to blk-mq, though the bug has been there forever as far as I can tell. Also fix cleanup through regular module exit. Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/paride/pcd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 96670eefaeb2..377a694dc228 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -749,8 +749,12 @@ static int pcd_detect(void) return 0; printk("%s: No CD-ROM drive found\n", name); - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) + for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + blk_cleanup_queue(cd->disk->queue); + cd->disk->queue = NULL; + blk_mq_free_tag_set(&cd->tag_set); put_disk(cd->disk); + } pi_unregister_driver(par_drv); return -1; } -- cgit v1.2.3 From 74920ee161d47462dd54f4d16601015e82621093 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sat, 19 Jan 2019 17:15:23 +0100 Subject: thermal/intel_powerclamp: fix __percpu declaration of worker_data [ Upstream commit aa36e3616532f82a920b5ebf4e059fbafae63d88 ] This variable is declared as: static struct powerclamp_worker_data * __percpu worker_data; In other words, a percpu pointer to struct ... But this variable not used like so but as a pointer to a percpu struct powerclamp_worker_data. So fix the declaration as: static struct powerclamp_worker_data __percpu *worker_data; This also quiets Sparse's warnings from __verify_pcpu_ptr(), like: 494:49: warning: incorrect type in initializer (different address spaces) 494:49: expected void const [noderef] *__vpp_verify 494:49: got struct powerclamp_worker_data * Signed-off-by: Luc Van Oostenryck Reviewed-by: Petr Mladek Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/intel/intel_powerclamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 7571f7c2e7c9..b12ecd436e23 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -101,7 +101,7 @@ struct powerclamp_worker_data { bool clamping; }; -static struct powerclamp_worker_data * __percpu worker_data; +static struct powerclamp_worker_data __percpu *worker_data; static struct thermal_cooling_device *cooling_dev; static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu * clamping kthread worker -- cgit v1.2.3 From 481c8a89e89a373b8f157ac2b0df6cee07e2bbfb Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 22 Jan 2019 16:47:41 +0100 Subject: thermal: samsung: Fix incorrect check after code merge [ Upstream commit 3b5236cc5d086dd3ddd01113ee9255421aab9fab ] Merge commit 19785cf93b6c ("Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal") broke the code introduced by commit ffe6e16f14fa ("thermal: exynos: Reduce severity of too early temperature read"). Restore the original code from the mentioned commit to finally fix the warning message during boot: thermal thermal_zone0: failed to read out thermal zone (-22) Reported-by: Marian Mihailescu Signed-off-by: Marek Szyprowski Fixes: 19785cf93b6c ("Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/samsung/exynos_tmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 48eef552cba4..fc9399d9c082 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -666,7 +666,7 @@ static int exynos_get_temp(void *p, int *temp) struct exynos_tmu_data *data = p; int value, ret = 0; - if (!data || !data->tmu_read || !data->enabled) + if (!data || !data->tmu_read) return -EINVAL; else if (!data->enabled) /* -- cgit v1.2.3 From f1a315ca06be80dddab380c7af1ddace3ee5f110 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Tue, 29 Jan 2019 09:55:57 +0000 Subject: thermal: bcm2835: Fix crash in bcm2835_thermal_debugfs [ Upstream commit 35122495a8c6683e863acf7b05a7036b2be64c7a ] "cat /sys/kernel/debug/bcm2835_thermal/regset" causes a NULL pointer dereference in bcm2835_thermal_debugfs. The driver makes use of the implementation details of the thermal framework to retrieve a pointer to its private data from a struct thermal_zone_device, and gets it wrong - leading to the crash. Instead, store its private data as the drvdata and retrieve the thermal_zone_device pointer from it. Fixes: bcb7dd9ef206 ("thermal: bcm2835: add thermal driver for bcm2835 SoC") Signed-off-by: Phil Elwell Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/broadcom/bcm2835_thermal.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c index 720760cd493f..ba39647a690c 100644 --- a/drivers/thermal/broadcom/bcm2835_thermal.c +++ b/drivers/thermal/broadcom/bcm2835_thermal.c @@ -119,8 +119,7 @@ static const struct debugfs_reg32 bcm2835_thermal_regs[] = { static void bcm2835_thermal_debugfs(struct platform_device *pdev) { - struct thermal_zone_device *tz = platform_get_drvdata(pdev); - struct bcm2835_thermal_data *data = tz->devdata; + struct bcm2835_thermal_data *data = platform_get_drvdata(pdev); struct debugfs_regset32 *regset; data->debugfsdir = debugfs_create_dir("bcm2835_thermal", NULL); @@ -266,7 +265,7 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) data->tz = tz; - platform_set_drvdata(pdev, tz); + platform_set_drvdata(pdev, data); /* * Thermal_zone doesn't enable hwmon as default, @@ -290,8 +289,8 @@ err_clk: static int bcm2835_thermal_remove(struct platform_device *pdev) { - struct thermal_zone_device *tz = platform_get_drvdata(pdev); - struct bcm2835_thermal_data *data = tz->devdata; + struct bcm2835_thermal_data *data = platform_get_drvdata(pdev); + struct thermal_zone_device *tz = data->tz; debugfs_remove_recursive(data->debugfsdir); thermal_zone_of_sensor_unregister(&pdev->dev, tz); -- cgit v1.2.3 From 891fea677cca1dfd0cc2bb7d1960672e7ab43001 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 10 Oct 2018 01:30:06 -0700 Subject: thermal/int340x_thermal: Add additional UUIDs [ Upstream commit 16fc8eca1975358111dbd7ce65e4ce42d1a848fb ] Add more supported DPTF policies than the driver currently exposes. Signed-off-by: Matthew Garrett Cc: Nisha Aram Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 61ca7ce3624e..e0f39cacbc18 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -22,6 +22,13 @@ enum int3400_thermal_uuid { INT3400_THERMAL_PASSIVE_1, INT3400_THERMAL_ACTIVE, INT3400_THERMAL_CRITICAL, + INT3400_THERMAL_ADAPTIVE_PERFORMANCE, + INT3400_THERMAL_EMERGENCY_CALL_MODE, + INT3400_THERMAL_PASSIVE_2, + INT3400_THERMAL_POWER_BOSS, + INT3400_THERMAL_VIRTUAL_SENSOR, + INT3400_THERMAL_COOLING_MODE, + INT3400_THERMAL_HARDWARE_DUTY_CYCLING, INT3400_THERMAL_MAXIMUM_UUID, }; @@ -29,6 +36,13 @@ static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { "42A441D6-AE6A-462b-A84B-4A8CE79027D3", "3A95C389-E4B8-4629-A526-C52C88626BAE", "97C68AE7-15FA-499c-B8C9-5DA81D606E0A", + "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D", + "5349962F-71E6-431D-9AE8-0A635B710AEE", + "9E04115A-AE87-4D1C-9500-0F3E340BFE75", + "F5A35014-C209-46A4-993A-EB56DE7530A1", + "6ED722A7-9240-48A5-B479-31EEF723D7CF", + "16CAF1B7-DD38-40ED-B1C1-1B8A1913D531", + "BE84BABF-C4D4-403D-B495-3128FD44dAC1", }; struct int3400_thermal_priv { -- cgit v1.2.3 From 800e3fd7bfce5398ada8e58a8e67a1c63fc27e7a Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 10 Oct 2018 01:30:07 -0700 Subject: thermal/int340x_thermal: fix mode setting [ Upstream commit 396ee4d0cd52c13b3f6421b8d324d65da5e7e409 ] int3400 only pushes the UUID into the firmware when the mode is flipped to "enable". The current code only exposes the mode flag if the firmware supports the PASSIVE_1 UUID, which not all machines do. Remove the restriction. Signed-off-by: Matthew Garrett Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index e0f39cacbc18..5f3ed24e26ec 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -313,10 +313,9 @@ static int int3400_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - if (priv->uuid_bitmap & 1 << INT3400_THERMAL_PASSIVE_1) { - int3400_thermal_ops.get_mode = int3400_thermal_get_mode; - int3400_thermal_ops.set_mode = int3400_thermal_set_mode; - } + int3400_thermal_ops.get_mode = int3400_thermal_get_mode; + int3400_thermal_ops.set_mode = int3400_thermal_set_mode; + priv->thermal = thermal_zone_device_register("INT3400 Thermal", 0, 0, priv, &int3400_thermal_ops, &int3400_thermal_params, 0, 0); -- cgit v1.2.3 From b0b05ab719c36fc84810337693d9e9aa3b32e77f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 18 Mar 2019 22:26:33 +0800 Subject: thermal/intel_powerclamp: fix truncated kthread name [ Upstream commit e925b5be5751f6a7286bbd9a4cbbc4ac90cc5fa6 ] kthread name only allows 15 characters (TASK_COMMON_LEN is 16). Thus rename the kthreads created by intel_powerclamp driver from "kidle_inject/ + decimal cpuid" to "kidle_inj/ + decimal cpuid" to avoid truncated kthead name for cpu 100 and later. Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/intel/intel_powerclamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index b12ecd436e23..ac7256b5f020 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -494,7 +494,7 @@ static void start_power_clamp_worker(unsigned long cpu) struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); struct kthread_worker *worker; - worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inject/%ld", cpu); + worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu); if (IS_ERR(worker)) return; -- cgit v1.2.3 From 7aa9be519579e43c19bf341d61510ce0b7702b92 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 28 Jan 2019 15:24:42 +0100 Subject: scsi: iscsi: flush running unbind operations when removing a session [ Upstream commit 165aa2bfb42904b1bec4bf2fa257c8c603c14a06 ] In some cases, the iscsi_remove_session() function is called while an unbind_work operation is still running. This may cause a situation where sysfs objects are removed in an incorrect order, triggering a kernel warning. [ 605.249442] ------------[ cut here ]------------ [ 605.259180] sysfs group 'power' not found for kobject 'target2:0:0' [ 605.321371] WARNING: CPU: 1 PID: 26794 at fs/sysfs/group.c:235 sysfs_remove_group+0x76/0x80 [ 605.341266] Modules linked in: dm_service_time target_core_user target_core_pscsi target_core_file target_core_iblock iscsi_target_mod target_core_mod nls_utf8 isofs ppdev bochs_drm nfit ttm libnvdimm drm_kms_helper syscopyarea sysfillrect sysimgblt joydev pcspkr fb_sys_fops drm i2c_piix4 sg parport_pc parport xfs libcrc32c dm_multipath sr_mod sd_mod cdrom ata_generic 8021q garp mrp ata_piix stp crct10dif_pclmul crc32_pclmul llc libata crc32c_intel virtio_net net_failover ghash_clmulni_intel serio_raw failover sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi [ 605.627479] CPU: 1 PID: 26794 Comm: kworker/u32:2 Not tainted 4.18.0-60.el8.x86_64 #1 [ 605.721401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180724_192412-buildhw-07.phx2.fedoraproject.org-1.fc29 04/01/2014 [ 605.823651] Workqueue: scsi_wq_2 __iscsi_unbind_session [scsi_transport_iscsi] [ 605.830940] RIP: 0010:sysfs_remove_group+0x76/0x80 [ 605.922907] Code: 48 89 df 5b 5d 41 5c e9 38 c4 ff ff 48 89 df e8 e0 bf ff ff eb cb 49 8b 14 24 48 8b 75 00 48 c7 c7 38 73 cb a7 e8 24 77 d7 ff <0f> 0b 5b 5d 41 5c c3 0f 1f 00 0f 1f 44 00 00 41 56 41 55 41 54 55 [ 606.122304] RSP: 0018:ffffbadcc8d1bda8 EFLAGS: 00010286 [ 606.218492] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 606.326381] RDX: ffff98bdfe85eb40 RSI: ffff98bdfe856818 RDI: ffff98bdfe856818 [ 606.514498] RBP: ffffffffa7ab73e0 R08: 0000000000000268 R09: 0000000000000007 [ 606.529469] R10: 0000000000000000 R11: ffffffffa860d9ad R12: ffff98bdf978e838 [ 606.630535] R13: ffff98bdc2cd4010 R14: ffff98bdc2cd3ff0 R15: ffff98bdc2cd4000 [ 606.824707] FS: 0000000000000000(0000) GS:ffff98bdfe840000(0000) knlGS:0000000000000000 [ 607.018333] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 607.117844] CR2: 00007f84b78ac024 CR3: 000000002c00a003 CR4: 00000000003606e0 [ 607.117844] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 607.420926] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 607.524236] Call Trace: [ 607.530591] device_del+0x56/0x350 [ 607.624393] ? ata_tlink_match+0x30/0x30 [libata] [ 607.727805] ? attribute_container_device_trigger+0xb4/0xf0 [ 607.829911] scsi_target_reap_ref_release+0x39/0x50 [ 607.928572] scsi_remove_target+0x1a2/0x1d0 [ 608.017350] __iscsi_unbind_session+0xb3/0x160 [scsi_transport_iscsi] [ 608.117435] process_one_work+0x1a7/0x360 [ 608.132917] worker_thread+0x30/0x390 [ 608.222900] ? pwq_unbound_release_workfn+0xd0/0xd0 [ 608.323989] kthread+0x112/0x130 [ 608.418318] ? kthread_bind+0x30/0x30 [ 608.513821] ret_from_fork+0x35/0x40 [ 608.613909] ---[ end trace 0b98c310c8a6138c ]--- Signed-off-by: Maurizio Lombardi Acked-by: Chris Leech Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_iscsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0508831d6fb9..0a82e93566dc 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2200,6 +2200,8 @@ void iscsi_remove_session(struct iscsi_cls_session *session) scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); /* flush running scans then delete devices */ flush_work(&session->scan_work); + /* flush running unbind operations */ + flush_work(&session->unbind_work); __iscsi_unbind_session(&session->unbind_work); /* hw iscsi may not have removed all connections from session */ -- cgit v1.2.3 From 02abd369fa77b4bd8d256b1b6aa8b9ec4bd68879 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Mar 2019 09:32:02 +0100 Subject: sched/cpufreq: Fix 32-bit math overflow [ Upstream commit a23314e9d88d89d49e69db08f60b7caa470f04e1 ] Vincent Wang reported that get_next_freq() has a mult overflow bug on 32-bit platforms in the IOWAIT boost case, since in that case {util,max} are in freq units instead of capacity units. Solve this by moving the IOWAIT boost to capacity units. And since this means @max is constant; simplify the code. Reported-by: Vincent Wang Tested-by: Vincent Wang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rafael J. Wysocki Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Chunyan Zhang Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Quentin Perret Cc: Rafael J. Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190305083202.GU32494@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/cpufreq_schedutil.c | 59 +++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 033ec7c45f13..1ccf77f6d346 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -48,10 +48,10 @@ struct sugov_cpu { bool iowait_boost_pending; unsigned int iowait_boost; - unsigned int iowait_boost_max; u64 last_update; unsigned long bw_dl; + unsigned long min; unsigned long max; /* The field below is for single-CPU policies only: */ @@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, if (delta_ns <= TICK_NSEC) return false; - sg_cpu->iowait_boost = set_iowait_boost - ? sg_cpu->sg_policy->policy->min : 0; + sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0; sg_cpu->iowait_boost_pending = set_iowait_boost; return true; @@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, /* Double the boost at each request */ if (sg_cpu->iowait_boost) { - sg_cpu->iowait_boost <<= 1; - if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + sg_cpu->iowait_boost = + min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); return; } /* First wakeup after IO: start with minimum boost */ - sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; + sg_cpu->iowait_boost = sg_cpu->min; } /** @@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, * This mechanism is designed to boost high frequently IO waiting tasks, while * being more conservative on tasks which does sporadic IO operations. */ -static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, - unsigned long *util, unsigned long *max) +static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, + unsigned long util, unsigned long max) { - unsigned int boost_util, boost_max; + unsigned long boost; /* No boost currently required */ if (!sg_cpu->iowait_boost) - return; + return util; /* Reset boost if the CPU appears to have been idle enough */ if (sugov_iowait_reset(sg_cpu, time, false)) - return; + return util; - /* - * An IO waiting task has just woken up: - * allow to further double the boost value - */ - if (sg_cpu->iowait_boost_pending) { - sg_cpu->iowait_boost_pending = false; - } else { + if (!sg_cpu->iowait_boost_pending) { /* - * Otherwise: reduce the boost value and disable it when we - * reach the minimum. + * No boost pending; reduce the boost value. */ sg_cpu->iowait_boost >>= 1; - if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { + if (sg_cpu->iowait_boost < sg_cpu->min) { sg_cpu->iowait_boost = 0; - return; + return util; } } + sg_cpu->iowait_boost_pending = false; + /* - * Apply the current boost value: a CPU is boosted only if its current - * utilization is smaller then the current IO boost level. + * @util is already in capacity scale; convert iowait_boost + * into the same scale so we can compare. */ - boost_util = sg_cpu->iowait_boost; - boost_max = sg_cpu->iowait_boost_max; - if (*util * boost_max < *max * boost_util) { - *util = boost_util; - *max = boost_max; - } + boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT; + return max(boost, util); } #ifdef CONFIG_NO_HZ_COMMON @@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, util = sugov_get_util(sg_cpu); max = sg_cpu->max; - sugov_iowait_apply(sg_cpu, time, &util, &max); + util = sugov_iowait_apply(sg_cpu, time, util, max); next_f = get_next_freq(sg_policy, util, max); /* * Do not reduce the frequency if the CPU has not been idle @@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) j_util = sugov_get_util(j_sg_cpu); j_max = j_sg_cpu->max; - sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max); + j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max); if (j_util * max > j_max * util) { util = j_util; @@ -837,7 +826,9 @@ static int sugov_start(struct cpufreq_policy *policy) memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + sg_cpu->min = + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) / + policy->cpuinfo.max_freq; } for_each_cpu(cpu, policy->cpus) { -- cgit v1.2.3 From 8764542aa21c4659d85aff7ca61243d08041240b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 6 Mar 2019 20:11:42 +0300 Subject: sched/core: Fix buffer overflow in cgroup2 property cpu.max [ Upstream commit 4c47acd824aaaa8fc6dc519fb4e08d1522105b7a ] Add limit into sscanf format string for on-stack buffer. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Johannes Weiner Cc: Li Zefan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Fixes: 0d5936344f30 ("sched: Implement interface for cgroup unified hierarchy") Link: https://lkml.kernel.org/r/155189230232.2620.13120481613524200065.stgit@buzz Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 01a2489de94e..62cc29364fba 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6942,7 +6942,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf, { char tok[21]; /* U64_MAX */ - if (!sscanf(buf, "%s %llu", tok, periodp)) + if (sscanf(buf, "%20s %llu", tok, periodp) < 1) return -EINVAL; *periodp *= NSEC_PER_USEC; -- cgit v1.2.3 From 4a45137bb2eb6270fee2471881526927ada2b734 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 18 Mar 2019 22:24:03 +0100 Subject: x86/mm: Don't leak kernel addresses [ Upstream commit a3151724437f54076cc10bc02b1c4f0003ae36cd ] Since commit: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") at boot "____ptrval____" is printed instead of actual addresses: found SMP MP-table at [mem 0x000f5cc0-0x000f5ccf] mapped at [(____ptrval____)] Instead of changing the print to "%px", and leaking a kernel addresses, just remove the print completely, like in: 071929dbdd865f77 ("arm64: Stop printing the virtual memory layout"). Signed-off-by: Matteo Croce Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3482460d984d..1bfe5c6e6cfe 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -598,8 +598,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf_base = base; mpf_found = true; - pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", - base, base + sizeof(*mpf) - 1, mpf); + pr_info("found SMP MP-table at [mem %#010lx-%#010lx]\n", + base, base + sizeof(*mpf) - 1); memblock_reserve(base, sizeof(*mpf)); if (mpf->physptr) -- cgit v1.2.3 From 638bf55940b966047555ba1caac93799749bb350 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Tue, 12 Feb 2019 09:34:39 -0500 Subject: tools/power turbostat: return the exit status of a command [ Upstream commit 2a95496634a017c19641f26f00907af75b962f01 ] turbostat failed to return a non-zero exit status even though the supplied command (turbostat ) failed. Currently when turbostat forks a command it returns zero instead of the actual exit status of the command. Modify the code to return the exit status. Signed-off-by: David Arcari Acked-by: Len Brown Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9327c0ddc3a5..c3fad065c89c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5077,6 +5077,9 @@ int fork_it(char **argv) signal(SIGQUIT, SIG_IGN); if (waitpid(child_pid, &status, 0) == -1) err(status, "waitpid"); + + if (WIFEXITED(status)) + status = WEXITSTATUS(status); } /* * n.b. fork_it() does not check for errors from for_all_cpus() -- cgit v1.2.3 From 9ec3c84936f7206bb68cbba3c543cfafcaab9c63 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 Mar 2019 09:29:26 -0700 Subject: scsi: core: Also call destroy_rcu_head() for passthrough requests [ Upstream commit db983f6eef57a9d78af79bc32389b7e60eb3c47d ] cmd->rcu is initialized by scsi_initialize_rq(). For passthrough requests, blk_get_request() calls scsi_initialize_rq(). For filesystem requests, scsi_init_command() calls scsi_initialize_rq(). Make sure that destroy_rcu_head() is called for passthrough requests. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ewan D. Milne Cc: Johannes Thumshirn Reported-by: Ewan D. Milne Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_lib.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 5a6e8e12701a..655ad26106e4 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -598,9 +598,16 @@ static bool scsi_end_request(struct request *req, blk_status_t error, if (!blk_rq_is_scsi(req)) { WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED)); cmd->flags &= ~SCMD_INITIALIZED; - destroy_rcu_head(&cmd->rcu); } + /* + * Calling rcu_barrier() is not necessary here because the + * SCSI error handler guarantees that the function called by + * call_rcu() has been called before scsi_end_request() is + * called. + */ + destroy_rcu_head(&cmd->rcu); + /* * In the MQ case the command gets freed by __blk_mq_end_request, * so we have to do all cleanup that depends on it earlier. -- cgit v1.2.3 From c957d798c11ca0e09bc27273a4ba554b419a6ff3 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Fri, 15 Mar 2019 15:04:19 -0700 Subject: scsi: qla2xxx: Fix NULL pointer crash due to stale CPUID [ Upstream commit ac444b4f0ace05d7c4c99f6b1e5b0cae0852f025 ] This patch fixes crash due to NULL pointer derefrence because CPU pointer is not set and used by driver. Instead, driver is passes CPU as tag via ha->isp_ops->{lun_reset|target_reset} [ 30.160780] qla2xxx [0000:a0:00.1]-8038:9: Cable is unplugged... [ 69.984045] qla2xxx [0000:a0:00.0]-8009:8: DEVICE RESET ISSUED nexus=8:0:0 cmd=00000000b0d62f46. [ 69.992849] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 [ 70.000680] PGD 0 P4D 0 [ 70.003232] Oops: 0000 [#1] SMP PTI [ 70.006727] CPU: 2 PID: 6714 Comm: sg_reset Kdump: loaded Not tainted 4.18.0-67.el8.x86_64 #1 [ 70.015258] Hardware name: NEC Express5800/T110j [N8100-2758Y]/MX32-PH0-NJ, BIOS F11 02/13/2019 [ 70.024016] RIP: 0010:blk_mq_rq_cpu+0x9/0x10 [ 70.028315] Code: 01 58 01 00 00 48 83 c0 28 48 3d 80 02 00 00 75 ab c3 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 47 08 <8b> 40 40 c3 0f 1f 00 0f 1f 44 00 00 48 83 ec 10 48 c7 c6 20 6e 7c [ 70.047087] RSP: 0018:ffff99a481487d58 EFLAGS: 00010246 [ 70.052322] RAX: 0000000000000000 RBX: ffffffffc041b08b RCX: 0000000000000000 [ 70.059466] RDX: 0000000000000000 RSI: ffff8d10b6b16898 RDI: ffff8d10b341e400 [ 70.066615] RBP: ffffffffc03a6bd0 R08: 0000000000000415 R09: 0000000000aaaaaa [ 70.073765] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8d10b341e528 [ 70.080914] R13: ffff8d10aadefc00 R14: ffff8d0f64efa998 R15: ffff8d0f64efa000 [ 70.088083] FS: 00007f90a201e540(0000) GS:ffff8d10b6b00000(0000) knlGS:0000000000000000 [ 70.096188] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 70.101959] CR2: 0000000000000040 CR3: 0000000268886005 CR4: 00000000003606e0 [ 70.109127] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 70.116277] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 70.123425] Call Trace: [ 70.125896] __qla2xxx_eh_generic_reset+0xb1/0x220 [qla2xxx] [ 70.131572] scsi_ioctl_reset+0x1f5/0x2a0 [ 70.135600] scsi_ioctl+0x18e/0x397 [ 70.139099] ? sd_ioctl+0x7c/0x100 [sd_mod] [ 70.143287] blkdev_ioctl+0x32b/0x9f0 [ 70.146954] ? __check_object_size+0xa3/0x181 [ 70.151323] block_ioctl+0x39/0x40 [ 70.154735] do_vfs_ioctl+0xa4/0x630 [ 70.158322] ? syscall_trace_enter+0x1d3/0x2c0 [ 70.162769] ksys_ioctl+0x60/0x90 [ 70.166104] __x64_sys_ioctl+0x16/0x20 [ 70.169859] do_syscall_64+0x5b/0x1b0 [ 70.173532] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 70.178587] RIP: 0033:0x7f90a1b3445b [ 70.182183] Code: 0f 1e fa 48 8b 05 2d aa 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fd a9 2c 00 f7 d8 64 89 01 48 [ 70.200956] RSP: 002b:00007fffdca88b68 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 70.208535] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f90a1b3445b [ 70.215684] RDX: 00007fffdca88b84 RSI: 0000000000002284 RDI: 0000000000000003 [ 70.222833] RBP: 00007fffdca88ca8 R08: 00007fffdca88b84 R09: 0000000000000000 [ 70.229981] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fffdca88b84 [ 70.237131] R13: 0000000000000000 R14: 000055ab09b0bd28 R15: 0000000000000000 [ 70.244284] Modules linked in: nft_chain_route_ipv4 xt_CHECKSUM nft_chain_nat_ipv4 ipt_MASQUERADE nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c ipt_REJECT nf_reject_ipv4 nft_counter nft_compat tun bridge stp llc nf_tables nfnetli nk devlink sunrpc vfat fat intel_rapl intel_pmc_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm wmi_bmof iTCO_wdt iTCO_ vendor_support irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_ssif intel_cstate intel_uncore intel_rapl_perf ipmi_si jo ydev pcspkr ipmi_devintf sg wmi ipmi_msghandler video acpi_power_meter acpi_pad mei_me i2c_i801 mei ip_tables ext4 mbcache jbd2 sr_mod cd rom sd_mod qla2xxx ast i2c_algo_bit drm_kms_helper nvme_fc syscopyarea sysfillrect uas sysimgblt fb_sys_fops nvme_fabrics ttm [ 70.314805] usb_storage nvme_core crc32c_intel scsi_transport_fc ahci drm libahci tg3 libata megaraid_sas pinctrl_cannonlake pinctrl_ intel [ 70.327335] CR2: 0000000000000040 Fixes: 9cf2bab630765 ("block: kill request ->cpu member") Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7e35ce2162d0..503fda4e7e8e 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1459,7 +1459,7 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type, goto eh_reset_failed; } err = 2; - if (do_reset(fcport, cmd->device->lun, blk_mq_rq_cpu(cmd->request) + 1) + if (do_reset(fcport, cmd->device->lun, 1) != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x800c, "do_reset failed for cmd=%p.\n", cmd); -- cgit v1.2.3 From 0c6568492019a881d9233dae813ee6b693413f90 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:50:01 -0700 Subject: perf stat: Fix --no-scale [ Upstream commit 75998bb263bf48c1c85d78cd2d2f3a97d3747cab ] The -c option to enable multiplex scaling has been useless for quite some time because scaling is default. It's only useful as --no-scale to disable scaling. But the non scaling code path has bitrotted and doesn't print anything because perf output code relies on value run/ena information. Also even when we don't want to scale a value it's still useful to show its multiplex percentage. This patch: - Fixes help and documentation to show --no-scale instead of -c - Removes -c, only keeps the long option because -c doesn't support negatives. - Enables running/enabled even with --no-scale - And fixes some other problems in the no-scale output. Before: $ perf stat --no-scale -e cycles true Performance counter stats for 'true': cycles 0.000984154 seconds time elapsed After: $ ./perf stat --no-scale -e cycles true Performance counter stats for 'true': 706,070 cycles 0.001219821 seconds time elapsed Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo LPU-Reference: 20190314225002.30108-9-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-xggjvwcdaj2aqy8ib3i4b1g6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Documentation/perf-stat.txt | 5 ++--- tools/perf/builtin-stat.c | 3 ++- tools/perf/util/evsel.c | 3 +-- tools/perf/util/stat.c | 12 ++++-------- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4bc2085e5197..39c05f89104e 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -72,9 +72,8 @@ report:: --all-cpus:: system-wide collection from all CPUs (default if no target is specified) --c:: ---scale:: - scale/normalize counter values +--no-scale:: + Don't scale/normalize counter values -d:: --detailed:: diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 63a3afc7f32b..a52295dbad2b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -728,7 +728,8 @@ static struct option stat_options[] = { "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), - OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), + OPT_BOOLEAN(0, "scale", &stat_config.scale, + "Use --no-scale to disable counter scaling for multiplexing"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &stat_config.run_count, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbc0466db368..142e1ca94992 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1341,8 +1341,7 @@ void perf_counts_values__scale(struct perf_counts_values *count, scaled = 1; count->val = (u64)((double) count->val * count->ena / count->run + 0.5); } - } else - count->ena = count->run = 0; + } if (pscaled) *pscaled = scaled; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4d40515307b8..2856cc9d5a31 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -291,10 +291,8 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel break; case AGGR_GLOBAL: aggr->val += count->val; - if (config->scale) { - aggr->ena += count->ena; - aggr->run += count->run; - } + aggr->ena += count->ena; + aggr->run += count->run; case AGGR_UNSET: default: break; @@ -442,10 +440,8 @@ int create_perf_stat_counter(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; struct perf_evsel *leader = evsel->leader; - if (config->scale) { - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - } + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; /* * The event is part of non trivial group, let's enable -- cgit v1.2.3 From 84b2a2ca6d4142568a4bba52bfcef4c2be944f4e Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:42 +0800 Subject: perf list: Don't forget to drop the reference to the allocated thread_map [ Upstream commit 39df730b09774bd860e39ea208a48d15078236cb ] Detected via gcc's ASan: Direct leak of 2048 byte(s) in 64 object(s) allocated from: 6 #0 0x7f606512e370 in __interceptor_realloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee370) 7 #1 0x556b0f1d7ddd in thread_map__realloc util/thread_map.c:43 8 #2 0x556b0f1d84c7 in thread_map__new_by_tid util/thread_map.c:85 9 #3 0x556b0f0e045e in is_event_supported util/parse-events.c:2250 10 #4 0x556b0f0e1aa1 in print_hwcache_events util/parse-events.c:2382 11 #5 0x556b0f0e3231 in print_events util/parse-events.c:2514 12 #6 0x556b0ee0a66e in cmd_list /home/changbin/work/linux/tools/perf/builtin-list.c:58 13 #7 0x556b0f01e0ae in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 14 #8 0x556b0f01e859 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 15 #9 0x556b0f01edc8 in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 16 #10 0x556b0f01f71f in main /home/changbin/work/linux/tools/perf/perf.c:520 17 #11 0x7f6062ccf09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 89896051f8da ("perf tools: Do not put a variable sized type not at the end of a struct") Link: http://lkml.kernel.org/r/20190316080556.3075-3-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/parse-events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 920e1e6551dd..9afd0d2beef5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2271,6 +2271,7 @@ static bool is_event_supported(u8 type, unsigned config) perf_evsel__delete(evsel); } + thread_map__put(tmap); return ret; } -- cgit v1.2.3 From d41f87ee413f3701a53413ad7c31cb6a00cb6200 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:43 +0800 Subject: perf tools: Fix errors under optimization level '-Og' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 11c1ea6f1a9bc97bf857fd12f72eacb6c69794e2 ] Optimization level '-Og' offers a reasonable level of optimization while maintaining fast compilation and a good debugging experience. This patch tries to make it work. $ make DEBUG=1 EXTRA_CFLAGS='-Og' bench/epoll-ctl.c: In function ‘do_threads’: bench/epoll-ctl.c:274:9: error: ‘ret’ may be used uninitialized in this function [-Werror=maybe-uninitialized] return ret; ^~~ ... Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lkml.kernel.org/r/20190316080556.3075-4-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 2 +- tools/perf/bench/epoll-ctl.c | 2 +- tools/perf/bench/epoll-wait.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 169e347c76f6..9ba1a2e1ed7a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -627,7 +627,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) bool strict = !(flags & MAPS_RELAX_COMPAT); int i, map_idx, map_def_sz, nr_maps = 0; Elf_Scn *scn; - Elf_Data *data; + Elf_Data *data = NULL; Elf_Data *symbols = obj->efile.symbols; if (obj->efile.maps_shndx < 0) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 0c0a6e824934..2af067859966 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -224,7 +224,7 @@ static int do_threads(struct worker *worker, struct cpu_map *cpu) pthread_attr_t thread_attr, *attrp = NULL; cpu_set_t cpuset; unsigned int i, j; - int ret; + int ret = 0; if (!noaffinity) pthread_attr_init(&thread_attr); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 5a11534e96a0..fe85448abd45 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -293,7 +293,7 @@ static int do_threads(struct worker *worker, struct cpu_map *cpu) pthread_attr_t thread_attr, *attrp = NULL; cpu_set_t cpuset; unsigned int i, j; - int ret, events = EPOLLIN; + int ret = 0, events = EPOLLIN; if (oneshot) events |= EPOLLONESHOT; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 6d598cc071ae..1a9c3becf5ff 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -18,7 +18,7 @@ static void testcase(void) int i; for (i = 0; i < NR_ITERS; i++) { - char proc_name[10]; + char proc_name[15]; snprintf(proc_name, sizeof(proc_name), "p:%d\n", i); prctl(PR_SET_NAME, proc_name); -- cgit v1.2.3 From 5bb92662f2f109ec0898139b0bf1257679890ca8 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:44 +0800 Subject: perf config: Fix an error in the config template documentation [ Upstream commit 9b40dff7ba3caaf0d1919f98e136fa3400bd34aa ] The option 'sort-order' should be 'sort_order'. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 893c5c798be9 ("perf config: Show default report configuration in example and docs") Link: http://lkml.kernel.org/r/20190316080556.3075-5-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Documentation/perf-config.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 4ac7775fbc11..4851285ba00c 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -114,7 +114,7 @@ Given a $HOME/.perfconfig like this: [report] # Defaults - sort-order = comm,dso,symbol + sort_order = comm,dso,symbol percent-limit = 0 queue-size = 0 children = true -- cgit v1.2.3 From 86cb58f1a12fdf845a56066cf25a38a4a0a8a87d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:45 +0800 Subject: perf config: Fix a memory leak in collect_config() [ Upstream commit 54569ba4b06d5baedae4614bde33a25a191473ba ] Detected with gcc's ASan: Direct leak of 66 byte(s) in 5 object(s) allocated from: #0 0x7ff3b1f32070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x560c8761034d in collect_config util/config.c:597 #2 0x560c8760d9cb in get_value util/config.c:169 #3 0x560c8760dfd7 in perf_parse_file util/config.c:285 #4 0x560c8760e0d2 in perf_config_from_file util/config.c:476 #5 0x560c876108fd in perf_config_set__init util/config.c:661 #6 0x560c87610c72 in perf_config_set__new util/config.c:709 #7 0x560c87610d2f in perf_config__init util/config.c:718 #8 0x560c87610e5d in perf_config util/config.c:730 #9 0x560c875ddea0 in main /home/changbin/work/linux/tools/perf/perf.c:442 #10 0x7ff3afb8609a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Cc: Taeung Song Fixes: 20105ca1240c ("perf config: Introduce perf_config_set class") Link: http://lkml.kernel.org/r/20190316080556.3075-6-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/config.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 1ea8f898f1a1..9ecdbd5986b3 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -632,11 +632,10 @@ static int collect_config(const char *var, const char *value, } ret = set_value(item, value); - return ret; out_free: free(key); - return -1; + return ret; } int perf_config_set__collect(struct perf_config_set *set, const char *file_name, -- cgit v1.2.3 From 29dddb32f56b9855bd6608d4f6e81c48e4ee3aa8 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:46 +0800 Subject: perf build-id: Fix memory leak in print_sdt_events() [ Upstream commit 8bde8516893da5a5fdf06121f74d11b52ab92df5 ] Detected with gcc's ASan: Direct leak of 4356 byte(s) in 120 object(s) allocated from: #0 0x7ff1a2b5a070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x55719aef4814 in build_id_cache__origname util/build-id.c:215 #2 0x55719af649b6 in print_sdt_events util/parse-events.c:2339 #3 0x55719af66272 in print_events util/parse-events.c:2542 #4 0x55719ad1ecaa in cmd_list /home/changbin/work/linux/tools/perf/builtin-list.c:58 #5 0x55719aec745d in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #6 0x55719aec7d1a in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #7 0x55719aec8184 in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #8 0x55719aeca41a in main /home/changbin/work/linux/tools/perf/perf.c:520 #9 0x7ff1a07ae09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 40218daea1db ("perf list: Show SDT and pre-cached events") Link: http://lkml.kernel.org/r/20190316080556.3075-7-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/build-id.c | 1 + tools/perf/util/parse-events.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 04b1d53e4bf9..1d352621bd48 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -183,6 +183,7 @@ char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size) return bf; } +/* The caller is responsible to free the returned buffer. */ char *build_id_cache__origname(const char *sbuild_id) { char *linkname; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9afd0d2beef5..03860313313c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2342,6 +2342,7 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, printf(" %-50s [%s]\n", buf, "SDT event"); free(buf); } + free(path); } else printf(" %-50s [%s]\n", nd->s, "SDT event"); if (nd2) { -- cgit v1.2.3 From 4a66a027c1ba59192b887d794c79ece8f23c61df Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:48 +0800 Subject: perf top: Fix error handling in cmd_top() [ Upstream commit 70c819e4bf1c5f492768b399d898d458ccdad2b6 ] We should go to the cleanup path, to avoid leaks, detected using gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lkml.kernel.org/r/20190316080556.3075-9-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-top.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f64e312db787..616408251e25 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1633,8 +1633,9 @@ int cmd_top(int argc, const char **argv) annotation_config__init(); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); - if (symbol__init(NULL) < 0) - return -1; + status = symbol__init(NULL); + if (status < 0) + goto out_delete_evlist; sort__setup_elide(stdout); -- cgit v1.2.3 From d86bf97d119d9dfdb2c2ac9f3f5602c4107bdb57 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:49 +0800 Subject: perf hist: Add missing map__put() in error case [ Upstream commit cb6186aeffda4d27e56066c79e9579e7831541d3 ] We need to map__put() before returning from failure of sample__resolve_callchain(). Detected with gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Krister Johansen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 9c68ae98c6f7 ("perf callchain: Reference count maps") Link: http://lkml.kernel.org/r/20190316080556.3075-10-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/hist.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 8aad8330e392..e416e76f5600 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1048,8 +1048,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); - if (err) + if (err) { + map__put(alm); return err; + } err = iter->ops->prepare_entry(iter, al); if (err) -- cgit v1.2.3 From 60b7f41c4aea4d09bc36c957014e4bc7aa056597 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:50 +0800 Subject: perf map: Remove map from 'names' tree in __maps__remove() [ Upstream commit b49265e04410b97b31a5ee66ef6782c1b2d6cd2c ] There are two trees for each map inserted by maps__insert(), so remove it from the 'names' tree in __maps__remove(). Detected with gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section") Link: http://lkml.kernel.org/r/20190316080556.3075-11-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6751301a755c..750ecc3dad50 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -911,6 +911,9 @@ static void __maps__remove(struct maps *maps, struct map *map) { rb_erase_init(&map->rb_node, &maps->entries); map__put(map); + + rb_erase_init(&map->rb_node_name, &maps->names); + map__put(map); } void maps__remove(struct maps *maps, struct map *map) -- cgit v1.2.3 From 940df86f505d30aafc0620b2fdc9e52282c79dcc Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:51 +0800 Subject: perf maps: Purge all maps from the 'names' tree [ Upstream commit da3a53a7390a89391bd63bead0c2e9af4c5ef3d6 ] Add function __maps__purge_names() to purge all maps from the names tree. We need to cleanup the names tree in maps__exit(). Detected with gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section") Link: http://lkml.kernel.org/r/20190316080556.3075-12-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/map.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 750ecc3dad50..2b37f56f0549 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -571,10 +571,25 @@ static void __maps__purge(struct maps *maps) } } +static void __maps__purge_names(struct maps *maps) +{ + struct rb_root *root = &maps->names; + struct rb_node *next = rb_first(root); + + while (next) { + struct map *pos = rb_entry(next, struct map, rb_node_name); + + next = rb_next(&pos->rb_node_name); + rb_erase_init(&pos->rb_node_name, root); + map__put(pos); + } +} + static void maps__exit(struct maps *maps) { down_write(&maps->lock); __maps__purge(maps); + __maps__purge_names(maps); up_write(&maps->lock); } -- cgit v1.2.3 From 05fe1d5b6ed181e881d73de9aa0a865695fc685f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:52 +0800 Subject: perf top: Fix global-buffer-overflow issue [ Upstream commit 1e5b0cf8672e622257df024074e6e09bfbcb7750 ] The array str[] should have six elements. ================================================================= ==4322==ERROR: AddressSanitizer: global-buffer-overflow on address 0x56463844e300 at pc 0x564637e7ad0d bp 0x7f30c8c89d10 sp 0x7f30c8c89d00 READ of size 8 at 0x56463844e300 thread T9 #0 0x564637e7ad0c in __ordered_events__flush util/ordered-events.c:316 #1 0x564637e7b0e4 in ordered_events__flush util/ordered-events.c:338 #2 0x564637c6a57d in process_thread /home/changbin/work/linux/tools/perf/builtin-top.c:1073 #3 0x7f30d173a163 in start_thread (/lib/x86_64-linux-gnu/libpthread.so.0+0x8163) #4 0x7f30cfffbdee in __clone (/lib/x86_64-linux-gnu/libc.so.6+0x11adee) 0x56463844e300 is located 32 bytes to the left of global variable 'flags' defined in 'util/trace-event-parse.c:229:26' (0x56463844e320) of size 192 0x56463844e300 is located 0 bytes to the right of global variable 'str' defined in 'util/ordered-events.c:268:28' (0x56463844e2e0) of size 32 SUMMARY: AddressSanitizer: global-buffer-overflow util/ordered-events.c:316 in __ordered_events__flush Shadow bytes around the buggy address: 0x0ac947081c10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c50: 00 00 00 00 00 00 00 00 f9 f9 f9 f9 00 00 00 00 =>0x0ac947081c60:[f9]f9 f9 f9 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c70: 00 00 00 00 00 00 00 00 00 00 00 00 f9 f9 f9 f9 0x0ac947081c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081ca0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081cb0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Thread T9 created by T0 here: #0 0x7f30d179de5f in __interceptor_pthread_create (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x4ae5f) #1 0x564637c6b954 in __cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1253 #2 0x564637c7173c in cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1642 #3 0x564637d85038 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #4 0x564637d85577 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #5 0x564637d8597b in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #6 0x564637d860e9 in main /home/changbin/work/linux/tools/perf/perf.c:520 #7 0x7f30cff0509a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Cc: Jiri Olsa Fixes: 16c66bc167cc ("perf top: Add processing thread") Fixes: 68ca5d07de20 ("perf ordered_events: Add ordered_events__flush_time interface") Link: http://lkml.kernel.org/r/20190316080556.3075-13-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/ordered-events.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index ea523d3b248f..989fed6f43b5 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -270,6 +270,8 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how, "FINAL", "ROUND", "HALF ", + "TOP ", + "TIME ", }; int err; bool show_progress = false; -- cgit v1.2.3 From ece1fd3f4023b3a777801a7a2e0aa62f38916454 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Mar 2019 16:41:28 -0300 Subject: perf evsel: Free evsel->counts in perf_evsel__exit() [ Upstream commit 42dfa451d825a2ad15793c476f73e7bbc0f9d312 ] Using gcc's ASan, Changbin reports: ================================================================= ==7494==ERROR: LeakSanitizer: detected memory leaks Direct leak of 48 byte(s) in 1 object(s) allocated from: #0 0x7f0333a89138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x5625e5330a5e in zalloc util/util.h:23 #2 0x5625e5330a9b in perf_counts__new util/counts.c:10 #3 0x5625e5330ca0 in perf_evsel__alloc_counts util/counts.c:47 #4 0x5625e520d8e5 in __perf_evsel__read_on_cpu util/evsel.c:1505 #5 0x5625e517a985 in perf_evsel__read_on_cpu /home/work/linux/tools/perf/util/evsel.h:347 #6 0x5625e517ad1a in test__openat_syscall_event tests/openat-syscall.c:47 #7 0x5625e51528e6 in run_test tests/builtin-test.c:358 #8 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #9 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #10 0x5625e515572f in cmd_test tests/builtin-test.c:722 #11 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #12 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #13 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #14 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #15 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Indirect leak of 72 byte(s) in 1 object(s) allocated from: #0 0x7f0333a89138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x5625e532560d in zalloc util/util.h:23 #2 0x5625e532566b in xyarray__new util/xyarray.c:10 #3 0x5625e5330aba in perf_counts__new util/counts.c:15 #4 0x5625e5330ca0 in perf_evsel__alloc_counts util/counts.c:47 #5 0x5625e520d8e5 in __perf_evsel__read_on_cpu util/evsel.c:1505 #6 0x5625e517a985 in perf_evsel__read_on_cpu /home/work/linux/tools/perf/util/evsel.h:347 #7 0x5625e517ad1a in test__openat_syscall_event tests/openat-syscall.c:47 #8 0x5625e51528e6 in run_test tests/builtin-test.c:358 #9 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #10 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #11 0x5625e515572f in cmd_test tests/builtin-test.c:722 #12 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #13 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #14 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #15 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #16 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) His patch took care of evsel->prev_raw_counts, but the above backtraces are about evsel->counts, so fix that instead. Reported-by: Changbin Du Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lkml.kernel.org/n/tip-hd1x13g59f0nuhe4anxhsmfp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 142e1ca94992..50c933044f88 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1289,6 +1289,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); assert(evsel->evlist == NULL); + perf_evsel__free_counts(evsel); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); perf_evsel__free_config_terms(evsel); -- cgit v1.2.3 From 5ffefcfe9764e13924ff27a9b431f1a8a3cc9efc Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:54 +0800 Subject: perf tests: Fix a memory leak of cpu_map object in the openat_syscall_event_on_all_cpus test [ Upstream commit 93faa52e8371f0291ee1ff4994edae2b336b6233 ] ================================================================= ==7497==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f0333a88f30 in __interceptor_malloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xedf30) #1 0x5625e5326213 in cpu_map__trim_new util/cpumap.c:45 #2 0x5625e5326703 in cpu_map__read util/cpumap.c:103 #3 0x5625e53267ef in cpu_map__read_all_cpu_map util/cpumap.c:120 #4 0x5625e5326915 in cpu_map__new util/cpumap.c:135 #5 0x5625e517b355 in test__openat_syscall_event_on_all_cpus tests/openat-syscall-all-cpus.c:36 #6 0x5625e51528e6 in run_test tests/builtin-test.c:358 #7 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #8 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #9 0x5625e515572f in cmd_test tests/builtin-test.c:722 #10 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #11 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #12 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #13 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #14 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: f30a79b012e5 ("perf tools: Add reference counting for cpu_map object") Link: http://lkml.kernel.org/r/20190316080556.3075-15-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/openat-syscall-all-cpus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index c531e6deb104..493ecb611540 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -45,7 +45,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); - goto out_thread_map_delete; + goto out_cpu_map_delete; } if (perf_evsel__open(evsel, cpus, threads) < 0) { @@ -119,6 +119,8 @@ out_close_fd: perf_evsel__close_fd(evsel); out_evsel_delete: perf_evsel__delete(evsel); +out_cpu_map_delete: + cpu_map__put(cpus); out_thread_map_delete: thread_map__put(threads); return err; -- cgit v1.2.3 From 26980cd03ea6f83b82639550a637865bdf002f23 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:55 +0800 Subject: perf tests: Fix memory leak by expr__find_other() in test__expr() [ Upstream commit f97a8991d3b998e518f56794d879f645964de649 ] ================================================================= ==7506==ERROR: LeakSanitizer: detected memory leaks Direct leak of 13 byte(s) in 3 object(s) allocated from: #0 0x7f03339d6070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x5625e53aaef0 in expr__find_other util/expr.y:221 #2 0x5625e51bcd3f in test__expr tests/expr.c:52 #3 0x5625e51528e6 in run_test tests/builtin-test.c:358 #4 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #5 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #6 0x5625e515572f in cmd_test tests/builtin-test.c:722 #7 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #8 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #9 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #10 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #11 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 075167363f8b ("perf tools: Add a simple expression parser for JSON") Link: http://lkml.kernel.org/r/20190316080556.3075-16-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/expr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 01f0706995a9..9acc1e80b936 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -19,7 +19,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) const char *p; const char **other; double val; - int ret; + int i, ret; struct parse_ctx ctx; int num_other; @@ -56,6 +56,9 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); TEST_ASSERT_VAL("find other", other[3] == NULL); + + for (i = 0; i < num_other; i++) + free((void *)other[i]); free((void *)other); return 0; -- cgit v1.2.3 From fd9f338db8679c4d053c2f73a0863d7309801689 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:56 +0800 Subject: perf tests: Fix a memory leak in test__perf_evsel__tp_sched_test() [ Upstream commit d982b33133284fa7efa0e52ae06b88f9be3ea764 ] ================================================================= ==20875==ERROR: LeakSanitizer: detected memory leaks Direct leak of 1160 byte(s) in 1 object(s) allocated from: #0 0x7f1b6fc84138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x55bd50005599 in zalloc util/util.h:23 #2 0x55bd500068f5 in perf_evsel__newtp_idx util/evsel.c:327 #3 0x55bd4ff810fc in perf_evsel__newtp /home/work/linux/tools/perf/util/evsel.h:216 #4 0x55bd4ff81608 in test__perf_evsel__tp_sched_test tests/evsel-tp-sched.c:69 #5 0x55bd4ff528e6 in run_test tests/builtin-test.c:358 #6 0x55bd4ff52baf in test_and_print tests/builtin-test.c:388 #7 0x55bd4ff543fe in __cmd_test tests/builtin-test.c:583 #8 0x55bd4ff5572f in cmd_test tests/builtin-test.c:722 #9 0x55bd4ffc4087 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #10 0x55bd4ffc45c6 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #11 0x55bd4ffc49ca in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #12 0x55bd4ffc5138 in main /home/changbin/work/linux/tools/perf/perf.c:520 #13 0x7f1b6e34809a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Indirect leak of 19 byte(s) in 1 object(s) allocated from: #0 0x7f1b6fc83f30 in __interceptor_malloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xedf30) #1 0x7f1b6e3ac30f in vasprintf (/lib/x86_64-linux-gnu/libc.so.6+0x8830f) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 6a6cd11d4e57 ("perf test: Add test for the sched tracepoint format fields") Link: http://lkml.kernel.org/r/20190316080556.3075-17-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/evsel-tp-sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index ea7acf403727..71f60c0f9faa 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -85,5 +85,6 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; + perf_evsel__delete(evsel); return ret; } -- cgit v1.2.3 From 66495ebfd4dff8434b494193e78aeeeba0c5c358 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 18 Mar 2019 21:47:09 +0300 Subject: ACPI / utils: Drop reference in test for device presence [ Upstream commit 54e3aca84e571559915998aa6cc05e5ac37c043b ] When commit 8661423eea1a ("ACPI / utils: Add new acpi_dev_present helper") introduced acpi_dev_present(), it missed the fact that bus_find_device() took a reference on the device found by it and the callers of acpi_dev_present() don't drop that reference. Drop the reference on the device in acpi_dev_present(). Fixes: 8661423eea1a ("ACPI / utils: Add new acpi_dev_present helper") Signed-off-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 78db97687f26..c4b06cc075f9 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -800,6 +800,7 @@ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) match.hrv = hrv; dev = bus_find_device(&acpi_bus_type, NULL, &match, acpi_dev_match_cb); + put_device(dev); return !!dev; } EXPORT_SYMBOL(acpi_dev_present); -- cgit v1.2.3 From 8bd30e5e0ec594d6200448f0c2b889bd8bd28a5a Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Tue, 12 Mar 2019 15:51:28 +0900 Subject: PM / Domains: Avoid a potential deadlock [ Upstream commit 2071ac985d37efe496782c34318dbead93beb02f ] Lockdep warns that prepare_lock and genpd->mlock can cause a deadlock the deadlock scenario is like following: First thread is probing cs2000 cs2000_probe() clk_register() __clk_core_init() clk_prepare_lock() ----> acquires prepare_lock cs2000_recalc_rate() i2c_smbus_read_byte_data() rcar_i2c_master_xfer() dma_request_chan() rcar_dmac_of_xlate() rcar_dmac_alloc_chan_resources() pm_runtime_get_sync() __pm_runtime_resume() rpm_resume() rpm_callback() genpd_runtime_resume() ----> acquires genpd->mlock Second thread is attaching any device to the same PM domain genpd_add_device() genpd_lock() ----> acquires genpd->mlock cpg_mssr_attach_dev() of_clk_get_from_provider() __of_clk_get_from_provider() __clk_create_clk() clk_prepare_lock() ----> acquires prepare_lock Since currently no PM provider access genpd's critical section in .attach_dev, and .detach_dev callbacks, so there is no need to protect these two callbacks with genpd->mlock. This patch avoids a potential deadlock by moving out .attach_dev and .detach_dev from genpd->mlock, so that genpd->mlock won't be held when prepare_lock is acquired in .attach_dev and .detach_dev Signed-off-by: Jiada Wang Reviewed-by: Ulf Hansson Tested-by: Geert Uytterhoeven Reviewed-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/base/power/domain.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 500de1dee967..a00ca6b8117b 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1467,12 +1467,12 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); - genpd_lock(genpd); - ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0; if (ret) goto out; + genpd_lock(genpd); + dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1480,9 +1480,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); - out: genpd_unlock(genpd); - + out: if (ret) genpd_free_dev_data(dev, gpd_data); else @@ -1531,15 +1530,15 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; - if (genpd->detach_dev) - genpd->detach_dev(genpd, dev); - dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); genpd_unlock(genpd); + if (genpd->detach_dev) + genpd->detach_dev(genpd, dev); + genpd_free_dev_data(dev, gpd_data); return 0; -- cgit v1.2.3 From c43003451a010ce1dcdbb05f53a265be1004931c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 20 Mar 2019 13:15:01 -0700 Subject: blk-iolatency: #include "blk.h" [ Upstream commit 373e915cd8e84544609eced57a44fbc084f8d60f ] This patch avoids that the following warning is reported when building with W=1: block/blk-iolatency.c:734:5: warning: no previous prototype for 'blk_iolatency_init' [-Wmissing-prototypes] Cc: Josef Bacik Fixes: d70675121546 ("block: introduce blk-iolatency io controller") # v4.19 Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-iolatency.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 2620baa1f699..507212d75ee2 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -75,6 +75,7 @@ #include #include "blk-rq-qos.h" #include "blk-stat.h" +#include "blk.h" #define DEFAULT_SCALE_COOKIE 1000000U -- cgit v1.2.3 From c24b1f67cea0b9cf52c024c68e97d3d948342f7d Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 19 Mar 2019 14:05:11 +0100 Subject: drm/exynos/mixer: fix MIXER shadow registry synchronisation code [ Upstream commit 6a3b45ada960ac475ec2b4103d43e57943b2b8d3 ] MIXER on Exynos5 SoCs uses different synchronisation method than Exynos4 to update internal state (shadow registers). Apparently the driver implements it incorrectly. The rule should be as follows: - do not request updating registers until previous request was finished, ie. MXR_CFG_LAYER_UPDATE_COUNT must be 0. - before setting registers synchronisation on VSYNC should be turned off, ie. MXR_STATUS_SYNC_ENABLE should be reset, - after finishing MXR_STATUS_SYNC_ENABLE should be set again. The patch hopefully implements it correctly. Below sample kernel log from page fault caused by the bug: [ 25.670038] exynos-sysmmu 14650000.sysmmu: 14450000.mixer: PAGE FAULT occurred at 0x2247b800 [ 25.677888] ------------[ cut here ]------------ [ 25.682164] kernel BUG at ../drivers/iommu/exynos-iommu.c:450! [ 25.687971] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM [ 25.693778] Modules linked in: [ 25.696816] CPU: 5 PID: 1553 Comm: fb-release_test Not tainted 5.0.0-rc7-01157-g5f86b1566bdd #136 [ 25.705646] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 25.711710] PC is at exynos_sysmmu_irq+0x1c0/0x264 [ 25.716470] LR is at lock_is_held_type+0x44/0x64 v2: added missing MXR_CFG_LAYER_UPDATE bit setting in mixer_enable_sync Reported-by: Marian Mihailescu Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos_mixer.c | 110 ++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 0573eab0e190..f35e4ab55b27 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -20,6 +20,7 @@ #include "regs-vp.h" #include +#include #include #include #include @@ -352,15 +353,62 @@ static void mixer_cfg_vp_blend(struct mixer_context *ctx, unsigned int alpha) mixer_reg_write(ctx, MXR_VIDEO_CFG, val); } -static void mixer_vsync_set_update(struct mixer_context *ctx, bool enable) +static bool mixer_is_synced(struct mixer_context *ctx) { - /* block update on vsync */ - mixer_reg_writemask(ctx, MXR_STATUS, enable ? - MXR_STATUS_SYNC_ENABLE : 0, MXR_STATUS_SYNC_ENABLE); + u32 base, shadow; + if (ctx->mxr_ver == MXR_VER_16_0_33_0 || + ctx->mxr_ver == MXR_VER_128_0_0_184) + return !(mixer_reg_read(ctx, MXR_CFG) & + MXR_CFG_LAYER_UPDATE_COUNT_MASK); + + if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) && + vp_reg_read(ctx, VP_SHADOW_UPDATE)) + return false; + + base = mixer_reg_read(ctx, MXR_CFG); + shadow = mixer_reg_read(ctx, MXR_CFG_S); + if (base != shadow) + return false; + + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); + shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); + if (base != shadow) + return false; + + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(1)); + shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(1)); + if (base != shadow) + return false; + + return true; +} + +static int mixer_wait_for_sync(struct mixer_context *ctx) +{ + ktime_t timeout = ktime_add_us(ktime_get(), 100000); + + while (!mixer_is_synced(ctx)) { + usleep_range(1000, 2000); + if (ktime_compare(ktime_get(), timeout) > 0) + return -ETIMEDOUT; + } + return 0; +} + +static void mixer_disable_sync(struct mixer_context *ctx) +{ + mixer_reg_writemask(ctx, MXR_STATUS, 0, MXR_STATUS_SYNC_ENABLE); +} + +static void mixer_enable_sync(struct mixer_context *ctx) +{ + if (ctx->mxr_ver == MXR_VER_16_0_33_0 || + ctx->mxr_ver == MXR_VER_128_0_0_184) + mixer_reg_writemask(ctx, MXR_CFG, ~0, MXR_CFG_LAYER_UPDATE); + mixer_reg_writemask(ctx, MXR_STATUS, ~0, MXR_STATUS_SYNC_ENABLE); if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags)) - vp_reg_write(ctx, VP_SHADOW_UPDATE, enable ? - VP_SHADOW_UPDATE_ENABLE : 0); + vp_reg_write(ctx, VP_SHADOW_UPDATE, VP_SHADOW_UPDATE_ENABLE); } static void mixer_cfg_scan(struct mixer_context *ctx, int width, int height) @@ -498,7 +546,6 @@ static void vp_video_buffer(struct mixer_context *ctx, spin_lock_irqsave(&ctx->reg_slock, flags); - vp_reg_write(ctx, VP_SHADOW_UPDATE, 1); /* interlace or progressive scan mode */ val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0); vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP); @@ -553,11 +600,6 @@ static void vp_video_buffer(struct mixer_context *ctx, vp_regs_dump(ctx); } -static void mixer_layer_update(struct mixer_context *ctx) -{ - mixer_reg_writemask(ctx, MXR_CFG, ~0, MXR_CFG_LAYER_UPDATE); -} - static void mixer_graph_buffer(struct mixer_context *ctx, struct exynos_drm_plane *plane) { @@ -640,11 +682,6 @@ static void mixer_graph_buffer(struct mixer_context *ctx, mixer_cfg_layer(ctx, win, priority, true); mixer_cfg_gfx_blend(ctx, win, pixel_alpha, state->base.alpha); - /* layer update mandatory for mixer 16.0.33.0 */ - if (ctx->mxr_ver == MXR_VER_16_0_33_0 || - ctx->mxr_ver == MXR_VER_128_0_0_184) - mixer_layer_update(ctx); - spin_unlock_irqrestore(&ctx->reg_slock, flags); mixer_regs_dump(ctx); @@ -709,7 +746,7 @@ static void mixer_win_reset(struct mixer_context *ctx) static irqreturn_t mixer_irq_handler(int irq, void *arg) { struct mixer_context *ctx = arg; - u32 val, base, shadow; + u32 val; spin_lock(&ctx->reg_slock); @@ -723,26 +760,9 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg) val &= ~MXR_INT_STATUS_VSYNC; /* interlace scan need to check shadow register */ - if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { - if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) && - vp_reg_read(ctx, VP_SHADOW_UPDATE)) - goto out; - - base = mixer_reg_read(ctx, MXR_CFG); - shadow = mixer_reg_read(ctx, MXR_CFG_S); - if (base != shadow) - goto out; - - base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); - shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); - if (base != shadow) - goto out; - - base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(1)); - shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(1)); - if (base != shadow) - goto out; - } + if (test_bit(MXR_BIT_INTERLACE, &ctx->flags) + && !mixer_is_synced(ctx)) + goto out; drm_crtc_handle_vblank(&ctx->crtc->base); } @@ -917,12 +937,14 @@ static void mixer_disable_vblank(struct exynos_drm_crtc *crtc) static void mixer_atomic_begin(struct exynos_drm_crtc *crtc) { - struct mixer_context *mixer_ctx = crtc->ctx; + struct mixer_context *ctx = crtc->ctx; - if (!test_bit(MXR_BIT_POWERED, &mixer_ctx->flags)) + if (!test_bit(MXR_BIT_POWERED, &ctx->flags)) return; - mixer_vsync_set_update(mixer_ctx, false); + if (mixer_wait_for_sync(ctx)) + dev_err(ctx->dev, "timeout waiting for VSYNC\n"); + mixer_disable_sync(ctx); } static void mixer_update_plane(struct exynos_drm_crtc *crtc, @@ -964,7 +986,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc) if (!test_bit(MXR_BIT_POWERED, &mixer_ctx->flags)) return; - mixer_vsync_set_update(mixer_ctx, true); + mixer_enable_sync(mixer_ctx); exynos_crtc_handle_event(crtc); } @@ -979,7 +1001,7 @@ static void mixer_enable(struct exynos_drm_crtc *crtc) exynos_drm_pipe_clk_enable(crtc, true); - mixer_vsync_set_update(ctx, false); + mixer_disable_sync(ctx); mixer_reg_writemask(ctx, MXR_STATUS, ~0, MXR_STATUS_SOFT_RESET); @@ -992,7 +1014,7 @@ static void mixer_enable(struct exynos_drm_crtc *crtc) mixer_commit(ctx); - mixer_vsync_set_update(ctx, true); + mixer_enable_sync(ctx); set_bit(MXR_BIT_POWERED, &ctx->flags); } -- cgit v1.2.3 From d79220ee57b8796fab5e8cbb3e505994405fd708 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Thu, 7 Mar 2019 19:40:35 +0100 Subject: irqchip/stm32: Don't clear rising/falling config registers at init [ Upstream commit 0dda09666f50eae9c5b794dd89b1fd8a8d89d714 ] Falling and rising configuration and status registers are not banked. As they are shared with M4 co-processor, they should not be cleared at probe time, else M4 co-processor configuration will be lost. Fixes: f9fc1745501e ("irqchip/stm32: Add host and driver data structures") Signed-off-by: Loic Pallardy Signed-off-by: Fabien Dessenne Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-stm32-exti.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index a93296b9b45d..162a7f547f9e 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -735,11 +735,6 @@ stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data, */ writel_relaxed(0, base + stm32_bank->imr_ofst); writel_relaxed(0, base + stm32_bank->emr_ofst); - writel_relaxed(0, base + stm32_bank->rtsr_ofst); - writel_relaxed(0, base + stm32_bank->ftsr_ofst); - writel_relaxed(~0UL, base + stm32_bank->rpr_ofst); - if (stm32_bank->fpr_ofst != UNDEF_REG) - writel_relaxed(~0UL, base + stm32_bank->fpr_ofst); pr_info("%pOF: bank%d\n", h_data->node, bank_idx); -- cgit v1.2.3 From dc60ac49b0403073505612ea08e07d5b08ae41a8 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Thu, 7 Mar 2019 19:40:36 +0100 Subject: irqchip/stm32: Don't set rising configuration registers at init [ Upstream commit 6a77623d78b307b34d4cf7886da6a907689bf388 ] The rising configuration status register (rtsr) is not banked. As it is shared with the co-processor, it should not be written at probe time, else the co-processor configuration will be lost. Fixes: f9fc1745501e ("irqchip/stm32: Add host and driver data structures") Signed-off-by: Fabien Dessenne Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-stm32-exti.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index 162a7f547f9e..7bd1d4cb2e19 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -716,7 +716,6 @@ stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data, const struct stm32_exti_bank *stm32_bank; struct stm32_exti_chip_data *chip_data; void __iomem *base = h_data->base; - u32 irqs_mask; stm32_bank = h_data->drv_data->exti_banks[bank_idx]; chip_data = &h_data->chips_data[bank_idx]; @@ -725,10 +724,6 @@ stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data, raw_spin_lock_init(&chip_data->rlock); - /* Determine number of irqs supported */ - writel_relaxed(~0UL, base + stm32_bank->rtsr_ofst); - irqs_mask = readl_relaxed(base + stm32_bank->rtsr_ofst); - /* * This IP has no reset, so after hot reboot we should * clear registers to avoid residue -- cgit v1.2.3 From 1512c986c5788f34939d0d1e391455ca4a3f8880 Mon Sep 17 00:00:00 2001 From: Jianguo Chen Date: Wed, 20 Mar 2019 18:54:21 +0000 Subject: irqchip/mbigen: Don't clear eventid when freeing an MSI [ Upstream commit fca269f201a8d9985c0a31fb60b15d4eb57cef80 ] mbigen_write_msg clears eventid bits of a mbigen register when free a interrupt, because msi_domain_deactivate memset struct msg to zero. Then multiple mbigen pins with zero eventid will report the same interrupt number. The eventid clear call trace: free_irq __free_irq irq_shutdown irq_domain_deactivate_irq __irq_domain_deactivate_irq __irq_domain_deactivate_irq msi_domain_deactivate platform_msi_write_msg mbigen_write_msg Signed-off-by: Jianguo Chen [maz: massaged subject] Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-mbigen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 567b29c47608..98b6e1d4b1a6 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -161,6 +161,9 @@ static void mbigen_write_msg(struct msi_desc *desc, struct msi_msg *msg) void __iomem *base = d->chip_data; u32 val; + if (!msg->address_lo && !msg->address_hi) + return; + base += get_mbigen_vec_reg(d->hwirq); val = readl_relaxed(base); -- cgit v1.2.3 From 105d043fedcb86bf04e15c67892f7343ec3e4e0f Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 18 Mar 2019 21:19:56 -0500 Subject: x86/hpet: Prevent potential NULL pointer dereference [ Upstream commit 2e84f116afca3719c9d0a1a78b47b48f75fd5724 ] hpet_virt_address may be NULL when ioremap_nocache fail, but the code lacks a check. Add a check to prevent NULL pointer dereference. Signed-off-by: Aditya Pakki Signed-off-by: Thomas Gleixner Cc: kjlu@umn.edu Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Joe Perches Cc: Nicolai Stange Cc: Roland Dreier Link: https://lkml.kernel.org/r/20190319021958.17275-1-pakki001@umn.edu Signed-off-by: Sasha Levin --- arch/x86/kernel/hpet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dfd3aca82c61..fb32925a2e62 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -905,6 +905,8 @@ int __init hpet_enable(void) return 0; hpet_set_mapping(); + if (!hpet_virt_address) + return 0; /* * Read the period and check for a sane value: -- cgit v1.2.3 From aaddd952f3db382b4d0786a39140ca2f8bd7334b Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 00:46:51 -0500 Subject: x86/hyperv: Prevent potential NULL pointer dereference [ Upstream commit 534c89c22e26b183d838294f0937ee092c82ad3a ] The page allocation in hv_cpu_init() can fail, but the code does not have a check for that. Add a check and return -ENOMEM when the allocation fails. [ tglx: Massaged changelog ] Signed-off-by: Kangjie Lu Signed-off-by: Thomas Gleixner Reviewed-by: Mukesh Ojha Acked-by: "K. Y. Srinivasan" Cc: pakki001@umn.edu Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Sasha Levin Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: linux-hyperv@vger.kernel.org Link: https://lkml.kernel.org/r/20190314054651.1315-1-kjlu@umn.edu Signed-off-by: Sasha Levin --- arch/x86/hyperv/hv_init.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index d3f42b6bbdac..8a9cff1f129d 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -102,9 +102,13 @@ static int hv_cpu_init(unsigned int cpu) u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; void **input_arg; + struct page *pg; input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - *input_arg = page_address(alloc_page(GFP_KERNEL)); + pg = alloc_page(GFP_KERNEL); + if (unlikely(!pg)) + return -ENOMEM; + *input_arg = page_address(pg); hv_get_vp_index(msr_vp_index); -- cgit v1.2.3 From f0a085e99ff77b741130bcb26980472912968e4f Mon Sep 17 00:00:00 2001 From: Matthew Whitehead Date: Thu, 14 Mar 2019 16:46:00 -0400 Subject: x86/cpu/cyrix: Use correct macros for Cyrix calls on Geode processors [ Upstream commit 18fb053f9b827bd98cfc64f2a35df8ab19745a1d ] There are comments in processor-cyrix.h advising you to _not_ make calls using the deprecated macros in this style: setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); This is because it expands the macro into a non-functioning calling sequence. The calling order must be: outb(CX86_CCR2, 0x22); inb(0x23); From the comments: * When using the old macros a line like * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); * gets expanded to: * do { * outb((CX86_CCR2), 0x22); * outb((({ * outb((CX86_CCR2), 0x22); * inb(0x23); * }) | 0x88), 0x23); * } while (0); The new macros fix this problem, so use them instead. Tested on an actual Geode processor. Signed-off-by: Matthew Whitehead Signed-off-by: Thomas Gleixner Cc: luto@kernel.org Link: https://lkml.kernel.org/r/1552596361-8967-2-git-send-email-tedheadster@gmail.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/cyrix.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index d12226f60168..1d9b8aaea06c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -124,7 +124,7 @@ static void set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -135,11 +135,11 @@ static void set_cx86_memwb(void) pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } /* @@ -153,14 +153,14 @@ static void geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -296,7 +296,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -319,7 +319,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); } else { /* A 6x86MX - it has the bug. */ set_cpu_bug(c, X86_BUG_COMA); -- cgit v1.2.3 From d9fb98c921a89a178cfa36c8efcb675493644314 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Feb 2019 20:24:59 +0800 Subject: drm/nouveau/debugfs: Fix check of pm_runtime_get_sync failure [ Upstream commit 909e9c9c428376e2a43d178ed4b0a2d5ba9cb7d3 ] pm_runtime_get_sync returns negative on failure. Fixes: eaeb9010bb4b ("drm/nouveau/debugfs: Wake up GPU before doing any reclocking") Signed-off-by: YueHaibing Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 88a52f6b39fe..7dfbbbc1beea 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -181,7 +181,7 @@ nouveau_debugfs_pstate_set(struct file *file, const char __user *ubuf, } ret = pm_runtime_get_sync(drm->dev); - if (IS_ERR_VALUE(ret) && ret != -EACCES) + if (ret < 0 && ret != -EACCES) return ret; ret = nvif_mthd(ctrl, NVIF_CONTROL_PSTATE_USER, &args, sizeof(args)); pm_runtime_put_autosuspend(drm->dev); -- cgit v1.2.3 From d965161274983e34caf8f38af287f0a1e2adbb27 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Mar 2019 09:58:33 +0800 Subject: iommu/vt-d: Check capability before disabling protected memory [ Upstream commit 5bb71fc790a88d063507dc5d445ab8b14e845591 ] The spec states in 10.4.16 that the Protected Memory Enable Register should be treated as read-only for implementations not supporting protected memory regions (PLMR and PHMR fields reported as Clear in the Capability register). Cc: Jacob Pan Cc: mark gross Suggested-by: Ashok Raj Fixes: f8bab73515ca5 ("intel-iommu: PMEN support") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index dbd6824dfffa..68a21c49c562 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1534,6 +1534,9 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) u32 pmen; unsigned long flags; + if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) + return; + raw_spin_lock_irqsave(&iommu->register_lock, flags); pmen = readl(iommu->reg + DMAR_PMEN_REG); pmen &= ~DMA_PMEN_EPM; -- cgit v1.2.3 From aca4bd1a1cc6084f74f2dba955270629a3fbf7b9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Mar 2019 09:58:34 +0800 Subject: iommu/vt-d: Save the right domain ID used by hardware [ Upstream commit 84c11e4df5aa4955acaa441f0cf1cb2e50daf64b ] The driver sets a default domain id (FLPT_DEFAULT_DID) in the first level only pasid entry, but saves a different domain id in @sdev->did. The value saved in @sdev->did will be used to invalidate the translation caches. Hence, the driver might result in invalidating the caches with a wrong domain id. Cc: Ashok Raj Cc: Jacob Pan Fixes: 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 68a21c49c562..53b1fbadc496 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5331,7 +5331,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd ctx_lo = context[0].lo; - sdev->did = domain->iommu_did[iommu->seq_id]; + sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); if (!(ctx_lo & CONTEXT_PASIDE)) { -- cgit v1.2.3 From 92b646e276775041ad698c2079be6844378d1372 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 14:27:56 -0700 Subject: x86/hw_breakpoints: Make default case in hw_breakpoint_arch_parse() return an error [ Upstream commit e898e69d6b9475bf123f99b3c5d1a67bb7cb2361 ] When building with -Wsometimes-uninitialized, Clang warns: arch/x86/kernel/hw_breakpoint.c:355:2: warning: variable 'align' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] The default cannot be reached because arch_build_bp_info() initializes hw->len to one of the specified cases. Nevertheless the warning is valid and returning -EINVAL makes sure that this cannot be broken by future modifications. Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Gleixner Reviewed-by: Nick Desaulniers Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: clang-built-linux@googlegroups.com Link: https://github.com/ClangBuiltLinux/linux/issues/392 Link: https://lkml.kernel.org/r/20190307212756.4648-1-natechancellor@gmail.com Signed-off-by: Sasha Levin --- arch/x86/kernel/hw_breakpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 34a5c1715148..2882fe1d2a78 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -357,6 +357,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, #endif default: WARN_ON_ONCE(1); + return -EINVAL; } /* -- cgit v1.2.3 From 21edc981053f290aa03c7f1787d39963ee2cfccb Mon Sep 17 00:00:00 2001 From: Xiaoli Feng Date: Sat, 16 Mar 2019 12:11:54 +0800 Subject: cifs: fix that return -EINVAL when do dedupe operation [ Upstream commit b073a08016a10f01dfb0d0b6c7fa89da0d544963 ] dedupe_file_range operations is combiled into remap_file_range. But it's always skipped for dedupe operations in function cifs_remap_file_range. Example to test: Before this patch: # dd if=/dev/zero of=cifs/file bs=1M count=1 # xfs_io -c "dedupe cifs/file 4k 64k 4k" cifs/file XFS_IOC_FILE_EXTENT_SAME: Invalid argument After this patch: # dd if=/dev/zero of=cifs/file bs=1M count=1 # xfs_io -c "dedupe cifs/file 4k 64k 4k" cifs/file XFS_IOC_FILE_EXTENT_SAME: Operation not supported Influence for xfstests: generic/091 generic/112 generic/127 generic/263 These tests report this error "do_copy_range:: Invalid argument" instead of "FIDEDUPERANGE: Invalid argument". Because there are still two bugs cause these test failed. https://bugzilla.kernel.org/show_bug.cgi?id=202935 https://bugzilla.kernel.org/show_bug.cgi?id=202785 Signed-off-by: Xiaoli Feng Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 07cad54b84f1..e8e125acd712 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1010,7 +1010,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; - if (remap_flags & ~REMAP_FILE_ADVISORY) + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; cifs_dbg(FYI, "clone range\n"); -- cgit v1.2.3 From a419571b2da5683cc1bddf0988612e7eecc9e98f Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 17 Mar 2019 15:58:38 -0500 Subject: fix incorrect error code mapping for OBJECTID_NOT_FOUND [ Upstream commit 85f9987b236cf46e06ffdb5c225cf1f3c0acb789 ] It was mapped to EIO which can be confusing when user space queries for an object GUID for an object for which the server file system doesn't support (or hasn't saved one). As Amir Goldstein suggested this is similar to ENOATTR (equivalently ENODATA in Linux errno definitions) so changing NT STATUS code mapping for OBJECTID_NOT_FOUND to ENODATA. Signed-off-by: Steve French CC: Amir Goldstein Signed-off-by: Sasha Levin --- fs/cifs/smb2maperror.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 924269cec135..e32c264e3adb 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -1036,7 +1036,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_UNFINISHED_CONTEXT_DELETED, -EIO, "STATUS_UNFINISHED_CONTEXT_DELETED"}, {STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"}, - {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"}, + /* Note that ENOATTTR and ENODATA are the same errno */ + {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"}, {STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"}, {STATUS_WRONG_CREDENTIAL_HANDLE, -EIO, "STATUS_WRONG_CREDENTIAL_HANDLE"}, -- cgit v1.2.3 From 14bec2dda7a0fc2628afef3aa282cbc8ebf7b72d Mon Sep 17 00:00:00 2001 From: "Paulo Alcantara (SUSE)" Date: Thu, 21 Mar 2019 19:31:22 -0300 Subject: cifs: Fix slab-out-of-bounds when tracing SMB tcon [ Upstream commit 68ddb496800acdb46172b4981dc3753ea9b39c25 ] This patch fixes the following KASAN report: [ 779.044746] BUG: KASAN: slab-out-of-bounds in string+0xab/0x180 [ 779.044750] Read of size 1 at addr ffff88814f327968 by task trace-cmd/2812 [ 779.044756] CPU: 1 PID: 2812 Comm: trace-cmd Not tainted 5.1.0-rc1+ #62 [ 779.044760] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-0-ga698c89-prebuilt.qemu.org 04/01/2014 [ 779.044761] Call Trace: [ 779.044769] dump_stack+0x5b/0x90 [ 779.044775] ? string+0xab/0x180 [ 779.044781] print_address_description+0x6c/0x23c [ 779.044787] ? string+0xab/0x180 [ 779.044792] ? string+0xab/0x180 [ 779.044797] kasan_report.cold.3+0x1a/0x32 [ 779.044803] ? string+0xab/0x180 [ 779.044809] string+0xab/0x180 [ 779.044816] ? widen_string+0x160/0x160 [ 779.044822] ? vsnprintf+0x5bf/0x7f0 [ 779.044829] vsnprintf+0x4e7/0x7f0 [ 779.044836] ? pointer+0x4a0/0x4a0 [ 779.044841] ? seq_buf_vprintf+0x79/0xc0 [ 779.044848] seq_buf_vprintf+0x62/0xc0 [ 779.044855] trace_seq_printf+0x113/0x210 [ 779.044861] ? trace_seq_puts+0x110/0x110 [ 779.044867] ? trace_raw_output_prep+0xd8/0x110 [ 779.044876] trace_raw_output_smb3_tcon_class+0x9f/0xc0 [ 779.044882] print_trace_line+0x377/0x890 [ 779.044888] ? tracing_buffers_read+0x300/0x300 [ 779.044893] ? ring_buffer_read+0x58/0x70 [ 779.044899] s_show+0x6e/0x140 [ 779.044906] seq_read+0x505/0x6a0 [ 779.044913] vfs_read+0xaf/0x1b0 [ 779.044919] ksys_read+0xa1/0x130 [ 779.044925] ? kernel_write+0xa0/0xa0 [ 779.044931] ? __do_page_fault+0x3d5/0x620 [ 779.044938] do_syscall_64+0x63/0x150 [ 779.044944] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 779.044949] RIP: 0033:0x7f62c2c2db31 [ 779.044955] Code: fe ff ff 48 8d 3d 17 9e 09 00 48 83 ec 08 e8 96 02 02 00 66 0f 1f 44 00 00 8b 05 fa fc 2c 00 48 63 ff 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 55 53 48 89 d5 48 89 [ 779.044958] RSP: 002b:00007ffd6e116678 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 779.044964] RAX: ffffffffffffffda RBX: 0000560a38be9260 RCX: 00007f62c2c2db31 [ 779.044966] RDX: 0000000000002000 RSI: 00007ffd6e116710 RDI: 0000000000000003 [ 779.044966] RDX: 0000000000002000 RSI: 00007ffd6e116710 RDI: 0000000000000003 [ 779.044969] RBP: 00007f62c2ef5420 R08: 0000000000000000 R09: 0000000000000003 [ 779.044972] R10: ffffffffffffffa8 R11: 0000000000000246 R12: 00007ffd6e116710 [ 779.044975] R13: 0000000000002000 R14: 0000000000000d68 R15: 0000000000002000 [ 779.044981] Allocated by task 1257: [ 779.044987] __kasan_kmalloc.constprop.5+0xc1/0xd0 [ 779.044992] kmem_cache_alloc+0xad/0x1a0 [ 779.044997] getname_flags+0x6c/0x2a0 [ 779.045003] user_path_at_empty+0x1d/0x40 [ 779.045008] do_faccessat+0x12a/0x330 [ 779.045012] do_syscall_64+0x63/0x150 [ 779.045017] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 779.045019] Freed by task 1257: [ 779.045023] __kasan_slab_free+0x12e/0x180 [ 779.045029] kmem_cache_free+0x85/0x1b0 [ 779.045034] filename_lookup.part.70+0x176/0x250 [ 779.045039] do_faccessat+0x12a/0x330 [ 779.045043] do_syscall_64+0x63/0x150 [ 779.045048] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 779.045052] The buggy address belongs to the object at ffff88814f326600 which belongs to the cache names_cache of size 4096 [ 779.045057] The buggy address is located 872 bytes to the right of 4096-byte region [ffff88814f326600, ffff88814f327600) [ 779.045058] The buggy address belongs to the page: [ 779.045062] page:ffffea00053cc800 count:1 mapcount:0 mapping:ffff88815b191b40 index:0x0 compound_mapcount: 0 [ 779.045067] flags: 0x200000000010200(slab|head) [ 779.045075] raw: 0200000000010200 dead000000000100 dead000000000200 ffff88815b191b40 [ 779.045081] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 779.045083] page dumped because: kasan: bad access detected [ 779.045085] Memory state around the buggy address: [ 779.045089] ffff88814f327800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045093] ffff88814f327880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045097] >ffff88814f327900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045099] ^ [ 779.045103] ffff88814f327980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045107] ffff88814f327a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045109] ================================================================== [ 779.045110] Disabling lock debugging due to kernel taint Correctly assign tree name str for smb3_tcon event. Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index 59be48206932..b49bc925fb4f 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -378,19 +378,19 @@ DECLARE_EVENT_CLASS(smb3_tcon_class, __field(unsigned int, xid) __field(__u32, tid) __field(__u64, sesid) - __field(const char *, unc_name) + __string(name, unc_name) __field(int, rc) ), TP_fast_assign( __entry->xid = xid; __entry->tid = tid; __entry->sesid = sesid; - __entry->unc_name = unc_name; + __assign_str(name, unc_name); __entry->rc = rc; ), TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d", __entry->xid, __entry->sesid, __entry->tid, - __entry->unc_name, __entry->rc) + __get_str(name), __entry->rc) ) #define DEFINE_SMB3_TCON_EVENT(name) \ -- cgit v1.2.3 From 642530739f198fe6a9f419b3825b1af54aa7d83f Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 8 Mar 2019 11:05:08 +0800 Subject: x86/gart: Exclude GART aperture from kcore [ Upstream commit ffc8599aa9763f39f6736a79da4d1575e7006f9a ] On machines where the GART aperture is mapped over physical RAM, /proc/kcore contains the GART aperture range. Accessing the GART range via /proc/kcore results in a kernel crash. vmcore used to have the same issue, until it was fixed with commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")', leveraging existing hook infrastructure in vmcore to let /proc/vmcore return zeroes when attempting to read the aperture region, and so it won't read from the actual memory. Apply the same workaround for kcore. First implement the same hook infrastructure for kcore, then reuse the hook functions introduced in the previous vmcore fix. Just with some minor adjustment, rename some functions for more general usage, and simplify the hook infrastructure a bit as there is no module usage yet. Suggested-by: Baoquan He Signed-off-by: Kairui Song Signed-off-by: Thomas Gleixner Reviewed-by: Jiri Bohac Acked-by: Baoquan He Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Omar Sandoval Cc: Dave Young Link: https://lkml.kernel.org/r/20190308030508.13548-1-kasong@redhat.com Signed-off-by: Sasha Levin --- arch/x86/kernel/aperture_64.c | 20 +++++++++++++------- fs/proc/kcore.c | 27 +++++++++++++++++++++++++++ include/linux/kcore.h | 2 ++ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 58176b56354e..294ed4392a0e 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "AGP: " fmt #include +#include #include #include #include @@ -57,7 +58,7 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; -#ifdef CONFIG_PROC_VMCORE +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE) /* * If the first kernel maps the aperture over e820 RAM, the kdump kernel will * use the same range because it will remain configured in the northbridge. @@ -66,20 +67,25 @@ int fix_aperture __initdata = 1; */ static unsigned long aperture_pfn_start, aperture_page_count; -static int gart_oldmem_pfn_is_ram(unsigned long pfn) +static int gart_mem_pfn_is_ram(unsigned long pfn) { return likely((pfn < aperture_pfn_start) || (pfn >= aperture_pfn_start + aperture_page_count)); } -static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +static void __init exclude_from_core(u64 aper_base, u32 aper_order) { aperture_pfn_start = aper_base >> PAGE_SHIFT; aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; - WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram)); +#ifdef CONFIG_PROC_VMCORE + WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram)); +#endif +#ifdef CONFIG_PROC_KCORE + WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); +#endif } #else -static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +static void exclude_from_core(u64 aper_base, u32 aper_order) { } #endif @@ -474,7 +480,7 @@ out: * may have allocated the range over its e820 RAM * and fixed up the northbridge */ - exclude_from_vmcore(last_aper_base, last_aper_order); + exclude_from_core(last_aper_base, last_aper_order); return 1; } @@ -520,7 +526,7 @@ out: * overlap with the first kernel's memory. We can't access the * range through vmcore even though it should be part of the dump. */ - exclude_from_vmcore(aper_alloc, aper_order); + exclude_from_core(aper_alloc, aper_order); /* Fix up the north bridges */ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index bbcc185062bb..d29d869abec1 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head); static DECLARE_RWSEM(kclist_lock); static int kcore_need_update = 1; +/* + * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error + * Same as oldmem_pfn_is_ram in vmcore + */ +static int (*mem_pfn_is_ram)(unsigned long pfn); + +int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)) +{ + if (mem_pfn_is_ram) + return -EBUSY; + mem_pfn_is_ram = fn; + return 0; +} + +static int pfn_is_ram(unsigned long pfn) +{ + if (mem_pfn_is_ram) + return mem_pfn_is_ram(pfn); + else + return 1; +} + /* This doesn't grab kclist_lock, so it should only be used at init time. */ void __init kclist_add(struct kcore_list *new, void *addr, size_t size, int type) @@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) goto out; } m = NULL; /* skip the list anchor */ + } else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) { + if (clear_user(buffer, tsz)) { + ret = -EFAULT; + goto out; + } } else if (m->type == KCORE_VMALLOC) { vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ diff --git a/include/linux/kcore.h b/include/linux/kcore.h index 8c3f8c14eeaa..c843f4a9c512 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -44,6 +44,8 @@ void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) m->vaddr = (unsigned long)vaddr; kclist_add(m, addr, sz, KCORE_REMAP); } + +extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)); #else static inline void kclist_add(struct kcore_list *new, void *addr, size_t size, int type) -- cgit v1.2.3 From 8bc6ef890c869099514bdaf72354f84c8517b7c2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 23 Mar 2019 12:10:29 -0400 Subject: ext4: prohibit fstrim in norecovery mode [ Upstream commit 18915b5873f07e5030e6fb108a050fa7c71c59fb ] The ext4 fstrim implementation uses the block bitmaps to find free space that can be discarded. If we haven't replayed the journal, the bitmaps will be stale and we absolutely *cannot* use stale metadata to zap the underlying storage. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 2e76fb55d94a..5f24fdc140ad 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -999,6 +999,13 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; + /* + * We haven't replayed the journal, so we cannot use our + * block-bitmap-guided storage zapping commands. + */ + if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) + return -EROFS; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; -- cgit v1.2.3 From 8167ea40725deb8617d66a3f63c08c9d924a6b04 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 7 Nov 2018 20:14:10 +0000 Subject: lkdtm: Print real addresses [ Upstream commit 4c411157a42f122051ae3469bee0b5cabe89e139 ] Today, when doing a lkdtm test before the readiness of the random generator, (ptrval) is printed instead of the address at which it perform the fault: [ 1597.337030] lkdtm: Performing direct entry EXEC_USERSPACE [ 1597.337142] lkdtm: attempting ok execution at (ptrval) [ 1597.337398] lkdtm: attempting bad execution at (ptrval) [ 1597.337460] kernel tried to execute user page (77858000) -exploit attempt? (uid: 0) [ 1597.344769] Unable to handle kernel paging request for instruction fetch [ 1597.351392] Faulting instruction address: 0x77858000 [ 1597.356312] Oops: Kernel access of bad area, sig: 11 [#1] If the lkdtm test is done later on, it prints an hashed address. In both cases this is pointless. The purpose of the test is to ensure the kernel generates an Oops at the expected address, so real addresses needs to be printed. This patch fixes that. Signed-off-by: Christophe Leroy Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- drivers/misc/lkdtm/perms.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 53b85c9d16b8..fa54add6375a 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -47,7 +47,7 @@ static noinline void execute_location(void *dst, bool write) { void (*func)(void) = dst; - pr_info("attempting ok execution at %p\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing); do_nothing(); if (write == CODE_WRITE) { @@ -55,7 +55,7 @@ static noinline void execute_location(void *dst, bool write) flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); } - pr_info("attempting bad execution at %p\n", func); + pr_info("attempting bad execution at %px\n", func); func(); } @@ -66,14 +66,14 @@ static void execute_user_location(void *dst) /* Intentionally crossing kernel/user memory boundary. */ void (*func)(void) = dst; - pr_info("attempting ok execution at %p\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing); do_nothing(); copied = access_process_vm(current, (unsigned long)dst, do_nothing, EXEC_SIZE, FOLL_WRITE); if (copied < EXEC_SIZE) return; - pr_info("attempting bad execution at %p\n", func); + pr_info("attempting bad execution at %px\n", func); func(); } @@ -82,7 +82,7 @@ void lkdtm_WRITE_RO(void) /* Explicitly cast away "const" for the test. */ unsigned long *ptr = (unsigned long *)&rodata; - pr_info("attempting bad rodata write at %p\n", ptr); + pr_info("attempting bad rodata write at %px\n", ptr); *ptr ^= 0xabcd1234; } @@ -100,7 +100,7 @@ void lkdtm_WRITE_RO_AFTER_INIT(void) return; } - pr_info("attempting bad ro_after_init write at %p\n", ptr); + pr_info("attempting bad ro_after_init write at %px\n", ptr); *ptr ^= 0xabcd1234; } @@ -112,7 +112,7 @@ void lkdtm_WRITE_KERN(void) size = (unsigned long)do_overwritten - (unsigned long)do_nothing; ptr = (unsigned char *)do_overwritten; - pr_info("attempting bad %zu byte write at %p\n", size, ptr); + pr_info("attempting bad %zu byte write at %px\n", size, ptr); memcpy(ptr, (unsigned char *)do_nothing, size); flush_icache_range((unsigned long)ptr, (unsigned long)(ptr + size)); @@ -185,11 +185,11 @@ void lkdtm_ACCESS_USERSPACE(void) ptr = (unsigned long *)user_addr; - pr_info("attempting bad read at %p\n", ptr); + pr_info("attempting bad read at %px\n", ptr); tmp = *ptr; tmp += 0xc0dec0de; - pr_info("attempting bad write at %p\n", ptr); + pr_info("attempting bad write at %px\n", ptr); *ptr = tmp; vm_munmap(user_addr, PAGE_SIZE); -- cgit v1.2.3 From e7ab5c78e51653c411af66308dada30f15b02413 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Dec 2018 15:26:20 +0000 Subject: lkdtm: Add tests for NULL pointer dereference [ Upstream commit 59a12205d3c32aee4c13ca36889fdf7cfed31126 ] Introduce lkdtm tests for NULL pointer dereference: check access or exec at NULL address, since these errors tend to be reported differently from the general fault error text. For example from x86: pr_alert("BUG: unable to handle kernel %s at %px\n", address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", (void *)address); Signed-off-by: Christophe Leroy Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- drivers/misc/lkdtm/core.c | 2 ++ drivers/misc/lkdtm/lkdtm.h | 2 ++ drivers/misc/lkdtm/perms.c | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 2837dc77478e..f0f9eb30bd2b 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -152,7 +152,9 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(EXEC_VMALLOC), CRASHTYPE(EXEC_RODATA), CRASHTYPE(EXEC_USERSPACE), + CRASHTYPE(EXEC_NULL), CRASHTYPE(ACCESS_USERSPACE), + CRASHTYPE(ACCESS_NULL), CRASHTYPE(WRITE_RO), CRASHTYPE(WRITE_RO_AFTER_INIT), CRASHTYPE(WRITE_KERN), diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index 3c6fd327e166..b69ee004a3f7 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -45,7 +45,9 @@ void lkdtm_EXEC_KMALLOC(void); void lkdtm_EXEC_VMALLOC(void); void lkdtm_EXEC_RODATA(void); void lkdtm_EXEC_USERSPACE(void); +void lkdtm_EXEC_NULL(void); void lkdtm_ACCESS_USERSPACE(void); +void lkdtm_ACCESS_NULL(void); /* lkdtm_refcount.c */ void lkdtm_REFCOUNT_INC_OVERFLOW(void); diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index fa54add6375a..62f76d506f04 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -164,6 +164,11 @@ void lkdtm_EXEC_USERSPACE(void) vm_munmap(user_addr, PAGE_SIZE); } +void lkdtm_EXEC_NULL(void) +{ + execute_location(NULL, CODE_AS_IS); +} + void lkdtm_ACCESS_USERSPACE(void) { unsigned long user_addr, tmp = 0; @@ -195,6 +200,19 @@ void lkdtm_ACCESS_USERSPACE(void) vm_munmap(user_addr, PAGE_SIZE); } +void lkdtm_ACCESS_NULL(void) +{ + unsigned long tmp; + unsigned long *ptr = (unsigned long *)NULL; + + pr_info("attempting bad read at %px\n", ptr); + tmp = *ptr; + tmp += 0xc0dec0de; + + pr_info("attempting bad write at %px\n", ptr); + *ptr = tmp; +} + void __init lkdtm_perms_init(void) { /* Make sure we can write to __ro_after_init values during __init */ -- cgit v1.2.3 From 3fcb0274953088d0e32092cac0706235bdce7ff6 Mon Sep 17 00:00:00 2001 From: wentalou Date: Tue, 18 Dec 2018 15:42:08 +0800 Subject: drm/amdgpu: psp_ring_destroy cause psp->km_ring.ring_mem NULL [ Upstream commit 14d20ec7f31ef96a2e7dcf7880b13dde1d473b56 ] psp_ring_destroy inside psp_load_fw cause psp->km_ring.ring_mem NULL. Call Trace occurred when psp_cmd_submit. should be psp_ring_stop instead. Reviewed-by: Xiangliang Yu Signed-off-by: Wentao Lou Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3a9b48b227ac..a7208ca0bfe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -546,7 +546,7 @@ static int psp_load_fw(struct amdgpu_device *adev) struct psp_context *psp = &adev->psp; if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) { - psp_ring_destroy(psp, PSP_RING_TYPE__KM); + psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */ goto skip_memalloc; } -- cgit v1.2.3 From f1a2397542f643c99d6584fc508f5dae490b9c57 Mon Sep 17 00:00:00 2001 From: "Hsin-Yi, Wang" Date: Wed, 9 Jan 2019 14:59:22 +0800 Subject: drm/panel: panel-innolux: set display off in innolux_panel_unprepare [ Upstream commit 46f3ceaffa81e846677bca8668e0ad40e643cffd ] Move mipi_dsi_dcs_set_display_off() from innolux_panel_disable() to innolux_panel_unprepare(), so they are consistent with innolux_panel_enable() and innolux_panel_prepare(). This also fixes some mode check and irq timeout issue in MTK dsi code. Since some dsi code (e.g. mtk_dsi) have following call trace: 1. drm_panel_disable(), which calls innolux_panel_disable() 2. switch to cmd mode 3. drm_panel_unprepare(), which calls innolux_panel_unprepare() However, mtk_dsi needs to be in cmd mode to be able to send commands (e.g. mipi_dsi_dcs_set_display_off() and mipi_dsi_dcs_enter_sleep_mode()), so we need these functions to be called after the switch to cmd mode happens, i.e. in innolux_panel_unprepare. Signed-off-by: Hsin-Yi, Wang Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190109065922.231753-1-hsinyi@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-innolux-p079zca.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-innolux-p079zca.c b/drivers/gpu/drm/panel/panel-innolux-p079zca.c index ca4ae45dd307..8e5724b63f1f 100644 --- a/drivers/gpu/drm/panel/panel-innolux-p079zca.c +++ b/drivers/gpu/drm/panel/panel-innolux-p079zca.c @@ -70,18 +70,12 @@ static inline struct innolux_panel *to_innolux_panel(struct drm_panel *panel) static int innolux_panel_disable(struct drm_panel *panel) { struct innolux_panel *innolux = to_innolux_panel(panel); - int err; if (!innolux->enabled) return 0; backlight_disable(innolux->backlight); - err = mipi_dsi_dcs_set_display_off(innolux->link); - if (err < 0) - DRM_DEV_ERROR(panel->dev, "failed to set display off: %d\n", - err); - innolux->enabled = false; return 0; @@ -95,6 +89,11 @@ static int innolux_panel_unprepare(struct drm_panel *panel) if (!innolux->prepared) return 0; + err = mipi_dsi_dcs_set_display_off(innolux->link); + if (err < 0) + DRM_DEV_ERROR(panel->dev, "failed to set display off: %d\n", + err); + err = mipi_dsi_dcs_enter_sleep_mode(innolux->link); if (err < 0) { DRM_DEV_ERROR(panel->dev, "failed to enter sleep mode: %d\n", -- cgit v1.2.3 From b9563b52e8029e9d37b515a8aa64192bda62b9a5 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 31 Jan 2019 04:55:46 +0800 Subject: net: hns3: Fix NULL deref when unloading driver [ Upstream commit c8a8045b2d0a974149d65bbe6a7acbcde93cf85b ] When the driver is unloading, if there is a calling of ndo_open occurs between phy_disconnect() and unregister_netdev(), it will end up causing the kernel to eventually hit a NULL deref: [14942.417828] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000048 [14942.529878] Mem abort info: [14942.551166] ESR = 0x96000006 [14942.567070] Exception class = DABT (current EL), IL = 32 bits [14942.623081] SET = 0, FnV = 0 [14942.639112] EA = 0, S1PTW = 0 [14942.643628] Data abort info: [14942.659227] ISV = 0, ISS = 0x00000006 [14942.674870] CM = 0, WnR = 0 [14942.679449] user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000224ad6ad [14942.695595] [0000000000000048] pgd=00000021e6673003, pud=00000021dbf01003, pmd=0000000000000000 [14942.723163] Internal error: Oops: 96000006 [#1] PREEMPT SMP [14942.729358] Modules linked in: hns3(O) hclge(O) pv680_mii(O) hnae3(O) [last unloaded: hclge] [14942.738907] CPU: 1 PID: 26629 Comm: kworker/u4:13 Tainted: G O 4.18.0-rc1-12928-ga960791-dirty #145 [14942.749491] Hardware name: Huawei Technologies Co., Ltd. D05/D05, BIOS Hi1620 FPGA TB BOOT BIOS B763 08/17/2018 [14942.760392] Workqueue: events_power_efficient phy_state_machine [14942.766644] pstate: 80c00009 (Nzcv daif +PAN +UAO) [14942.771918] pc : test_and_set_bit+0x18/0x38 [14942.776589] lr : netif_carrier_off+0x24/0x70 [14942.781033] sp : ffff0000121abd20 [14942.784518] x29: ffff0000121abd20 x28: 0000000000000000 [14942.790208] x27: ffff0000164d3cd8 x26: ffff8021da68b7b8 [14942.795832] x25: 0000000000000000 x24: ffff8021eb407800 [14942.801445] x23: 0000000000000000 x22: 0000000000000000 [14942.807046] x21: 0000000000000001 x20: 0000000000000000 [14942.812672] x19: 0000000000000000 x18: ffff000009781708 [14942.818284] x17: 00000000004970e8 x16: ffff00000816ad48 [14942.823900] x15: 0000000000000000 x14: 0000000000000008 [14942.829528] x13: 0000000000000000 x12: 0000000000000f65 [14942.835149] x11: 0000000000000001 x10: 00000000000009d0 [14942.840753] x9 : ffff0000121abaa0 x8 : 0000000000000000 [14942.846360] x7 : ffff000009781708 x6 : 0000000000000003 [14942.851970] x5 : 0000000000000020 x4 : 0000000000000004 [14942.857575] x3 : 0000000000000002 x2 : 0000000000000001 [14942.863180] x1 : 0000000000000048 x0 : 0000000000000000 [14942.868875] Process kworker/u4:13 (pid: 26629, stack limit = 0x00000000c909dbf3) [14942.876464] Call trace: [14942.879200] test_and_set_bit+0x18/0x38 [14942.883376] phy_link_change+0x38/0x78 [14942.887378] phy_state_machine+0x3dc/0x4f8 [14942.891968] process_one_work+0x158/0x470 [14942.896223] worker_thread+0x50/0x470 [14942.900219] kthread+0x104/0x130 [14942.903905] ret_from_fork+0x10/0x1c [14942.907755] Code: d2800022 8b400c21 f9800031 9ac32044 (c85f7c22) [14942.914185] ---[ end trace 968c9e12eb740b23 ]--- So this patch fixes it by modifying the timing to do phy_connect_direct() and phy_disconnect(). Fixes: 256727da7395 ("net: hns3: Add MDIO support to HNS3 Ethernet driver for hip08 SoC") Signed-off-by: Huazhong Tan Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 ++ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 28 ++++++++++++++++++++++ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 20 ++-------------- .../ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 8 +++++-- .../ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h | 4 ++-- 5 files changed, 40 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 09c774fe8853..854a55d4332a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -463,6 +463,8 @@ struct hnae3_ae_ops { int (*set_gro_en)(struct hnae3_handle *handle, int enable); u16 (*get_global_queue_id)(struct hnae3_handle *handle, u16 queue_id); void (*set_timer_task)(struct hnae3_handle *handle, bool enable); + int (*mac_connect_phy)(struct hnae3_handle *handle); + void (*mac_disconnect_phy)(struct hnae3_handle *handle); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index d84c50068f66..40b69eaf2cb3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3519,6 +3519,25 @@ static int hns3_init_mac_addr(struct net_device *netdev, bool init) return ret; } +static int hns3_init_phy(struct net_device *netdev) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + int ret = 0; + + if (h->ae_algo->ops->mac_connect_phy) + ret = h->ae_algo->ops->mac_connect_phy(h); + + return ret; +} + +static void hns3_uninit_phy(struct net_device *netdev) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (h->ae_algo->ops->mac_disconnect_phy) + h->ae_algo->ops->mac_disconnect_phy(h); +} + static int hns3_restore_fd_rules(struct net_device *netdev) { struct hnae3_handle *h = hns3_get_handle(netdev); @@ -3627,6 +3646,10 @@ static int hns3_client_init(struct hnae3_handle *handle) goto out_init_ring_data; } + ret = hns3_init_phy(netdev); + if (ret) + goto out_init_phy; + ret = register_netdev(netdev); if (ret) { dev_err(priv->dev, "probe register netdev fail!\n"); @@ -3651,6 +3674,9 @@ static int hns3_client_init(struct hnae3_handle *handle) return ret; out_reg_netdev_fail: + hns3_uninit_phy(netdev); +out_init_phy: + hns3_uninit_all_ring(priv); out_init_ring_data: (void)hns3_nic_uninit_vector_data(priv); out_init_vector_data: @@ -3685,6 +3711,8 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) hns3_force_clear_all_rx_ring(handle); + hns3_uninit_phy(netdev); + ret = hns3_nic_uninit_vector_data(priv); if (ret) netdev_err(netdev, "uninit vector error\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f7637c08bb3a..cb7571747af7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6959,16 +6959,6 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle, *tp_mdix = ETH_TP_MDI; } -static int hclge_init_instance_hw(struct hclge_dev *hdev) -{ - return hclge_mac_connect_phy(hdev); -} - -static void hclge_uninit_instance_hw(struct hclge_dev *hdev) -{ - hclge_mac_disconnect_phy(hdev); -} - static int hclge_init_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { @@ -6988,13 +6978,6 @@ static int hclge_init_client_instance(struct hnae3_client *client, if (ret) goto clear_nic; - ret = hclge_init_instance_hw(hdev); - if (ret) { - client->ops->uninit_instance(&vport->nic, - 0); - goto clear_nic; - } - hnae3_set_client_init_flag(client, ae_dev, 1); if (hdev->roce_client && @@ -7079,7 +7062,6 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, if (client->type == HNAE3_CLIENT_ROCE) return; if (hdev->nic_client && client->ops->uninit_instance) { - hclge_uninit_instance_hw(hdev); client->ops->uninit_instance(&vport->nic, 0); hdev->nic_client = NULL; vport->nic.client = NULL; @@ -8012,6 +7994,8 @@ static const struct hnae3_ae_ops hclge_ops = { .set_gro_en = hclge_gro_en, .get_global_queue_id = hclge_covert_handle_qid_global, .set_timer_task = hclge_set_timer_task, + .mac_connect_phy = hclge_mac_connect_phy, + .mac_disconnect_phy = hclge_mac_disconnect_phy, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index dabb8437f8dc..84f28785ba28 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -195,8 +195,10 @@ static void hclge_mac_adjust_link(struct net_device *netdev) netdev_err(netdev, "failed to configure flow control.\n"); } -int hclge_mac_connect_phy(struct hclge_dev *hdev) +int hclge_mac_connect_phy(struct hnae3_handle *handle) { + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; struct net_device *netdev = hdev->vport[0].nic.netdev; struct phy_device *phydev = hdev->hw.mac.phydev; __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; @@ -229,8 +231,10 @@ int hclge_mac_connect_phy(struct hclge_dev *hdev) return 0; } -void hclge_mac_disconnect_phy(struct hclge_dev *hdev) +void hclge_mac_disconnect_phy(struct hnae3_handle *handle) { + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; struct phy_device *phydev = hdev->hw.mac.phydev; if (!phydev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index 5fbf7dddb5d9..ef095d9c566f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -5,8 +5,8 @@ #define __HCLGE_MDIO_H int hclge_mac_mdio_config(struct hclge_dev *hdev); -int hclge_mac_connect_phy(struct hclge_dev *hdev); -void hclge_mac_disconnect_phy(struct hclge_dev *hdev); +int hclge_mac_connect_phy(struct hnae3_handle *handle); +void hclge_mac_disconnect_phy(struct hnae3_handle *handle); void hclge_mac_start_phy(struct hclge_dev *hdev); void hclge_mac_stop_phy(struct hclge_dev *hdev); -- cgit v1.2.3 From 41c3883dfadb73fb44188bd089f4eb29cc794574 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Wed, 23 Jan 2019 12:59:42 +0100 Subject: crypto: axis - fix for recursive locking from bottom half [ Upstream commit c34a83820f59bb275e5f2d55cd5ea99c64f6ef23 ] Clients may submit a new requests from the completion callback context. The driver was not prepared to receive a request in this state because it already held the request queue lock and a recursive lock error is triggered. Now all completions are queued up until we are ready to drop the queue lock and then delivered. The fault was triggered by TCP over an IPsec connection in the LTP test suite: LTP: starting tcp4_ipsec02 (tcp_ipsec.sh -p ah -m transport -s "100 1000 65535") BUG: spinlock recursion on CPU#1, genload/943 lock: 0xbf3c3094, .magic: dead4ead, .owner: genload/943, .owner_cpu: 1 CPU: 1 PID: 943 Comm: genload Tainted: G O 4.9.62-axis5-devel #6 Hardware name: Axis ARTPEC-6 Platform (unwind_backtrace) from [<8010d134>] (show_stack+0x18/0x1c) (show_stack) from [<803a289c>] (dump_stack+0x84/0x98) (dump_stack) from [<8016e164>] (do_raw_spin_lock+0x124/0x128) (do_raw_spin_lock) from [<804de1a4>] (artpec6_crypto_submit+0x2c/0xa0) (artpec6_crypto_submit) from [<804def38>] (artpec6_crypto_prepare_submit_hash+0xd0/0x54c) (artpec6_crypto_prepare_submit_hash) from [<7f3165f0>] (ah_output+0x2a4/0x3dc [ah4]) (ah_output [ah4]) from [<805df9bc>] (xfrm_output_resume+0x178/0x4a4) (xfrm_output_resume) from [<805d283c>] (xfrm4_output+0xac/0xbc) (xfrm4_output) from [<80587928>] (ip_queue_xmit+0x140/0x3b4) (ip_queue_xmit) from [<805a13b4>] (tcp_transmit_skb+0x4c4/0x95c) (tcp_transmit_skb) from [<8059f218>] (tcp_rcv_state_process+0xdf4/0xdfc) (tcp_rcv_state_process) from [<805a7530>] (tcp_v4_do_rcv+0x64/0x1ac) (tcp_v4_do_rcv) from [<805a9724>] (tcp_v4_rcv+0xa34/0xb74) (tcp_v4_rcv) from [<80581d34>] (ip_local_deliver_finish+0x78/0x2b0) (ip_local_deliver_finish) from [<8058259c>] (ip_local_deliver+0xe4/0x104) (ip_local_deliver) from [<805d23ec>] (xfrm4_transport_finish+0xf4/0x144) (xfrm4_transport_finish) from [<805df564>] (xfrm_input+0x4f4/0x74c) (xfrm_input) from [<804de420>] (artpec6_crypto_task+0x208/0x38c) (artpec6_crypto_task) from [<801271b0>] (tasklet_action+0x60/0xec) (tasklet_action) from [<801266d4>] (__do_softirq+0xcc/0x3a4) (__do_softirq) from [<80126d20>] (irq_exit+0xf4/0x15c) (irq_exit) from [<801741e8>] (__handle_domain_irq+0x68/0xbc) (__handle_domain_irq) from [<801014f0>] (gic_handle_irq+0x50/0x94) (gic_handle_irq) from [<80657370>] (__irq_usr+0x50/0x80) Signed-off-by: Lars Persson Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/axis/artpec6_crypto.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index f3442c2bdbdc..3c70004240d6 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -284,6 +284,7 @@ enum artpec6_crypto_hash_flags { struct artpec6_crypto_req_common { struct list_head list; + struct list_head complete_in_progress; struct artpec6_crypto_dma_descriptors *dma; struct crypto_async_request *req; void (*complete)(struct crypto_async_request *req); @@ -2045,7 +2046,8 @@ static int artpec6_crypto_prepare_aead(struct aead_request *areq) return artpec6_crypto_dma_map_descs(common); } -static void artpec6_crypto_process_queue(struct artpec6_crypto *ac) +static void artpec6_crypto_process_queue(struct artpec6_crypto *ac, + struct list_head *completions) { struct artpec6_crypto_req_common *req; @@ -2056,7 +2058,7 @@ static void artpec6_crypto_process_queue(struct artpec6_crypto *ac) list_move_tail(&req->list, &ac->pending); artpec6_crypto_start_dma(req); - req->req->complete(req->req, -EINPROGRESS); + list_add_tail(&req->complete_in_progress, completions); } /* @@ -2086,6 +2088,11 @@ static void artpec6_crypto_task(unsigned long data) struct artpec6_crypto *ac = (struct artpec6_crypto *)data; struct artpec6_crypto_req_common *req; struct artpec6_crypto_req_common *n; + struct list_head complete_done; + struct list_head complete_in_progress; + + INIT_LIST_HEAD(&complete_done); + INIT_LIST_HEAD(&complete_in_progress); if (list_empty(&ac->pending)) { pr_debug("Spurious IRQ\n"); @@ -2119,19 +2126,30 @@ static void artpec6_crypto_task(unsigned long data) pr_debug("Completing request %p\n", req); - list_del(&req->list); + list_move_tail(&req->list, &complete_done); artpec6_crypto_dma_unmap_all(req); artpec6_crypto_copy_bounce_buffers(req); ac->pending_count--; artpec6_crypto_common_destroy(req); - req->complete(req->req); } - artpec6_crypto_process_queue(ac); + artpec6_crypto_process_queue(ac, &complete_in_progress); spin_unlock_bh(&ac->queue_lock); + + /* Perform the completion callbacks without holding the queue lock + * to allow new request submissions from the callbacks. + */ + list_for_each_entry_safe(req, n, &complete_done, list) { + req->complete(req->req); + } + + list_for_each_entry_safe(req, n, &complete_in_progress, + complete_in_progress) { + req->req->complete(req->req, -EINPROGRESS); + } } static void artpec6_crypto_complete_crypto(struct crypto_async_request *req) -- cgit v1.2.3 From 5ada4de039433d9d42299e933c7bd425b7c9d264 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 1 Feb 2019 14:13:41 +0800 Subject: Revert "ACPI / EC: Remove old CLEAR_ON_RESUME quirk" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b6a3e1475b0220378ad32bdf4d8692f058b1fc03 ] On some Samsung hardware, it is necessary to clear events accumulated by the EC during sleep. These ECs stop reporting GPEs until they are manually polled, if too many events are accumulated. Thus the CLEAR_ON_RESUME quirk is introduced to send EC query commands unconditionally after resume to clear all the EC query events on those platforms. Later, commit 4c237371f290 ("ACPI / EC: Remove old CLEAR_ON_RESUME quirk") removes the CLEAR_ON_RESUME quirk because we thought the new EC IRQ polling logic should handle this case. Now it has been proved that the EC IRQ Polling logic does not fix the issue actually because we got regression report on these Samsung platforms after removing the quirk. Thus revert commit 4c237371f290 ("ACPI / EC: Remove old CLEAR_ON_RESUME quirk") to introduce back the Samsung quirk in this patch. Link: https://bugzilla.kernel.org/show_bug.cgi?id=44161 Tested-by: Ortwin Glück Tested-by: Francisco Cribari Tested-by: Balazs Varga Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/ec.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 9d66a47d32fb..49e16f009095 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -194,6 +194,7 @@ static struct workqueue_struct *ec_query_wq; static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */ static int EC_FLAGS_CORRECT_ECDT; /* Needs ECDT port address correction */ static int EC_FLAGS_IGNORE_DSDT_GPE; /* Needs ECDT GPE as correction setting */ +static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ /* -------------------------------------------------------------------------- * Logging/Debugging @@ -499,6 +500,26 @@ static inline void __acpi_ec_disable_event(struct acpi_ec *ec) ec_log_drv("event blocked"); } +/* + * Process _Q events that might have accumulated in the EC. + * Run with locked ec mutex. + */ +static void acpi_ec_clear(struct acpi_ec *ec) +{ + int i, status; + u8 value = 0; + + for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) { + status = acpi_ec_query(ec, &value); + if (status || !value) + break; + } + if (unlikely(i == ACPI_EC_CLEAR_MAX)) + pr_warn("Warning: Maximum of %d stale EC events cleared\n", i); + else + pr_info("%d stale EC events cleared\n", i); +} + static void acpi_ec_enable_event(struct acpi_ec *ec) { unsigned long flags; @@ -507,6 +528,10 @@ static void acpi_ec_enable_event(struct acpi_ec *ec) if (acpi_ec_started(ec)) __acpi_ec_enable_event(ec); spin_unlock_irqrestore(&ec->lock, flags); + + /* Drain additional events if hardware requires that */ + if (EC_FLAGS_CLEAR_ON_RESUME) + acpi_ec_clear(ec); } #ifdef CONFIG_PM_SLEEP @@ -1820,6 +1845,31 @@ static int ec_flag_query_handshake(const struct dmi_system_id *id) } #endif +/* + * On some hardware it is necessary to clear events accumulated by the EC during + * sleep. These ECs stop reporting GPEs until they are manually polled, if too + * many events are accumulated. (e.g. Samsung Series 5/9 notebooks) + * + * https://bugzilla.kernel.org/show_bug.cgi?id=44161 + * + * Ideally, the EC should also be instructed NOT to accumulate events during + * sleep (which Windows seems to do somehow), but the interface to control this + * behaviour is not known at this time. + * + * Models known to be affected are Samsung 530Uxx/535Uxx/540Uxx/550Pxx/900Xxx, + * however it is very likely that other Samsung models are affected. + * + * On systems which don't accumulate _Q events during sleep, this extra check + * should be harmless. + */ +static int ec_clear_on_resume(const struct dmi_system_id *id) +{ + pr_debug("Detected system needing EC poll on resume.\n"); + EC_FLAGS_CLEAR_ON_RESUME = 1; + ec_event_clearing = ACPI_EC_EVT_TIMING_STATUS; + return 0; +} + /* * Some ECDTs contain wrong register addresses. * MSI MS-171F @@ -1869,6 +1919,9 @@ static const struct dmi_system_id ec_dmi_table[] __initconst = { ec_honor_ecdt_gpe, "ASUS X580VD", { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X580VD"),}, NULL}, + { + ec_clear_on_resume, "Samsung hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL}, {}, }; -- cgit v1.2.3 From cf56bb03ffa3f595b6f2a305acdd2ade23c49a42 Mon Sep 17 00:00:00 2001 From: "Wei Hu (Xavier)" Date: Sun, 3 Feb 2019 20:43:13 +0800 Subject: RDMA/hns: Fix the Oops during rmmod or insmod ko when reset occurs [ Upstream commit d061effc36f7bd38a12912977a37a50ac9140d11 ] In the reset process, the hns3 NIC driver notifies the RoCE driver to perform reset related processing by calling the .reset_notify() interface registered by the RoCE driver in hip08 SoC. In the current version, if a reset occurs simultaneously during the execution of rmmod or insmod ko, there may be Oops error as below: Internal error: Oops: 86000007 [#1] PREEMPT SMP Modules linked in: hns_roce(O) hns3(O) hclge(O) hnae3(O) [last unloaded: hns_roce_hw_v2] CPU: 0 PID: 14 Comm: kworker/0:1 Tainted: G O 4.19.0-ge00d540 #1 Hardware name: Huawei Technologies Co., Ltd. Workqueue: events hclge_reset_service_task [hclge] pstate: 60c00009 (nZCv daif +PAN +UAO) pc : 0xffff00000100b0b8 lr : 0xffff00000100aea0 sp : ffff000009afbab0 x29: ffff000009afbab0 x28: 0000000000000800 x27: 0000000000007ff0 x26: ffff80002f90c004 x25: 00000000000007ff x24: ffff000008f97000 x23: ffff80003efee0a8 x22: 0000000000001000 x21: ffff80002f917ff0 x20: ffff8000286ea070 x19: 0000000000000800 x18: 0000000000000400 x17: 00000000c4d3225d x16: 00000000000021b8 x15: 0000000000000400 x14: 0000000000000400 x13: 0000000000000000 x12: ffff80003fac6e30 x11: 0000800036303000 x10: 0000000000000001 x9 : 0000000000000000 x8 : ffff80003016d000 x7 : 0000000000000000 x6 : 000000000000003f x5 : 0000000000000040 x4 : 0000000000000000 x3 : 0000000000000004 x2 : 00000000000007ff x1 : 0000000000000000 x0 : 0000000000000000 Process kworker/0:1 (pid: 14, stack limit = 0x00000000af8f0ad9) Call trace: 0xffff00000100b0b8 0xffff00000100b3a0 hns_roce_init+0x624/0xc88 [hns_roce] 0xffff000001002df8 0xffff000001006960 hclge_notify_roce_client+0x74/0xe0 [hclge] hclge_reset_service_task+0xa58/0xbc0 [hclge] process_one_work+0x1e4/0x458 worker_thread+0x40/0x450 kthread+0x12c/0x130 ret_from_fork+0x10/0x18 Code: bad PC value In the reset process, we will release the resources firstly, and after the hardware reset is completed, we will reapply resources and reconfigure the hardware. We can solve this problem by modifying both the NIC and the RoCE driver. We can modify the concurrent processing in the NIC driver to avoid calling the .reset_notify and .uninit_instance ops at the same time. And we need to modify the RoCE driver to record the reset stage and the driver's init/uninit state, and check the state in the .reset_notify, .init_instance. and uninit_instance functions to avoid NULL pointer operation. Fixes: cb7a94c9c808 ("RDMA/hns: Add reset process for RoCE in hip08") Signed-off-by: Wei Hu (Xavier) Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_device.h | 21 ++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 103 ++++++++++++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + 3 files changed, 112 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 509e467843f6..f4cac63194d9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -216,6 +216,26 @@ enum { HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4 }; +enum hns_roce_reset_stage { + HNS_ROCE_STATE_NON_RST, + HNS_ROCE_STATE_RST_BEF_DOWN, + HNS_ROCE_STATE_RST_DOWN, + HNS_ROCE_STATE_RST_UNINIT, + HNS_ROCE_STATE_RST_INIT, + HNS_ROCE_STATE_RST_INITED, +}; + +enum hns_roce_instance_state { + HNS_ROCE_STATE_NON_INIT, + HNS_ROCE_STATE_INIT, + HNS_ROCE_STATE_INITED, + HNS_ROCE_STATE_UNINIT, +}; + +enum { + HNS_ROCE_RST_DIRECT_RETURN = 0, +}; + #define HNS_ROCE_CMD_SUCCESS 1 #define HNS_ROCE_PORT_DOWN 0 @@ -898,6 +918,7 @@ struct hns_roce_dev { spinlock_t bt_cmd_lock; bool active; bool is_reset; + unsigned long reset_cnt; struct hns_roce_ib_iboe iboe; struct list_head pgdir_list; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 543fa1504cd3..7ac06576d791 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5800,6 +5800,7 @@ MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { + struct hns_roce_v2_priv *priv = hr_dev->priv; const struct pci_device_id *id; int i; @@ -5830,10 +5831,13 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->cmd_mod = 1; hr_dev->loop_idc = 0; + hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle); + priv->handle = handle; + return 0; } -static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) +static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) { struct hns_roce_dev *hr_dev; int ret; @@ -5850,7 +5854,6 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) hr_dev->pci_dev = handle->pdev; hr_dev->dev = &handle->pdev->dev; - handle->priv = hr_dev; ret = hns_roce_hw_v2_get_cfg(hr_dev, handle); if (ret) { @@ -5864,6 +5867,8 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_get_cfg; } + handle->priv = hr_dev; + return 0; error_failed_get_cfg: @@ -5875,7 +5880,7 @@ error_failed_kzalloc: return ret; } -static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, +static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) { struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; @@ -5883,24 +5888,78 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, if (!hr_dev) return; + handle->priv = NULL; hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); } +static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) +{ + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + struct device *dev = &handle->pdev->dev; + int ret; + + handle->rinfo.instance_state = HNS_ROCE_STATE_INIT; + + if (ops->ae_dev_resetting(handle) || ops->get_hw_reset_stat(handle)) { + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + goto reset_chk_err; + } + + ret = __hns_roce_hw_v2_init_instance(handle); + if (ret) { + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; + dev_err(dev, "RoCE instance init failed! ret = %d\n", ret); + if (ops->ae_dev_resetting(handle) || + ops->get_hw_reset_stat(handle)) + goto reset_chk_err; + else + return ret; + } + + handle->rinfo.instance_state = HNS_ROCE_STATE_INITED; + + + return 0; + +reset_chk_err: + dev_err(dev, "Device is busy in resetting state.\n" + "please retry later.\n"); + + return -EBUSY; +} + +static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, + bool reset) +{ + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) + return; + + handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT; + + __hns_roce_hw_v2_uninit_instance(handle, reset); + + handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; +} static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { - struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + struct hns_roce_dev *hr_dev; struct ib_event event; - if (!hr_dev) { - dev_err(&handle->pdev->dev, - "Input parameter handle->priv is NULL!\n"); - return -EINVAL; + if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) { + set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); + return 0; } + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_DOWN; + clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); + + hr_dev = (struct hns_roce_dev *)handle->priv; + if (!hr_dev) + return 0; + hr_dev->active = false; - hr_dev->is_reset = true; event.event = IB_EVENT_DEVICE_FATAL; event.device = &hr_dev->ib_dev; @@ -5912,17 +5971,29 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) { + struct device *dev = &handle->pdev->dev; int ret; - ret = hns_roce_hw_v2_init_instance(handle); + if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN, + &handle->rinfo.state)) { + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; + return 0; + } + + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INIT; + + dev_info(&handle->pdev->dev, "In reset process RoCE client reinit.\n"); + ret = __hns_roce_hw_v2_init_instance(handle); if (ret) { /* when reset notify type is HNAE3_INIT_CLIENT In reset notify * callback function, RoCE Engine reinitialize. If RoCE reinit * failed, we should inform NIC driver. */ handle->priv = NULL; - dev_err(&handle->pdev->dev, - "In reset process RoCE reinit failed %d.\n", ret); + dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret); + } else { + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; + dev_info(dev, "Reset done, RoCE client reinit finished.\n"); } return ret; @@ -5930,8 +6001,14 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle) { + if (test_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state)) + return 0; + + handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT; + dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n"); msleep(100); - hns_roce_hw_v2_uninit_instance(handle, false); + __hns_roce_hw_v2_uninit_instance(handle, false); + return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index b72d0443c835..5398aa718cfc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1546,6 +1546,7 @@ struct hns_roce_link_table_entry { #define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20) struct hns_roce_v2_priv { + struct hnae3_handle *handle; struct hns_roce_v2_cmq cmq; struct hns_roce_link_table tsq; struct hns_roce_link_table tpq; -- cgit v1.2.3 From dce48c5878ab9374bab4e5079759c27529cda6e9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 5 Feb 2019 16:24:53 -0700 Subject: coresight: cpu-debug: Support for CA73 CPUs [ Upstream commit a0f890aba2be33377f4eb24e13633c4a76a68f38 ] This patch is to add the AMBA device ID for CA73 CPU, so that CPU debug module can be initialized successfully when a SoC contain CA73 CPUs. This patch has been verified on 96boards Hikey960. Signed-off-by: Leo Yan Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-cpu-debug.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index 45b2460f3166..e8819d750938 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -668,6 +668,10 @@ static const struct amba_id debug_ids[] = { .id = 0x000bbd08, .mask = 0x000fffff, }, + { /* Debug for Cortex-A73 */ + .id = 0x000bbd09, + .mask = 0x000fffff, + }, { 0, 0 }, }; -- cgit v1.2.3 From 75e3256e2309dbc3063623f2d9b55ceaa889559f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 31 Jan 2019 19:38:56 +0300 Subject: PCI: Blacklist power management of Gigabyte X299 DESIGNARE EX PCIe ports [ Upstream commit 85b0cae89d5266e6a7abb2e83c6f716326fc494c ] Gigabyte X299 DESIGNARE EX motherboard has one PCIe root port that is connected to an Alpine Ridge Thunderbolt controller. This port has slot implemented bit set in the config space but other than that it is not hotplug capable in the sense we are expecting in Linux (it has dev->is_hotplug_bridge set to 0): 00:1c.4 PCI bridge: Intel Corporation 200 Series PCH PCI Express Root Port #5 Bus: primary=00, secondary=05, subordinate=46, sec-latency=0 Memory behind bridge: 78000000-8fffffff [size=384M] Prefetchable memory behind bridge: 00003800f8000000-00003800ffffffff [size=128M] ... Capabilities: [40] Express (v2) Root Port (Slot+), MSI 00 ... SltCap: AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug- Surprise- Slot #8, PowerLimit 25.000W; Interlock- NoCompl+ SltCtl: Enable: AttnBtn- PwrFlt- MRL- PresDet- CmdCplt- HPIrq- LinkChg- Control: AttnInd Unknown, PwrInd Unknown, Power- Interlock- SltSta: Status: AttnBtn- PowerFlt- MRL- CmdCplt- PresDet- Interlock- Changed: MRL- PresDet+ LinkState+ This system is using ACPI based hotplug to notify the OS that it needs to rescan the PCI bus (ACPI hotplug). If there is nothing connected in any of the Thunderbolt ports the root port will not have any runtime PM active children and is thus automatically runtime suspended pretty soon after boot by PCI PM core. Now, when a device is connected the BIOS SMI handler responsible for enumerating newly added devices is not able to find anything because the port is in D3. Prevent this from happening by blacklisting PCI power management of this particular Gigabyte system. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202031 Reported-by: Kedar A Dongre Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c25acace7d91..5a94a3cf61cd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2501,6 +2501,25 @@ void pci_config_pm_runtime_put(struct pci_dev *pdev) pm_runtime_put_sync(parent); } +static const struct dmi_system_id bridge_d3_blacklist[] = { +#ifdef CONFIG_X86 + { + /* + * Gigabyte X299 root port is not marked as hotplug capable + * which allows Linux to power manage it. However, this + * confuses the BIOS SMI handler so don't power manage root + * ports on that system. + */ + .ident = "X299 DESIGNARE EX-CF", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), + DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"), + }, + }, +#endif + { } +}; + /** * pci_bridge_d3_possible - Is it possible to put the bridge into D3 * @bridge: Bridge to check @@ -2546,6 +2565,9 @@ bool pci_bridge_d3_possible(struct pci_dev *bridge) if (bridge->is_hotplug_bridge) return false; + if (dmi_check_system(bridge_d3_blacklist)) + return false; + /* * It should be safe to put PCIe ports from 2015 or newer * to D3. -- cgit v1.2.3 From b851a25507e2c90ff3ec64079d238c94d6d3d634 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 9 Jan 2019 08:22:08 -0600 Subject: PCI/ASPM: Save LTR Capability for suspend/resume [ Upstream commit dbbfadf2319005cf528b0f15f12a05d4e4644303 ] Latency Tolerance Reporting (LTR) allows Endpoints and Switch Upstream Ports to report their latency requirements to upstream components. If ASPM L1 PM substates are enabled, the LTR information helps determine when a Link enters L1.2 [1]. Software must set the maximum latency values in the LTR Capability based on characteristics of the platform, then set LTR Mechanism Enable in the Device Control 2 register in the PCIe Capability. The device can then use LTR to report its latency tolerance. If the device reports a maximum latency value of zero, that means the device requires the highest possible performance and the ASPM L1.2 substate is effectively disabled. We put devices in D3 for suspend, and we assume their internal state is lost. On resume, previously we did not restore the LTR Capability, but we did restore the LTR Mechanism Enable bit, so devices would request the highest possible performance and ASPM L1.2 wouldn't be used. [1] PCIe r4.0, sec 5.5.1 Link: https://bugzilla.kernel.org/show_bug.cgi?id=201469 Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5a94a3cf61cd..e91005d0f20c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1233,7 +1233,6 @@ static void pci_restore_pcie_state(struct pci_dev *dev) pcie_capability_write_word(dev, PCI_EXP_SLTCTL2, cap[i++]); } - static int pci_save_pcix_state(struct pci_dev *dev) { int pos; @@ -1270,6 +1269,45 @@ static void pci_restore_pcix_state(struct pci_dev *dev) pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]); } +static void pci_save_ltr_state(struct pci_dev *dev) +{ + int ltr; + struct pci_cap_saved_state *save_state; + u16 *cap; + + if (!pci_is_pcie(dev)) + return; + + ltr = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR); + if (!ltr) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_LTR); + if (!save_state) { + pci_err(dev, "no suspend buffer for LTR; ASPM issues possible after resume\n"); + return; + } + + cap = (u16 *)&save_state->cap.data[0]; + pci_read_config_word(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, cap++); + pci_read_config_word(dev, ltr + PCI_LTR_MAX_NOSNOOP_LAT, cap++); +} + +static void pci_restore_ltr_state(struct pci_dev *dev) +{ + struct pci_cap_saved_state *save_state; + int ltr; + u16 *cap; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_LTR); + ltr = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR); + if (!save_state || !ltr) + return; + + cap = (u16 *)&save_state->cap.data[0]; + pci_write_config_word(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, *cap++); + pci_write_config_word(dev, ltr + PCI_LTR_MAX_NOSNOOP_LAT, *cap++); +} /** * pci_save_state - save the PCI configuration space of a device before suspending @@ -1291,6 +1329,7 @@ int pci_save_state(struct pci_dev *dev) if (i != 0) return i; + pci_save_ltr_state(dev); pci_save_dpc_state(dev); return pci_save_vc_state(dev); } @@ -1390,7 +1429,12 @@ void pci_restore_state(struct pci_dev *dev) if (!dev->state_saved) return; - /* PCI Express register must be restored first */ + /* + * Restore max latencies (in the LTR capability) before enabling + * LTR itself (in the PCIe capability). + */ + pci_restore_ltr_state(dev); + pci_restore_pcie_state(dev); pci_restore_pasid_state(dev); pci_restore_pri_state(dev); @@ -3020,6 +3064,11 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) if (error) pci_err(dev, "unable to preallocate PCI-X save buffer\n"); + error = pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_LTR, + 2 * sizeof(u16)); + if (error) + pci_err(dev, "unable to allocate suspend buffer for LTR\n"); + pci_allocate_vc_save_buffers(dev); } -- cgit v1.2.3 From 3b2457ce9997efdbb333553c6037ef11cb2fdda9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Jan 2019 14:04:33 -0800 Subject: f2fs: sync filesystem after roll-forward recovery [ Upstream commit 812a95977fd2f0d1f220c716a98a7f22e22f488d ] Some works after roll-forward recovery can get an error which will release all the data structures. Let's flush them in order to make it clean. One possible corruption came from: [ 90.400500] list_del corruption. prev->next should be ffffffed1f566208, but was (null) [ 90.675349] Call trace: [ 90.677869] __list_del_entry_valid+0x94/0xb4 [ 90.682351] remove_dirty_inode+0xac/0x114 [ 90.686563] __f2fs_write_data_pages+0x6a8/0x6c8 [ 90.691302] f2fs_write_data_pages+0x40/0x4c [ 90.695695] do_writepages+0x80/0xf0 [ 90.699372] __writeback_single_inode+0xdc/0x4ac [ 90.704113] writeback_sb_inodes+0x280/0x440 [ 90.708501] wb_writeback+0x1b8/0x3d0 [ 90.712267] wb_workfn+0x1a8/0x4d4 [ 90.715765] process_one_work+0x1c0/0x3d4 [ 90.719883] worker_thread+0x224/0x344 [ 90.723739] kthread+0x120/0x130 [ 90.727055] ret_from_fork+0x10/0x18 Reported-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 5 +++-- fs/f2fs/node.c | 4 +++- fs/f2fs/super.c | 44 +++++++++++++++++++++++++++++++++----------- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f955cd3e0677..ccccf0ce2f06 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* collect a number of dirty meta pages and write together */ - if (wbc->for_kupdate || - get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_META) < + nr_pages_to_skip(sbi, META)) goto skip_write; /* if locked failed, cp will flush dirty pages instead */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4f450e573312..f6ff84e29749 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, f2fs_balance_fs_bg(sbi); /* collect a number of dirty node pages and write together */ - if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) + if (wbc->sync_mode != WB_SYNC_ALL && + get_pages(sbi, F2FS_DIRTY_NODES) < + nr_pages_to_skip(sbi, NODE)) goto skip_write; if (wbc->sync_mode == WB_SYNC_ALL) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5892fa3c885f..144ffba3ec5a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1460,9 +1460,16 @@ static int f2fs_enable_quotas(struct super_block *sb); static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) { + unsigned int s_flags = sbi->sb->s_flags; struct cp_control cpc; - int err; + int err = 0; + int ret; + if (s_flags & SB_RDONLY) { + f2fs_msg(sbi->sb, KERN_ERR, + "checkpoint=disable on readonly fs"); + return -EINVAL; + } sbi->sb->s_flags |= SB_ACTIVE; f2fs_update_time(sbi, DISABLE_TIME); @@ -1470,18 +1477,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) while (!f2fs_time_over(sbi, DISABLE_TIME)) { mutex_lock(&sbi->gc_mutex); err = f2fs_gc(sbi, true, false, NULL_SEGNO); - if (err == -ENODATA) + if (err == -ENODATA) { + err = 0; break; + } if (err && err != -EAGAIN) - return err; + break; } - err = sync_filesystem(sbi->sb); - if (err) - return err; + ret = sync_filesystem(sbi->sb); + if (ret || err) { + err = ret ? ret: err; + goto restore_flag; + } - if (f2fs_disable_cp_again(sbi)) - return -EAGAIN; + if (f2fs_disable_cp_again(sbi)) { + err = -EAGAIN; + goto restore_flag; + } mutex_lock(&sbi->gc_mutex); cpc.reason = CP_PAUSE; @@ -1490,7 +1503,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) sbi->unusable_block_count = 0; mutex_unlock(&sbi->gc_mutex); - return 0; +restore_flag: + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + return err; } static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) @@ -3359,7 +3374,7 @@ skip_recovery: if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); if (err) - goto free_meta; + goto sync_free_meta; } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { f2fs_enable_checkpoint(sbi); } @@ -3372,7 +3387,7 @@ skip_recovery: /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) - goto free_meta; + goto sync_free_meta; } kvfree(options); @@ -3394,6 +3409,11 @@ skip_recovery: f2fs_update_time(sbi, REQ_TIME); return 0; +sync_free_meta: + /* safe to flush all the data */ + sync_filesystem(sbi->sb); + retry = false; + free_meta: #ifdef CONFIG_QUOTA f2fs_truncate_quota_inode_pages(sb); @@ -3407,6 +3427,8 @@ free_meta: * falls into an infinite loop in f2fs_sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); + /* evict some inodes being cached by GC */ + evict_inodes(sb); f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); -- cgit v1.2.3 From 36f268740bce29bfbae99e472918af34f7985ece Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 13 Jan 2019 17:50:10 -0500 Subject: drm/nouveau/volt/gf117: fix speedo readout register [ Upstream commit fc782242749fa4235592854fafe1a1297583c1fb ] GF117 appears to use the same register as GK104 (but still with the general Fermi readout mechanism). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108980 Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c | 60 ++++++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h index 8a0f85f5fc1a..6a765682fbfa 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h @@ -38,6 +38,7 @@ int nvkm_volt_set_id(struct nvkm_volt *, u8 id, u8 min_id, u8 temp, int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gf100_volt_new(struct nvkm_device *, int, struct nvkm_volt **); +int gf117_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gm20b_volt_new(struct nvkm_device *, int, struct nvkm_volt **); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index d9edb5785813..d75fa7678483 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1613,7 +1613,7 @@ nvd7_chipset = { .pci = gf106_pci_new, .therm = gf119_therm_new, .timer = nv41_timer_new, - .volt = gf100_volt_new, + .volt = gf117_volt_new, .ce[0] = gf100_ce_new, .disp = gf119_disp_new, .dma = gf119_dma_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild index bcd179ba11d0..146adcdd316a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild @@ -2,6 +2,7 @@ nvkm-y += nvkm/subdev/volt/base.o nvkm-y += nvkm/subdev/volt/gpio.o nvkm-y += nvkm/subdev/volt/nv40.o nvkm-y += nvkm/subdev/volt/gf100.o +nvkm-y += nvkm/subdev/volt/gf117.o nvkm-y += nvkm/subdev/volt/gk104.o nvkm-y += nvkm/subdev/volt/gk20a.o nvkm-y += nvkm/subdev/volt/gm20b.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c new file mode 100644 index 000000000000..547a58f0aeac --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c @@ -0,0 +1,60 @@ +/* + * Copyright 2019 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ilia Mirkin + */ +#include "priv.h" + +#include + +static int +gf117_volt_speedo_read(struct nvkm_volt *volt) +{ + struct nvkm_device *device = volt->subdev.device; + struct nvkm_fuse *fuse = device->fuse; + + if (!fuse) + return -EINVAL; + + return nvkm_fuse_read(fuse, 0x3a8); +} + +static const struct nvkm_volt_func +gf117_volt = { + .oneinit = gf100_volt_oneinit, + .vid_get = nvkm_voltgpio_get, + .vid_set = nvkm_voltgpio_set, + .speedo_read = gf117_volt_speedo_read, +}; + +int +gf117_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt) +{ + struct nvkm_volt *volt; + int ret; + + ret = nvkm_volt_new_(&gf117_volt, device, index, &volt); + *pvolt = volt; + if (ret) + return ret; + + return nvkm_voltgpio_init(volt); +} -- cgit v1.2.3 From ed3a6901a3d7e358b5a09c7b22a2750f1bf046c6 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Thu, 14 Feb 2019 17:27:12 +0530 Subject: platform/x86: intel_pmc_core: Quirk to ignore XTAL shutdown [ Upstream commit 238f9c11351f8af8534ae0318b4d9acc77b09ee8 ] On some platforms such as HP Elite-x2-1013-g3, the platform BIOS enforces XTAL to remain off before S0ix state can be achieved. This may not be optimum when we want to enable use cases like Low Power Audio, Wake on Voice etc which always need 24mhz clock. This introduces a new quirk to allow S0ix entry when all other conditions except for XTAL clock are good on a given platform. The extra power consumed by XTAL clock is about 2mw but it saves much more platform power compared to the system that remains in just PC10. Link: https://bit.ly/2UmnrFf Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201579 Tested-by: "David E. Box" Reported-and-tested-by: russianneuromancer Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Andy Shevchenko Signed-off-by: Darren Hart (VMware) Signed-off-by: Sasha Levin --- drivers/platform/x86/intel_pmc_core.c | 34 ++++++++++++++++++++++++++++++++++ drivers/platform/x86/intel_pmc_core.h | 5 +++++ 2 files changed, 39 insertions(+) diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c index c37e74ee609d..a9cbe5be277b 100644 --- a/drivers/platform/x86/intel_pmc_core.c +++ b/drivers/platform/x86/intel_pmc_core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -139,6 +140,7 @@ static const struct pmc_reg_map spt_reg_map = { .pm_cfg_offset = SPT_PMC_PM_CFG_OFFSET, .pm_read_disable_bit = SPT_PMC_READ_DISABLE_BIT, .ltr_ignore_max = SPT_NUM_IP_IGN_ALLOWED, + .pm_vric1_offset = SPT_PMC_VRIC1_OFFSET, }; /* Cannonlake: PGD PFET Enable Ack Status Register(s) bitmap */ @@ -751,6 +753,37 @@ static const struct pci_device_id pmc_pci_ids[] = { { 0, }, }; +/* + * This quirk can be used on those platforms where + * the platform BIOS enforces 24Mhx Crystal to shutdown + * before PMC can assert SLP_S0#. + */ +int quirk_xtal_ignore(const struct dmi_system_id *id) +{ + struct pmc_dev *pmcdev = &pmc; + u32 value; + + value = pmc_core_reg_read(pmcdev, pmcdev->map->pm_vric1_offset); + /* 24MHz Crystal Shutdown Qualification Disable */ + value |= SPT_PMC_VRIC1_XTALSDQDIS; + /* Low Voltage Mode Enable */ + value &= ~SPT_PMC_VRIC1_SLPS0LVEN; + pmc_core_reg_write(pmcdev, pmcdev->map->pm_vric1_offset, value); + return 0; +} + +static const struct dmi_system_id pmc_core_dmi_table[] = { + { + .callback = quirk_xtal_ignore, + .ident = "HP Elite x2 1013 G3", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite x2 1013 G3"), + }, + }, + {} +}; + static int __init pmc_core_probe(void) { struct pmc_dev *pmcdev = &pmc; @@ -792,6 +825,7 @@ static int __init pmc_core_probe(void) return err; } + dmi_check_system(pmc_core_dmi_table); pr_info(" initialized\n"); return 0; } diff --git a/drivers/platform/x86/intel_pmc_core.h b/drivers/platform/x86/intel_pmc_core.h index 1a0104d2cbf0..9bc16d7d2917 100644 --- a/drivers/platform/x86/intel_pmc_core.h +++ b/drivers/platform/x86/intel_pmc_core.h @@ -25,6 +25,7 @@ #define SPT_PMC_MTPMC_OFFSET 0x20 #define SPT_PMC_MFPMC_OFFSET 0x38 #define SPT_PMC_LTR_IGNORE_OFFSET 0x30C +#define SPT_PMC_VRIC1_OFFSET 0x31c #define SPT_PMC_MPHY_CORE_STS_0 0x1143 #define SPT_PMC_MPHY_CORE_STS_1 0x1142 #define SPT_PMC_MPHY_COM_STS_0 0x1155 @@ -135,6 +136,9 @@ enum ppfear_regs { #define SPT_PMC_BIT_MPHY_CMN_LANE2 BIT(2) #define SPT_PMC_BIT_MPHY_CMN_LANE3 BIT(3) +#define SPT_PMC_VRIC1_SLPS0LVEN BIT(13) +#define SPT_PMC_VRIC1_XTALSDQDIS BIT(22) + /* Cannonlake Power Management Controller register offsets */ #define CNP_PMC_SLPS0_DBG_OFFSET 0x10B4 #define CNP_PMC_PM_CFG_OFFSET 0x1818 @@ -217,6 +221,7 @@ struct pmc_reg_map { const int pm_read_disable_bit; const u32 slps0_dbg_offset; const u32 ltr_ignore_max; + const u32 pm_vric1_offset; }; /** -- cgit v1.2.3 From 3835c46e6ff51d24698487ef1e073573264dcfd6 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 13 Feb 2019 17:14:23 +0100 Subject: ARM: 8839/1: kprobe: make patch_lock a raw_spinlock_t [ Upstream commit 143c2a89e0e5fda6c6fd08d7bc1126438c19ae90 ] When running kprobe on -rt kernel, the below bug is caught: |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931 |in_atomic(): 1, irqs_disabled(): 128, pid: 14, name: migration/0 |Preemption disabled at:[<802f2b98>] cpu_stopper_thread+0xc0/0x140 |CPU: 0 PID: 14 Comm: migration/0 Tainted: G O 4.8.3-rt2 #1 |Hardware name: Freescale LS1021A |[<8025a43c>] (___might_sleep) |[<80b5b324>] (rt_spin_lock) |[<80b5c31c>] (__patch_text_real) |[<80b5c3ac>] (patch_text_stop_machine) |[<802f2920>] (multi_cpu_stop) Since patch_text_stop_machine() is called in stop_machine() which disables IRQ, sleepable lock should be not used in this atomic context, so replace patch_lock to raw lock. Signed-off-by: Yang Shi Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Arnd Bergmann Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/patch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index a50dc00d79a2..d0a05a3bdb96 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -16,7 +16,7 @@ struct patch { unsigned int insn; }; -static DEFINE_SPINLOCK(patch_lock); +static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) __acquires(&patch_lock) @@ -33,7 +33,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) return addr; if (flags) - spin_lock_irqsave(&patch_lock, *flags); + raw_spin_lock_irqsave(&patch_lock, *flags); else __acquire(&patch_lock); @@ -48,7 +48,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) clear_fixmap(fixmap); if (flags) - spin_unlock_irqrestore(&patch_lock, *flags); + raw_spin_unlock_irqrestore(&patch_lock, *flags); else __release(&patch_lock); } -- cgit v1.2.3 From a33383757975c454f5f8ffaf984939b5650b42fd Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Fri, 22 Feb 2019 12:36:49 +0800 Subject: drm/amdkfd: use init_mqd function to allocate object for hid_mqd (CI) [ Upstream commit cac734c2dbd2514f14c8c6a17caba1990d83bf1d ] if use the legacy method to allocate object, when mqd_hiq need to run uninit code, it will be cause WARNING call trace. eg: (s3 suspend test) [ 34.918944] Call Trace: [ 34.918948] [] dump_stack+0x19/0x1b [ 34.918950] [] __warn+0xd8/0x100 [ 34.918951] [] warn_slowpath_null+0x1d/0x20 [ 34.918991] [] uninit_mqd_hiq_sdma+0x4e/0x50 [amdgpu] [ 34.919028] [] uninitialize+0x37/0xe0 [amdgpu] [ 34.919064] [] kernel_queue_uninit+0x16/0x30 [amdgpu] [ 34.919086] [] pm_uninit+0x12/0x20 [amdgpu] [ 34.919107] [] stop_nocpsch+0x15/0x20 [amdgpu] [ 34.919129] [] kgd2kfd_suspend.part.4+0x2e/0x50 [amdgpu] [ 34.919150] [] kgd2kfd_suspend+0x17/0x20 [amdgpu] [ 34.919171] [] amdgpu_amdkfd_suspend+0x1a/0x20 [amdgpu] [ 34.919187] [] amdgpu_device_suspend+0x88/0x3a0 [amdgpu] [ 34.919189] [] ? enqueue_entity+0x2ef/0xbe0 [ 34.919205] [] amdgpu_pmops_suspend+0x20/0x30 [amdgpu] [ 34.919207] [] pci_pm_suspend+0x6f/0x150 [ 34.919208] [] ? pci_pm_freeze+0xf0/0xf0 [ 34.919210] [] dpm_run_callback+0x46/0x90 [ 34.919212] [] __device_suspend+0xfb/0x2a0 [ 34.919213] [] async_suspend+0x1f/0xa0 [ 34.919214] [] async_run_entry_fn+0x3f/0x130 [ 34.919216] [] process_one_work+0x17f/0x440 [ 34.919217] [] worker_thread+0x126/0x3c0 [ 34.919218] [] ? manage_workers.isra.25+0x2a0/0x2a0 [ 34.919220] [] kthread+0xd1/0xe0 [ 34.919221] [] ? insert_kthread_work+0x40/0x40 [ 34.919222] [] ret_from_fork_nospec_begin+0x7/0x21 [ 34.919224] [] ? insert_kthread_work+0x40/0x40 [ 34.919224] ---[ end trace 38cd9f65c963adad ]--- Signed-off-by: Kevin Wang Reviewed-by: Oak Zeng Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 52 +----------------------- 1 file changed, 1 insertion(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 47243165a082..ae90a99909ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -323,57 +323,7 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) { - uint64_t addr; - struct cik_mqd *m; - int retval; - - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), - mqd_mem_obj); - - if (retval != 0) - return -ENOMEM; - - m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; - addr = (*mqd_mem_obj)->gpu_addr; - - memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); - - m->header = 0xC0310800; - m->compute_pipelinestat_enable = 1; - m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; - - m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE | - PRELOAD_REQ; - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | - QUANTUM_DURATION(10); - - m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; - m->cp_mqd_base_addr_lo = lower_32_bits(addr); - m->cp_mqd_base_addr_hi = upper_32_bits(addr); - - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; - - /* - * Pipe Priority - * Identifies the pipe relative priority when this queue is connected - * to the pipeline. The pipe priority is against the GFX pipe and HP3D. - * In KFD we are using a fixed pipe priority set to CS_MEDIUM. - * 0 = CS_LOW (typically below GFX) - * 1 = CS_MEDIUM (typically between HP3D and GFX - * 2 = CS_HIGH (typically above HP3D) - */ - m->cp_hqd_pipe_priority = 1; - m->cp_hqd_queue_priority = 15; - - *mqd = m; - if (gart_addr) - *gart_addr = addr; - retval = mm->update_mqd(mm, m, q); - - return retval; + return init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); } static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, -- cgit v1.2.3 From 2fbb0171b13a2fcdd7deb682527784b47cd8d624 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 1 Mar 2019 10:57:57 +0800 Subject: appletalk: Fix use-after-free in atalk_proc_exit [ Upstream commit 6377f787aeb945cae7abbb6474798de129e1f3ac ] KASAN report this: BUG: KASAN: use-after-free in pde_subdir_find+0x12d/0x150 fs/proc/generic.c:71 Read of size 8 at addr ffff8881f41fe5b0 by task syz-executor.0/2806 CPU: 0 PID: 2806 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xfa/0x1ce lib/dump_stack.c:113 print_address_description+0x65/0x270 mm/kasan/report.c:187 kasan_report+0x149/0x18d mm/kasan/report.c:317 pde_subdir_find+0x12d/0x150 fs/proc/generic.c:71 remove_proc_entry+0xe8/0x420 fs/proc/generic.c:667 atalk_proc_exit+0x18/0x820 [appletalk] atalk_exit+0xf/0x5a [appletalk] __do_sys_delete_module kernel/module.c:1018 [inline] __se_sys_delete_module kernel/module.c:961 [inline] __x64_sys_delete_module+0x3dc/0x5e0 kernel/module.c:961 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fb2de6b9c58 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000200001c0 RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb2de6ba6bc R13: 00000000004bccaa R14: 00000000006f6bc8 R15: 00000000ffffffff Allocated by task 2806: set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc.constprop.3+0xa0/0xd0 mm/kasan/common.c:496 slab_post_alloc_hook mm/slab.h:444 [inline] slab_alloc_node mm/slub.c:2739 [inline] slab_alloc mm/slub.c:2747 [inline] kmem_cache_alloc+0xcf/0x250 mm/slub.c:2752 kmem_cache_zalloc include/linux/slab.h:730 [inline] __proc_create+0x30f/0xa20 fs/proc/generic.c:408 proc_mkdir_data+0x47/0x190 fs/proc/generic.c:469 0xffffffffc10c01bb 0xffffffffc10c0166 do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 2806: set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:458 slab_free_hook mm/slub.c:1409 [inline] slab_free_freelist_hook mm/slub.c:1436 [inline] slab_free mm/slub.c:2986 [inline] kmem_cache_free+0xa6/0x2a0 mm/slub.c:3002 pde_put+0x6e/0x80 fs/proc/generic.c:647 remove_proc_entry+0x1d3/0x420 fs/proc/generic.c:684 0xffffffffc10c031c 0xffffffffc10c0166 do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8881f41fe500 which belongs to the cache proc_dir_entry of size 256 The buggy address is located 176 bytes inside of 256-byte region [ffff8881f41fe500, ffff8881f41fe600) The buggy address belongs to the page: page:ffffea0007d07f80 count:1 mapcount:0 mapping:ffff8881f6e69a00 index:0x0 flags: 0x2fffc0000000200(slab) raw: 02fffc0000000200 dead000000000100 dead000000000200 ffff8881f6e69a00 raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881f41fe480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8881f41fe500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8881f41fe580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881f41fe600: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8881f41fe680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb It should check the return value of atalk_proc_init fails, otherwise atalk_exit will trgger use-after-free in pde_subdir_find while unload the module.This patch fix error cleanup path of atalk_init Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/atalk.h | 2 +- net/appletalk/atalk_proc.c | 2 +- net/appletalk/ddp.c | 37 +++++++++++++++++++++++++++++++------ net/appletalk/sysctl_net_atalk.c | 5 ++++- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 840cf92307ba..d5cfc0b15b76 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -158,7 +158,7 @@ extern int sysctl_aarp_retransmit_limit; extern int sysctl_aarp_resolve_time; #ifdef CONFIG_SYSCTL -extern void atalk_register_sysctl(void); +extern int atalk_register_sysctl(void); extern void atalk_unregister_sysctl(void); #else static inline int atalk_register_sysctl(void) diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 8006295f8bd7..dda73991bb54 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -255,7 +255,7 @@ out_interface: goto out; } -void __exit atalk_proc_exit(void) +void atalk_proc_exit(void) { remove_proc_entry("interface", atalk_proc_dir); remove_proc_entry("route", atalk_proc_dir); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 9b6bc5abe946..795fbc6c06aa 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1910,12 +1910,16 @@ static const char atalk_err_snap[] __initconst = /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { - int rc = proto_register(&ddp_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&ddp_proto, 0); + if (rc) goto out; - (void)sock_register(&atalk_family_ops); + rc = sock_register(&atalk_family_ops); + if (rc) + goto out_proto; + ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); if (!ddp_dl) printk(atalk_err_snap); @@ -1923,12 +1927,33 @@ static int __init atalk_init(void) dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); - register_netdevice_notifier(&ddp_notifier); + rc = register_netdevice_notifier(&ddp_notifier); + if (rc) + goto out_sock; + aarp_proto_init(); - atalk_proc_init(); - atalk_register_sysctl(); + rc = atalk_proc_init(); + if (rc) + goto out_aarp; + + rc = atalk_register_sysctl(); + if (rc) + goto out_proc; out: return rc; +out_proc: + atalk_proc_exit(); +out_aarp: + aarp_cleanup_module(); + unregister_netdevice_notifier(&ddp_notifier); +out_sock: + dev_remove_pack(&ppptalk_packet_type); + dev_remove_pack(<alk_packet_type); + unregister_snap_client(ddp_dl); + sock_unregister(PF_APPLETALK); +out_proto: + proto_unregister(&ddp_proto); + goto out; } module_init(atalk_init); diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index c744a853fa5f..d945b7c0176d 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -45,9 +45,12 @@ static struct ctl_table atalk_table[] = { static struct ctl_table_header *atalk_table_header; -void atalk_register_sysctl(void) +int __init atalk_register_sysctl(void) { atalk_table_header = register_net_sysctl(&init_net, "net/appletalk", atalk_table); + if (!atalk_table_header) + return -ENOMEM; + return 0; } void atalk_unregister_sysctl(void) -- cgit v1.2.3 From f0f1c97f38b890c5eea33dfd60fb2d12657ba0ef Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 7 Feb 2019 15:48:44 +1000 Subject: cifs: return -ENODATA when deleting an xattr that does not exist [ Upstream commit 2109464184919f81efd593b4008291448c522815 ] BUGZILLA: https://bugzilla.kernel.org/show_bug.cgi?id=202007 When deleting an xattr/EA: SMB2/3 servers will return SUCCESS when clients delete non-existing EAs. This means that we need to first QUERY the server and check if the EA exists or not so that we can return -ENODATA correctly when this happens. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b29f711ab965..ea56b1cdbdde 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -949,6 +949,16 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; memset(rsp_iov, 0, sizeof(rsp_iov)); + if (ses->server->ops->query_all_EAs) { + if (!ea_value) { + rc = ses->server->ops->query_all_EAs(xid, tcon, path, + ea_name, NULL, 0, + cifs_sb); + if (rc == -ENODATA) + goto sea_exit; + } + } + /* Open */ memset(&open_iov, 0, sizeof(open_iov)); rqst[0].rq_iov = open_iov; -- cgit v1.2.3 From 78154e3198885b037aa30bbf9539747c2692656a Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 7 Mar 2019 16:28:18 -0800 Subject: lib/div64.c: off by one in shift [ Upstream commit cdc94a37493135e355dfc0b0e086d84e3eadb50d ] fls counts bits starting from 1 to 32 (returns 0 for zero argument). If we add 1 we shift right one bit more and loose precision from divisor, what cause function incorect results with some numbers. Corrected code was tested in user-space, see bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202391 Link: http://lkml.kernel.org/r/1548686944-11891-1-git-send-email-sgruszka@redhat.com Fixes: 658716d19f8f ("div64_u64(): improve precision on 32bit platforms") Signed-off-by: Stanislaw Gruszka Reported-by: Siarhei Volkau Tested-by: Siarhei Volkau Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/div64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/div64.c b/lib/div64.c index 01c8602bb6ff..ee146bb4c558 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -109,7 +109,7 @@ u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) quot = div_u64_rem(dividend, divisor, &rem32); *remainder = rem32; } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) @@ -147,7 +147,7 @@ u64 div64_u64(u64 dividend, u64 divisor) if (high == 0) { quot = div_u64(dividend, divisor); } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) -- cgit v1.2.3 From 4fb70c2107e99b897d68b0bd820bebad6e5b1614 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 8 Mar 2019 12:48:39 +0000 Subject: rxrpc: Fix client call connect/disconnect race [ Upstream commit 930c9f9125c85b5134b3e711bc252ecc094708e3 ] rxrpc_disconnect_client_call() reads the call's connection ID protocol value (call->cid) as part of that function's variable declarations. This is bad because it's not inside the locked section and so may race with someone granting use of the channel to the call. This manifests as an assertion failure (see below) where the call in the presumed channel (0 because call->cid wasn't set when we read it) doesn't match the call attached to the channel we were actually granted (if 1, 2 or 3). Fix this by moving the read and dependent calculations inside of the channel_lock section. Also, only set the channel number and pointer variables if cid is not zero (ie. unset). This problem can be induced by injecting an occasional error in rxrpc_wait_for_channel() before the call to schedule(). Make two further changes also: (1) Add a trace for wait failure in rxrpc_connect_call(). (2) Drop channel_lock before BUG'ing in the case of the assertion failure. The failure causes a trace akin to the following: rxrpc: Assertion failed - 18446612685268945920(0xffff8880beab8c00) == 18446612685268621312(0xffff8880bea69800) is false ------------[ cut here ]------------ kernel BUG at net/rxrpc/conn_client.c:824! ... RIP: 0010:rxrpc_disconnect_client_call+0x2bf/0x99d ... Call Trace: rxrpc_connect_call+0x902/0x9b3 ? wake_up_q+0x54/0x54 rxrpc_new_client_call+0x3a0/0x751 ? rxrpc_kernel_begin_call+0x141/0x1bc ? afs_alloc_call+0x1b5/0x1b5 rxrpc_kernel_begin_call+0x141/0x1bc afs_make_call+0x20c/0x525 ? afs_alloc_call+0x1b5/0x1b5 ? __lock_is_held+0x40/0x71 ? lockdep_init_map+0xaf/0x193 ? lockdep_init_map+0xaf/0x193 ? __lock_is_held+0x40/0x71 ? yfs_fs_fetch_data+0x33b/0x34a yfs_fs_fetch_data+0x33b/0x34a afs_fetch_data+0xdc/0x3b7 afs_read_dir+0x52d/0x97f afs_dir_iterate+0xa0/0x661 ? iterate_dir+0x63/0x141 iterate_dir+0xa2/0x141 ksys_getdents64+0x9f/0x11b ? filldir+0x111/0x111 ? do_syscall_64+0x3e/0x1a0 __x64_sys_getdents64+0x16/0x19 do_syscall_64+0x7d/0x1a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects") Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/conn_client.c | 20 +++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 5b50fe4906d2..7b60fd186cfe 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -76,6 +76,7 @@ enum rxrpc_client_trace { rxrpc_client_chan_disconnect, rxrpc_client_chan_pass, rxrpc_client_chan_unstarted, + rxrpc_client_chan_wait_failed, rxrpc_client_cleanup, rxrpc_client_count, rxrpc_client_discard, @@ -276,6 +277,7 @@ enum rxrpc_tx_point { EM(rxrpc_client_chan_disconnect, "ChDisc") \ EM(rxrpc_client_chan_pass, "ChPass") \ EM(rxrpc_client_chan_unstarted, "ChUnst") \ + EM(rxrpc_client_chan_wait_failed, "ChWtFl") \ EM(rxrpc_client_cleanup, "Clean ") \ EM(rxrpc_client_count, "Count ") \ EM(rxrpc_client_discard, "Discar") \ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5cf6d9f4761d..83797b3949e2 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -704,6 +704,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, ret = rxrpc_wait_for_channel(call, gfp); if (ret < 0) { + trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); rxrpc_disconnect_client_call(call); goto out; } @@ -774,16 +775,22 @@ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) */ void rxrpc_disconnect_client_call(struct rxrpc_call *call) { - unsigned int channel = call->cid & RXRPC_CHANNELMASK; struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_channel *chan = NULL; struct rxrpc_net *rxnet = conn->params.local->rxnet; + unsigned int channel = -1; + u32 cid; + spin_lock(&conn->channel_lock); + + cid = call->cid; + if (cid) { + channel = cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + } trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); call->conn = NULL; - spin_lock(&conn->channel_lock); - /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow * immediately unless someone else grabs it in the meantime. @@ -807,7 +814,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out; } - ASSERTCMP(rcu_access_pointer(chan->call), ==, call); + if (rcu_access_pointer(chan->call) != call) { + spin_unlock(&conn->channel_lock); + BUG(); + } /* If a client call was exposed to the world, we save the result for * retransmission. -- cgit v1.2.3 From c55d13d94f8ced4f3fc585b9e2ee12d9df5f3fc4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 23 Feb 2019 09:48:27 +0800 Subject: f2fs: fix to dirty inode for i_mode recovery [ Upstream commit ca597bddedd94906cd761d8be6a3ad21292725de ] As Seulbae Kim reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202637 We didn't recover permission field correctly after sudden power-cut, the reason is in setattr we didn't add inode into global dirty list once i_mode is changed, so latter checkpoint triggered by fsync will not flush last i_mode into disk, result in this problem, fix it. Reported-by: Seulbae Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/file.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ae2b45e75847..30ed43bce110 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -768,7 +768,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int err; - bool size_changed = false; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; @@ -843,8 +842,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) down_write(&F2FS_I(inode)->i_sem); F2FS_I(inode)->last_disk_size = i_size_read(inode); up_write(&F2FS_I(inode)->i_sem); - - size_changed = true; } __setattr_copy(inode, attr); @@ -858,7 +855,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } /* file size may changed here */ - f2fs_mark_inode_dirty_sync(inode, size_changed); + f2fs_mark_inode_dirty_sync(inode, true); /* inode change will produce dirty node pages flushed by checkpoint */ f2fs_balance_fs(F2FS_I_SB(inode), true); -- cgit v1.2.3 From b5f51f7abb4373b9cb5677aba23ec45e13f21b02 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 5 Mar 2019 17:52:33 +0800 Subject: f2fs: fix to use kvfree instead of kzfree [ Upstream commit 2a6a7e722e7a78d774ce02b847c5b183a3ff2672 ] As Jiqun Li reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202747 System can panic due to using wrong allocate/free function pair in xattr interface: - use kvmalloc to allocate memory - use kzfree to free memory Let's fix to use kvfree instead of kzfree, BTW, we are safe to get rid of kzfree, since there is no such confidential data stored as xattr, we don't need to zero it before free memory. Fixes: 5222595d093e ("f2fs: use kvmalloc, if kmalloc is failed") Reported-by: Jiqun Li Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/xattr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 73b92985198b..6b6fe6431a64 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -347,7 +347,7 @@ check: *base_addr = txattr_addr; return 0; out: - kzfree(txattr_addr); + kvfree(txattr_addr); return err; } @@ -390,7 +390,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, *base_addr = txattr_addr; return 0; fail: - kzfree(txattr_addr); + kvfree(txattr_addr); return err; } @@ -517,7 +517,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, } error = size; out: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -563,7 +563,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) } error = buffer_size - rest; cleanup: - kzfree(base_addr); + kvfree(base_addr); return error; } @@ -694,7 +694,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (!error && S_ISDIR(inode->i_mode)) set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); exit: - kzfree(base_addr); + kvfree(base_addr); return error; } -- cgit v1.2.3 From 1c108a1b645930bfaac6d10b0e1374554ff125c8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Mar 2019 17:30:59 +0800 Subject: f2fs: fix to add refcount once page is tagged PG_private [ Upstream commit 240a59156d9bcfabceddb66be449e7b32fb5dc4a ] As Gao Xiang reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202749 f2fs may skip pageout() due to incorrect page reference count. The problem here is that MM defined the rule [1] very clearly that once page was set with PG_private flag, we should increment the refcount in that page, also main flows like pageout(), migrate_page() will assume there is one additional page reference count if page_has_private() returns true. But currently, f2fs won't add/del refcount when changing PG_private flag. Anyway, f2fs should follow MM's rule to make MM's related flows running as expected. [1] https://lore.kernel.org/lkml/2b19b3c4-2bc4-15fa-15cc-27a13e5c7af1@aol.com/ Reported-by: Gao Xiang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 21 ++++++++------------- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 21 +++++++++++++++++++++ fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 9 +++------ 6 files changed, 36 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ccccf0ce2f06..7743fa83b895 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -406,7 +406,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } @@ -957,7 +957,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page) inode_inc_dirty_pages(inode); spin_unlock(&sbi->inode_lock[type]); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f91d8630c9a2..c99aab23efea 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2711,8 +2711,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, if (IS_ATOMIC_WRITTEN_PAGE(page)) return f2fs_drop_inmem_page(inode, page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); } int f2fs_release_page(struct page *page, gfp_t wait) @@ -2726,8 +2725,7 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 0; clear_cold_data(page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); return 1; } @@ -2795,12 +2793,8 @@ int f2fs_migrate_page(struct address_space *mapping, return -EAGAIN; } - /* - * A reference is expected if PagePrivate set when move mapping, - * however F2FS breaks this for maintaining dirty page counts when - * truncating pages. So here adjusting the 'extra_count' make it work. - */ - extra_count = (atomic_written ? 1 : 0) - page_has_private(page); + /* one extra reference was held for atomic_write page */ + extra_count = atomic_written ? 1 : 0; rc = migrate_page_move_mapping(mapping, newpage, page, mode, extra_count); if (rc != MIGRATEPAGE_SUCCESS) { @@ -2821,9 +2815,10 @@ int f2fs_migrate_page(struct address_space *mapping, get_page(newpage); } - if (PagePrivate(page)) - SetPagePrivate(newpage); - set_page_private(newpage, page_private(page)); + if (PagePrivate(page)) { + f2fs_set_page_private(newpage, page_private(page)); + f2fs_clear_page_private(page); + } if (mode != MIGRATE_SYNC_NO_COPY) migrate_page_copy(newpage, page); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 50d0d36280fa..99a6063c2327 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -728,7 +728,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, !f2fs_truncate_hole(dir, page->index, page->index + 1)) { f2fs_clear_page_cache_dirty_tag(page); clear_page_dirty_for_io(page); - ClearPagePrivate(page); + f2fs_clear_page_private(page); ClearPageUptodate(page); clear_cold_data(page); inode_dec_dirty_pages(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 279bc00489cc..6d9186a6528c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2825,6 +2825,27 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, return true; } +static inline void f2fs_set_page_private(struct page *page, + unsigned long data) +{ + if (PagePrivate(page)) + return; + + get_page(page); + SetPagePrivate(page); + set_page_private(page, data); +} + +static inline void f2fs_clear_page_private(struct page *page) +{ + if (!PagePrivate(page)) + return; + + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 0); +} + /* * file.c */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f6ff84e29749..3f99ab288695 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1961,7 +1961,7 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); - SetPagePrivate(page); + f2fs_set_page_private(page, 0); f2fs_trace_pid(page); return 1; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e1b1d390b329..b6c8b0696ef6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -191,8 +191,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page) f2fs_trace_pid(page); - set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); - SetPagePrivate(page); + f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); @@ -280,8 +279,7 @@ next: ClearPageUptodate(page); clear_cold_data(page); } - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); f2fs_put_page(page, 1); list_del(&cur->list); @@ -370,8 +368,7 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page) kmem_cache_free(inmem_entry_slab, cur); ClearPageUptodate(page); - set_page_private(page, 0); - ClearPagePrivate(page); + f2fs_clear_page_private(page); f2fs_put_page(page, 0); trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); -- cgit v1.2.3 From d05fb50b3d0c58bafcae154c597ebde9e5eab318 Mon Sep 17 00:00:00 2001 From: Pi-Hsun Shih Date: Wed, 13 Mar 2019 11:44:33 -0700 Subject: include/linux/swap.h: use offsetof() instead of custom __swapoffset macro [ Upstream commit a4046c06be50a4f01d435aa7fe57514818e6cc82 ] Use offsetof() to calculate offset of a field to take advantage of compiler built-in version when possible, and avoid UBSAN warning when compiling with Clang: UBSAN: Undefined behaviour in mm/swapfile.c:3010:38 member access within null pointer of type 'union swap_header' CPU: 6 PID: 1833 Comm: swapon Tainted: G S 4.19.23 #43 Call trace: dump_backtrace+0x0/0x194 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0x70/0x94 ubsan_epilogue+0x14/0x44 ubsan_type_mismatch_common+0xf4/0xfc __ubsan_handle_type_mismatch_v1+0x34/0x54 __se_sys_swapon+0x654/0x1084 __arm64_sys_swapon+0x1c/0x24 el0_svc_common+0xa8/0x150 el0_svc_compat_handler+0x2c/0x38 el0_svc_compat+0x8/0x18 Link: http://lkml.kernel.org/r/20190312081902.223764-1-pihsun@chromium.org Signed-off-by: Pi-Hsun Shih Acked-by: Michal Hocko Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 622025ac1461..f1146ed21062 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -157,9 +157,9 @@ struct swap_extent { /* * Max bad pages in the new format.. */ -#define __swapoffset(x) ((unsigned long)&((union swap_header *)0)->x) #define MAX_SWAP_BADPAGES \ - ((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int)) + ((offsetof(union swap_header, magic.magic) - \ + offsetof(union swap_header, info.badpages)) / sizeof(int)) enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ -- cgit v1.2.3 From b05baa9f19d070674dc6863f8341c5207c18c0e2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Mar 2019 15:54:43 +0100 Subject: bpf: fix use after free in bpf_evict_inode [ Upstream commit 1da6c4d9140cb7c13e87667dc4e1488d6c8fc10f ] syzkaller was able to generate the following UAF in bpf: BUG: KASAN: use-after-free in lookup_last fs/namei.c:2269 [inline] BUG: KASAN: use-after-free in path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318 Read of size 1 at addr ffff8801c4865c47 by task syz-executor2/9423 CPU: 0 PID: 9423 Comm: syz-executor2 Not tainted 4.20.0-rc1-next-20181109+ #110 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x244/0x39d lib/dump_stack.c:113 print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.8+0x242/0x309 mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 lookup_last fs/namei.c:2269 [inline] path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318 filename_lookup+0x26a/0x520 fs/namei.c:2348 user_path_at_empty+0x40/0x50 fs/namei.c:2608 user_path include/linux/namei.h:62 [inline] do_mount+0x180/0x1ff0 fs/namespace.c:2980 ksys_mount+0x12d/0x140 fs/namespace.c:3258 __do_sys_mount fs/namespace.c:3272 [inline] __se_sys_mount fs/namespace.c:3269 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3269 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457569 Code: fd b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fde6ed96c78 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000457569 RDX: 0000000020000040 RSI: 0000000020000000 RDI: 0000000000000000 RBP: 000000000072bf00 R08: 0000000020000340 R09: 0000000000000000 R10: 0000000000200000 R11: 0000000000000246 R12: 00007fde6ed976d4 R13: 00000000004c2c24 R14: 00000000004d4990 R15: 00000000ffffffff Allocated by task 9424: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc mm/slab.c:3722 [inline] __kmalloc_track_caller+0x157/0x760 mm/slab.c:3737 kstrdup+0x39/0x70 mm/util.c:49 bpf_symlink+0x26/0x140 kernel/bpf/inode.c:356 vfs_symlink+0x37a/0x5d0 fs/namei.c:4127 do_symlinkat+0x242/0x2d0 fs/namei.c:4154 __do_sys_symlink fs/namei.c:4173 [inline] __se_sys_symlink fs/namei.c:4171 [inline] __x64_sys_symlink+0x59/0x80 fs/namei.c:4171 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 9425: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3817 bpf_evict_inode+0x11f/0x150 kernel/bpf/inode.c:565 evict+0x4b9/0x980 fs/inode.c:558 iput_final fs/inode.c:1550 [inline] iput+0x674/0xa90 fs/inode.c:1576 do_unlinkat+0x733/0xa30 fs/namei.c:4069 __do_sys_unlink fs/namei.c:4110 [inline] __se_sys_unlink fs/namei.c:4108 [inline] __x64_sys_unlink+0x42/0x50 fs/namei.c:4108 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe In this scenario path lookup under RCU is racing with the final unlink in case of symlinks. As Linus puts it in his analysis: [...] We actually RCU-delay the inode freeing itself, but when we do the final iput(), the "evict()" function is called synchronously. Now, the simple fix would seem to just RCU-delay the kfree() of the symlink data in bpf_evict_inode(). Maybe that's the right thing to do. [...] Al suggested to piggy-back on the ->destroy_inode() callback in order to implement RCU deferral there which can then kfree() the inode->i_link eventually right before putting inode back into inode cache. By reusing free_inode_nonrcu() from there we can avoid the need for our own inode cache and just reuse generic one as we currently do. And in-fact on top of all this we should just get rid of the bpf_evict_inode() entirely. This means truncate_inode_pages_final() and clear_inode() will then simply be called by the fs core via evict(). Dropping the reference should really only be done when inode is unhashed and nothing reachable anymore, so it's better also moved into the final ->destroy_inode() callback. Fixes: 0f98621bef5d ("bpf, inode: add support for symlinks and fix mtime/ctime") Reported-by: syzbot+fb731ca573367b7f6564@syzkaller.appspotmail.com Reported-by: syzbot+a13e5ead792d6df37818@syzkaller.appspotmail.com Reported-by: syzbot+7a8ba368b47fdefca61e@syzkaller.appspotmail.com Suggested-by: Al Viro Analyzed-by: Linus Torvalds Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Linus Torvalds Acked-by: Al Viro Link: https://lore.kernel.org/lkml/0000000000006946d2057bbd0eef@google.com/T/ Signed-off-by: Sasha Levin (Microsoft) --- kernel/bpf/inode.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 2ada5e21dfa6..4a8f390a2b82 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -554,19 +554,6 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ } EXPORT_SYMBOL(bpf_prog_get_type_path); -static void bpf_evict_inode(struct inode *inode) -{ - enum bpf_type type; - - truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); - - if (S_ISLNK(inode->i_mode)) - kfree(inode->i_link); - if (!bpf_inode_type(inode, &type)) - bpf_any_put(inode->i_private, type); -} - /* * Display the mount options in /proc/mounts. */ @@ -579,11 +566,28 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; } +static void bpf_destroy_inode_deferred(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + enum bpf_type type; + + if (S_ISLNK(inode->i_mode)) + kfree(inode->i_link); + if (!bpf_inode_type(inode, &type)) + bpf_any_put(inode->i_private, type); + free_inode_nonrcu(inode); +} + +static void bpf_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred); +} + static const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, - .evict_inode = bpf_evict_inode, + .destroy_inode = bpf_destroy_inode, }; enum { -- cgit v1.2.3 From b59d92ac8a328d9b0212ba5fcd3318cdf8a69409 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:55:19 -0700 Subject: IB/hfi1: Failed to drain send queue when QP is put into error state commit 662d66466637862ef955f7f6e78a286d8cf0ebef upstream. When a QP is put into error state, all pending requests in the send work queue should be drained. The following sequence of events could lead to a failure, causing a request to hang: (1) The QP builds a packet and tries to send through SDMA engine. However, PIO engine is still busy. Consequently, this packet is put on the QP's tx list and the QP is put on the PIO waiting list. The field qp->s_flags is set with HFI1_S_WAIT_PIO_DRAIN; (2) The QP is put into error state by the user application and notify_error_qp() is called, which removes the QP from the PIO waiting list and the packet from the QP's tx list. In addition, qp->s_flags is cleared of RVT_S_ANY_WAIT_IO bits, which does not include HFI1_S_WAIT_PIO_DRAIN bit; (3) The hfi1_schdule_send() function is called to drain the QP's send queue. Subsequently, hfi1_do_send() is called. Since the flag bit HFI1_S_WAIT_PIO_DRAIN is set in qp->s_flags, hfi1_send_ok() fails. As a result, hfi1_do_send() bails out without draining any request from the send queue; (4) The PIO engine completes the sending and tries to wake up any QP on its waiting list. But the QP has been removed from the PIO waiting list and therefore is kept in sleep forever. The fix is to clear qp->s_flags of HFI1_S_ANY_WAIT_IO bits in step (2). HFI1_S_ANY_WAIT_IO includes RVT_S_ANY_WAIT_IO and HFI1_S_WAIT_PIO_DRAIN. Fixes: 2e2ba09e48b7 ("IB/rdmavt, IB/hfi1: Create device dependent s_flags") Cc: # 4.19.x+ Reviewed-by: Mike Marciniszyn Reviewed-by: Alex Estrin Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 5344e8993b28..5866f358ea04 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -833,7 +833,7 @@ void notify_error_qp(struct rvt_qp *qp) write_seqlock(lock); if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { - qp->s_flags &= ~RVT_S_ANY_WAIT_IO; + qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; rvt_put_qp(qp); -- cgit v1.2.3 From e78434f4dcd2136c537f7b430f7a5a8c45ec406e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 3 Apr 2019 11:37:07 +0800 Subject: paride/pf: Fix potential NULL pointer dereference [ Upstream commit 58ccd2d31e502c37e108b285bf3d343eb00c235b ] Syzkaller report this: pf: pf version 1.04, major 47, cluster 64, nice 0 pf: No ATAPI disk detected kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 0 PID: 9887 Comm: syz-executor.0 Tainted: G C 5.1.0-rc3+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:pf_init+0x7af/0x1000 [pf] Code: 46 77 d2 48 89 d8 48 c1 e8 03 80 3c 28 00 74 08 48 89 df e8 03 25 a6 d2 4c 8b 23 49 8d bc 24 80 05 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 74 05 e8 e6 24 a6 d2 49 8b bc 24 80 05 00 00 e8 79 34 RSP: 0018:ffff8881abcbf998 EFLAGS: 00010202 RAX: 00000000000000b0 RBX: ffffffffc1e4a8a8 RCX: ffffffffaec50788 RDX: 0000000000039b10 RSI: ffffc9000153c000 RDI: 0000000000000580 RBP: dffffc0000000000 R08: ffffed103ee44e59 R09: ffffed103ee44e59 R10: 0000000000000001 R11: ffffed103ee44e58 R12: 0000000000000000 R13: ffffffffc1e4b028 R14: 0000000000000000 R15: 0000000000000020 FS: 00007f1b78a91700(0000) GS:ffff8881f7200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6d72b207f8 CR3: 00000001d5790004 CR4: 00000000007606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? 0xffffffffc1e50000 do_one_initcall+0xbc/0x47d init/main.c:901 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f1b78a90c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00007f1b78a90c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f1b78a916bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: pf(+) paride gpio_tps65218 tps65218 i2c_cht_wc ati_remote dc395x act_meta_skbtcindex act_ife ife ecdh_generic rc_xbox_dvd sky81452_regulator v4l2_fwnode leds_blinkm snd_usb_hiface comedi(C) aes_ti slhc cfi_cmdset_0020 mtd cfi_util sx8654 mdio_gpio of_mdio fixed_phy mdio_bitbang libphy alcor_pci matrix_keymap hid_uclogic usbhid scsi_transport_fc videobuf2_v4l2 videobuf2_dma_sg snd_soc_pcm179x_spi snd_soc_pcm179x_codec i2c_demux_pinctrl mdev snd_indigodj isl6405 mii enc28j60 cmac adt7316_i2c(C) adt7316(C) fmc_trivial fmc nf_reject_ipv4 authenc rc_dtt200u rtc_ds1672 dvb_usb_dibusb_mc dvb_usb_dibusb_mc_common dib3000mc dibx000_common dvb_usb_dibusb_common dvb_usb dvb_core videobuf2_common videobuf2_vmalloc videobuf2_memops regulator_haptic adf7242 mac802154 ieee802154 s5h1409 da9034_ts snd_intel8x0m wmi cx24120 usbcore sdhci_cadence sdhci_pltfm sdhci mmc_core joydev i2c_algo_bit scsi_transport_iscsi iscsi_boot_sysfs ves1820 lockd grace nfs_acl auth_rpcgss sunrp c ip_vs snd_soc_adau7002 snd_cs4281 snd_rawmidi gameport snd_opl3_lib snd_seq_device snd_hwdep snd_ac97_codec ad7418 hid_primax hid snd_soc_cs4265 snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer ac97_bus snd_compress snd soundcore ti_adc108s102 eeprom_93cx6 i2c_algo_pca mlxreg_hotplug st_pressure st_sensors industrialio_triggered_buffer kfifo_buf industrialio v4l2_common videodev media snd_soc_adau_utils rc_pinnacle_grey rc_core pps_gpio leds_lm3692x nandcore ledtrig_pattern iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel ide_pci_generic aes_x86_64 piix crypto_simd input_leds psmouse cryp td glue_helper ide_core intel_agp serio_raw intel_gtt agpgart ata_generic i2c_piix4 pata_acpi parport_pc parport rtc_cmos floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: paride] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 7a818cf5f210d79e ]--- If alloc_disk fails in pf_init_units, pf->disk will be NULL, however in pf_detect and pf_exit, it's not check this before free.It may result a NULL pointer dereference. Also when register_blkdev failed, blk_cleanup_queue() and blk_mq_free_tag_set() should be called to free resources. Reported-by: Hulk Robot Fixes: 6ce59025f118 ("paride/pf: cleanup queues when detection fails") Signed-off-by: YueHaibing Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/paride/pf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 103b617cdc31..35e6e271b219 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -762,6 +762,8 @@ static int pf_detect(void) printk("%s: No ATAPI disk detected\n", name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; blk_cleanup_queue(pf->disk->queue); pf->disk->queue = NULL; blk_mq_free_tag_set(&pf->tag_set); @@ -1029,8 +1031,13 @@ static int __init pf_init(void) pf_busy = 0; if (register_blkdev(major, name)) { - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) + for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; + blk_cleanup_queue(pf->disk->queue); + blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); + } return -EBUSY; } @@ -1051,6 +1058,9 @@ static void __exit pf_exit(void) int unit; unregister_blkdev(major, name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + if (!pf->disk) + continue; + if (pf->present) del_gendisk(pf->disk); -- cgit v1.2.3 From 2db9f8d63d74969fc97017136b347ed2a5fb4b66 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 5 Apr 2019 10:14:58 +0800 Subject: paride/pcd: Fix potential NULL pointer dereference and mem leak [ Upstream commit f0d1762554014ce0ae347b9f0d088f2c157c8c72 ] Syzkaller report this: pcd: pcd version 1.07, major 46, nice 0 pcd0: Autoprobe failed pcd: No CD-ROM drive found kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 4525 Comm: syz-executor.0 Not tainted 5.1.0-rc3+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:pcd_init+0x95c/0x1000 [pcd] Code: c4 ab f7 48 89 d8 48 c1 e8 03 80 3c 28 00 74 08 48 89 df e8 56 a3 da f7 4c 8b 23 49 8d bc 24 80 05 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 74 05 e8 39 a3 da f7 49 8b bc 24 80 05 00 00 e8 cc b2 RSP: 0018:ffff8881e84df880 EFLAGS: 00010202 RAX: 00000000000000b0 RBX: ffffffffc155a088 RCX: ffffffffc1508935 RDX: 0000000000040000 RSI: ffffc900014f0000 RDI: 0000000000000580 RBP: dffffc0000000000 R08: ffffed103ee658b8 R09: ffffed103ee658b8 R10: 0000000000000001 R11: ffffed103ee658b7 R12: 0000000000000000 R13: ffffffffc155a778 R14: ffffffffc155a4a8 R15: 0000000000000003 FS: 00007fe71bee3700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a7334441a8 CR3: 00000001e9674003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? 0xffffffffc1508000 ? 0xffffffffc1508000 do_one_initcall+0xbc/0x47d init/main.c:901 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fe71bee2c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00007fe71bee2c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe71bee36bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: pcd(+) paride solos_pci atm ts_fsm rtc_mt6397 mac80211 nhc_mobility nhc_udp nhc_ipv6 nhc_hop nhc_dest nhc_fragment nhc_routing 6lowpan rtc_cros_ec memconsole intel_xhci_usb_role_switch roles rtc_wm8350 usbcore industrialio_triggered_buffer kfifo_buf industrialio asc7621 dm_era dm_persistent_data dm_bufio dm_mod tpm gnss_ubx gnss_serial serdev gnss max2165 cpufreq_dt hid_penmount hid menf21bmc_wdt rc_core n_tracesink ide_gd_mod cdns_csi2tx v4l2_fwnode videodev media pinctrl_lewisburg pinctrl_intel iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 crypto_simd ide_pci_generic piix input_leds cryptd glue_helper psmouse ide_core intel_agp serio_raw intel_gtt ata_generic i2c_piix4 agpgart pata_acpi parport_pc parport floppy rtc_cmos sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: bmc150_magn] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace d873691c3cd69f56 ]--- If alloc_disk fails in pcd_init_units, cd->disk will be NULL, however in pcd_detect and pcd_exit, it's not check this before free.It may result a NULL pointer dereference. Also when register_blkdev failed, blk_cleanup_queue() and blk_mq_free_tag_set() should be called to free resources. Reported-by: Hulk Robot Fixes: 81b74ac68c28 ("paride/pcd: cleanup queues when detection fails") Signed-off-by: YueHaibing Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/paride/pcd.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 377a694dc228..6d415b20fb70 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -314,6 +314,7 @@ static void pcd_init_units(void) disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops, 1, BLK_MQ_F_SHOULD_MERGE); if (IS_ERR(disk->queue)) { + put_disk(disk); disk->queue = NULL; continue; } @@ -750,6 +751,8 @@ static int pcd_detect(void) printk("%s: No CD-ROM drive found\n", name); for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; blk_cleanup_queue(cd->disk->queue); cd->disk->queue = NULL; blk_mq_free_tag_set(&cd->tag_set); @@ -1010,8 +1013,14 @@ static int __init pcd_init(void) pcd_probe_capabilities(); if (register_blkdev(major, name)) { - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) + for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; + + blk_cleanup_queue(cd->disk->queue); + blk_mq_free_tag_set(&cd->tag_set); put_disk(cd->disk); + } return -EBUSY; } @@ -1032,6 +1041,9 @@ static void __exit pcd_exit(void) int unit; for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + if (!cd->disk) + continue; + if (cd->present) { del_gendisk(cd->disk); pi_release(cd->pi); -- cgit v1.2.3 From e4abcebedac3415cf347e95749209a4a7b6f3074 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 20 Apr 2019 09:17:05 +0200 Subject: Linux 5.0.9 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f7666051de66..ef192ca04330 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 8 +SUBLEVEL = 9 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3 From 7e2c712830d63c3f8c3691829cd19a40295856f1 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 12 Apr 2019 15:04:10 +0200 Subject: bonding: fix event handling for stacked bonds [ Upstream commit 92480b3977fd3884649d404cbbaf839b70035699 ] When a bond is enslaved to another bond, bond_netdev_event() only handles the event as if the bond is a master, and skips treating the bond as a slave. This leads to a refcount leak on the slave, since we don't remove the adjacency to its master and the master holds a reference on the slave. Reproducer: ip link add bondL type bond ip link add bondU type bond ip link set bondL master bondU ip link del bondL No "Fixes:" tag, this code is older than git history. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 537c90c8eb0a..f89fc6ea6078 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3214,8 +3214,12 @@ static int bond_netdev_event(struct notifier_block *this, return NOTIFY_DONE; if (event_dev->flags & IFF_MASTER) { + int ret; + netdev_dbg(event_dev, "IFF_MASTER\n"); - return bond_master_netdev_event(event, event_dev); + ret = bond_master_netdev_event(event, event_dev); + if (ret != NOTIFY_DONE) + return ret; } if (event_dev->flags & IFF_SLAVE) { -- cgit v1.2.3 From b633f6580b6265d4d160cbfad39bd3abbfda0196 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Mon, 8 Apr 2019 19:45:27 -0400 Subject: failover: allow name change on IFF_UP slave interfaces [ Upstream commit 8065a779f17e94536a1c4dcee4f9d88011672f97 ] When a netdev appears through hot plug then gets enslaved by a failover master that is already up and running, the slave will be opened right away after getting enslaved. Today there's a race that userspace (udev) may fail to rename the slave if the kernel (net_failover) opens the slave earlier than when the userspace rename happens. Unlike bond or team, the primary slave of failover can't be renamed by userspace ahead of time, since the kernel initiated auto-enslavement is unable to, or rather, is never meant to be synchronized with the rename request from userspace. As the failover slave interfaces are not designed to be operated directly by userspace apps: IP configuration, filter rules with regard to network traffic passing and etc., should all be done on master interface. In general, userspace apps only care about the name of master interface, while slave names are less important as long as admin users can see reliable names that may carry other information describing the netdev. For e.g., they can infer that "ens3nsby" is a standby slave of "ens3", while for a name like "eth0" they can't tell which master it belongs to. Historically the name of IFF_UP interface can't be changed because there might be admin script or management software that is already relying on such behavior and assumes that the slave name can't be changed once UP. But failover is special: with the in-kernel auto-enslavement mechanism, the userspace expectation for device enumeration and bring-up order is already broken. Previously initramfs and various userspace config tools were modified to bypass failover slaves because of auto-enslavement and duplicate MAC address. Similarly, in case that users care about seeing reliable slave name, the new type of failover slaves needs to be taken care of specifically in userspace anyway. It's less risky to lift up the rename restriction on failover slave which is already UP. Although it's possible this change may potentially break userspace component (most likely configuration scripts or management software) that assumes slave name can't be changed while UP, it's relatively a limited and controllable set among all userspace components, which can be fixed specifically to listen for the rename events on failover slaves. Userspace component interacting with slaves is expected to be changed to operate on failover master interface instead, as the failover slave is dynamic in nature which may come and go at any point. The goal is to make the role of failover slaves less relevant, and userspace components should only deal with failover master in the long run. Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module") Signed-off-by: Si-Wei Liu Reviewed-by: Liran Alon Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 16 +++++++++++++++- net/core/failover.c | 6 +++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 848b54b7ec91..7df56decae37 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1484,6 +1484,7 @@ struct net_device_ops { * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device + * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1516,6 +1517,7 @@ enum netdev_priv_flags { IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, + IFF_LIVE_RENAME_OK = 1<<30, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1547,6 +1549,7 @@ enum netdev_priv_flags { #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER +#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK /** * struct net_device - The DEVICE structure. diff --git a/net/core/dev.c b/net/core/dev.c index 12824e007e06..7277dd393c00 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *dev, const char *newname) BUG_ON(!dev_net(dev)); net = dev_net(dev); - if (dev->flags & IFF_UP) + + /* Some auto-enslaved devices e.g. failover slaves are + * special, as userspace might rename the device after + * the interface had been brought up and running since + * the point kernel initiated auto-enslavement. Allow + * live name change even when these slave devices are + * up and running. + * + * Typically, users of these auto-enslaving devices + * don't actually care about slave name change, as + * they are supposed to operate on master interface + * directly. + */ + if (dev->flags & IFF_UP && + likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK))) return -EBUSY; write_seqcount_begin(&devnet_rename_seq); diff --git a/net/core/failover.c b/net/core/failover.c index 4a92a98ccce9..b5cd3c727285 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -80,14 +80,14 @@ static int failover_slave_register(struct net_device *slave_dev) goto err_upper_link; } - slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; + slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); if (fops && fops->slave_register && !fops->slave_register(slave_dev, failover_dev)) return NOTIFY_OK; netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); err_upper_link: netdev_rx_handler_unregister(slave_dev); done: @@ -121,7 +121,7 @@ int failover_slave_unregister(struct net_device *slave_dev) netdev_rx_handler_unregister(slave_dev); netdev_upper_dev_unlink(slave_dev, failover_dev); - slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; + slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); if (fops && fops->slave_unregister && !fops->slave_unregister(slave_dev, failover_dev)) -- cgit v1.2.3 From 11ba95c4503b09247b6940280aecd975ab842e4c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 15 Apr 2019 15:57:23 -0500 Subject: net: atm: Fix potential Spectre v1 vulnerabilities [ Upstream commit 899537b73557aafbdd11050b501cf54b4f5c45af ] arg is controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: net/atm/lec.c:715 lec_mcast_attach() warn: potential spectre issue 'dev_lec' [r] (local cap) Fix this by sanitizing arg before using it to index dev_lec. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/atm/lec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index d7f5cf5b7594..ad4f829193f0 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { - if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + if (arg < 0 || arg >= MAX_LEC_ITF) + return -EINVAL; + arg = array_index_nospec(arg, MAX_LEC_ITF); + if (!dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); @@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) i = arg; if (arg >= MAX_LEC_ITF) return -EINVAL; + i = array_index_nospec(arg, MAX_LEC_ITF); if (!dev_lec[i]) { int size; -- cgit v1.2.3 From 88c58435304bc68a549b8a4db51b35f7dd887049 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 11 Apr 2019 13:56:39 +0300 Subject: net: bridge: fix per-port af_packet sockets [ Upstream commit 3b2e2904deb314cc77a2192f506f2fd44e3d10d0 ] When the commit below was introduced it changed two visible things: - the skb was no longer passed through the protocol handlers with the original device - the skb was passed up the stack with skb->dev = bridge The first change broke af_packet sockets on bridge ports. For example we use them for hostapd which listens for ETH_P_PAE packets on the ports. We discussed two possible fixes: - create a clone and pass it through NF_HOOK(), act on the original skb based on the result - somehow signal to the caller from the okfn() that it was called, meaning the skb is ok to be passed, which this patch is trying to implement via returning 1 from the bridge link-local okfn() Note that we rely on the fact that NF_QUEUE/STOLEN would return 0 and drop/error would return < 0 thus the okfn() is called only when the return was 1, so we signal to the caller that it was called by preserving the return value from nf_hook(). Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_input.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5ea7e56119c1..ba303ee99b9b 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -197,13 +197,10 @@ static void __br_handle_local_finish(struct sk_buff *skb) /* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_bridge_port *p = br_port_get_rcu(skb->dev); - __br_handle_local_finish(skb); - BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; - br_pass_frame_up(skb); - return 0; + /* return 1 to signal the okfn() was called so it's ok to use the skb */ + return 1; } /* @@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } - /* Deliver packet to local host only */ - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev), - NULL, skb, skb->dev, NULL, br_handle_local_finish); - return RX_HANDLER_CONSUMED; + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) + * Thus return 1 from the okfn() to signal the skb is ok to pass + */ + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + br_handle_local_finish) == 1) { + return RX_HANDLER_PASS; + } else { + return RX_HANDLER_CONSUMED; + } } forward: -- cgit v1.2.3 From 88f561ab1bd4915b67cadd960c7a7339473d3ed6 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 11 Apr 2019 15:08:25 +0300 Subject: net: bridge: multicast: use rcu to access port list from br_multicast_start_querier [ Upstream commit c5b493ce192bd7a4e7bd073b5685aad121eeef82 ] br_multicast_start_querier() walks over the port list but it can be called from a timer with only multicast_lock held which doesn't protect the port list, so use RCU to walk over it. Fixes: c83b8fab06fc ("bridge: Restart queries when last querier expires") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index e4777614a8a0..61ff0d497da6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1916,7 +1916,8 @@ static void br_multicast_start_querier(struct net_bridge *br, __br_multicast_open(br, query); - list_for_each_entry(port, &br->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; @@ -1928,6 +1929,7 @@ static void br_multicast_start_querier(struct net_bridge *br, br_multicast_enable(&port->ip6_own_query); #endif } + rcu_read_unlock(); } int br_multicast_toggle(struct net_bridge *br, unsigned long val) -- cgit v1.2.3 From a7a3382b15648c792264f664eb2d4cecf9c622d5 Mon Sep 17 00:00:00 2001 From: Andy Duan Date: Tue, 9 Apr 2019 03:40:56 +0000 Subject: net: fec: manage ahb clock in runtime pm [ Upstream commit d7c3a206e6338e4ccdf030719dec028e26a521d5 ] Some SOC like i.MX6SX clock have some limits: - ahb clock should be disabled before ipg. - ahb and ipg clocks are required for MAC MII bus. So, move the ahb clock to runtime management together with ipg clock. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 697c2427f2b7..a96ad20ee484 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1840,13 +1840,9 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) int ret; if (enable) { - ret = clk_prepare_enable(fep->clk_ahb); - if (ret) - return ret; - ret = clk_prepare_enable(fep->clk_enet_out); if (ret) - goto failed_clk_enet_out; + return ret; if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1866,7 +1862,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) phy_reset_after_clk_enable(ndev->phydev); } else { - clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { mutex_lock(&fep->ptp_clk_mutex); @@ -1885,8 +1880,6 @@ failed_clk_ref: failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); -failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ahb); return ret; } @@ -3470,6 +3463,9 @@ fec_probe(struct platform_device *pdev) ret = clk_prepare_enable(fep->clk_ipg); if (ret) goto failed_clk_ipg; + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + goto failed_clk_ahb; fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { @@ -3563,6 +3559,9 @@ failed_reset: pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); failed_regulator: + clk_disable_unprepare(fep->clk_ahb); +failed_clk_ahb: + clk_disable_unprepare(fep->clk_ipg); failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: @@ -3686,6 +3685,7 @@ static int __maybe_unused fec_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); + clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); return 0; @@ -3695,8 +3695,20 @@ static int __maybe_unused fec_runtime_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); + int ret; - return clk_prepare_enable(fep->clk_ipg); + ret = clk_prepare_enable(fep->clk_ahb); + if (ret) + return ret; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + + return 0; + +failed_clk_ipg: + clk_disable_unprepare(fep->clk_ahb); + return ret; } static const struct dev_pm_ops fec_pm_ops = { -- cgit v1.2.3 From 49f472ba481bcbd6f0c98345a08876b01a0f7b7d Mon Sep 17 00:00:00 2001 From: Yuya Kusakabe Date: Tue, 16 Apr 2019 10:22:28 +0900 Subject: net: Fix missing meta data in skb with vlan packet [ Upstream commit d85e8be2a5a02869f815dd0ac2d743deb4cd7957 ] skb_reorder_vlan_header() should move XDP meta data with ethernet header if XDP meta data exists. Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") Signed-off-by: Yuya Kusakabe Signed-off-by: Takeru Hayasaka Co-developed-by: Takeru Hayasaka Reviewed-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef2cd5712098..40796b8bf820 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { - int mac_len; + int mac_len, meta_len; + void *meta; if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); @@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), mac_len - VLAN_HLEN - ETH_TLEN); } + + meta_len = skb_metadata_len(skb); + if (meta_len) { + meta = skb_metadata_end(skb) - meta_len; + memmove(meta + VLAN_HLEN, meta, meta_len); + } + skb->mac_header += VLAN_HLEN; return skb; } -- cgit v1.2.3 From 1274905c8e1b9cfb631fdb428307a6e55161d785 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 9 Apr 2019 11:47:20 +0200 Subject: net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv [ Upstream commit 988dc4a9a3b66be75b30405a5494faf0dc7cffb6 ] gue tunnels run iptunnel_pull_offloads on received skbs. This can determine a possible use-after-free accessing guehdr pointer since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb (e.g if the packet has been sent though a veth device) Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fou.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 79e98e21cdd7..12ce6c526d72 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) struct guehdr *guehdr; void *data; u16 doffset = 0; + u8 proto_ctype; if (!fou) return 1; @@ -212,13 +213,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(guehdr->control)) return gue_control_message(skb, guehdr); + proto_ctype = guehdr->proto_ctype; __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); if (iptunnel_pull_offloads(skb)) goto drop; - return -guehdr->proto_ctype; + return -proto_ctype; drop: kfree_skb(skb); -- cgit v1.2.3 From 8aa965e92750828202567a26a0072f596d5e6d99 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Apr 2019 10:55:20 -0700 Subject: tcp: tcp_grow_window() needs to respect tcp_space() [ Upstream commit 50ce163a72d817a99e8974222dcf2886d5deb1ae ] For some reason, tcp_grow_window() correctly tests if enough room is present before attempting to increase tp->rcv_ssthresh, but does not prevent it to grow past tcp_space() This is causing hard to debug issues, like failing the (__tcp_select_window(sk) >= tp->rcv_wnd) test in __tcp_ack_snd_check(), causing ACK delays and possibly slow flows. Depending on tcp_rmem[2], MTU, skb->len/skb->truesize ratio, we can see the problem happening on "netperf -t TCP_RR -- -r 2000,2000" after about 60 round trips, when the active side no longer sends immediate acks. This bug predates git history. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Wei Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7b1ef897b398..95b2e31fff08 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + int room; + + room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; /* Check #1 */ - if (tp->rcv_ssthresh < tp->window_clamp && - (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_under_memory_pressure(sk)) { + if (room > 0 && !tcp_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) if (incr) { incr = max_t(int, incr, 2 * skb->len); - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, - tp->window_clamp); + tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } } -- cgit v1.2.3 From aad7db2b23c1275360ba8f5dff32ab36f8428bf1 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 8 Apr 2019 16:45:17 +0800 Subject: team: set slave to promisc if team is already in promisc mode [ Upstream commit 43c2adb9df7ddd6560fd3546d925b42cef92daa0 ] After adding a team interface to bridge, the team interface will enter promisc mode. Then if we add a new slave to team0, the slave will keep promisc off. Fix it by setting slave to promisc on if team master is already in promisc mode, also do the same for allmulti. v2: add promisc and allmulti checking when delete ports Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6ce3f666d142..1283632091d5 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1247,6 +1247,23 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_option_port_add; } + /* set promiscuity level to new slave */ + if (dev->flags & IFF_PROMISC) { + err = dev_set_promiscuity(port_dev, 1); + if (err) + goto err_set_slave_promisc; + } + + /* set allmulti level to new slave */ + if (dev->flags & IFF_ALLMULTI) { + err = dev_set_allmulti(port_dev, 1); + if (err) { + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(port_dev, -1); + goto err_set_slave_promisc; + } + } + netif_addr_lock_bh(dev); dev_uc_sync_multiple(port_dev, dev); dev_mc_sync_multiple(port_dev, dev); @@ -1263,6 +1280,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev, return 0; +err_set_slave_promisc: + __team_option_inst_del_port(team, port); + err_option_port_add: team_upper_dev_unlink(team, port); @@ -1308,6 +1328,12 @@ static int team_port_del(struct team *team, struct net_device *port_dev) team_port_disable(team, port); list_del_rcu(&port->list); + + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(port_dev, -1); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(port_dev, -1); + team_upper_dev_unlink(team, port); netdev_rx_handler_unregister(port_dev); team_port_disable_netpoll(port); -- cgit v1.2.3 From e35fa001678806e1424c7469bebeced5c6c96478 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Tue, 9 Apr 2019 14:59:24 +0700 Subject: tipc: missing entries in name table of publications [ Upstream commit d1841533e54876f152a30ac398a34f47ad6590b1 ] When binding multiple services with specific type 1Ki, 2Ki.., this leads to some entries in the name table of publications missing when listed out via 'tipc name show'. The problem is at identify zero last_type conditional provided via netlink. The first is initial 'type' when starting name table dummping. The second is continuously with zero type (node state service type). Then, lookup function failure to finding node state service type in next iteration. To solve this, adding more conditional to marked as dirty type and lookup correct service type for the next iteration instead of select the first service as initial 'type' zero. Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/name_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bff241f03525..89993afe0fbd 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, for (; i < TIPC_NAMETBL_SIZE; i++) { head = &tn->nametbl->services[i]; - if (*last_type) { + if (*last_type || + (!i && *last_key && (*last_lower == *last_key))) { service = tipc_service_find(net, *last_type); if (!service) return -EPIPE; -- cgit v1.2.3 From 8cc6c4767e499c2a3e50b90cee4036a23839d7c9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 9 Apr 2019 12:10:25 +0800 Subject: vhost: reject zero size iova range [ Upstream commit 813dbeb656d6c90266f251d8bd2b02d445afa63f ] We used to accept zero size iova range which will lead a infinite loop in translate_desc(). Fixing this by failing the request in this case. Reported-by: syzbot+d21e6e297322a900c128@syzkaller.appspotmail.com Fixes: 6b1e6cc7 ("vhost: new device IOTLB API") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a2e5dc7716e2..674cfc5a4084 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -911,8 +911,12 @@ static int vhost_new_umem_range(struct vhost_umem *umem, u64 start, u64 size, u64 end, u64 userspace_addr, int perm) { - struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC); + struct vhost_umem_node *tmp, *node; + if (!size) + return -EFAULT; + + node = kmalloc(sizeof(*node), GFP_ATOMIC); if (!node) return -ENOMEM; -- cgit v1.2.3 From 6bd1ee0a993fc9574ae43c1994c54a60cb23a380 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Fri, 12 Apr 2019 16:19:27 -0400 Subject: ipv4: recompile ip options in ipv4_link_failure [ Upstream commit ed0de45a1008991fdaa27a0152befcb74d126a8b ] Recompile IP options since IPCB may not be valid anymore when ipv4_link_failure is called from arp_error_report. Refer to the commit 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error") and the commit before that (9ef6b42ad6fd) for a similar issue. Signed-off-by: Stephen Suryaputra Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e04cdb58a602..6bc427adcb06 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1186,8 +1186,16 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; + struct ip_options opt; - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + /* Recompile ip options since IPCB may not be valid anymore. + */ + memset(&opt, 0, sizeof(opt)); + opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL)) + return; + + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); rt = skb_rtable(skb); if (rt) -- cgit v1.2.3 From ed112abe6795d646db5445fd19016afc30a6ba18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 13 Apr 2019 17:32:21 -0700 Subject: ipv4: ensure rcu_read_lock() in ipv4_link_failure() [ Upstream commit c543cb4a5f07e09237ec0fc2c60c9f131b2c79ad ] fib_compute_spec_dst() needs to be called under rcu protection. syzbot reported : WARNING: suspicious RCU usage 5.1.0-rc4+ #165 Not tainted include/linux/inetdevice.h:220 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by swapper/0/0: #0: 0000000051b67925 ((&n->timer)){+.-.}, at: lockdep_copy_map include/linux/lockdep.h:170 [inline] #0: 0000000051b67925 ((&n->timer)){+.-.}, at: call_timer_fn+0xda/0x720 kernel/time/timer.c:1315 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.1.0-rc4+ #165 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 lockdep_rcu_suspicious+0x153/0x15d kernel/locking/lockdep.c:5162 __in_dev_get_rcu include/linux/inetdevice.h:220 [inline] fib_compute_spec_dst+0xbbd/0x1030 net/ipv4/fib_frontend.c:294 spec_dst_fill net/ipv4/ip_options.c:245 [inline] __ip_options_compile+0x15a7/0x1a10 net/ipv4/ip_options.c:343 ipv4_link_failure+0x172/0x400 net/ipv4/route.c:1195 dst_link_failure include/net/dst.h:427 [inline] arp_error_report+0xd1/0x1c0 net/ipv4/arp.c:297 neigh_invalidate+0x24b/0x570 net/core/neighbour.c:995 neigh_timer_handler+0xc35/0xf30 net/core/neighbour.c:1081 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:293 invoke_softirq kernel/softirq.c:374 [inline] irq_exit+0x180/0x1d0 kernel/softirq.c:414 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Stephen Suryaputra Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6bc427adcb06..25d9bef27d03 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1185,14 +1185,20 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_link_failure(struct sk_buff *skb) { - struct rtable *rt; struct ip_options opt; + struct rtable *rt; + int res; /* Recompile ip options since IPCB may not be valid anymore. */ memset(&opt, 0, sizeof(opt)); opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL)) + + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); -- cgit v1.2.3 From 6f180e411c3e551ef905c9424982d90b9bb26f6a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:12 +0000 Subject: mlxsw: spectrum_switchdev: Add MDB entries in prepare phase [ Upstream commit d4d0e40977ac450f32f2db5e4d8e23c9d2578899 ] The driver cannot guarantee in the prepare phase that it will be able to write an MDB entry to the device. In case the driver returned success during the prepare phase, but then failed to add the entry in the commit phase, a WARNING [1] will be generated by the switchdev core. Fix this by doing the work in the prepare phase instead. [1] [ 358.544486] swp12s0: Commit of object (id=2) failed. [ 358.550061] WARNING: CPU: 0 PID: 30 at net/switchdev/switchdev.c:281 switchdev_port_obj_add_now+0x9b/0xe0 [ 358.560754] CPU: 0 PID: 30 Comm: kworker/0:1 Not tainted 5.0.0-custom-13382-gf2449babf221 #1350 [ 358.570472] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 358.580582] Workqueue: events switchdev_deferred_process_work [ 358.587001] RIP: 0010:switchdev_port_obj_add_now+0x9b/0xe0 ... [ 358.614109] RSP: 0018:ffffa6b900d6fe18 EFLAGS: 00010286 [ 358.619943] RAX: 0000000000000000 RBX: ffff8b00797ff000 RCX: 0000000000000000 [ 358.627912] RDX: ffff8b00b7a1d4c0 RSI: ffff8b00b7a152e8 RDI: ffff8b00b7a152e8 [ 358.635881] RBP: ffff8b005c3f5bc0 R08: 000000000000022b R09: 0000000000000000 [ 358.643850] R10: 0000000000000000 R11: ffffa6b900d6fcc8 R12: 0000000000000000 [ 358.651819] R13: dead000000000100 R14: ffff8b00b65a23c0 R15: 0ffff8b00b7a2200 [ 358.659790] FS: 0000000000000000(0000) GS:ffff8b00b7a00000(0000) knlGS:0000000000000000 [ 358.668820] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 358.675228] CR2: 00007f00aad90de0 CR3: 00000001ca80d000 CR4: 00000000001006f0 [ 358.683188] Call Trace: [ 358.685918] switchdev_port_obj_add_deferred+0x13/0x60 [ 358.691655] switchdev_deferred_process+0x6b/0xf0 [ 358.696907] switchdev_deferred_process_work+0xa/0x10 [ 358.702548] process_one_work+0x1f5/0x3f0 [ 358.707022] worker_thread+0x28/0x3c0 [ 358.711099] ? process_one_work+0x3f0/0x3f0 [ 358.715768] kthread+0x10d/0x130 [ 358.719369] ? __kthread_create_on_node+0x180/0x180 [ 358.724815] ret_from_fork+0x35/0x40 Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel Reported-by: Alex Kushnarov Tested-by: Alex Kushnarov Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c772109b638d..f5a10e286400 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1654,7 +1654,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid_index; int err = 0; - if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_commit(trans)) return 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); -- cgit v1.2.3 From 565e18a0be1051935f7e36cf470c87c6badbc36e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:13 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for EMAD workqueue [ Upstream commit a8c133b06183c529c51cd0d54eb57d6b7078370c ] The EMAD workqueue is used to handle retransmission of EMAD packets that contain configuration data for the device's firmware. Given the workers need to allocate these packets and that the code is not called as part of memory reclaim path, remove the WQ_MEM_RECLAIM flag. Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index ddedf8ab5b64..3be9da4b8f73 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; - emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); if (!emad_wq) return -ENOMEM; mlxsw_core->emad_wq = emad_wq; -- cgit v1.2.3 From 87ffb893490abd1ce05c4cf84a1a00b32e143b70 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:14 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw ordered workqueue [ Upstream commit 4af0699782e2cc7d0d89db9eb6f8844dd3df82dc ] The ordered workqueue is used to offload various objects such as routes and neighbours in the order they are notified. It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag. This can also result in a warning [1], if a worker tries to flush a non-WQ_MEM_RECLAIM workqueue. [1] [97703.542861] workqueue: WQ_MEM_RECLAIM mlxsw_core_ordered:mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] is flushing !WQ_MEM_RECLAIM events:rht_deferred_worker [97703.542884] WARNING: CPU: 1 PID: 32492 at kernel/workqueue.c:2605 check_flush_dependency+0xb5/0x130 ... [97703.542988] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 [97703.543049] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] [97703.543061] RIP: 0010:check_flush_dependency+0xb5/0x130 ... [97703.543071] RSP: 0018:ffffb3f08137bc00 EFLAGS: 00010086 [97703.543076] RAX: 0000000000000000 RBX: ffff96e07740ae00 RCX: 0000000000000000 [97703.543080] RDX: 0000000000000094 RSI: ffffffff82dc1934 RDI: 0000000000000046 [97703.543084] RBP: ffffb3f08137bc20 R08: ffffffff82dc18a0 R09: 00000000000225c0 [97703.543087] R10: 0000000000000000 R11: 0000000000007eec R12: ffffffff816e4ee0 [97703.543091] R13: ffff96e06f6a5c00 R14: ffff96e077ba7700 R15: ffffffff812ab0c0 [97703.543097] FS: 0000000000000000(0000) GS:ffff96e077a80000(0000) knlGS:0000000000000000 [97703.543101] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [97703.543104] CR2: 00007f8cd135b280 CR3: 00000001e860e003 CR4: 00000000003606e0 [97703.543109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [97703.543112] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [97703.543115] Call Trace: [97703.543129] __flush_work+0xbd/0x1e0 [97703.543137] ? __cancel_work_timer+0x136/0x1b0 [97703.543145] ? pwq_dec_nr_in_flight+0x49/0xa0 [97703.543154] __cancel_work_timer+0x136/0x1b0 [97703.543175] ? mlxsw_reg_trans_bulk_wait+0x145/0x400 [mlxsw_core] [97703.543184] cancel_work_sync+0x10/0x20 [97703.543191] rhashtable_free_and_destroy+0x23/0x140 [97703.543198] rhashtable_destroy+0xd/0x10 [97703.543254] mlxsw_sp_fib_destroy+0xb1/0xf0 [mlxsw_spectrum] [97703.543310] mlxsw_sp_vr_put+0xa8/0xc0 [mlxsw_spectrum] [97703.543364] mlxsw_sp_fib_node_put+0xbf/0x140 [mlxsw_spectrum] [97703.543418] ? mlxsw_sp_fib6_entry_destroy+0xe8/0x110 [mlxsw_spectrum] [97703.543475] mlxsw_sp_router_fib6_event_work+0x6cd/0x7f0 [mlxsw_spectrum] [97703.543484] process_one_work+0x1fd/0x400 [97703.543493] worker_thread+0x34/0x410 [97703.543500] kthread+0x121/0x140 [97703.543507] ? process_one_work+0x400/0x400 [97703.543512] ? kthread_park+0x90/0x90 [97703.543523] ret_from_fork+0x35/0x40 Fixes: a3832b31898f ("mlxsw: core: Create an ordered workqueue for FIB offload") Signed-off-by: Ido Schimmel Reported-by: Semion Lisyansky Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 3be9da4b8f73..0cd58359e0c3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1915,7 +1915,7 @@ static int __init mlxsw_core_module_init(void) mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); if (!mlxsw_wq) return -ENOMEM; - mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM, + mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, mlxsw_core_driver_name); if (!mlxsw_owq) { err = -ENOMEM; -- cgit v1.2.3 From 61647856f32ae24db9dadb2c61457adb327df281 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:15 +0000 Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw workqueue [ Upstream commit b442fed1b724af0de087912a5718ddde1b87acbb ] The workqueue is used to periodically update the networking stack about activity / statistics of various objects such as neighbours and TC actions. It should not be called as part of memory reclaim path, so remove the WQ_MEM_RECLAIM flag. Fixes: 3d5479e92087 ("mlxsw: core: Remove deprecated create_workqueue") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 0cd58359e0c3..fc643fde5a4a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1912,7 +1912,7 @@ static int __init mlxsw_core_module_init(void) { int err; - mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); if (!mlxsw_wq) return -ENOMEM; mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, -- cgit v1.2.3 From 9be4e8224cd6ba2ae515a3d6c0ad2281b1dcdb22 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Apr 2019 06:58:15 +0000 Subject: mlxsw: spectrum_router: Do not check VRF MAC address [ Upstream commit 972fae683cbad5cf348268e76abc6d55cfb3ba87 ] Commit 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses") enabled the driver to veto router interface (RIF) MAC addresses that it cannot support. This check should only be performed for interfaces for which the driver actually configures a RIF. A VRF upper is not one of them, so ignore it. Without this patch it is not possible to set an IP address on the VRF device and use it as a loopback. Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Tested-by: Alexander Petrovskiy Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 98e5ffd71b91..2f6afbfd689f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6745,7 +6745,7 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, /* A RIF is not created for macvlan netdevs. Their MAC is used to * populate the FDB */ - if (netif_is_macvlan(dev)) + if (netif_is_macvlan(dev) || netif_is_l3_master(dev)) return 0; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { -- cgit v1.2.3 From a902fe6dd585a75168bcd8b560a498492f03e262 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 11 Apr 2019 12:26:32 +0200 Subject: net: thunderx: raise XDP MTU to 1508 [ Upstream commit 5ee15c101f29e0093ffb5448773ccbc786eb313b ] The thunderx driver splits frames bigger than 1530 bytes to multiple pages, making impossible to run an eBPF program on it. This leads to a maximum MTU of 1508 if QinQ is in use. The thunderx driver forbids to load an eBPF program if the MTU is higher than 1500 bytes. Raise the limit to 1508 so it is possible to use L2 protocols which need some more headroom. Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support") Signed-off-by: Matteo Croce Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index d4ee9f9c8c34..4fed3e88874c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -32,6 +32,13 @@ #define DRV_NAME "nicvf" #define DRV_VERSION "1.0" +/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs + * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed + * this value, keeping headroom for the 14 byte Ethernet header and two + * VLAN tags (for QinQ) + */ +#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2) + /* Supported devices */ static const struct pci_device_id nicvf_id_table[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, @@ -1830,8 +1837,10 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) bool bpf_attached = false; int ret = 0; - /* For now just support only the usual MTU sized frames */ - if (prog && (dev->mtu > 1500)) { + /* For now just support only the usual MTU sized frames, + * plus some headroom for VLAN, QinQ. + */ + if (prog && dev->mtu > MAX_XDP_MTU) { netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", dev->mtu); return -EOPNOTSUPP; -- cgit v1.2.3 From ae924f4e1c4598b3d6be8c48e7f4f025e91f3661 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 11 Apr 2019 12:26:33 +0200 Subject: net: thunderx: don't allow jumbo frames with XDP [ Upstream commit 1f227d16083b2e280b7dde4ca78883d75593f2fd ] The thunderx driver forbids to load an eBPF program if the MTU is too high, but this can be circumvented by loading the eBPF, then raising the MTU. Fix this by limiting the MTU if an eBPF program is already loaded. Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support") Signed-off-by: Matteo Croce Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 4fed3e88874c..36263c77df46 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1589,6 +1589,15 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) struct nicvf *nic = netdev_priv(netdev); int orig_mtu = netdev->mtu; + /* For now just support only the usual MTU sized frames, + * plus some headroom for VLAN, QinQ. + */ + if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) { + netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", + netdev->mtu); + return -EINVAL; + } + netdev->mtu = new_mtu; if (!netif_running(netdev)) -- cgit v1.2.3 From b30d9457b1dc79e1cad6a3ebeea9728c8e905c9d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:30 -0700 Subject: net/tls: fix the IV leaks [ Upstream commit 5a03bc73abed6ae196c15e9950afde19d48be12c ] Commit f66de3ee2c16 ("net/tls: Split conf to rx + tx") made freeing of IV and record sequence number conditional to SW path only, but commit e8f69799810c ("net/tls: Add generic NIC offload infrastructure") also allocates that state for the device offload configuration. Remember to free it. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index d753e362d2d9..4a7f4da69b34 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock); static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) + if (ctx->tx_conf == TLS_HW) { kfree(tls_offload_ctx_tx(ctx)); + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + } if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); -- cgit v1.2.3 From 96418eaa8e6aba1f4bdbdf058ea5f1d085599fd7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:31 -0700 Subject: net/tls: don't leak partially sent record in device mode [ Upstream commit 35b71a34ada62c9573847a324bf06a133fe11b11 ] David reports that tls triggers warnings related to sk->sk_forward_alloc not being zero at destruction time: WARNING: CPU: 5 PID: 6831 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110 WARNING: CPU: 5 PID: 6831 at net/ipv4/af_inet.c:160 inet_sock_destruct+0x15b/0x170 When sender fills up the write buffer and dies from SIGPIPE. This is due to the device implementation not cleaning up the partially_sent_record. This is because commit a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") moved the partial record cleanup to the SW-only path. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: David Beckett Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tls.h | 2 ++ net/tls/tls_device.c | 7 +++++++ net/tls/tls_main.c | 22 ++++++++++++++++++++++ net/tls/tls_sw.c | 15 +-------------- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 1486b60c4de8..02418e72b8b2 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -289,6 +289,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_device_sk_destruct(struct sock *sk); +void tls_device_free_resources_tx(struct sock *sk); void tls_device_init(void); void tls_device_cleanup(void); int tls_tx_records(struct sock *sk, int flags); @@ -312,6 +313,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx, int flags); int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, int flags); +bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx); int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, int flags, long *timeo); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4a7f4da69b34..4b5ff3d44912 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -219,6 +219,13 @@ void tls_device_sk_destruct(struct sock *sk) } EXPORT_SYMBOL(tls_device_sk_destruct); +void tls_device_free_resources_tx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + tls_free_partial_record(sk, tls_ctx); +} + static void tls_append_frag(struct tls_record_info *record, struct page_frag *pfrag, int size) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 78cb4a584080..8f3df7da99d9 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -220,6 +220,26 @@ int tls_push_pending_closed_record(struct sock *sk, return tls_ctx->push_pending_record(sk, flags); } +bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx) +{ + struct scatterlist *sg; + + sg = ctx->partially_sent_record; + if (!sg) + return false; + + while (1) { + put_page(sg_page(sg)); + sk_mem_uncharge(sk, sg->length); + + if (sg_is_last(sg)) + break; + sg++; + } + ctx->partially_sent_record = NULL; + return true; +} + static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); @@ -278,6 +298,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_free_resources_tx(sk); + } else if (ctx->tx_conf == TLS_HW) { + tls_device_free_resources_tx(sk); } if (ctx->rx_conf == TLS_SW) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index bf5b54b513bc..d2d4f7c0d4be 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1804,20 +1804,7 @@ void tls_sw_free_resources_tx(struct sock *sk) /* Free up un-sent records in tx_list. First, free * the partially sent record if any at head of tx_list. */ - if (tls_ctx->partially_sent_record) { - struct scatterlist *sg = tls_ctx->partially_sent_record; - - while (1) { - put_page(sg_page(sg)); - sk_mem_uncharge(sk, sg->length); - - if (sg_is_last(sg)) - break; - sg++; - } - - tls_ctx->partially_sent_record = NULL; - + if (tls_free_partial_record(sk, tls_ctx)) { rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); list_del(&rec->list); -- cgit v1.2.3 From 5e7171bf8db70e2f0de91d47e206bf2e0e7805cb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 11:04:32 -0700 Subject: net: strparser: partially revert "strparser: Call skb_unclone conditionally" [ Upstream commit 4a9c2e3746e6151fd5d077259d79ce9ca86d47d7 ] This reverts the first part of commit 4e485d06bb8c ("strparser: Call skb_unclone conditionally"). To build a message with multiple fragments we need our own root of frag_list. We can't simply use the frag_list of orig_skb, because it will lead to linking all orig_skbs together creating very long frag chains, and causing stack overflow on kfree_skb() (which is called recursively on the frag_lists). BUG: stack guard page was hit at 00000000d40fad41 (stack is 0000000029dde9f4..000000008cce03d5) kernel stack overflow (double-fault): 0000 [#1] PREEMPT SMP RIP: 0010:free_one_page+0x2b/0x490 Call Trace: __free_pages_ok+0x143/0x2c0 skb_release_data+0x8e/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 [...] skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 ? skb_release_data+0xad/0x140 kfree_skb+0x32/0xb0 skb_release_data+0xad/0x140 __kfree_skb+0xe/0x20 tcp_disconnect+0xd6/0x4d0 tcp_close+0xf4/0x430 ? tcp_check_oom+0xf0/0xf0 tls_sk_proto_close+0xe4/0x1e0 [tls] inet_release+0x36/0x60 __sock_release+0x37/0xa0 sock_close+0x11/0x20 __fput+0xa2/0x1d0 task_work_run+0x89/0xb0 exit_to_usermode_loop+0x9a/0xa0 do_syscall_64+0xc0/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Let's leave the second unclone conditional, as I'm not entirely sure what is its purpose :) Fixes: 4e485d06bb8c ("strparser: Call skb_unclone conditionally") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/strparser/strparser.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index da1a676860ca..0f4e42792878 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* We are going to append to the frags_list of head. * Need to unshare the frag_list. */ - if (skb_has_frag_list(head)) { - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - STRP_STATS_INCR(strp->stats.mem_fail); - desc->error = err; - return 0; - } + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.mem_fail); + desc->error = err; + return 0; } if (unlikely(skb_shinfo(head)->frag_list)) { -- cgit v1.2.3 From adfc12451123298bc583b9922f5182ae8cf70e59 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Apr 2019 16:23:39 -0700 Subject: net/tls: fix build without CONFIG_TLS_DEVICE [ Upstream commit 903f1a187776bb8d79b13618ec05b25f86318885 ] buildbot noticed that TLS_HW is not defined if CONFIG_TLS_DEVICE=n. Wrap the cleanup branch into an ifdef, tls_device_free_resources_tx() wouldn't be compiled either in this case. Fixes: 35b71a34ada6 ("net/tls: don't leak partially sent record in device mode") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 8f3df7da99d9..96dbac91ac6e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -298,8 +298,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_free_resources_tx(sk); +#ifdef CONFIG_TLS_DEVICE } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); +#endif } if (ctx->rx_conf == TLS_SW) { -- cgit v1.2.3 From 0e4b3800c0699f3ccdda30019fd7e7295a697c1b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 16 Apr 2019 16:15:56 +0300 Subject: net: bridge: fix netlink export of vlan_stats_per_port option [ Upstream commit 600bea7dba1a72874ae0cd9bc66bf2abfe43b49d ] Since the introduction of the vlan_stats_per_port option the netlink export of it has been broken since I made a typo and used the ifla attribute instead of the bridge option to retrieve its state. Sysfs export is fine, only netlink export has been affected. Fixes: 9163a0fc1f0c0 ("net: bridge: add support for per-port vlan stats") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9c07591b0232..7104cf13da84 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) || nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT, - br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT))) + br_opt_get(br, BROPT_VLAN_STATS_PER_PORT))) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -- cgit v1.2.3 From 58f682514cb54cef47fcfcd9e0b49c2c6213e7eb Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 21 Mar 2019 19:07:20 -0700 Subject: net/mlx5e: XDP, Avoid checksum complete when XDP prog is loaded [ Upstream commit 5d0bb3bac4b9f6c22280b04545626fdfd99edc6b ] XDP programs might change packets data contents which will make the reported skb checksum (checksum complete) invalid. When XDP programs are loaded/unloaded set/clear rx RQs MLX5E_RQ_STATE_NO_CSUM_COMPLETE flag. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index e6099f51d25f..3b9e5f0d0212 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1665,7 +1665,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) struct mlx5e_channel *c; int i; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) return 0; for (i = 0; i < channels->num; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 93e50ccd44c3..3ea541f30ee0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -950,7 +950,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)) + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f86e4804e83e..4cbf15da8eb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -753,7 +753,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) goto csum_unnecessary; /* CQE csum doesn't cover padding octets in short ethernet -- cgit v1.2.3 From 6daad2e30e0b1fd37eee9aab7163243511c0629b Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Fri, 29 Mar 2019 12:50:37 +0000 Subject: net/mlx5e: Protect against non-uplink representor for encap [ Upstream commit 5e0060b1491b299b1706414e61ede0b02265680e ] TC encap offload is supported only for the physical uplink representor. Fail for non uplink representor. Fixes: 3e621b19b0bb ("net/mlx5e: Support TC encapsulation offloads with upper devices") Signed-off-by: Dmytro Linkin Reviewed-by: Eli Britstein Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index f3c7ab6faea5..b8521e2f64ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -39,6 +39,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (!(mlx5e_eswitch_rep(*out_dev) && + mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) + return -EOPNOTSUPP; + return 0; } -- cgit v1.2.3 From af294d42d43e906b4634aca06262fa6cb9ada582 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 31 Aug 2018 14:29:16 +0300 Subject: net/mlx5e: Switch to Toeplitz RSS hash by default [ Upstream commit 7ee2ace9c544a0886e02b54b625e521df8692d20 ] Although XOR hash function can perform very well on some special use cases, to align with all drivers, mlx5 driver should use Toeplitz hash by default. Toeplitz is more stable for the general use case and it is more standard and reliable. On top of that, since XOR (MLX5_RX_HASH_FN_INVERTED_XOR8) gives only a repeated 8 bits pattern. When used for udp tunneling RSS source port manipulation it results in fixed source port, which will cause bad RSS spread. Fixes: 2be6967cdbc9 ("net/mlx5e: Support ETH_RSS_HASH_XOR") Signed-off-by: Konstantin Khlebnikov Reviewed-by: Tariq Toukan Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3ea541f30ee0..0cb19e4dd439 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4574,7 +4574,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, { enum mlx5e_traffic_types tt; - rss_params->hfunc = ETH_RSS_HASH_XOR; + rss_params->hfunc = ETH_RSS_HASH_TOP; netdev_rss_key_fill(rss_params->toeplitz_hash_key, sizeof(rss_params->toeplitz_hash_key)); mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, -- cgit v1.2.3 From 3ce8793e03cdf6191430955adf11be1cd7331e0f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 12 Mar 2019 00:24:52 -0700 Subject: net/mlx5e: Rx, Fixup skb checksum for packets with tail padding [ Upstream commit 0aa1d18615c163f92935b806dcaff9157645233a ] When an ethernet frame with ip payload is padded, the padding octets are not covered by the hardware checksum. Prior to the cited commit, skb checksum was forced to be CHECKSUM_NONE when padding is detected. After it, the kernel will try to trim the padding bytes and subtract their checksum from skb->csum. In this patch we fixup skb->csum for any ip packet with tail padding of any size, if any padding found. FCS case is just one special case of this general purpose patch, hence, it is removed. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), Cc: Eric Dumazet Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 79 ++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 6 ++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 ++ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 4cbf15da8eb3..31a7b3d8e5d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -713,17 +713,6 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) rq->stats->ecn_mark += !!rc; } -static u32 mlx5e_get_fcs(const struct sk_buff *skb) -{ - const void *fcs_bytes; - u32 _fcs_bytes; - - fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, - ETH_FCS_LEN, &_fcs_bytes); - - return __get_unaligned_cpu32(fcs_bytes); -} - static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { void *ip_p = skb->data + network_depth; @@ -734,6 +723,68 @@ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) +#define MAX_PADDING 8 + +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); +} + +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) +{ + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; + + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); +} + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -782,10 +833,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum = csum_partial(skb->data + ETH_HLEN, network_depth - ETH_HLEN, skb->csum); - if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_block_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb), - skb->len - ETH_FCS_LEN); + + mlx5e_skb_padding_csum(skb, network_depth, proto, stats); stats->csum_complete++; return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index d3fe48ff9da9..4461b44acafc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -59,6 +59,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, @@ -151,6 +153,8 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; @@ -1192,6 +1196,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index fe91ec06e3c7..714303bf0797 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -71,6 +71,8 @@ struct mlx5e_sw_stats { u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; u64 rx_xdp_redirect; @@ -181,6 +183,8 @@ struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none; -- cgit v1.2.3 From acf4d270942ad4d5e03e56e6c265594f1dc99d41 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 25 Mar 2019 22:10:59 -0700 Subject: net/mlx5e: Rx, Check ip headers sanity [ Upstream commit 0318a7b7fcad9765931146efa7ca3a034194737c ] In the two places is_last_ethertype_ip is being called, the caller will be looking inside the ip header, to be safe, add ip{4,6} header sanity check. And return true only on valid ip headers, i.e: the whole header is contained in the linear part of the skb. Note: Such situation is very rare and hard to reproduce, since mlx5e allocates a large enough headroom to contain the largest header one can imagine. Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets") Reported-by: Cong Wang Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 31a7b3d8e5d3..1c7ef919f546 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -693,7 +693,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, { *proto = ((struct ethhdr *)skb->data)->h_proto; *proto = __vlan_get_protocol(skb, *proto, network_depth); - return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; } static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) -- cgit v1.2.3 From abe4a1328a7838685aa551c1529f9d01f1fafab1 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 31 Mar 2019 12:53:03 +0000 Subject: Revert "net/mlx5e: Enable reporting checksum unnecessary also for L3 packets" [ Upstream commit 8c8811d46d00d119ffbe039a6e52a0b504df1c2c ] This reverts commit b820e6fb0978f9c2ac438c199d2bb2f35950e9c9. Prior the commit we are reverting, checksum unnecessary was only set when both the L3 OK and L4 OK bits are set on the CQE. This caused packets of IP protocols such as SCTP which are not dealt by the current HW L4 parser (hence the L4 OK bit is not set, but the L4 header type none bit is set) to go through the checksum none code, where currently we wrongly report checksum unnecessary for them, a regression. Fix this by a revert. Note that on our usual track we report checksum complete, so the revert isn't expected to have any notable performance impact. Also, when we are not on the checksum complete track, the L4 protocols for which we report checksum none are not high performance ones, we will still report checksum unnecessary for UDP/TCP. Fixes: b820e6fb0978 ("net/mlx5e: Enable reporting checksum unnecessary also for L3 packets") Signed-off-by: Or Gerlitz Reported-by: Avi Urman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1c7ef919f546..2cbda8abd8b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -848,8 +848,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, csum_unnecessary: if (likely((cqe->hds_ip_ext & CQE_L3_OK) && - ((cqe->hds_ip_ext & CQE_L4_OK) || - (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) { + (cqe->hds_ip_ext & CQE_L4_OK))) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (cqe_is_tunneled(cqe)) { skb->csum_level = 1; -- cgit v1.2.3 From 1d9005b96dcec04ca244cf229c5e4b84f02504a4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Mar 2019 22:09:05 -0700 Subject: net/mlx5: FPGA, tls, hold rcu read lock a bit longer [ Upstream commit 31634bf5dcc418b5b2cacd954394c0c4620db6a2 ] To avoid use-after-free, hold the rcu read lock until we are done copying flow data into the command buffer. Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") Reported-by: Eric Dumazet Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 8de64e88c670..08aa7266c8c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -217,22 +217,22 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, void *cmd; int ret; - rcu_read_lock(); - flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - rcu_read_unlock(); - - if (!flow) { - WARN_ONCE(1, "Received NULL pointer for handle\n"); - return -EINVAL; - } - buf = kzalloc(size, GFP_ATOMIC); if (!buf) return -ENOMEM; cmd = (buf + 1); + rcu_read_lock(); + flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); + if (unlikely(!flow)) { + rcu_read_unlock(); + WARN_ONCE(1, "Received NULL pointer for handle\n"); + kfree(buf); + return -EINVAL; + } mlx5_fpga_tls_flow_to_cmd(flow, cmd); + rcu_read_unlock(); MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); -- cgit v1.2.3 From d3697f88a2711638cf2b93c5fd3f7e3adeeaf690 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 8 Apr 2019 17:59:50 -0700 Subject: net/tls: prevent bad memory access in tls_is_sk_tx_device_offloaded() [ Upstream commit b4f47f3848eb70986f75d06112af7b48b7f5f462 ] Unlike '&&' operator, the '&' does not have short-circuit evaluation semantics. IOW both sides of the operator always get evaluated. Fix the wrong operator in tls_is_sk_tx_device_offloaded(), which would lead to out-of-bounds access for for non-full sockets. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tls.h b/include/net/tls.h index 02418e72b8b2..8b3d10917d99 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -366,7 +366,7 @@ tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) { #ifdef CONFIG_SOCK_VALIDATE_XMIT - return sk_fullsock(sk) & + return sk_fullsock(sk) && (smp_load_acquire(&sk->sk_validate_xmit_skb) == &tls_validate_xmit_skb); #else -- cgit v1.2.3 From 6ad8c35a70ccead9118f4431320a228de4c6af66 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Mar 2019 01:05:41 -0700 Subject: net/mlx5: FPGA, tls, idr remove on flow delete [ Upstream commit df3a8344d404a810b4aadbf19b08c8232fbaa715 ] Flow is kfreed on mlx5_fpga_tls_del_flow but kept in the idr data structure, this is risky and can cause use-after-free, since the idr_remove is delayed until tls_send_teardown_cmd completion. Instead of delaying idr_remove, in this patch we do it on mlx5_fpga_tls_del_flow, before actually kfree(flow). Added synchronize_rcu before kfree(flow) Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 43 ++++++++-------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 08aa7266c8c0..22a2ef111514 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, return ret; } -static void mlx5_fpga_tls_release_swid(struct idr *idr, - spinlock_t *idr_spinlock, u32 swid) +static void *mlx5_fpga_tls_release_swid(struct idr *idr, + spinlock_t *idr_spinlock, u32 swid) { unsigned long flags; + void *ptr; spin_lock_irqsave(idr_spinlock, flags); - idr_remove(idr, swid); + ptr = idr_remove(idr, swid); spin_unlock_irqrestore(idr_spinlock, flags); + return ptr; } static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, @@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, kfree(buf); } -struct mlx5_teardown_stream_context { - struct mlx5_fpga_tls_command_context cmd; - u32 swid; -}; - static void mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, struct mlx5_fpga_tls_command_context *cmd, struct mlx5_fpga_dma_buf *resp) { - struct mlx5_teardown_stream_context *ctx = - container_of(cmd, struct mlx5_teardown_stream_context, cmd); - if (resp) { u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); @@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, mlx5_fpga_err(fdev, "Teardown stream failed with syndrome = %d", syndrome); - else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx)) - mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr, - &fdev->tls->tx_idr_spinlock, - ctx->swid); - else - mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr, - &fdev->tls->rx_idr_spinlock, - ctx->swid); } mlx5_fpga_tls_put_command_ctx(cmd); } @@ -253,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, void *flow, u32 swid, gfp_t flags) { - struct mlx5_teardown_stream_context *ctx; + struct mlx5_fpga_tls_command_context *ctx; struct mlx5_fpga_dma_buf *buf; void *cmd; @@ -261,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, if (!ctx) return; - buf = &ctx->cmd.buf; + buf = &ctx->buf; cmd = (ctx + 1); MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); MLX5_SET(tls_cmd, cmd, swid, swid); @@ -272,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, buf->sg[0].data = cmd; buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - ctx->swid = swid; - mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, + mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, mlx5_fpga_tls_teardown_completion); } @@ -283,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, struct mlx5_fpga_tls *tls = mdev->fpga->tls; void *flow; - rcu_read_lock(); if (direction_sx) - flow = idr_find(&tls->tx_idr, swid); + flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, + &tls->tx_idr_spinlock, + swid); else - flow = idr_find(&tls->rx_idr, swid); - - rcu_read_unlock(); + flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, + &tls->rx_idr_spinlock, + swid); if (!flow) { mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", @@ -297,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, return; } + synchronize_rcu(); /* before kfree(flow) */ mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); } -- cgit v1.2.3 From da86299fcccb7cd1073e927068fc2f43fe5b9f8f Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Sun, 14 Apr 2019 14:21:29 -0700 Subject: route: Avoid crash from dereferencing NULL rt->from [ Upstream commit 9c69a13205151c0d801de9f9d83a818e6e8f60ec ] When __ip6_rt_update_pmtu() is called, rt->from is RCU dereferenced, but is never checked for null - rt6_flush_exceptions() may have removed the entry. [ 1913.989004] RIP: 0010:ip6_rt_cache_alloc+0x13/0x170 [ 1914.209410] Call Trace: [ 1914.214798] [ 1914.219226] __ip6_rt_update_pmtu+0xb0/0x190 [ 1914.228649] ip6_tnl_xmit+0x2c2/0x970 [ip6_tunnel] [ 1914.239223] ? ip6_tnl_parse_tlv_enc_lim+0x32/0x1a0 [ip6_tunnel] [ 1914.252489] ? __gre6_xmit+0x148/0x530 [ip6_gre] [ 1914.262678] ip6gre_tunnel_xmit+0x17e/0x3c7 [ip6_gre] [ 1914.273831] dev_hard_start_xmit+0x8d/0x1f0 [ 1914.283061] sch_direct_xmit+0xfa/0x230 [ 1914.291521] __qdisc_run+0x154/0x4b0 [ 1914.299407] net_tx_action+0x10e/0x1f0 [ 1914.307678] __do_softirq+0xca/0x297 [ 1914.315567] irq_exit+0x96/0xa0 [ 1914.322494] smp_apic_timer_interrupt+0x68/0x130 [ 1914.332683] apic_timer_interrupt+0xf/0x20 [ 1914.341721] Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Jonathan Lemon Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0086acc16f3c..b6a97115a906 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2336,6 +2336,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, rcu_read_lock(); from = rcu_dereference(rt6->from); + if (!from) { + rcu_read_unlock(); + return; + } nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); -- cgit v1.2.3 From 51db0d068057040012ba9201ae0b8129fe0591f9 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Mon, 1 Apr 2019 19:36:33 -0700 Subject: nfp: flower: replace CFI with vlan present [ Upstream commit f7ee799a51ddbcc205ef615fe424fb5084e9e0aa ] Replace vlan CFI bit with a vlan present bit that indicates the presence of a vlan tag. Previously the driver incorrectly assumed that an vlan id of 0 is not matchable, therefore we indicate vlan presence with a vlan present bit. Fixes: 5571e8c9f241 ("nfp: extend flower matching capabilities") Signed-off-by: Pieter Jansen van Vuuren Signed-off-by: Louis Peens Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 2 +- drivers/net/ethernet/netronome/nfp/flower/match.c | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 15f41cfef9f1..bf06458288f3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -26,7 +26,7 @@ #define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) -#define NFP_FLOWER_MASK_VLAN_CFI BIT(12) +#define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) #define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) #define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12) diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index cdf75595f627..571cc8ced33e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -26,14 +26,12 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *frame, FLOW_DISSECTOR_KEY_VLAN, target); /* Populate the tci field. */ - if (flow_vlan->vlan_id || flow_vlan->vlan_priority) { - tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, - flow_vlan->vlan_priority) | - FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, - flow_vlan->vlan_id) | - NFP_FLOWER_MASK_VLAN_CFI; - frame->tci = cpu_to_be16(tmp_tci); - } + tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + flow_vlan->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + flow_vlan->vlan_id); + frame->tci = cpu_to_be16(tmp_tci); } } -- cgit v1.2.3 From 116ac142d2c6cccf821ef351645158ef8ba9e05e Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Mon, 1 Apr 2019 19:36:34 -0700 Subject: nfp: flower: remove vlan CFI bit from push vlan action [ Upstream commit 42cd5484a22f1a1b947e21e2af65fa7dab09d017 ] We no longer set CFI when pushing vlan tags, therefore we remove the CFI bit from push vlan. Fixes: 1a1e586f54bf ("nfp: add basic action capabilities to flower offloads") Signed-off-by: Pieter Jansen van Vuuren Signed-off-by: Louis Peens Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/netronome/nfp/flower/action.c | 3 +-- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 8d54b36afee8..2bbc5b8f92c2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -49,8 +49,7 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, tmp_push_vlan_tci = FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) | - FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) | - NFP_FL_PUSH_VLAN_CFI; + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)); push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index bf06458288f3..ab07d76b4186 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -82,7 +82,6 @@ #define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0) #define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) -#define NFP_FL_PUSH_VLAN_CFI BIT(12) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) #define IPV6_FLOW_LABEL_MASK cpu_to_be32(0x000fffff) -- cgit v1.2.3 From 064290d0f6c7adc4fa19f88e33bbb9f2633984fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 4 Apr 2019 15:01:33 +0200 Subject: sch_cake: Use tc_skb_protocol() helper for getting packet protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b2100cc56fca8c51d28aa42a9f1fbcb2cf351996 ] We shouldn't be using skb->protocol directly as that will miss cases with hardware-accelerated VLAN tags. Use the helper instead to get the right protocol number. Reported-by: Kevin Darbyshire-Bryant Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cake.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 73940293700d..c55fd5c932d4 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1526,7 +1526,7 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { u8 dscp; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; if (wash && dscp) -- cgit v1.2.3 From d58923dac952107bcfe0fe0ad59e80d3c952cc36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 4 Apr 2019 15:01:33 +0200 Subject: sch_cake: Make sure we can write the IP header before changing DSCP bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c87b4ecdbe8db27867a7b7f840291cd843406bd7 ] There is not actually any guarantee that the IP headers are valid before we access the DSCP bits of the packets. Fix this using the same approach taken in sch_dsmark. Reported-by: Kevin Darbyshire-Bryant Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cake.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index c55fd5c932d4..9a1b0a93534d 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1524,16 +1524,27 @@ static void cake_wash_diffserv(struct sk_buff *skb) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { + int wlen = skb_network_offset(skb); u8 dscp; switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; if (wash && dscp) ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); return dscp; case htons(ETH_P_IPV6): + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; if (wash && dscp) ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); -- cgit v1.2.3 From a023c1a245a71d936c67d2c984e5789d604622e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Apr 2019 10:12:48 +0300 Subject: NFC: nci: Add some bounds checking in nci_hci_cmd_received() [ Upstream commit d7ee81ad09f072eab1681877fc71ec05f9c1ae92 ] This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory corruption when handling SHDLC I-Frame commands"). I'm not totally sure, but I think that commit description may have overstated the danger. I was under the impression that this data came from the firmware? If you can't trust your networking firmware, then you're already in trouble. Anyway, these days we add bounds checking where ever we can and we call it kernel hardening. Better safe than sorry. Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/hci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ddfc52ac1f9b..c0d323b58e73 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; + if (new_pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id @@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE; -- cgit v1.2.3 From 5aa94a5b5641a3c2200be00c8bd9cc48ed5f6640 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Apr 2019 10:13:51 +0300 Subject: nfc: nci: Potential off by one in ->pipes[] array [ Upstream commit 6491d698396fd5da4941980a35ca7c162a672016 ] This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes") where we changed NFC_HCI_MAX_PIPES from 127 to 128. As the comment next to the define explains, the pipe identifier is 7 bits long. The highest possible pipe is 127, but the number of possible pipes is 128. As the code is now, then there is potential for an out of bounds array access: net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one? 'ndev->hci_dev->pipes[pipe]' '0-127 == 127' Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- include/net/nfc/nci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 87499b6b35d6..df5c69db68af 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -166,7 +166,7 @@ struct nci_conn_info { * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ -#define NCI_HCI_MAX_PIPES 127 +#define NCI_HCI_MAX_PIPES 128 struct nci_hci_gate { u8 gate; -- cgit v1.2.3 From 2cf17769d6b543b262a3cd04b3fd17b2bd4e6487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 5 Apr 2019 15:01:59 +0200 Subject: sch_cake: Simplify logic in cake_select_tin() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4976e3c683f328bc6f2edef555a4ffee6524486f ] The logic in cake_select_tin() was getting a bit hairy, and it turns out we can simplify it quite a bit. This also allows us to get rid of one of the two diffserv parsing functions, which has the added benefit that already-zeroed DSCP fields won't get re-written. Suggested-by: Kevin Darbyshire-Bryant Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cake.c | 44 ++++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 9a1b0a93534d..7b5ce1343474 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1508,20 +1508,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) return idx + (tin << 16); } -static void cake_wash_diffserv(struct sk_buff *skb) -{ - switch (skb->protocol) { - case htons(ETH_P_IP): - ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); - break; - case htons(ETH_P_IPV6): - ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); - break; - default: - break; - } -} - static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) { int wlen = skb_network_offset(skb); @@ -1564,25 +1550,27 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, { struct cake_sched_data *q = qdisc_priv(sch); u32 tin; + u8 dscp; - if (TC_H_MAJ(skb->priority) == sch->handle && - TC_H_MIN(skb->priority) > 0 && - TC_H_MIN(skb->priority) <= q->tin_cnt) { + /* Tin selection: Default to diffserv-based selection, allow overriding + * using firewall marks or skb->priority. + */ + dscp = cake_handle_diffserv(skb, + q->rate_flags & CAKE_FLAG_WASH); + + if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) + tin = 0; + + else if (TC_H_MAJ(skb->priority) == sch->handle && + TC_H_MIN(skb->priority) > 0 && + TC_H_MIN(skb->priority) <= q->tin_cnt) tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; - if (q->rate_flags & CAKE_FLAG_WASH) - cake_wash_diffserv(skb); - } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) { - /* extract the Diffserv Precedence field, if it exists */ - /* and clear DSCP bits if washing */ - tin = q->tin_index[cake_handle_diffserv(skb, - q->rate_flags & CAKE_FLAG_WASH)]; + else { + tin = q->tin_index[dscp]; + if (unlikely(tin >= q->tin_cnt)) tin = 0; - } else { - tin = 0; - if (q->rate_flags & CAKE_FLAG_WASH) - cake_wash_diffserv(skb); } return &q->tins[tin]; -- cgit v1.2.3 From ebac4d0adf68f8962bd82fcf483936edd6ec095b Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Fri, 29 Mar 2019 10:49:12 +0100 Subject: CIFS: keep FileInfo handle live during oplock break commit b98749cac4a695f084a5ff076f4510b23e353ecd upstream. In the oplock break handler, writing pending changes from pages puts the FileInfo handle. If the refcount reaches zero it closes the handle and waits for any oplock break handler to return, thus causing a deadlock. To prevent this situation: * We add a wait flag to cifsFileInfo_put() to decide whether we should wait for running/pending oplock break handlers * We keep an additionnal reference of the SMB FileInfo handle so that for the rest of the handler putting the handle won't close it. - The ref is bumped everytime we queue the handler via the cifs_queue_oplock_break() helper. - The ref is decremented at the end of the handler This bug was triggered by xfstest 464. Also important fix to address the various reports of oops in smb2_push_mandatory_locks Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsglob.h | 2 ++ fs/cifs/file.c | 30 +++++++++++++++++++++++++----- fs/cifs/misc.c | 25 +++++++++++++++++++++++-- fs/cifs/smb2misc.c | 6 +++--- 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6c934ab3722b..10ead04346ee 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1303,6 +1303,7 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file) } struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); +void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); #define CIFS_CACHE_READ_FLG 1 @@ -1824,6 +1825,7 @@ GLOBAL_EXTERN spinlock_t gidsidlock; #endif /* CONFIG_CIFS_ACL */ void cifs_oplock_break(struct work_struct *work); +void cifs_queue_oplock_break(struct cifsFileInfo *cfile); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8d107587208f..7c05353b766c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -360,12 +360,30 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file) return cifs_file; } -/* - * Release a reference on the file private data. This may involve closing - * the filehandle out on the server. Must be called without holding - * tcon->open_file_lock and cifs_file->file_info_lock. +/** + * cifsFileInfo_put - release a reference of file priv data + * + * Always potentially wait for oplock handler. See _cifsFileInfo_put(). */ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) +{ + _cifsFileInfo_put(cifs_file, true); +} + +/** + * _cifsFileInfo_put - release a reference of file priv data + * + * This may involve closing the filehandle @cifs_file out on the + * server. Must be called without holding tcon->open_file_lock and + * cifs_file->file_info_lock. + * + * If @wait_for_oplock_handler is true and we are releasing the last + * reference, wait for any running oplock break handler of the file + * and cancel any pending one. If calling this function from the + * oplock break handler, you need to pass false. + * + */ +void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) { struct inode *inode = d_inode(cifs_file->dentry); struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); @@ -414,7 +432,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) spin_unlock(&tcon->open_file_lock); - oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break); + oplock_break_cancelled = wait_oplock_handler ? + cancel_work_sync(&cifs_file->oplock_break) : false; if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; @@ -4480,6 +4499,7 @@ void cifs_oplock_break(struct work_struct *work) cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } + _cifsFileInfo_put(cfile, false /* do not wait for ourself */); cifs_done_oplock_break(cinode); } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index bee203055b30..1e1626a2cfc3 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -501,8 +501,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &pCifsInode->flags); - queue_work(cifsoplockd_wq, - &netfile->oplock_break); + cifs_queue_oplock_break(netfile); netfile->oplock_break_cancelled = false; spin_unlock(&tcon->open_file_lock); @@ -607,6 +606,28 @@ void cifs_put_writer(struct cifsInodeInfo *cinode) spin_unlock(&cinode->writers_lock); } +/** + * cifs_queue_oplock_break - queue the oplock break handler for cfile + * + * This function is called from the demultiplex thread when it + * receives an oplock break for @cfile. + * + * Assumes the tcon->open_file_lock is held. + * Assumes cfile->file_info_lock is NOT held. + */ +void cifs_queue_oplock_break(struct cifsFileInfo *cfile) +{ + /* + * Bump the handle refcount now while we hold the + * open_file_lock to enforce the validity of it for the oplock + * break handler. The matching put is done at the end of the + * handler. + */ + cifsFileInfo_get(cfile); + + queue_work(cifsoplockd_wq, &cfile->oplock_break); +} + void cifs_done_oplock_break(struct cifsInodeInfo *cinode) { clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 58700d2ba8cd..0a7ed2e3ad4f 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -555,7 +555,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags); - queue_work(cifsoplockd_wq, &cfile->oplock_break); + cifs_queue_oplock_break(cfile); kfree(lw); return true; } @@ -719,8 +719,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags); spin_unlock(&cfile->file_info_lock); - queue_work(cifsoplockd_wq, - &cfile->oplock_break); + + cifs_queue_oplock_break(cfile); spin_unlock(&tcon->open_file_lock); spin_unlock(&cifs_tcp_ses_lock); -- cgit v1.2.3 From 9582ba401ef08e92f328001a813835a464258e7d Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 6 Apr 2019 15:30:38 +0800 Subject: cifs: Fix lease buffer length error commit b57a55e2200ede754e4dc9cce4ba9402544b9365 upstream. There is a KASAN slab-out-of-bounds: BUG: KASAN: slab-out-of-bounds in _copy_from_iter_full+0x783/0xaa0 Read of size 80 at addr ffff88810c35e180 by task mount.cifs/539 CPU: 1 PID: 539 Comm: mount.cifs Not tainted 4.19 #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0xdd/0x12a print_address_description+0xa7/0x540 kasan_report+0x1ff/0x550 check_memory_region+0x2f1/0x310 memcpy+0x2f/0x80 _copy_from_iter_full+0x783/0xaa0 tcp_sendmsg_locked+0x1840/0x4140 tcp_sendmsg+0x37/0x60 inet_sendmsg+0x18c/0x490 sock_sendmsg+0xae/0x130 smb_send_kvec+0x29c/0x520 __smb_send_rqst+0x3ef/0xc60 smb_send_rqst+0x25a/0x2e0 compound_send_recv+0x9e8/0x2af0 cifs_send_recv+0x24/0x30 SMB2_open+0x35e/0x1620 open_shroot+0x27b/0x490 smb2_open_op_close+0x4e1/0x590 smb2_query_path_info+0x2ac/0x650 cifs_get_inode_info+0x1058/0x28f0 cifs_root_iget+0x3bb/0xf80 cifs_smb3_do_mount+0xe00/0x14c0 cifs_do_mount+0x15/0x20 mount_fs+0x5e/0x290 vfs_kern_mount+0x88/0x460 do_mount+0x398/0x31e0 ksys_mount+0xc6/0x150 __x64_sys_mount+0xea/0x190 do_syscall_64+0x122/0x590 entry_SYSCALL_64_after_hwframe+0x44/0xa9 It can be reproduced by the following step: 1. samba configured with: server max protocol = SMB2_10 2. mount -o vers=default When parse the mount version parameter, the 'ops' and 'vals' was setted to smb30, if negotiate result is smb21, just update the 'ops' to smb21, but the 'vals' is still smb30. When add lease context, the iov_base is allocated with smb21 ops, but the iov_len is initiallited with the smb30. Because the iov_len is longer than iov_base, when send the message, copy array out of bounds. we need to keep the 'ops' and 'vals' consistent. Fixes: 9764c02fcbad ("SMB3: Add support for multidialect negotiate (SMB2.1 and later)") Fixes: d5c7076b772a ("smb3: add smb3.1.1 to default dialect list") Signed-off-by: ZhangXiaoxu Signed-off-by: Steve French CC: Stable Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 068febe37fe4..a8c1303aa699 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -815,8 +815,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { /* ops set to 3.0 by default for default so update */ ses->server->ops = &smb21_operations; - } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) + ses->server->vals = &smb21_values; + } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { ses->server->ops = &smb311_operations; + ses->server->vals = &smb311_values; + } } else if (le16_to_cpu(rsp->DialectRevision) != ses->server->vals->protocol_id) { /* if requested single dialect ensure returned dialect matched */ -- cgit v1.2.3 From e8ac406c749ea5d2a583854667a20d8e7fe38070 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 6 Apr 2019 15:47:38 +0800 Subject: cifs: Fix use-after-free in SMB2_write commit 6a3eb3360667170988f8a6477f6686242061488a upstream. There is a KASAN use-after-free: BUG: KASAN: use-after-free in SMB2_write+0x1342/0x1580 Read of size 8 at addr ffff8880b6a8e450 by task ln/4196 Should not release the 'req' because it will use in the trace. Fixes: eccb4422cf97 ("smb3: Add ftrace tracepoints for improved SMB3 debugging") Signed-off-by: ZhangXiaoxu Signed-off-by: Steve French CC: Stable 4.18+ Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a8c1303aa699..fb068d139250 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3708,7 +3708,6 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); rsp = (struct smb2_write_rsp *)rsp_iov.iov_base; if (rc) { @@ -3726,6 +3725,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, io_parms->offset, *nbytes); } + cifs_small_buf_release(req); free_rsp_buf(resp_buftype, rsp); return rc; } -- cgit v1.2.3 From 76dbd554c2730ce0b324e54bac9d8a9056b1ffdc Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 6 Apr 2019 15:47:39 +0800 Subject: cifs: Fix use-after-free in SMB2_read commit 088aaf17aa79300cab14dbee2569c58cfafd7d6e upstream. There is a KASAN use-after-free: BUG: KASAN: use-after-free in SMB2_read+0x1136/0x1190 Read of size 8 at addr ffff8880b4e45e50 by task ln/1009 Should not release the 'req' because it will use in the trace. Fixes: eccb4422cf97 ("smb3: Add ftrace tracepoints for improved SMB3 debugging") Signed-off-by: ZhangXiaoxu Signed-off-by: Steve French CC: Stable 4.18+ Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index fb068d139250..938e75cc3b66 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3390,8 +3390,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, rqst.rq_nvec = 1; rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); - rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; if (rc) { @@ -3410,6 +3408,8 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, io_parms->tcon->tid, ses->Suid, io_parms->offset, io_parms->length); + cifs_small_buf_release(req); + *nbytes = le32_to_cpu(rsp->DataLength); if ((*nbytes > CIFS_MAX_MSGSIZE) || (*nbytes > io_parms->length)) { -- cgit v1.2.3 From f6846161e203637e39d84814f5b2006eeeddba00 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 10 Apr 2019 07:47:22 +1000 Subject: cifs: fix handle leak in smb2_query_symlink() commit e6d0fb7b34f264f72c33053558a360a6a734905e upstream. If we enter smb2_query_symlink() for something that is not a symlink and where the SMB2_open() would succeed we would never end up closing this handle and would thus leak a handle on the server. Fix this by immediately calling SMB2_close() on successfull open. Signed-off-by: Ronnie Sahlberg CC: Stable Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ea56b1cdbdde..d5434ac0571b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2210,6 +2210,8 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov, &resp_buftype); + if (!rc) + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); if (!rc || !err_iov.iov_base) { rc = -ENOENT; goto free_path; -- cgit v1.2.3 From d12bcf87e6ff1a5f20aa4606c5062bb1859e3b7c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 9 Mar 2019 17:37:21 +0530 Subject: fs/dax: Deposit pagetable even when installing zero page commit 11cf9d863dcb583345723b0ed72173348761e9c0 upstream. Architectures like ppc64 use the deposited page table to store hardware page table slot information. Make sure we deposit a page table when using zero page at the pmd level for hash. Without this we hit Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc000000000082a74 Oops: Kernel access of bad area, sig: 11 [#1] .... NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0 LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0 Call Trace: hash_page_mm+0x43c/0x740 do_hash_page+0x2c/0x3c copy_from_iter_flushcache+0xa4/0x4a0 pmem_copy_from_iter+0x2c/0x50 [nd_pmem] dax_copy_from_iter+0x40/0x70 dax_iomap_actor+0x134/0x360 iomap_apply+0xfc/0x1b0 dax_iomap_rw+0xac/0x130 ext4_file_write_iter+0x254/0x460 [ext4] __vfs_write+0x120/0x1e0 vfs_write+0xd8/0x220 SyS_write+0x6c/0x110 system_call+0x3c/0x130 Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions") Cc: Reviewed-by: Jan Kara Signed-off-by: Aneesh Kumar K.V Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- fs/dax.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 05cca2214ae3..827ee143413e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "internal.h" #define CREATE_TRACE_POINTS @@ -1409,7 +1410,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, { struct address_space *mapping = vmf->vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; + struct vm_area_struct *vma = vmf->vma; struct inode *inode = mapping->host; + pgtable_t pgtable = NULL; struct page *zero_page; spinlock_t *ptl; pmd_t pmd_entry; @@ -1424,12 +1427,22 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, DAX_PMD | DAX_ZERO_PAGE, false); + if (arch_needs_pgtable_deposit()) { + pgtable = pte_alloc_one(vma->vm_mm); + if (!pgtable) + return VM_FAULT_OOM; + } + ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (!pmd_none(*(vmf->pmd))) { spin_unlock(ptl); goto fallback; } + if (pgtable) { + pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); + mm_inc_nr_ptes(vma->vm_mm); + } pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot); pmd_entry = pmd_mkhuge(pmd_entry); set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); @@ -1438,6 +1451,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, return VM_FAULT_NOPAGE; fallback: + if (pgtable) + pte_free(vma->vm_mm, pgtable); trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry); return VM_FAULT_FALLBACK; } -- cgit v1.2.3 From 935fef82120fe9531263c609685e85e5b58448d6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:10:47 -0700 Subject: KVM: x86: Don't clear EFER during SMM transitions for 32-bit vCPU commit 8f4dc2e77cdfaf7e644ef29693fa229db29ee1de upstream. Neither AMD nor Intel CPUs have an EFER field in the legacy SMRAM save state area, i.e. don't save/restore EFER across SMM transitions. KVM somewhat models this, e.g. doesn't clear EFER on entry to SMM if the guest doesn't support long mode. But during RSM, KVM unconditionally clears EFER so that it can get back to pure 32-bit mode in order to start loading CRs with their actual non-SMM values. Clear EFER only when it will be written when loading the non-SMM state so as to preserve bits that can theoretically be set on 32-bit vCPUs, e.g. KVM always emulates EFER_SCE. And because CR4.PAE is cleared only to play nice with EFER, wrap that code in the long mode check as well. Note, this may result in a compiler warning about cr4 being consumed uninitialized. Re-read CR4 even though it's technically unnecessary, as doing so allows for more readable code and RSM emulation is not a performance critical path. Fixes: 660a5d517aaab ("KVM: x86: save/load state on SMM switch") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c338984c850d..81be2165821f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2575,15 +2575,13 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU * supports long mode. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); if (emulator_has_longmode(ctxt)) { struct desc_struct cs_desc; /* Zero CR4.PCIDE before CR0.PG. */ - if (cr4 & X86_CR4_PCIDE) { + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PCIDE) ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - cr4 &= ~X86_CR4_PCIDE; - } /* A 32-bit code segment is required to clear EFER.LMA. */ memset(&cs_desc, 0, sizeof(cs_desc)); @@ -2597,13 +2595,16 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) if (cr0 & X86_CR0_PE) ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */ - if (cr4 & X86_CR4_PAE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); + if (emulator_has_longmode(ctxt)) { + /* Clear CR4.PAE before clearing EFER.LME. */ + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PAE) + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); - /* And finally go back to 32-bit mode. */ - efer = 0; - ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + /* And finally go back to 32-bit mode. */ + efer = 0; + ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + } smbase = ctxt->ops->get_smbase(ctxt); -- cgit v1.2.3 From 739969f56e4ff486f585fe2edeb12583e7e2a46c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 3 Apr 2019 16:06:42 +0200 Subject: KVM: x86: svm: make sure NMI is injected after nmi_singlestep commit 99c221796a810055974b54c02e8f53297e48d146 upstream. I noticed that apic test from kvm-unit-tests always hangs on my EPYC 7401P, the hanging test nmi-after-sti is trying to deliver 30000 NMIs and tracing shows that we're sometimes able to deliver a few but never all. When we're trying to inject an NMI we may fail to do so immediately for various reasons, however, we still need to inject it so enable_nmi_window() arms nmi_singlestep mode. #DB occurs as expected, but we're not checking for pending NMIs before entering the guest and unless there's a different event to process, the NMI will never get delivered. Make KVM_REQ_EVENT request on the vCPU from db_interception() to make sure pending NMIs are checked and possibly injected. Signed-off-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a9b8e38d78ad..538fcadda350 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2687,6 +2687,7 @@ static int npf_interception(struct vcpu_svm *svm) static int db_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm->vcpu.run; + struct kvm_vcpu *vcpu = &svm->vcpu; if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && @@ -2697,6 +2698,8 @@ static int db_interception(struct vcpu_svm *svm) if (svm->nmi_singlestep) { disable_nmi_singlestep(svm); + /* Make sure we check for pending NMIs upon entry */ + kvm_make_request(KVM_REQ_EVENT, vcpu); } if (svm->vcpu.guest_debug & -- cgit v1.2.3 From 80c1486b7e209f7ea571e3c31f3d1312c02205e7 Mon Sep 17 00:00:00 2001 From: Leonard Pollak Date: Wed, 13 Feb 2019 11:19:52 +0100 Subject: Staging: iio: meter: fixed typo commit 0a8a29be499cbb67df79370aaf5109085509feb8 upstream. This patch fixes an obvious typo, which will cause erroneously returning the Peak Voltage instead of the Peak Current. Signed-off-by: Leonard Pollak Cc: Acked-by: Michael Hennerich Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/meter/ade7854.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/meter/ade7854.c b/drivers/staging/iio/meter/ade7854.c index 029c3bf42d4d..07774c000c5a 100644 --- a/drivers/staging/iio/meter/ade7854.c +++ b/drivers/staging/iio/meter/ade7854.c @@ -269,7 +269,7 @@ static IIO_DEV_ATTR_VPEAK(0644, static IIO_DEV_ATTR_IPEAK(0644, ade7854_read_32bit, ade7854_write_32bit, - ADE7854_VPEAK); + ADE7854_IPEAK); static IIO_DEV_ATTR_APHCAL(0644, ade7854_read_16bit, ade7854_write_16bit, -- cgit v1.2.3 From 25a91f7b9810c62538fe131521f1165d5a392bb7 Mon Sep 17 00:00:00 2001 From: Mircea Caprioru Date: Wed, 20 Feb 2019 13:08:20 +0200 Subject: staging: iio: ad7192: Fix ad7193 channel address commit 7ce0f216221856a17fc4934b39284678a5fef2e9 upstream. This patch fixes the differential channels addresses for the ad7193. Signed-off-by: Mircea Caprioru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/adc/ad7192.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c index acdbc07fd259..2fc8bc22b57b 100644 --- a/drivers/staging/iio/adc/ad7192.c +++ b/drivers/staging/iio/adc/ad7192.c @@ -109,10 +109,10 @@ #define AD7192_CH_AIN3 BIT(6) /* AIN3 - AINCOM */ #define AD7192_CH_AIN4 BIT(7) /* AIN4 - AINCOM */ -#define AD7193_CH_AIN1P_AIN2M 0x000 /* AIN1(+) - AIN2(-) */ -#define AD7193_CH_AIN3P_AIN4M 0x001 /* AIN3(+) - AIN4(-) */ -#define AD7193_CH_AIN5P_AIN6M 0x002 /* AIN5(+) - AIN6(-) */ -#define AD7193_CH_AIN7P_AIN8M 0x004 /* AIN7(+) - AIN8(-) */ +#define AD7193_CH_AIN1P_AIN2M 0x001 /* AIN1(+) - AIN2(-) */ +#define AD7193_CH_AIN3P_AIN4M 0x002 /* AIN3(+) - AIN4(-) */ +#define AD7193_CH_AIN5P_AIN6M 0x004 /* AIN5(+) - AIN6(-) */ +#define AD7193_CH_AIN7P_AIN8M 0x008 /* AIN7(+) - AIN8(-) */ #define AD7193_CH_TEMP 0x100 /* Temp senseor */ #define AD7193_CH_AIN2P_AIN2M 0x200 /* AIN2(+) - AIN2(-) */ #define AD7193_CH_AIN1 0x401 /* AIN1 - AINCOM */ -- cgit v1.2.3 From 9efe152f769c6195282a4c787f2b03670b77d921 Mon Sep 17 00:00:00 2001 From: Sergey Larin Date: Sat, 2 Mar 2019 19:54:55 +0300 Subject: iio: gyro: mpu3050: fix chip ID reading commit 409a51e0a4a5f908763191fae2c29008632eb712 upstream. According to the datasheet, the last bit of CHIP_ID register controls I2C bus, and the first one is unused. Handle this correctly. Note that there are chips out there that have a value such that the id check currently fails. Signed-off-by: Sergey Larin Reviewed-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/mpu3050-core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index 77fac81a3adc..5ddebede31a6 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -29,7 +29,8 @@ #include "mpu3050.h" -#define MPU3050_CHIP_ID 0x69 +#define MPU3050_CHIP_ID 0x68 +#define MPU3050_CHIP_ID_MASK 0x7E /* * Register map: anything suffixed *_H is a big-endian high byte and always @@ -1176,8 +1177,9 @@ int mpu3050_common_probe(struct device *dev, goto err_power_down; } - if (val != MPU3050_CHIP_ID) { - dev_err(dev, "unsupported chip id %02x\n", (u8)val); + if ((val & MPU3050_CHIP_ID_MASK) != MPU3050_CHIP_ID) { + dev_err(dev, "unsupported chip id %02x\n", + (u8)(val & MPU3050_CHIP_ID_MASK)); ret = -ENODEV; goto err_power_down; } -- cgit v1.2.3 From 5527e1e580071f4b58a03212738a51d207c57eed Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Wed, 13 Feb 2019 08:41:47 +0100 Subject: iio/gyro/bmg160: Use millidegrees for temperature scale commit 40a7198a4a01037003c7ca714f0d048a61e729ac upstream. Standard unit for temperature is millidegrees Celcius, whereas this driver was reporting in degrees. Fix the scale factor in the driver. Signed-off-by: Mike Looijmans Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/bmg160_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index 63ca31628a93..92c07ab826eb 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -582,11 +582,10 @@ static int bmg160_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: return bmg160_get_filter(data, val); case IIO_CHAN_INFO_SCALE: - *val = 0; switch (chan->type) { case IIO_TEMP: - *val2 = 500000; - return IIO_VAL_INT_PLUS_MICRO; + *val = 500; + return IIO_VAL_INT; case IIO_ANGL_VEL: { int i; @@ -594,6 +593,7 @@ static int bmg160_read_raw(struct iio_dev *indio_dev, for (i = 0; i < ARRAY_SIZE(bmg160_scale_table); ++i) { if (bmg160_scale_table[i].dps_range == data->dps_range) { + *val = 0; *val2 = bmg160_scale_table[i].scale; return IIO_VAL_INT_PLUS_MICRO; } -- cgit v1.2.3 From fd3c6ad2a934831ccf61f1bd1b79f7044516b3e9 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Wed, 6 Mar 2019 08:31:47 +0100 Subject: iio:chemical:bme680: Fix, report temperature in millidegrees commit 9436f45dd53595e21566a8c6627411077dfdb776 upstream. The standard unit for temperature is millidegrees Celcius. Adapt the driver to report in millidegrees instead of degrees. Signed-off-by: Mike Looijmans Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor"); Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/chemical/bme680_core.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index 70c1fe4366f4..fefe32b5b69d 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -583,8 +583,7 @@ static int bme680_gas_config(struct bme680_data *data) return ret; } -static int bme680_read_temp(struct bme680_data *data, - int *val, int *val2) +static int bme680_read_temp(struct bme680_data *data, int *val) { struct device *dev = regmap_get_device(data->regmap); int ret; @@ -617,10 +616,9 @@ static int bme680_read_temp(struct bme680_data *data, * compensate_press/compensate_humid to get compensated * pressure/humidity readings. */ - if (val && val2) { - *val = comp_temp; - *val2 = 100; - return IIO_VAL_FRACTIONAL; + if (val) { + *val = comp_temp * 10; /* Centidegrees to millidegrees */ + return IIO_VAL_INT; } return ret; @@ -635,7 +633,7 @@ static int bme680_read_press(struct bme680_data *data, s32 adc_press; /* Read and compensate temperature to get a reading of t_fine */ - ret = bme680_read_temp(data, NULL, NULL); + ret = bme680_read_temp(data, NULL); if (ret < 0) return ret; @@ -668,7 +666,7 @@ static int bme680_read_humid(struct bme680_data *data, u32 comp_humidity; /* Read and compensate temperature to get a reading of t_fine */ - ret = bme680_read_temp(data, NULL, NULL); + ret = bme680_read_temp(data, NULL); if (ret < 0) return ret; @@ -761,7 +759,7 @@ static int bme680_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_PROCESSED: switch (chan->type) { case IIO_TEMP: - return bme680_read_temp(data, val, val2); + return bme680_read_temp(data, val); case IIO_PRESSURE: return bme680_read_press(data, val, val2); case IIO_HUMIDITYRELATIVE: -- cgit v1.2.3 From b4dd709ada6d8727f385cba8e98220ae4fc2b7d7 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Wed, 6 Mar 2019 08:31:48 +0100 Subject: iio:chemical:bme680: Fix SPI read interface commit 73f3bc6da506711302bb67572440eb84b1ec4a2c upstream. The SPI interface implementation was completely broken. When using the SPI interface, there are only 7 address bits, the upper bit is controlled by a page select register. The core needs access to both ranges, so implement register read/write for both regions. The regmap paging functionality didn't agree with a register that needs to be read and modified, so I implemented a custom paging algorithm. This fixes that the device wouldn't even probe in SPI mode. The SPI interface then isn't different from I2C, merged them into the core, and the I2C/SPI named registers are no longer needed. Implemented register value caching for the registers to reduce the I2C/SPI data transfers considerably. The calibration set reads as all zeroes until some undefined point in time, and I couldn't determine what makes it valid. The datasheet mentions these registers but does not provide any hints on when they become valid, and they aren't even enumerated in the memory map. So check the calibration and retry reading it from the device after each measurement until it provides something valid. Despite the size this is suitable for a stable backport given that it seems the SPI support never worked. Signed-off-by: Mike Looijmans Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor"); Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/chemical/bme680.h | 6 +- drivers/iio/chemical/bme680_core.c | 38 ++++++++++++ drivers/iio/chemical/bme680_i2c.c | 21 ------- drivers/iio/chemical/bme680_spi.c | 115 +++++++++++++++++++++++++------------ 4 files changed, 118 insertions(+), 62 deletions(-) diff --git a/drivers/iio/chemical/bme680.h b/drivers/iio/chemical/bme680.h index 0ae89b87e2d6..4edc5d21cb9f 100644 --- a/drivers/iio/chemical/bme680.h +++ b/drivers/iio/chemical/bme680.h @@ -2,11 +2,9 @@ #ifndef BME680_H_ #define BME680_H_ -#define BME680_REG_CHIP_I2C_ID 0xD0 -#define BME680_REG_CHIP_SPI_ID 0x50 +#define BME680_REG_CHIP_ID 0xD0 #define BME680_CHIP_ID_VAL 0x61 -#define BME680_REG_SOFT_RESET_I2C 0xE0 -#define BME680_REG_SOFT_RESET_SPI 0x60 +#define BME680_REG_SOFT_RESET 0xE0 #define BME680_CMD_SOFTRESET 0xB6 #define BME680_REG_STATUS 0x73 #define BME680_SPI_MEM_PAGE_BIT BIT(4) diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c index fefe32b5b69d..ccde4c65ff93 100644 --- a/drivers/iio/chemical/bme680_core.c +++ b/drivers/iio/chemical/bme680_core.c @@ -63,9 +63,23 @@ struct bme680_data { s32 t_fine; }; +static const struct regmap_range bme680_volatile_ranges[] = { + regmap_reg_range(BME680_REG_MEAS_STAT_0, BME680_REG_GAS_R_LSB), + regmap_reg_range(BME680_REG_STATUS, BME680_REG_STATUS), + regmap_reg_range(BME680_T2_LSB_REG, BME680_GH3_REG), +}; + +static const struct regmap_access_table bme680_volatile_table = { + .yes_ranges = bme680_volatile_ranges, + .n_yes_ranges = ARRAY_SIZE(bme680_volatile_ranges), +}; + const struct regmap_config bme680_regmap_config = { .reg_bits = 8, .val_bits = 8, + .max_register = 0xef, + .volatile_table = &bme680_volatile_table, + .cache_type = REGCACHE_RBTREE, }; EXPORT_SYMBOL(bme680_regmap_config); @@ -316,6 +330,10 @@ static s16 bme680_compensate_temp(struct bme680_data *data, s64 var1, var2, var3; s16 calc_temp; + /* If the calibration is invalid, attempt to reload it */ + if (!calib->par_t2) + bme680_read_calib(data, calib); + var1 = (adc_temp >> 3) - (calib->par_t1 << 1); var2 = (var1 * calib->par_t2) >> 11; var3 = ((var1 >> 1) * (var1 >> 1)) >> 12; @@ -865,8 +883,28 @@ int bme680_core_probe(struct device *dev, struct regmap *regmap, { struct iio_dev *indio_dev; struct bme680_data *data; + unsigned int val; int ret; + ret = regmap_write(regmap, BME680_REG_SOFT_RESET, + BME680_CMD_SOFTRESET); + if (ret < 0) { + dev_err(dev, "Failed to reset chip\n"); + return ret; + } + + ret = regmap_read(regmap, BME680_REG_CHIP_ID, &val); + if (ret < 0) { + dev_err(dev, "Error reading chip ID\n"); + return ret; + } + + if (val != BME680_CHIP_ID_VAL) { + dev_err(dev, "Wrong chip ID, got %x expected %x\n", + val, BME680_CHIP_ID_VAL); + return -ENODEV; + } + indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) return -ENOMEM; diff --git a/drivers/iio/chemical/bme680_i2c.c b/drivers/iio/chemical/bme680_i2c.c index 06d4be539d2e..cfc4449edf1b 100644 --- a/drivers/iio/chemical/bme680_i2c.c +++ b/drivers/iio/chemical/bme680_i2c.c @@ -23,8 +23,6 @@ static int bme680_i2c_probe(struct i2c_client *client, { struct regmap *regmap; const char *name = NULL; - unsigned int val; - int ret; regmap = devm_regmap_init_i2c(client, &bme680_regmap_config); if (IS_ERR(regmap)) { @@ -33,25 +31,6 @@ static int bme680_i2c_probe(struct i2c_client *client, return PTR_ERR(regmap); } - ret = regmap_write(regmap, BME680_REG_SOFT_RESET_I2C, - BME680_CMD_SOFTRESET); - if (ret < 0) { - dev_err(&client->dev, "Failed to reset chip\n"); - return ret; - } - - ret = regmap_read(regmap, BME680_REG_CHIP_I2C_ID, &val); - if (ret < 0) { - dev_err(&client->dev, "Error reading I2C chip ID\n"); - return ret; - } - - if (val != BME680_CHIP_ID_VAL) { - dev_err(&client->dev, "Wrong chip ID, got %x expected %x\n", - val, BME680_CHIP_ID_VAL); - return -ENODEV; - } - if (id) name = id->name; diff --git a/drivers/iio/chemical/bme680_spi.c b/drivers/iio/chemical/bme680_spi.c index c9fb05e8d0b9..881778e55d38 100644 --- a/drivers/iio/chemical/bme680_spi.c +++ b/drivers/iio/chemical/bme680_spi.c @@ -11,28 +11,93 @@ #include "bme680.h" +struct bme680_spi_bus_context { + struct spi_device *spi; + u8 current_page; +}; + +/* + * In SPI mode there are only 7 address bits, a "page" register determines + * which part of the 8-bit range is active. This function looks at the address + * and writes the page selection bit if needed + */ +static int bme680_regmap_spi_select_page( + struct bme680_spi_bus_context *ctx, u8 reg) +{ + struct spi_device *spi = ctx->spi; + int ret; + u8 buf[2]; + u8 page = (reg & 0x80) ? 0 : 1; /* Page "1" is low range */ + + if (page == ctx->current_page) + return 0; + + /* + * Data sheet claims we're only allowed to change bit 4, so we must do + * a read-modify-write on each and every page select + */ + buf[0] = BME680_REG_STATUS; + ret = spi_write_then_read(spi, buf, 1, buf + 1, 1); + if (ret < 0) { + dev_err(&spi->dev, "failed to set page %u\n", page); + return ret; + } + + buf[0] = BME680_REG_STATUS; + if (page) + buf[1] |= BME680_SPI_MEM_PAGE_BIT; + else + buf[1] &= ~BME680_SPI_MEM_PAGE_BIT; + + ret = spi_write(spi, buf, 2); + if (ret < 0) { + dev_err(&spi->dev, "failed to set page %u\n", page); + return ret; + } + + ctx->current_page = page; + + return 0; +} + static int bme680_regmap_spi_write(void *context, const void *data, size_t count) { - struct spi_device *spi = context; + struct bme680_spi_bus_context *ctx = context; + struct spi_device *spi = ctx->spi; + int ret; u8 buf[2]; memcpy(buf, data, 2); + + ret = bme680_regmap_spi_select_page(ctx, buf[0]); + if (ret) + return ret; + /* * The SPI register address (= full register address without bit 7) * and the write command (bit7 = RW = '0') */ buf[0] &= ~0x80; - return spi_write_then_read(spi, buf, 2, NULL, 0); + return spi_write(spi, buf, 2); } static int bme680_regmap_spi_read(void *context, const void *reg, size_t reg_size, void *val, size_t val_size) { - struct spi_device *spi = context; + struct bme680_spi_bus_context *ctx = context; + struct spi_device *spi = ctx->spi; + int ret; + u8 addr = *(const u8 *)reg; + + ret = bme680_regmap_spi_select_page(ctx, addr); + if (ret) + return ret; - return spi_write_then_read(spi, reg, reg_size, val, val_size); + addr |= 0x80; /* bit7 = RW = '1' */ + + return spi_write_then_read(spi, &addr, 1, val, val_size); } static struct regmap_bus bme680_regmap_bus = { @@ -45,8 +110,8 @@ static struct regmap_bus bme680_regmap_bus = { static int bme680_spi_probe(struct spi_device *spi) { const struct spi_device_id *id = spi_get_device_id(spi); + struct bme680_spi_bus_context *bus_context; struct regmap *regmap; - unsigned int val; int ret; spi->bits_per_word = 8; @@ -56,45 +121,21 @@ static int bme680_spi_probe(struct spi_device *spi) return ret; } + bus_context = devm_kzalloc(&spi->dev, sizeof(*bus_context), GFP_KERNEL); + if (!bus_context) + return -ENOMEM; + + bus_context->spi = spi; + bus_context->current_page = 0xff; /* Undefined on warm boot */ + regmap = devm_regmap_init(&spi->dev, &bme680_regmap_bus, - &spi->dev, &bme680_regmap_config); + bus_context, &bme680_regmap_config); if (IS_ERR(regmap)) { dev_err(&spi->dev, "Failed to register spi regmap %d\n", (int)PTR_ERR(regmap)); return PTR_ERR(regmap); } - ret = regmap_write(regmap, BME680_REG_SOFT_RESET_SPI, - BME680_CMD_SOFTRESET); - if (ret < 0) { - dev_err(&spi->dev, "Failed to reset chip\n"); - return ret; - } - - /* after power-on reset, Page 0(0x80-0xFF) of spi_mem_page is active */ - ret = regmap_read(regmap, BME680_REG_CHIP_SPI_ID, &val); - if (ret < 0) { - dev_err(&spi->dev, "Error reading SPI chip ID\n"); - return ret; - } - - if (val != BME680_CHIP_ID_VAL) { - dev_err(&spi->dev, "Wrong chip ID, got %x expected %x\n", - val, BME680_CHIP_ID_VAL); - return -ENODEV; - } - /* - * select Page 1 of spi_mem_page to enable access to - * to registers from address 0x00 to 0x7F. - */ - ret = regmap_write_bits(regmap, BME680_REG_STATUS, - BME680_SPI_MEM_PAGE_BIT, - BME680_SPI_MEM_PAGE_1_VAL); - if (ret < 0) { - dev_err(&spi->dev, "failed to set page 1 of spi_mem_page\n"); - return ret; - } - return bme680_core_probe(&spi->dev, regmap, id->name); } -- cgit v1.2.3 From f65207cfee337192eaee3c7c7647cd8180e28ab9 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Wed, 13 Mar 2019 12:40:02 +0100 Subject: iio: cros_ec: Fix the maths for gyro scale calculation commit 3d02d7082e5823598090530c3988a35f69689943 upstream. Calculation did not use IIO_DEGREE_TO_RAD and implemented a variant to avoid precision loss as we aim a nano value. The offset added to avoid rounding error, though, doesn't give us a close result to the expected value. E.g. For 1000dps, the result should be: (1000 * pi ) / 180 >> 15 ~= 0.000532632218 But with current calculation we get $ cat scale 0.000547890 Fix the calculation by just doing the maths involved for a nano value val * pi * 10e12 / (180 * 2^15) so we get a closer result. $ cat scale 0.000532632 Fixes: c14dca07a31d ("iio: cros_ec_sensors: add ChromeOS EC Contiguous Sensors driver") Signed-off-by: Gwendal Grignou Signed-off-by: Enric Balletbo i Serra Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c index 89cb0066a6e0..8d76afb87d87 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c @@ -103,9 +103,10 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev, * Do not use IIO_DEGREE_TO_RAD to avoid precision * loss. Round to the nearest integer. */ - *val = div_s64(val64 * 314159 + 9000000ULL, 1000); - *val2 = 18000 << (CROS_EC_SENSOR_BITS - 1); - ret = IIO_VAL_FRACTIONAL; + *val = 0; + *val2 = div_s64(val64 * 3141592653ULL, + 180 << (CROS_EC_SENSOR_BITS - 1)); + ret = IIO_VAL_INT_PLUS_NANO; break; case MOTIONSENSE_TYPE_MAG: /* -- cgit v1.2.3 From 50bc2c022b6e218f5ce7edbc76ff1584c956af8c Mon Sep 17 00:00:00 2001 From: Dragos Bogdan Date: Tue, 19 Mar 2019 12:47:00 +0200 Subject: iio: ad_sigma_delta: select channel when reading register commit fccfb9ce70ed4ea7a145f77b86de62e38178517f upstream. The desired channel has to be selected in order to correctly fill the buffer with the corresponding data. The `ad_sd_write_reg()` already does this, but for the `ad_sd_read_reg_raw()` this was omitted. Fixes: af3008485ea03 ("iio:adc: Add common code for ADI Sigma Delta devices") Signed-off-by: Dragos Bogdan Signed-off-by: Alexandru Ardelean Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad_sigma_delta.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index ff5f2da2e1b1..54d9978b2740 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -121,6 +121,7 @@ static int ad_sd_read_reg_raw(struct ad_sigma_delta *sigma_delta, if (sigma_delta->info->has_registers) { data[0] = reg << sigma_delta->info->addr_shift; data[0] |= sigma_delta->info->read_mask; + data[0] |= sigma_delta->comm; spi_message_add_tail(&t[0], &m); } spi_message_add_tail(&t[1], &m); -- cgit v1.2.3 From 5c526f27861fbf002f8ce0f17fbbc9b6e23804a3 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dagenais Date: Wed, 6 Mar 2019 15:56:06 -0500 Subject: iio: dac: mcp4725: add missing powerdown bits in store eeprom commit 06003531502d06bc89d32528f6ec96bf978790f9 upstream. When issuing the write DAC register and write eeprom command, the two powerdown bits (PD0 and PD1) are assumed by the chip to be present in the bytes sent. Leaving them at 0 implies "powerdown disabled" which is a different state that the current one. By adding the current state of the powerdown in the i2c write, the chip will correctly power-on exactly like as it is at the moment of store_eeprom call. This is documented in MCP4725's datasheet, FIGURE 6-2: "Write Commands for DAC Input Register and EEPROM" and MCP4726's datasheet, FIGURE 6-3: "Write All Memory Command". Signed-off-by: Jean-Francois Dagenais Acked-by: Peter Meerwald-Stadler Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/dac/mcp4725.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c index 6d71fd905e29..c701a45469f6 100644 --- a/drivers/iio/dac/mcp4725.c +++ b/drivers/iio/dac/mcp4725.c @@ -92,6 +92,7 @@ static ssize_t mcp4725_store_eeprom(struct device *dev, inoutbuf[0] = 0x60; /* write EEPROM */ inoutbuf[0] |= data->ref_mode << 3; + inoutbuf[0] |= data->powerdown ? ((data->powerdown_mode + 1) << 1) : 0; inoutbuf[1] = data->dac_value >> 4; inoutbuf[2] = (data->dac_value & 0xf) << 4; -- cgit v1.2.3 From 2b70088e150909401d4f97cd6b8d0ab0af7db39a Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 20 Feb 2019 17:11:32 +0200 Subject: iio: Fix scan mask selection commit 20ea39ef9f2f911bd01c69519e7d69cfec79fde3 upstream. The trialmask is expected to have all bits set to 0 after allocation. Currently kmalloc_array() is used which does not zero the memory and so random bits are set. This results in random channels being enabled when they shouldn't. Replace kmalloc_array() with kcalloc() which has the same interface but zeros the memory. Note the fix is actually required earlier than the below fixes tag, but will require a manual backport due to move from kmalloc to kmalloc_array. Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Fixes commit 057ac1acdfc4 ("iio: Use kmalloc_array() in iio_scan_mask_set()"). Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/industrialio-buffer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index cd5bfe39591b..dadd921a4a30 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -320,9 +320,8 @@ static int iio_scan_mask_set(struct iio_dev *indio_dev, const unsigned long *mask; unsigned long *trialmask; - trialmask = kmalloc_array(BITS_TO_LONGS(indio_dev->masklength), - sizeof(*trialmask), - GFP_KERNEL); + trialmask = kcalloc(BITS_TO_LONGS(indio_dev->masklength), + sizeof(*trialmask), GFP_KERNEL); if (trialmask == NULL) return -ENOMEM; if (!indio_dev->masklength) { -- cgit v1.2.3 From 1f6b63285e2fc71d285a7d24550759f2ba27a778 Mon Sep 17 00:00:00 2001 From: Georg Ottinger Date: Wed, 30 Jan 2019 14:42:02 +0100 Subject: iio: adc: at91: disable adc channel interrupt in timeout case commit 09c6bdee51183a575bf7546890c8c137a75a2b44 upstream. Having a brief look at at91_adc_read_raw() it is obvious that in the case of a timeout the setting of AT91_ADC_CHDR and AT91_ADC_IDR registers is omitted. If 2 different channels are queried we can end up with a situation where two interrupts are enabled, but only one interrupt is cleared in the interrupt handler. Resulting in a interrupt loop and a system hang. Signed-off-by: Georg Ottinger Acked-by: Ludovic Desroches Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 75d2f73582a3..596841a3c4db 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -704,23 +704,29 @@ static int at91_adc_read_raw(struct iio_dev *idev, ret = wait_event_interruptible_timeout(st->wq_data_avail, st->done, msecs_to_jiffies(1000)); - if (ret == 0) - ret = -ETIMEDOUT; - if (ret < 0) { - mutex_unlock(&st->lock); - return ret; - } - - *val = st->last_value; + /* Disable interrupts, regardless if adc conversion was + * successful or not + */ at91_adc_writel(st, AT91_ADC_CHDR, AT91_ADC_CH(chan->channel)); at91_adc_writel(st, AT91_ADC_IDR, BIT(chan->channel)); - st->last_value = 0; - st->done = false; + if (ret > 0) { + /* a valid conversion took place */ + *val = st->last_value; + st->last_value = 0; + st->done = false; + ret = IIO_VAL_INT; + } else if (ret == 0) { + /* conversion timeout */ + dev_err(&idev->dev, "ADC Channel %d timeout.\n", + chan->channel); + ret = -ETIMEDOUT; + } + mutex_unlock(&st->lock); - return IIO_VAL_INT; + return ret; case IIO_CHAN_INFO_SCALE: *val = st->vref_mv; -- cgit v1.2.3 From 3e13bb9782f537ddbb1e6f4dad8b76960c87ac55 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 25 Mar 2019 14:01:23 +0100 Subject: iio: core: fix a possible circular locking dependency commit 7f75591fc5a123929a29636834d1bcb8b5c9fee3 upstream. This fixes a possible circular locking dependency detected warning seen with: - CONFIG_PROVE_LOCKING=y - consumer/provider IIO devices (ex: "voltage-divider" consumer of "adc") When using the IIO consumer interface, e.g. iio_channel_get(), the consumer device will likely call iio_read_channel_raw() or similar that rely on 'info_exist_lock' mutex. typically: ... mutex_lock(&chan->indio_dev->info_exist_lock); if (chan->indio_dev->info == NULL) { ret = -ENODEV; goto err_unlock; } ret = do_some_ops() err_unlock: mutex_unlock(&chan->indio_dev->info_exist_lock); return ret; ... Same mutex is also hold in iio_device_unregister(). The following deadlock warning happens when: - the consumer device has called an API like iio_read_channel_raw() at least once. - the consumer driver is unregistered, removed (unbind from sysfs) ====================================================== WARNING: possible circular locking dependency detected 4.19.24 #577 Not tainted ------------------------------------------------------ sh/372 is trying to acquire lock: (kn->count#30){++++}, at: kernfs_remove_by_name_ns+0x3c/0x84 but task is already holding lock: (&dev->info_exist_lock){+.+.}, at: iio_device_unregister+0x18/0x60 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&dev->info_exist_lock){+.+.}: __mutex_lock+0x70/0xa3c mutex_lock_nested+0x1c/0x24 iio_read_channel_raw+0x1c/0x60 iio_read_channel_info+0xa8/0xb0 dev_attr_show+0x1c/0x48 sysfs_kf_seq_show+0x84/0xec seq_read+0x154/0x528 __vfs_read+0x2c/0x15c vfs_read+0x8c/0x110 ksys_read+0x4c/0xac ret_fast_syscall+0x0/0x28 0xbedefb60 -> #0 (kn->count#30){++++}: lock_acquire+0xd8/0x268 __kernfs_remove+0x288/0x374 kernfs_remove_by_name_ns+0x3c/0x84 remove_files+0x34/0x78 sysfs_remove_group+0x40/0x9c sysfs_remove_groups+0x24/0x34 device_remove_attrs+0x38/0x64 device_del+0x11c/0x360 cdev_device_del+0x14/0x2c iio_device_unregister+0x24/0x60 release_nodes+0x1bc/0x200 device_release_driver_internal+0x1a0/0x230 unbind_store+0x80/0x130 kernfs_fop_write+0x100/0x1e4 __vfs_write+0x2c/0x160 vfs_write+0xa4/0x17c ksys_write+0x4c/0xac ret_fast_syscall+0x0/0x28 0xbe906840 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev->info_exist_lock); lock(kn->count#30); lock(&dev->info_exist_lock); lock(kn->count#30); *** DEADLOCK *** ... cdev_device_del() can be called without holding the lock. It should be safe as info_exist_lock prevents kernelspace consumers to use the exported routines during/after provider removal. cdev_device_del() is for userspace. Help to reproduce: See example: Documentation/devicetree/bindings/iio/afe/voltage-divider.txt sysv { compatible = "voltage-divider"; io-channels = <&adc 0>; output-ohms = <22>; full-ohms = <222>; }; First, go to iio:deviceX for the "voltage-divider", do one read: $ cd /sys/bus/iio/devices/iio:deviceX $ cat in_voltage0_raw Then, unbind the consumer driver. It triggers above deadlock warning. $ cd /sys/bus/platform/drivers/iio-rescale/ $ echo sysv > unbind Note I don't actually expect stable will pick this up all the way back into IIO being in staging, but if's probably valid that far back. Signed-off-by: Fabrice Gasnier Fixes: ac917a81117c ("staging:iio:core set the iio_dev.info pointer to null on unregister") Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/industrialio-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 4f5cd9f60870..5b65750ce775 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1738,10 +1738,10 @@ EXPORT_SYMBOL(__iio_device_register); **/ void iio_device_unregister(struct iio_dev *indio_dev) { - mutex_lock(&indio_dev->info_exist_lock); - cdev_device_del(&indio_dev->chrdev, &indio_dev->dev); + mutex_lock(&indio_dev->info_exist_lock); + iio_device_unregister_debugfs(indio_dev); iio_disable_all_buffers(indio_dev); -- cgit v1.2.3 From 68bbd7524a6fce4198e25ee481eefd6c0fcf5318 Mon Sep 17 00:00:00 2001 From: "he, bo" Date: Wed, 6 Mar 2019 10:32:20 +0800 Subject: io: accel: kxcjk1013: restore the range after resume. commit fe2d3df639a7940a125a33d6460529b9689c5406 upstream. On some laptops, kxcjk1013 is powered off when system enters S3. We need restore the range regiter during resume. Otherwise, the sensor doesn't work properly after S3. Signed-off-by: he, bo Signed-off-by: Chen, Hu Reviewed-by: Hans de Goede Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/kxcjk-1013.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 7096e577b23f..50f3ff386bea 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1437,6 +1437,8 @@ static int kxcjk1013_resume(struct device *dev) mutex_lock(&data->mutex); ret = kxcjk1013_set_mode(data, OPERATION); + if (ret == 0) + ret = kxcjk1013_set_range(data, data->range); mutex_unlock(&data->mutex); return ret; -- cgit v1.2.3 From dcff1b3c1a1d2b2ec40758e26aebff4453f13c3e Mon Sep 17 00:00:00 2001 From: Christian Gromm Date: Tue, 2 Apr 2019 13:39:57 +0200 Subject: staging: most: core: use device description as name commit 131ac62253dba79daf4a6d83ab12293d2b9863d3 upstream. This patch uses the device description to clearly identity a device attached to the bus. It is needed as the currently useed mdevX notation is not sufficiant in case more than one network interface controller is being used at the same time. Cc: stable@vger.kernel.org Signed-off-by: Christian Gromm Signed-off-by: Greg Kroah-Hartman --- drivers/staging/most/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/most/core.c b/drivers/staging/most/core.c index 18936cdb1083..956daf8c3bd2 100644 --- a/drivers/staging/most/core.c +++ b/drivers/staging/most/core.c @@ -1431,7 +1431,7 @@ int most_register_interface(struct most_interface *iface) INIT_LIST_HEAD(&iface->p->channel_list); iface->p->dev_id = id; - snprintf(iface->p->name, STRING_SIZE, "mdev%d", id); + strcpy(iface->p->name, iface->description); iface->dev.init_name = iface->p->name; iface->dev.bus = &mc.bus; iface->dev.parent = &mc.dev; -- cgit v1.2.3 From bd301102144722c6fad21f8744ba481d08ca6248 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 15 Apr 2019 12:10:14 +0100 Subject: staging: comedi: vmk80xx: Fix use of uninitialized semaphore commit 08b7c2f9208f0e2a32159e4e7a4831b7adb10a3e upstream. If `vmk80xx_auto_attach()` returns an error, the core comedi module code will call `vmk80xx_detach()` to clean up. If `vmk80xx_auto_attach()` successfully allocated the comedi device private data, `vmk80xx_detach()` assumes that a `struct semaphore limit_sem` contained in the private data has been initialized and uses it. Unfortunately, there are a couple of places where `vmk80xx_auto_attach()` can return an error after allocating the device private data but before initializing the semaphore, so this assumption is invalid. Fix it by initializing the semaphore just after allocating the private data in `vmk80xx_auto_attach()` before any other errors can be returned. I believe this was the cause of the following syzbot crash report : usb 1-1: config 0 has no interface number 0 usb 1-1: New USB device found, idVendor=10cf, idProduct=8068, bcdDevice=e6.8d usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0 usb 1-1: config 0 descriptor?? vmk80xx 1-1:0.117: driver 'vmk80xx' failed to auto-configure device. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.1.0-rc4-319354-g9a33b36 #3 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xe8/0x16e lib/dump_stack.c:113 assign_lock_key kernel/locking/lockdep.c:786 [inline] register_lock_class+0x11b8/0x1250 kernel/locking/lockdep.c:1095 __lock_acquire+0xfb/0x37c0 kernel/locking/lockdep.c:3582 lock_acquire+0x10d/0x2f0 kernel/locking/lockdep.c:4211 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x44/0x60 kernel/locking/spinlock.c:152 down+0x12/0x80 kernel/locking/semaphore.c:58 vmk80xx_detach+0x59/0x100 drivers/staging/comedi/drivers/vmk80xx.c:829 comedi_device_detach+0xed/0x800 drivers/staging/comedi/drivers.c:204 comedi_device_cleanup.part.0+0x68/0x140 drivers/staging/comedi/comedi_fops.c:156 comedi_device_cleanup drivers/staging/comedi/comedi_fops.c:187 [inline] comedi_free_board_dev.part.0+0x16/0x90 drivers/staging/comedi/comedi_fops.c:190 comedi_free_board_dev drivers/staging/comedi/comedi_fops.c:189 [inline] comedi_release_hardware_device+0x111/0x140 drivers/staging/comedi/comedi_fops.c:2880 comedi_auto_config.cold+0x124/0x1b0 drivers/staging/comedi/drivers.c:1068 usb_probe_interface+0x31d/0x820 drivers/usb/core/driver.c:361 really_probe+0x2da/0xb10 drivers/base/dd.c:509 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454 __device_attach+0x223/0x3a0 drivers/base/dd.c:844 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514 device_add+0xad2/0x16e0 drivers/base/core.c:2106 usb_set_configuration+0xdf7/0x1740 drivers/usb/core/message.c:2021 generic_probe+0xa2/0xda drivers/usb/core/generic.c:210 usb_probe_device+0xc0/0x150 drivers/usb/core/driver.c:266 really_probe+0x2da/0xb10 drivers/base/dd.c:509 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454 __device_attach+0x223/0x3a0 drivers/base/dd.c:844 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514 device_add+0xad2/0x16e0 drivers/base/core.c:2106 usb_new_device.cold+0x537/0xccf drivers/usb/core/hub.c:2534 hub_port_connect drivers/usb/core/hub.c:5089 [inline] hub_port_connect_change drivers/usb/core/hub.c:5204 [inline] port_event drivers/usb/core/hub.c:5350 [inline] hub_event+0x138e/0x3b00 drivers/usb/core/hub.c:5432 process_one_work+0x90f/0x1580 kernel/workqueue.c:2269 worker_thread+0x9b/0xe20 kernel/workqueue.c:2415 kthread+0x313/0x420 kernel/kthread.c:253 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Reported-by: syzbot+54c2f58f15fe6876b6ad@syzkaller.appspotmail.com Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/vmk80xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index 6234b649d887..b035d662390b 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -800,6 +800,8 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, devpriv->model = board->model; + sema_init(&devpriv->limit_sem, 8); + ret = vmk80xx_find_usb_endpoints(dev); if (ret) return ret; @@ -808,8 +810,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, if (ret) return ret; - sema_init(&devpriv->limit_sem, 8); - usb_set_intfdata(intf, devpriv); if (devpriv->model == VMK8055_MODEL) -- cgit v1.2.3 From a47fd7121627d66d75f401f9a610685355996330 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 15 Apr 2019 12:52:30 +0100 Subject: staging: comedi: vmk80xx: Fix possible double-free of ->usb_rx_buf commit 663d294b4768bfd89e529e069bffa544a830b5bf upstream. `vmk80xx_alloc_usb_buffers()` is called from `vmk80xx_auto_attach()` to allocate RX and TX buffers for USB transfers. It allocates `devpriv->usb_rx_buf` followed by `devpriv->usb_tx_buf`. If the allocation of `devpriv->usb_tx_buf` fails, it frees `devpriv->usb_rx_buf`, leaving the pointer set dangling, and returns an error. Later, `vmk80xx_detach()` will be called from the core comedi module code to clean up. `vmk80xx_detach()` also frees both `devpriv->usb_rx_buf` and `devpriv->usb_tx_buf`, but `devpriv->usb_rx_buf` may have already been freed, leading to a double-free error. Fix it by removing the call to `kfree(devpriv->usb_rx_buf)` from `vmk80xx_alloc_usb_buffers()`, relying on `vmk80xx_detach()` to free the memory. Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/vmk80xx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index b035d662390b..65dc6c51037e 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -682,10 +682,8 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) size = usb_endpoint_maxp(devpriv->ep_tx); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); - if (!devpriv->usb_tx_buf) { - kfree(devpriv->usb_rx_buf); + if (!devpriv->usb_tx_buf) return -ENOMEM; - } return 0; } -- cgit v1.2.3 From 16b235e81d9d7b502fa72e579cc7796828090111 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 15 Apr 2019 12:43:01 +0100 Subject: staging: comedi: ni_usb6501: Fix use of uninitialized mutex commit 660cf4ce9d0f3497cc7456eaa6d74c8b71d6282c upstream. If `ni6501_auto_attach()` returns an error, the core comedi module code will call `ni6501_detach()` to clean up. If `ni6501_auto_attach()` successfully allocated the comedi device private data, `ni6501_detach()` assumes that a `struct mutex mut` contained in the private data has been initialized and uses it. Unfortunately, there are a couple of places where `ni6501_auto_attach()` can return an error after allocating the device private data but before initializing the mutex, so this assumption is invalid. Fix it by initializing the mutex just after allocating the private data in `ni6501_auto_attach()` before any other errors can be retturned. Also move the call to `usb_set_intfdata()` just to keep the code a bit neater (either position for the call is fine). I believe this was the cause of the following syzbot crash report : usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0 usb 1-1: config 0 descriptor?? usb 1-1: string descriptor 0 read error: -71 comedi comedi0: Wrong number of endpoints ni6501 1-1:0.233: driver 'ni6501' failed to auto-configure device. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 585 Comm: kworker/0:3 Not tainted 5.1.0-rc4-319354-g9a33b36 #3 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xe8/0x16e lib/dump_stack.c:113 assign_lock_key kernel/locking/lockdep.c:786 [inline] register_lock_class+0x11b8/0x1250 kernel/locking/lockdep.c:1095 __lock_acquire+0xfb/0x37c0 kernel/locking/lockdep.c:3582 lock_acquire+0x10d/0x2f0 kernel/locking/lockdep.c:4211 __mutex_lock_common kernel/locking/mutex.c:925 [inline] __mutex_lock+0xfe/0x12b0 kernel/locking/mutex.c:1072 ni6501_detach+0x5b/0x110 drivers/staging/comedi/drivers/ni_usb6501.c:567 comedi_device_detach+0xed/0x800 drivers/staging/comedi/drivers.c:204 comedi_device_cleanup.part.0+0x68/0x140 drivers/staging/comedi/comedi_fops.c:156 comedi_device_cleanup drivers/staging/comedi/comedi_fops.c:187 [inline] comedi_free_board_dev.part.0+0x16/0x90 drivers/staging/comedi/comedi_fops.c:190 comedi_free_board_dev drivers/staging/comedi/comedi_fops.c:189 [inline] comedi_release_hardware_device+0x111/0x140 drivers/staging/comedi/comedi_fops.c:2880 comedi_auto_config.cold+0x124/0x1b0 drivers/staging/comedi/drivers.c:1068 usb_probe_interface+0x31d/0x820 drivers/usb/core/driver.c:361 really_probe+0x2da/0xb10 drivers/base/dd.c:509 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454 __device_attach+0x223/0x3a0 drivers/base/dd.c:844 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514 device_add+0xad2/0x16e0 drivers/base/core.c:2106 usb_set_configuration+0xdf7/0x1740 drivers/usb/core/message.c:2021 generic_probe+0xa2/0xda drivers/usb/core/generic.c:210 usb_probe_device+0xc0/0x150 drivers/usb/core/driver.c:266 really_probe+0x2da/0xb10 drivers/base/dd.c:509 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454 __device_attach+0x223/0x3a0 drivers/base/dd.c:844 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514 device_add+0xad2/0x16e0 drivers/base/core.c:2106 usb_new_device.cold+0x537/0xccf drivers/usb/core/hub.c:2534 hub_port_connect drivers/usb/core/hub.c:5089 [inline] hub_port_connect_change drivers/usb/core/hub.c:5204 [inline] port_event drivers/usb/core/hub.c:5350 [inline] hub_event+0x138e/0x3b00 drivers/usb/core/hub.c:5432 process_one_work+0x90f/0x1580 kernel/workqueue.c:2269 worker_thread+0x9b/0xe20 kernel/workqueue.c:2415 kthread+0x313/0x420 kernel/kthread.c:253 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Reported-by: syzbot+cf4f2b6c24aff0a3edf6@syzkaller.appspotmail.com Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_usb6501.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c index 808ed92ed66f..ed5e42655821 100644 --- a/drivers/staging/comedi/drivers/ni_usb6501.c +++ b/drivers/staging/comedi/drivers/ni_usb6501.c @@ -518,6 +518,9 @@ static int ni6501_auto_attach(struct comedi_device *dev, if (!devpriv) return -ENOMEM; + mutex_init(&devpriv->mut); + usb_set_intfdata(intf, devpriv); + ret = ni6501_find_endpoints(dev); if (ret) return ret; @@ -526,9 +529,6 @@ static int ni6501_auto_attach(struct comedi_device *dev, if (ret) return ret; - mutex_init(&devpriv->mut); - usb_set_intfdata(intf, devpriv); - ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; -- cgit v1.2.3 From 8834139083cdb486cc520d2b5fbb21bdd0e534d0 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 15 Apr 2019 12:43:02 +0100 Subject: staging: comedi: ni_usb6501: Fix possible double-free of ->usb_rx_buf commit af4b54a2e5ba18259ff9aac445bf546dd60d037e upstream. `ni6501_alloc_usb_buffers()` is called from `ni6501_auto_attach()` to allocate RX and TX buffers for USB transfers. It allocates `devpriv->usb_rx_buf` followed by `devpriv->usb_tx_buf`. If the allocation of `devpriv->usb_tx_buf` fails, it frees `devpriv->usb_rx_buf`, leaving the pointer set dangling, and returns an error. Later, `ni6501_detach()` will be called from the core comedi module code to clean up. `ni6501_detach()` also frees both `devpriv->usb_rx_buf` and `devpriv->usb_tx_buf`, but `devpriv->usb_rx_buf` may have already beed freed, leading to a double-free error. Fix it bu removing the call to `kfree(devpriv->usb_rx_buf)` from `ni6501_alloc_usb_buffers()`, relying on `ni6501_detach()` to free the memory. Signed-off-by: Ian Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_usb6501.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c index ed5e42655821..1bb1cb651349 100644 --- a/drivers/staging/comedi/drivers/ni_usb6501.c +++ b/drivers/staging/comedi/drivers/ni_usb6501.c @@ -463,10 +463,8 @@ static int ni6501_alloc_usb_buffers(struct comedi_device *dev) size = usb_endpoint_maxp(devpriv->ep_tx); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); - if (!devpriv->usb_tx_buf) { - kfree(devpriv->usb_rx_buf); + if (!devpriv->usb_tx_buf) return -ENOMEM; - } return 0; } -- cgit v1.2.3 From ffd87f87a9c17b705e14b227ba6afa3248f1d7e8 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 17 Apr 2019 16:10:32 +0800 Subject: ALSA: hda/realtek - add two more pin configuration sets to quirk table commit b26e36b7ef36a8a3a147b1609b2505f8a4ecf511 upstream. We have two Dell laptops which have the codec 10ec0236 and 10ec0256 respectively, the headset mic on them can't work, need to apply the quirk of ALC255_FIXUP_DELL1_MIC_NO_PRESENCE. So adding their pin configurations in the pin quirk table. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 84fae0df59e9..f061167062bc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7247,6 +7247,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60140}, {0x14, 0x90170150}, {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, {0x14, 0x90170110}, {0x21, 0x02211020}), @@ -7357,6 +7359,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC, {0x14, 0x90170110}, {0x1b, 0x90a70130}, -- cgit v1.2.3 From 6ef122eb79569df65f7199c5026033250670ac85 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Apr 2019 17:06:33 +0200 Subject: ALSA: core: Fix card races between register and disconnect commit 2a3f7221acddfe1caa9ff09b3a8158c39b2fdeac upstream. There is a small race window in the card disconnection code that allows the registration of another card with the very same card id. This leads to a warning in procfs creation as caught by syzkaller. The problem is that we delete snd_cards and snd_cards_lock entries at the very beginning of the disconnection procedure. This makes the slot available to be assigned for another card object while the disconnection procedure is being processed. Then it becomes possible to issue a procfs registration with the existing file name although we check the conflict beforehand. The fix is simply to move the snd_cards and snd_cards_lock clearances at the end of the disconnection procedure. The references to these entries are merely either from the global proc files like /proc/asound/cards or from the card registration / disconnection, so it should be fine to shift at the very end. Reported-by: syzbot+48df349490c36f9f54ab@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/init.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sound/core/init.c b/sound/core/init.c index 4849c611c0fe..16b7cc7aa66b 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -407,14 +407,7 @@ int snd_card_disconnect(struct snd_card *card) card->shutdown = 1; spin_unlock(&card->files_lock); - /* phase 1: disable fops (user space) operations for ALSA API */ - mutex_lock(&snd_card_mutex); - snd_cards[card->number] = NULL; - clear_bit(card->number, snd_cards_lock); - mutex_unlock(&snd_card_mutex); - - /* phase 2: replace file->f_op with special dummy operations */ - + /* replace file->f_op with special dummy operations */ spin_lock(&card->files_lock); list_for_each_entry(mfile, &card->files_list, list) { /* it's critical part, use endless loop */ @@ -430,7 +423,7 @@ int snd_card_disconnect(struct snd_card *card) } spin_unlock(&card->files_lock); - /* phase 3: notify all connected devices about disconnection */ + /* notify all connected devices about disconnection */ /* at this point, they cannot respond to any calls except release() */ #if IS_ENABLED(CONFIG_SND_MIXER_OSS) @@ -446,6 +439,13 @@ int snd_card_disconnect(struct snd_card *card) device_del(&card->card_dev); card->registered = false; } + + /* disable fops (user space) operations for ALSA API */ + mutex_lock(&snd_card_mutex); + snd_cards[card->number] = NULL; + clear_bit(card->number, snd_cards_lock); + mutex_unlock(&snd_card_mutex); + #ifdef CONFIG_PM wake_up(&card->power_sleep); #endif -- cgit v1.2.3 From 95df599f95f398b0a34d081dadfdee3126e58163 Mon Sep 17 00:00:00 2001 From: KT Liao Date: Tue, 26 Mar 2019 17:28:32 -0700 Subject: Input: elan_i2c - add hardware ID for multiple Lenovo laptops commit 738c06d0e4562e0acf9f2c7438a22b2d5afc67aa upstream. There are many Lenovo laptops which need elan_i2c support, this patch adds relevant IDs to the Elan driver so that touchpads are recognized. Signed-off-by: KT Liao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 628ef617bb2f..f9525d6f0bfe 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1339,21 +1339,46 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0600", 0 }, { "ELAN0601", 0 }, { "ELAN0602", 0 }, + { "ELAN0603", 0 }, + { "ELAN0604", 0 }, { "ELAN0605", 0 }, + { "ELAN0606", 0 }, + { "ELAN0607", 0 }, { "ELAN0608", 0 }, { "ELAN0609", 0 }, { "ELAN060B", 0 }, { "ELAN060C", 0 }, + { "ELAN060F", 0 }, + { "ELAN0610", 0 }, { "ELAN0611", 0 }, { "ELAN0612", 0 }, + { "ELAN0615", 0 }, + { "ELAN0616", 0 }, { "ELAN0617", 0 }, { "ELAN0618", 0 }, + { "ELAN0619", 0 }, + { "ELAN061A", 0 }, + { "ELAN061B", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN061E", 0 }, + { "ELAN061F", 0 }, { "ELAN0620", 0 }, { "ELAN0621", 0 }, { "ELAN0622", 0 }, + { "ELAN0623", 0 }, + { "ELAN0624", 0 }, + { "ELAN0625", 0 }, + { "ELAN0626", 0 }, + { "ELAN0627", 0 }, + { "ELAN0628", 0 }, + { "ELAN0629", 0 }, + { "ELAN062A", 0 }, + { "ELAN062B", 0 }, + { "ELAN062C", 0 }, + { "ELAN062D", 0 }, + { "ELAN0631", 0 }, + { "ELAN0632", 0 }, { "ELAN1000", 0 }, { } }; -- cgit v1.2.3 From 333a81c16e4f6aa87dce0ca8e1df0bbca787d472 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 29 Mar 2019 10:10:26 +0100 Subject: serial: sh-sci: Fix HSCIF RX sampling point adjustment commit 6b87784b53592a90d21576be8eff688b56d93cce upstream. The calculation of the sampling point has min() and max() exchanged. Fix this by using the clamp() helper instead. Fixes: 63ba1e00f178a448 ("serial: sh-sci: Support for HSCIF RX sampling point adjustment") Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulrich Hecht Reviewed-by: Wolfram Sang Acked-by: Dirk Behme Cc: stable Reviewed-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 93bd90f1ff14..d440beb5cf83 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2504,7 +2504,7 @@ done: * last stop bit; we can increase the error * margin by shifting the sampling point. */ - int shift = min(-8, max(7, deviation / 2)); + int shift = clamp(deviation / 2, -8, 7); hssrr |= (shift << HSCIF_SRHP_SHIFT) & HSCIF_SRHP_MASK; -- cgit v1.2.3 From 5ae77c340c52b331e18af1a358ba7b2e3dcc3390 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 1 Apr 2019 13:25:10 +0200 Subject: serial: sh-sci: Fix HSCIF RX sampling point calculation commit ace965696da2611af759f0284e26342b7b6cec89 upstream. There are several issues with the formula used for calculating the deviation from the intended rate: 1. While min_err and last_stop are signed, srr and baud are unsigned. Hence the signed values are promoted to unsigned, which will lead to a bogus value of deviation if min_err is negative, 2. Srr is the register field value, which is one less than the actual sampling rate factor, 3. The divisions do not use rounding. Fix this by casting unsigned variables to int, adding one to srr, and using a single DIV_ROUND_CLOSEST(). Fixes: 63ba1e00f178a448 ("serial: sh-sci: Support for HSCIF RX sampling point adjustment") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mukesh Ojha Cc: stable Reviewed-by: Ulrich Hecht Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index d440beb5cf83..e9a8b79ba77e 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2497,7 +2497,9 @@ done: * center of the last stop bit in sampling clocks. */ int last_stop = bits * 2 - 1; - int deviation = min_err * srr * last_stop / 2 / baud; + int deviation = DIV_ROUND_CLOSEST(min_err * last_stop * + (int)(srr + 1), + 2 * (int)baud); if (abs(deviation) >= 2) { /* At least two sampling clocks off at the -- cgit v1.2.3 From c2116717884cf9ec6841630ea257d5c5c3a859c7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 4 Apr 2019 20:53:28 -0400 Subject: vt: fix cursor when clearing the screen commit b2ecf00631362a83744e5ec249947620db5e240c upstream. The patch a6dbe4427559 ("vt: perform safe console erase in the right order") introduced a bug. The conditional do_update_region() was replaced by a call to update_region() that does contain the conditional already, but with unwanted extra side effects such as restoring the cursor drawing. In order to reproduce the bug: - use framebuffer console with the AMDGPU driver - type "links" to start the console www browser - press 'q' and space to exit links Now the cursor will be permanently visible in the center of the screen. It will stay there until something overwrites it. The bug goes away if we change update_region() back to the conditional do_update_region(). [ nico: reworded changelog ] Signed-off-by: Mikulas Patocka Reviewed-by: Nicolas Pitre Cc: stable@vger.kernel.org Fixes: a6dbe4427559 ("vt: perform safe console erase in the right order") Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 9646ff63e77a..b6621a2e916d 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1518,7 +1518,8 @@ static void csi_J(struct vc_data *vc, int vpar) return; } scr_memsetw(start, vc->vc_video_erase_char, 2 * count); - update_region(vc, (unsigned long) start, count); + if (con_should_update(vc)) + do_update_region(vc, (unsigned long) start, count); vc->vc_need_wrap = 0; } -- cgit v1.2.3 From ad1deea5c4c93c75b8f90bb0cef9f94d3760ea84 Mon Sep 17 00:00:00 2001 From: Jaesoo Lee Date: Tue, 9 Apr 2019 17:02:22 -0700 Subject: scsi: core: set result when the command cannot be dispatched commit be549d49115422f846b6d96ee8fd7173a5f7ceb0 upstream. When SCSI blk-mq is enabled, there is a bug in handling errors in scsi_queue_rq. Specifically, the bug is not setting result field of scsi_request correctly when the dispatch of the command has been failed. Since the upper layer code including the sg_io ioctl expects to receive any error status from result field of scsi_request, the error is silently ignored and this could cause data corruptions for some applications. Fixes: d285203cf647 ("scsi: add support for a blk-mq based I/O path.") Cc: Signed-off-by: Jaesoo Lee Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 655ad26106e4..5c78710b713f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1763,8 +1763,12 @@ out_put_budget: ret = BLK_STS_DEV_RESOURCE; break; default: + if (unlikely(!scsi_device_online(sdev))) + scsi_req(req)->result = DID_NO_CONNECT << 16; + else + scsi_req(req)->result = DID_ERROR << 16; /* - * Make sure to release all allocated ressources when + * Make sure to release all allocated resources when * we hit an error, as we will never see this command * again. */ -- cgit v1.2.3 From e6200707e648c7d82c6dc45879d20502fc901d65 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 18 Apr 2019 03:40:12 -0700 Subject: Revert "scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO" commit 0228034d8e5915b98c33db35a98f5e909e848ae9 upstream. This patch clears FC_RP_STARTED flag during logoff, because of this re-login(flogi) didn't happen to the switch. This reverts commit 1550ec458e0cf1a40a170ab1f4c46e3f52860f65. Fixes: 1550ec458e0c ("scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO") Cc: # v4.18+ Signed-off-by: Saurav Kashyap Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libfc/fc_rport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index dfba4921b265..5bf61431434b 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -2162,7 +2162,6 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp) FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n", fc_rport_state(rdata)); - rdata->flags &= ~FC_RP_STARTED; fc_rport_enter_delete(rdata, RPORT_EV_STOP); mutex_unlock(&rdata->rp_mutex); kref_put(&rdata->kref, fc_rport_destroy); -- cgit v1.2.3 From 33a3fff44a0f926269d46eef3e5547bf54fe77e7 Mon Sep 17 00:00:00 2001 From: Vitor Soares Date: Mon, 8 Apr 2019 13:13:34 +0200 Subject: i3c: dw: Fix dw_i3c_master_disable controller by using correct mask commit 907621e94d49b85cd76f13110eceb940a182c69e upstream. The controller was being disabled incorrectly. The correct way is to clear the DEV_CTRL_ENABLE bit. Fix this by clearing this bit. Cc: Boris Brezillon Cc: Fixes: 1dd728f5d4d4 ("i3c: master: Add driver for Synopsys DesignWare IP") Signed-off-by: Vitor Soares Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/dw-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index bb03079fbade..ec385fbfef4c 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -300,7 +300,7 @@ to_dw_i3c_master(struct i3c_master_controller *master) static void dw_i3c_master_disable(struct dw_i3c_master *master) { - writel(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_ENABLE, + writel(readl(master->regs + DEVICE_CTRL) & ~DEV_CTRL_ENABLE, master->regs + DEVICE_CTRL); } -- cgit v1.2.3 From 922270b4f0182c0f00284881b2e4eb98a76d1232 Mon Sep 17 00:00:00 2001 From: Vitor Soares Date: Tue, 9 Apr 2019 18:59:59 +0200 Subject: i3c: Fix the verification of random PID commit 9752c37cc89f43675e70cf9acff23519fa84b48c upstream. The validation of random PID should be done by checking the boardinfo->pid instead of info.pid which is empty. Doing the change the info struture declaration is no longer necessary. Cc: Boris Brezillon Cc: Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Signed-off-by: Vitor Soares Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 2dc628d4f1ae..1412abcff010 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -1980,7 +1980,6 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, { struct i3c_dev_boardinfo *boardinfo; struct device *dev = &master->dev; - struct i3c_device_info info = { }; enum i3c_addr_slot_status addrstatus; u32 init_dyn_addr = 0; @@ -2012,8 +2011,8 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, boardinfo->pid = ((u64)reg[1] << 32) | reg[2]; - if ((info.pid & GENMASK_ULL(63, 48)) || - I3C_PID_RND_LOWER_32BITS(info.pid)) + if ((boardinfo->pid & GENMASK_ULL(63, 48)) || + I3C_PID_RND_LOWER_32BITS(boardinfo->pid)) return -EINVAL; boardinfo->init_dyn_addr = init_dyn_addr; -- cgit v1.2.3 From e58a114d07465baef1e7476242a44cbb9d108825 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 20 Mar 2019 08:12:28 +0000 Subject: Revert "svm: Fix AVIC incomplete IPI emulation" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4a58038b9e420276157785afa0a0bbb4b9bc2265 upstream. This reverts commit bb218fbcfaaa3b115d4cd7a43c0ca164f3a96e57. As Oren Twaig pointed out the old discussion: https://patchwork.kernel.org/patch/8292231/ that the change coud potentially cause an extra IPI to be sent to the destination vcpu because the AVIC hardware already set the IRR bit before the incomplete IPI #VMEXIT with id=1 (target vcpu is not running). Since writting to ICR and ICR2 will also set the IRR. If something triggers the destination vcpu to get scheduled before the emulation finishes, then this could result in an additional IPI. Also, the issue mentioned in the commit bb218fbcfaaa was misdiagnosed. Cc: Radim Krčmář Cc: Paolo Bonzini Reported-by: Oren Twaig Signed-off-by: Suravee Suthikulpanit Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 538fcadda350..d5f601eb57e4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4515,14 +4515,25 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) kvm_lapic_reg_write(apic, APIC_ICR, icrl); break; case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { + int i; + struct kvm_vcpu *vcpu; + struct kvm *kvm = svm->vcpu.kvm; struct kvm_lapic *apic = svm->vcpu.arch.apic; /* - * Update ICR high and low, then emulate sending IPI, - * which is handled when writing APIC_ICR. + * At this point, we expect that the AVIC HW has already + * set the appropriate IRR bits on the valid target + * vcpus. So, we just need to kick the appropriate vcpu. */ - kvm_lapic_reg_write(apic, APIC_ICR2, icrh); - kvm_lapic_reg_write(apic, APIC_ICR, icrl); + kvm_for_each_vcpu(i, vcpu, kvm) { + bool m = kvm_apic_match_dest(vcpu, apic, + icrl & KVM_APIC_SHORT_MASK, + GET_APIC_DEST_FIELD(icrh), + icrl & KVM_APIC_DEST_MASK); + + if (m && !avic_vcpu_is_running(vcpu)) + kvm_vcpu_wake_up(vcpu); + } break; } case AVIC_IPI_FAILURE_INVALID_TARGET: -- cgit v1.2.3 From 1eb719f09f7e319e79f6abf2b9e8c0dcc1c477b5 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 18 Apr 2019 17:50:52 -0700 Subject: coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping commit 04f5866e41fb70690e28397487d8bd8eea7d712a upstream. The core dumping code has always run without holding the mmap_sem for writing, despite that is the only way to ensure that the entire vma layout will not change from under it. Only using some signal serialization on the processes belonging to the mm is not nearly enough. This was pointed out earlier. For example in Hugh's post from Jul 2017: https://lkml.kernel.org/r/alpine.LSU.2.11.1707191716030.2055@eggly.anvils "Not strictly relevant here, but a related note: I was very surprised to discover, only quite recently, how handle_mm_fault() may be called without down_read(mmap_sem) - when core dumping. That seems a misguided optimization to me, which would also be nice to correct" In particular because the growsdown and growsup can move the vm_start/vm_end the various loops the core dump does around the vma will not be consistent if page faults can happen concurrently. Pretty much all users calling mmget_not_zero()/get_task_mm() and then taking the mmap_sem had the potential to introduce unexpected side effects in the core dumping code. Adding mmap_sem for writing around the ->core_dump invocation is a viable long term fix, but it requires removing all copy user and page faults and to replace them with get_dump_page() for all binary formats which is not suitable as a short term fix. For the time being this solution manually covers the places that can confuse the core dump either by altering the vma layout or the vma flags while it runs. Once ->core_dump runs under mmap_sem for writing the function mmget_still_valid() can be dropped. Allowing mmap_sem protected sections to run in parallel with the coredump provides some minor parallelism advantage to the swapoff code (which seems to be safe enough by never mangling any vma field and can keep doing swapins in parallel to the core dumping) and to some other corner case. In order to facilitate the backporting I added "Fixes: 86039bd3b4e6" however the side effect of this same race condition in /proc/pid/mem should be reproducible since before 2.6.12-rc2 so I couldn't add any other "Fixes:" because there's no hash beyond the git genesis commit. Because find_extend_vma() is the only location outside of the process context that could modify the "mm" structures under mmap_sem for reading, by adding the mmget_still_valid() check to it, all other cases that take the mmap_sem for reading don't need the new check after mmget_not_zero()/get_task_mm(). The expand_stack() in page fault context also doesn't need the new check, because all tasks under core dumping are frozen. Link: http://lkml.kernel.org/r/20190325224949.11068-1-aarcange@redhat.com Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization") Signed-off-by: Andrea Arcangeli Reported-by: Jann Horn Suggested-by: Oleg Nesterov Acked-by: Peter Xu Reviewed-by: Mike Rapoport Reviewed-by: Oleg Nesterov Reviewed-by: Jann Horn Acked-by: Jason Gunthorpe Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_main.c | 3 +++ fs/proc/task_mmu.c | 18 ++++++++++++++++++ fs/userfaultfd.c | 9 +++++++++ include/linux/sched/mm.h | 21 +++++++++++++++++++++ mm/mmap.c | 7 ++++++- 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 5f366838b7ff..e2a4570a47e8 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -992,6 +992,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) * will only be one mm, so no big deal. */ down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto skip_mm; mutex_lock(&ufile->umap_lock); list_for_each_entry_safe (priv, next_priv, &ufile->umaps, list) { @@ -1006,6 +1008,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); } mutex_unlock(&ufile->umap_lock); + skip_mm: up_write(&mm->mmap_sem); mmput(mm); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 85b0ef890b28..91bd2ff0c62c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1141,6 +1141,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, count = -EINTR; goto out_mm; } + /* + * Avoid to modify vma->vm_flags + * without locked ops while the + * coredump reads the vm_flags. + */ + if (!mmget_still_valid(mm)) { + /* + * Silently return "count" + * like if get_task_mm() + * failed. FIXME: should this + * function have returned + * -ESRCH if get_task_mm() + * failed like if + * get_proc_task() fails? + */ + up_write(&mm->mmap_sem); + goto out_mm; + } for (vma = mm->mmap; vma; vma = vma->vm_next) { vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 89800fc7dc9d..f5de1e726356 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -629,6 +629,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); + /* no task can run (and in turn coredump) yet */ + VM_WARN_ON(!mmget_still_valid(mm)); for (vma = mm->mmap; vma; vma = vma->vm_next) if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; @@ -883,6 +885,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * taking the mmap_sem for writing. */ down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto skip_mm; prev = NULL; for (vma = mm->mmap; vma; vma = vma->vm_next) { cond_resched(); @@ -905,6 +909,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } +skip_mm: up_write(&mm->mmap_sem); mmput(mm); wakeup: @@ -1333,6 +1338,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, goto out; down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto out_unlock; vma = find_vma_prev(mm, start, &prev); if (!vma) goto out_unlock; @@ -1520,6 +1527,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, goto out; down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) + goto out_unlock; vma = find_vma_prev(mm, start, &prev); if (!vma) goto out_unlock; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3bfa6a0cbba4..c1dbb737a36c 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,6 +49,27 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +/* + * This has to be called after a get_task_mm()/mmget_not_zero() + * followed by taking the mmap_sem for writing before modifying the + * vmas or anything the coredump pretends not to change from under it. + * + * NOTE: find_extend_vma() called from GUP context is the only place + * that can modify the "mm" (notably the vm_start/end) under mmap_sem + * for reading and outside the context of the process, so it is also + * the only case that holds the mmap_sem for reading that must call + * this function. Generally if the mmap_sem is hold for reading + * there's no need of this check after get_task_mm()/mmget_not_zero(). + * + * This function can be obsoleted and the check can be removed, after + * the coredump code will hold the mmap_sem for writing before + * invoking the ->core_dump methods. + */ +static inline bool mmget_still_valid(struct mm_struct *mm) +{ + return likely(!mm->core_state); +} + /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. diff --git a/mm/mmap.c b/mm/mmap.c index fc1809b1bed6..da9236a5022e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -2526,7 +2527,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (!prev || expand_stack(prev, addr)) + /* don't alter vm_end if the coredump is running */ + if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) populate_vma_page_range(prev, addr, prev->vm_end, NULL); @@ -2552,6 +2554,9 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) return vma; if (!(vma->vm_flags & VM_GROWSDOWN)) return NULL; + /* don't alter vm_start if the coredump is running */ + if (!mmget_still_valid(mm)) + return NULL; start = vma->vm_start; if (expand_stack(vma, addr)) return NULL; -- cgit v1.2.3 From 2f6919fdc23bc27d7170f1b2d46ebfc658f60217 Mon Sep 17 00:00:00 2001 From: WANG Chao Date: Fri, 12 Apr 2019 15:55:39 +0800 Subject: x86/kvm: move kvm_load/put_guest_xcr0 into atomic context commit 1811d979c71621aafc7b879477202d286f7e863b upstream. guest xcr0 could leak into host when MCE happens in guest mode. Because do_machine_check() could schedule out at a few places. For example: kvm_load_guest_xcr0 ... kvm_x86_ops->run(vcpu) { vmx_vcpu_run vmx_complete_atomic_exit kvm_machine_check do_machine_check do_memory_failure memory_failure lock_page In this case, host_xcr0 is 0x2ff, guest vcpu xcr0 is 0xff. After schedule out, host cpu has guest xcr0 loaded (0xff). In __switch_to { switch_fpu_finish copy_kernel_to_fpregs XRSTORS If any bit i in XSTATE_BV[i] == 1 and xcr0[i] == 0, XRSTORS will generate #GP (In this case, bit 9). Then ex_handler_fprestore kicks in and tries to reinitialize fpu by restoring init fpu state. Same story as last #GP, except we get DOUBLE FAULT this time. Cc: stable@vger.kernel.org Signed-off-by: WANG Chao Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 4 ++++ arch/x86/kvm/x86.c | 10 ++++------ arch/x86/kvm/x86.h | 2 ++ 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d5f601eb57e4..516c1de03d47 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5634,6 +5634,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.cr2 = vcpu->arch.cr2; clgi(); + kvm_load_guest_xcr0(vcpu); /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if @@ -5779,6 +5780,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); + kvm_put_guest_xcr0(vcpu); stgi(); /* Any pending NMI will happen here */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a0a770816429..34499081022c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6548,6 +6548,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); + kvm_load_guest_xcr0(vcpu); + if (static_cpu_has(X86_FEATURE_PKU) && kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && vcpu->arch.pkru != vmx->host_pkru) @@ -6635,6 +6637,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vmx->host_pkru); } + kvm_put_guest_xcr0(vcpu); + vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7ee802a92bc8..2db58067bb59 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) } EXPORT_SYMBOL_GPL(kvm_lmsw); -static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) { if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && !vcpu->guest_xcr0_loaded) { @@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) vcpu->guest_xcr0_loaded = 1; } } +EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0); -static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) { if (vcpu->guest_xcr0_loaded) { if (vcpu->arch.xcr0 != host_xcr0) @@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) vcpu->guest_xcr0_loaded = 0; } } +EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0); static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { @@ -7856,8 +7858,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } - kvm_load_guest_xcr0(vcpu); - if (req_immediate_exit) { kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_x86_ops->request_immediate_exit(vcpu); @@ -7910,8 +7910,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_put_guest_xcr0(vcpu); - kvm_before_interrupt(vcpu); kvm_x86_ops->handle_external_intr(vcpu); kvm_after_interrupt(vcpu); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 20ede17202bf..de3d46769ee3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) __this_cpu_write(current_vcpu, NULL); } +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); #endif -- cgit v1.2.3 From 46c4f2375638122a08b7ab8af5bc19bec74d28b4 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 3 Apr 2019 15:58:16 -0500 Subject: ipmi: fix sleep-in-atomic in free_user at cleanup SRCU user->release_barrier commit 3b9a907223d7f6b9d1dadea29436842ae9bcd76d upstream. free_user() could be called in atomic context. This patch pushed the free operation off into a workqueue. Example: BUG: sleeping function called from invalid context at kernel/workqueue.c:2856 in_atomic(): 1, irqs_disabled(): 0, pid: 177, name: ksoftirqd/27 CPU: 27 PID: 177 Comm: ksoftirqd/27 Not tainted 4.19.25-3 #1 Hardware name: AIC 1S-HV26-08/MB-DPSB04-06, BIOS IVYBV060 10/21/2015 Call Trace: dump_stack+0x5c/0x7b ___might_sleep+0xec/0x110 __flush_work+0x48/0x1f0 ? try_to_del_timer_sync+0x4d/0x80 _cleanup_srcu_struct+0x104/0x140 free_user+0x18/0x30 [ipmi_msghandler] ipmi_free_recv_msg+0x3a/0x50 [ipmi_msghandler] deliver_response+0xbd/0xd0 [ipmi_msghandler] deliver_local_response+0xe/0x30 [ipmi_msghandler] handle_one_recv_msg+0x163/0xc80 [ipmi_msghandler] ? dequeue_entity+0xa0/0x960 handle_new_recv_msgs+0x15c/0x1f0 [ipmi_msghandler] tasklet_action_common.isra.22+0x103/0x120 __do_softirq+0xf8/0x2d7 run_ksoftirqd+0x26/0x50 smpboot_thread_fn+0x11d/0x1e0 kthread+0x103/0x140 ? sort_range+0x20/0x20 ? kthread_destroy_worker+0x40/0x40 ret_from_fork+0x1f/0x40 Fixes: 77f8269606bf ("ipmi: fix use-after-free of user->release_barrier.rda") Reported-by: Konstantin Khlebnikov Signed-off-by: Corey Minyard Cc: stable@vger.kernel.org # 5.0 Cc: Yang Yingliang Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_msghandler.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c518659b4d9f..ff9dd9adf803 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -214,6 +214,9 @@ struct ipmi_user { /* Does this interface receive IPMI events? */ bool gets_events; + + /* Free must run in process context for RCU cleanup. */ + struct work_struct remove_work; }; static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index) @@ -1079,6 +1082,15 @@ static int intf_err_seq(struct ipmi_smi *intf, } +static void free_user_work(struct work_struct *work) +{ + struct ipmi_user *user = container_of(work, struct ipmi_user, + remove_work); + + cleanup_srcu_struct(&user->release_barrier); + kfree(user); +} + int ipmi_create_user(unsigned int if_num, const struct ipmi_user_hndl *handler, void *handler_data, @@ -1122,6 +1134,8 @@ int ipmi_create_user(unsigned int if_num, goto out_kfree; found: + INIT_WORK(&new_user->remove_work, free_user_work); + rv = init_srcu_struct(&new_user->release_barrier); if (rv) goto out_kfree; @@ -1184,8 +1198,9 @@ EXPORT_SYMBOL(ipmi_get_smi_info); static void free_user(struct kref *ref) { struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount); - cleanup_srcu_struct(&user->release_barrier); - kfree(user); + + /* SRCU cleanup must happen in task context. */ + schedule_work(&user->remove_work); } static void _ipmi_destroy_user(struct ipmi_user *user) -- cgit v1.2.3 From 8223263d7c44251e984ba982e02fc2cbb6704d3d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Mar 2019 13:04:11 -0700 Subject: crypto: x86/poly1305 - fix overflow during partial reduction commit 678cce4019d746da6c680c48ba9e6d417803e127 upstream. The x86_64 implementation of Poly1305 produces the wrong result on some inputs because poly1305_4block_avx2() incorrectly assumes that when partially reducing the accumulator, the bits carried from limb 'd4' to limb 'h0' fit in a 32-bit integer. This is true for poly1305-generic which processes only one block at a time. However, it's not true for the AVX2 implementation, which processes 4 blocks at a time and therefore can produce intermediate limbs about 4x larger. Fix it by making the relevant calculations use 64-bit arithmetic rather than 32-bit. Note that most of the carries already used 64-bit arithmetic, but the d4 -> h0 carry was different for some reason. To be safe I also made the same change to the corresponding SSE2 code, though that only operates on 1 or 2 blocks at a time. I don't think it's really needed for poly1305_block_sse2(), but it doesn't hurt because it's already x86_64 code. It *might* be needed for poly1305_2block_sse2(), but overflows aren't easy to reproduce there. This bug was originally detected by my patches that improve testmgr to fuzz algorithms against their generic implementation. But also add a test vector which reproduces it directly (in the AVX2 case). Fixes: b1ccc8f4b631 ("crypto: poly1305 - Add a four block AVX2 variant for x86_64") Fixes: c70f4abef07a ("crypto: poly1305 - Add a SSE2 SIMD variant for x86_64") Cc: # v4.3+ Cc: Martin Willi Cc: Jason A. Donenfeld Signed-off-by: Eric Biggers Reviewed-by: Martin Willi Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/poly1305-avx2-x86_64.S | 14 +++++++---- arch/x86/crypto/poly1305-sse2-x86_64.S | 22 ++++++++++------- crypto/testmgr.h | 44 +++++++++++++++++++++++++++++++++- 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index 3b6e70d085da..8457cdd47f75 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2) vpaddq t2,t1,t1 vmovq t1x,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index c88c670cb5fc..5851c7418fb7 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx @@ -520,6 +520,12 @@ ENTRY(poly1305_2block_sse2) paddq t2,t1 movq t1,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -558,16 +564,16 @@ ENTRY(poly1305_2block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx diff --git a/crypto/testmgr.h b/crypto/testmgr.h index ca8e8ebef309..db496aa360a3 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -5706,7 +5706,49 @@ static const struct hash_testvec poly1305_tv_template[] = { .psize = 80, .digest = "\x13\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00", - }, + }, { /* Regression test for overflow in AVX2 implementation */ + .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff", + .psize = 300, + .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8" + "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1", + } }; /* NHPoly1305 test vectors from https://github.com/google/adiantum */ -- cgit v1.2.3 From 508b773175c71c16f81bbaf6bc9e9be059ad545c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 2 Apr 2019 09:26:52 +0200 Subject: drm/ttm: fix out-of-bounds read in ttm_put_pages() v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a66477b0efe511d98dde3e4aaeb189790e6f0a39 upstream. When ttm_put_pages() tries to figure out whether it's dealing with transparent hugepages, it just reads past the bounds of the pages array without a check. v2: simplify the test if enough pages are left in the array (Christian). Signed-off-by: Jann Horn Signed-off-by: Christian König Fixes: 5c42c64f7d54 ("drm/ttm: fix the fix for huge compound pages") Cc: stable@vger.kernel.org Reviewed-by: Michel Dänzer Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index f841accc2c00..f77c81db161b 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -730,7 +730,8 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, } #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (!(flags & TTM_PAGE_FLAG_DMA32)) { + if (!(flags & TTM_PAGE_FLAG_DMA32) && + (npages - i) >= HPAGE_PMD_NR) { for (j = 0; j < HPAGE_PMD_NR; ++j) if (p++ != pages[i + j]) break; @@ -759,7 +760,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, unsigned max_size, n2free; spin_lock_irqsave(&huge->lock, irq_flags); - while (i < npages) { + while ((npages - i) >= HPAGE_PMD_NR) { struct page *p = pages[i]; unsigned j; -- cgit v1.2.3 From f89f9d9636f04158d31cb7cce73467a844daf0d7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Apr 2019 00:21:21 -0700 Subject: arm64: futex: Restore oldval initialization to work around buggy compilers commit ff8acf929014b7f87315588e0daf8597c8aa9d1c upstream. Commit 045afc24124d ("arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value") removed oldval's zero initialization in arch_futex_atomic_op_inuser because it is not necessary. Unfortunately, Android's arm64 GCC 4.9.4 [1] does not agree: ../kernel/futex.c: In function 'do_futex': ../kernel/futex.c:1658:17: warning: 'oldval' may be used uninitialized in this function [-Wmaybe-uninitialized] return oldval == cmparg; ^ In file included from ../kernel/futex.c:73:0: ../arch/arm64/include/asm/futex.h:53:6: note: 'oldval' was declared here int oldval, ret, tmp; ^ GCC fails to follow that when ret is non-zero, futex_atomic_op_inuser returns right away, avoiding the uninitialized use that it claims. Restoring the zero initialization works around this issue. [1]: https://android.googlesource.com/platform/prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/ Cc: stable@vger.kernel.org Fixes: 045afc24124d ("arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Nathan Chancellor Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index e1d95f08f8e1..c7e1a7837706 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -50,7 +50,7 @@ do { \ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) { - int oldval, ret, tmp; + int oldval = 0, ret, tmp; u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); pagefault_disable(); -- cgit v1.2.3 From fbe6f067a3adf64e749c27832f7f6f7a6e07b988 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 24 Feb 2019 01:49:52 +0900 Subject: x86/kprobes: Verify stack frame on kretprobe commit 3ff9c075cc767b3060bdac12da72fc94dd7da1b8 upstream. Verify the stack frame pointer on kretprobe trampoline handler, If the stack frame pointer does not match, it skips the wrong entry and tries to find correct one. This can happen if user puts the kretprobe on the function which can be used in the path of ftrace user-function call. Such functions should not be probed, so this adds a warning message that reports which function should be blacklisted. Tested-by: Andrea Righi Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/155094059185.6137.15527904013362842072.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 26 ++++++++++++++++++++++++++ include/linux/kprobes.h | 1 + 2 files changed, 27 insertions(+) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4ba75afba527..69b6400d1ce2 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -569,6 +569,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) unsigned long *sara = stack_addr(regs); ri->ret_addr = (kprobe_opcode_t *) *sara; + ri->fp = sara; /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; @@ -759,15 +760,21 @@ static __used void *trampoline_handler(struct pt_regs *regs) unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; kprobe_opcode_t *correct_ret_addr = NULL; + void *frame_pointer; + bool skipped = false; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; + /* On x86-64, we use pt_regs->sp for return address holder. */ + frame_pointer = ®s->sp; #else regs->cs = __KERNEL_CS | get_kernel_rpl(); regs->gs = 0; + /* On x86-32, we use pt_regs->flags for return address holder. */ + frame_pointer = ®s->flags; #endif regs->ip = trampoline_address; regs->orig_ax = ~0UL; @@ -789,8 +796,25 @@ static __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + /* + * Return probes must be pushed on this hash list correct + * order (same as return order) so that it can be poped + * correctly. However, if we find it is pushed it incorrect + * order, this means we find a function which should not be + * probed, because the wrong order entry is pushed on the + * path of processing other kretprobe itself. + */ + if (ri->fp != frame_pointer) { + if (!skipped) + pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n"); + skipped = true; + continue; + } orig_ret_address = (unsigned long)ri->ret_addr; + if (skipped) + pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n", + ri->rp->kp.addr); if (orig_ret_address != trampoline_address) /* @@ -808,6 +832,8 @@ static __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + if (ri->fp != frame_pointer) + continue; orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e07e91daaacc..72ff78c33033 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -173,6 +173,7 @@ struct kretprobe_instance { struct kretprobe *rp; kprobe_opcode_t *ret_addr; struct task_struct *task; + void *fp; char data[0]; }; -- cgit v1.2.3 From 7b91f26c45b65ca77ad0e4ac1b5832c75f274ac6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 24 Feb 2019 01:50:20 +0900 Subject: kprobes: Mark ftrace mcount handler functions nokprobe commit fabe38ab6b2bd9418350284c63825f13b8a6abba upstream. Mark ftrace mcount handler functions nokprobe since probing on these functions with kretprobe pushes return address incorrectly on kretprobe shadow stack. Reported-by: Francis Deslauriers Tested-by: Andrea Righi Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Acked-by: Steven Rostedt (VMware) Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/155094062044.6137.6419622920568680640.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index aac7847c0214..f546ae5102e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -6216,7 +6217,7 @@ void ftrace_reset_array_ops(struct trace_array *tr) tr->ops->func = ftrace_stub; } -static inline void +static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) { @@ -6276,11 +6277,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, { __ftrace_ops_list_func(ip, parent_ip, NULL, regs); } +NOKPROBE_SYMBOL(ftrace_ops_list_func); #else static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } +NOKPROBE_SYMBOL(ftrace_ops_no_ops); #endif /* @@ -6307,6 +6310,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, preempt_enable_notrace(); trace_clear_recursion(bit); } +NOKPROBE_SYMBOL(ftrace_ops_assist_func); /** * ftrace_ops_get_func - get the function a trampoline should call -- cgit v1.2.3 From ca61e51567a7bb1de157ad0964b35e1673296f59 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 24 Feb 2019 01:50:49 +0900 Subject: x86/kprobes: Avoid kretprobe recursion bug commit b191fa96ea6dc00d331dcc28c1f7db5e075693a0 upstream. Avoid kretprobe recursion loop bg by setting a dummy kprobes to current_kprobe per-CPU variable. This bug has been introduced with the asm-coded trampoline code, since previously it used another kprobe for hooking the function return placeholder (which only has a nop) and trampoline handler was called from that kprobe. This revives the old lost kprobe again. With this fix, we don't see deadlock anymore. And you can see that all inner-called kretprobe are skipped. event_1 235 0 event_2 19375 19612 The 1st column is recorded count and the 2nd is missed count. Above shows (event_1 rec) + (event_2 rec) ~= (event_2 missed) (some difference are here because the counter is racy) Reported-by: Andrea Righi Tested-by: Andrea Righi Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: c9becf58d935 ("[PATCH] kretprobe: kretprobe-booster") Link: http://lkml.kernel.org/r/155094064889.6137.972160690963039.stgit@devbox Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 69b6400d1ce2..f4b954ff5b89 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -749,11 +749,16 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); +static struct kprobe kretprobe_kprobe = { + .addr = (void *)kretprobe_trampoline, +}; + /* * Called from kretprobe_trampoline */ static __used void *trampoline_handler(struct pt_regs *regs) { + struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -763,6 +768,17 @@ static __used void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; + preempt_disable(); + + /* + * Set a dummy kprobe for avoiding kretprobe recursion. + * Since kretprobe never run in kprobe handler, kprobe must not + * be running at this point. + */ + kcb = get_kprobe_ctlblk(); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ @@ -838,10 +854,9 @@ static __used void *trampoline_handler(struct pt_regs *regs) orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); } recycle_rp_inst(ri, &empty_rp); @@ -857,6 +872,9 @@ static __used void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); -- cgit v1.2.3 From 44aa331f99b4ee0cc0713c907113109b6936e8ce Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 15 Apr 2019 15:01:25 +0900 Subject: kprobes: Fix error check when reusing optimized probes commit 5f843ed415581cfad4ef8fefe31c138a8346ca8a upstream. The following commit introduced a bug in one of our error paths: 819319fc9346 ("kprobes: Return error if we fail to reuse kprobe instead of BUG_ON()") it missed to handle the return value of kprobe_optready() as error-value. In reality, the kprobe_optready() returns a bool result, so "true" case must be passed instead of 0. This causes some errors on kprobe boot-time selftests on ARM: [ ] Beginning kprobe tests... [ ] Probe ARM code [ ] kprobe [ ] kretprobe [ ] ARM instruction simulation [ ] Check decoding tables [ ] Run test cases [ ] FAIL: test_case_handler not run [ ] FAIL: Test andge r10, r11, r14, asr r7 [ ] FAIL: Scenario 11 ... [ ] FAIL: Scenario 7 [ ] Total instruction simulation tests=1631, pass=1433 fail=198 [ ] kprobe tests failed This can happen if an optimized probe is unregistered and next kprobe is registered on same address until the previous probe is not reclaimed. If this happens, a hidden aggregated probe may be kept in memory, and no new kprobe can probe same address. Also, in that case register_kprobe() will return "1" instead of minus error value, which can mislead caller logic. Signed-off-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Naveen N . Rao Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org # v5.0+ Fixes: 819319fc9346 ("kprobes: Return error if we fail to reuse kprobe instead of BUG_ON()") Link: http://lkml.kernel.org/r/155530808559.32517.539898325433642204.stgit@devnote2 Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/kprobes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f4ddfdd2d07e..de78d1b998f8 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -709,7 +709,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) static int reuse_unused_kprobe(struct kprobe *ap) { struct optimized_kprobe *op; - int ret; /* * Unused kprobe MUST be on the way of delayed unoptimizing (means @@ -720,9 +719,8 @@ static int reuse_unused_kprobe(struct kprobe *ap) /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; /* Optimize it again (remove from op->list) */ - ret = kprobe_optready(ap); - if (ret) - return ret; + if (!kprobe_optready(ap)) + return -EINVAL; optimize_kprobe(ap); return 0; -- cgit v1.2.3 From 9a0748c5518877db31c30e986edf976eca141ab6 Mon Sep 17 00:00:00 2001 From: Vijayakumar Durai Date: Wed, 27 Mar 2019 11:03:17 +0100 Subject: rt2x00: do not increment sequence number while re-transmitting commit 746ba11f170603bf1eaade817553a6c2e9135bbe upstream. Currently rt2x00 devices retransmit the management frames with incremented sequence number if hardware is assigning the sequence. This is HW bug fixed already for non-QOS data frames, but it should be fixed for management frames except beacon. Without fix retransmitted frames have wrong SN: AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1648, FN=0, Flags=........C Frame is not being retransmitted 1648 1 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1649, FN=0, Flags=....R...C Frame is being retransmitted 1649 1 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1650, FN=0, Flags=....R...C Frame is being retransmitted 1650 1 With the fix SN stays correctly the same: 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=........C 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=....R...C 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=....R...C Cc: stable@vger.kernel.org Signed-off-by: Vijayakumar Durai [sgruszka: simplify code, change comments and changelog] Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ralink/rt2x00/rt2x00.h | 1 - drivers/net/wireless/ralink/rt2x00/rt2x00mac.c | 10 ---------- drivers/net/wireless/ralink/rt2x00/rt2x00queue.c | 15 +++++++++------ 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index 4b1744e9fb78..50b92ca92bd7 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -673,7 +673,6 @@ enum rt2x00_state_flags { CONFIG_CHANNEL_HT40, CONFIG_POWERSAVING, CONFIG_HT_DISABLED, - CONFIG_QOS_DISABLED, CONFIG_MONITORING, /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 2825560e2424..e8462f25d252 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -642,18 +642,8 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, rt2x00dev->intf_associated--; rt2x00leds_led_assoc(rt2x00dev, !!rt2x00dev->intf_associated); - - clear_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags); } - /* - * Check for access point which do not support 802.11e . We have to - * generate data frames sequence number in S/W for such AP, because - * of H/W bug. - */ - if (changes & BSS_CHANGED_QOS && !bss_conf->qos) - set_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags); - /* * When the erp information has changed, we should perform * additional configuration steps. For all other changes we are done. diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c index 92ddc19e7bf7..4834b4eb0206 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c @@ -201,15 +201,18 @@ static void rt2x00queue_create_tx_descriptor_seq(struct rt2x00_dev *rt2x00dev, if (!rt2x00_has_cap_flag(rt2x00dev, REQUIRE_SW_SEQNO)) { /* * rt2800 has a H/W (or F/W) bug, device incorrectly increase - * seqno on retransmited data (non-QOS) frames. To workaround - * the problem let's generate seqno in software if QOS is - * disabled. + * seqno on retransmitted data (non-QOS) and management frames. + * To workaround the problem let's generate seqno in software. + * Except for beacons which are transmitted periodically by H/W + * hence hardware has to assign seqno for them. */ - if (test_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags)) - __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); - else + if (ieee80211_is_beacon(hdr->frame_control)) { + __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); /* H/W will generate sequence number */ return; + } + + __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); } /* -- cgit v1.2.3 From b1db090fb04676a1c51accf3df76f9105a6e17e0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 1 Mar 2019 14:48:37 +0100 Subject: mac80211: do not call driver wake_tx_queue op during reconfig commit 4856bfd230985e43e84c26473c91028ff0a533bd upstream. There are several scenarios in which mac80211 can call drv_wake_tx_queue after ieee80211_restart_hw has been called and has not yet completed. Driver private structs are considered uninitialized until mac80211 has uploaded the vifs, stations and keys again, so using private tx queue data during that time is not safe. The driver can also not rely on drv_reconfig_complete to figure out when it is safe to accept drv_wake_tx_queue calls again, because it is only called after all tx queues are woken again. To fix this, bail out early in drv_wake_tx_queue if local->in_reconfig is set. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/driver-ops.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 3e0d5922a440..a9c1d6e3cdae 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1166,6 +1166,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); + if (local->in_reconfig) + return; + if (!check_sdata_in_driver(sdata)) return; -- cgit v1.2.3 From 5b05d7d6fd5b0a15fd143137561b53ed9f80b4d1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 7 Mar 2019 15:51:45 -0800 Subject: s390/mem_detect: Use IS_ENABLED(CONFIG_BLK_DEV_INITRD) commit 2d4ea4b95cae3133de6b18ec5d5a42ee824fa0ef upstream. IS_ENABLED should generally use CONFIG_ prefaced symbols and it doesn't appear as if there is a BLK_DEV_INITRD define. Cc: # 4.20 Signed-off-by: Joe Perches Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/boot/mem_detect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 4cb771ba13fa..5d316fe40480 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -25,7 +25,7 @@ static void *mem_detect_alloc_extended(void) { unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64)); - if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && INITRD_START < offset + ENTRIES_EXTENDED_MAX) offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64)); -- cgit v1.2.3 From 22cc6e1b4fc0ac31b8a18189a06c6630e0e1af76 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 Apr 2019 14:54:40 -0500 Subject: drm/amdgpu/gmc9: fix VM_L2_CNTL3 programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1925e7d3d4677e681cc2e878c2bdbeaee988c8e2 upstream. Got accidently dropped when 2+1 level support was added. Fixes: 6a42fd6fbf534096 ("drm/amdgpu: implement 2+1 PD support for Raven v3") Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index d0d966d6080a..1696644ec022 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -182,6 +182,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); } + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); -- cgit v1.2.3 From 35f288b72f64451461d38f46499061ed46878e37 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 21 Mar 2019 21:15:22 +0000 Subject: perf/x86/amd: Add event map for AMD Family 17h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3fe3331bb285700ab2253dbb07f8e478fcea2f1b upstream. Family 17h differs from prior families by: - Does not support an L2 cache miss event - It has re-enumerated PMC counters for: - L2 cache references - front & back end stalled cycles So we add a new amd_f17h_perfmon_event_map[] so that the generic perf event names will resolve to the correct h/w events on family 17h and above processors. Reference sections 2.1.13.3.3 (stalls) and 2.1.13.3.6 (L2): https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf Signed-off-by: Kim Phillips Cc: # v4.9+ Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Janakarajan Natarajan Cc: Jiri Olsa Cc: Linus Torvalds Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Pu Wen Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: e40ed1542dd7 ("perf/x86: Add perf support for AMD family-17h processors") [ Improved the formatting a bit. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/core.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 0ecfac84ba91..d45f3fbd232e 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -117,22 +117,39 @@ static __initconst const u64 amd_hw_cache_event_ids }; /* - * AMD Performance Monitor K7 and later. + * AMD Performance Monitor K7 and later, up to and including Family 16h: */ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, - [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ +}; + +/* + * AMD Performance Monitor Family 17h and later: + */ +static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, }; static u64 amd_pmu_event_map(int hw_event) { + if (boot_cpu_data.x86 >= 0x17) + return amd_f17h_perfmon_event_map[hw_event]; + return amd_perfmon_event_map[hw_event]; } -- cgit v1.2.3 From 5e34d62b59ddcba0de3835f44406a9d5bfc0a8fc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 29 Mar 2019 17:47:43 -0700 Subject: x86/cpu/bugs: Use __initconst for 'const' init data commit 1de7edbb59c8f1b46071f66c5c97b8a59569eb51 upstream. Some of the recently added const tables use __initdata which causes section attribute conflicts. Use __initconst instead. Fixes: fa1202ef2243 ("x86/speculation: Add command line control") Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190330004743.29541-9-andi@firstfloor.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01874d54f4fd..482383c2b184 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -275,7 +275,7 @@ static const struct { const char *option; enum spectre_v2_user_cmd cmd; bool secure; -} v2_user_options[] __initdata = { +} v2_user_options[] __initconst = { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, @@ -419,7 +419,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] __initdata = { +} mitigation_options[] __initconst = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -658,7 +658,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] __initdata = { +} ssb_mitigation_options[] __initconst = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -- cgit v1.2.3 From 19867049334b6cb5eb5d2c84c84b2a863cea886f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 2 Apr 2019 12:44:58 -0700 Subject: perf/x86: Fix incorrect PEBS_REGS commit 9d5dcc93a6ddfc78124f006ccd3637ce070ef2fc upstream. PEBS_REGS used as mask for the supported registers for large PEBS. However, the mask cannot filter the sample_regs_user/sample_regs_intr correctly. (1ULL << PERF_REG_X86_*) should be used to replace PERF_REG_X86_*, which is only the index. Rename PEBS_REGS to PEBS_GP_REGS, because the mask is only for general purpose registers. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: jolsa@kernel.org Fixes: 2fe1bc1f501d ("perf/x86: Enable free running PEBS for REGS_USER/INTR") Link: https://lkml.kernel.org/r/20190402194509.2832-2-kan.liang@linux.intel.com [ Renamed it to PEBS_GP_REGS - as 'GPRS' is used elsewhere ;-) ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 2 +- arch/x86/events/perf_event.h | 38 +++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2480feb07df3..470d7daa915d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3130,7 +3130,7 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) flags &= ~PERF_SAMPLE_TIME; if (!event->attr.exclude_kernel) flags &= ~PERF_SAMPLE_REGS_USER; - if (event->attr.sample_regs_user & ~PEBS_REGS) + if (event->attr.sample_regs_user & ~PEBS_GP_REGS) flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); return flags; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index acd72e669c04..b68ab65454ff 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -96,25 +96,25 @@ struct amd_nb { PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ PERF_SAMPLE_PERIOD) -#define PEBS_REGS \ - (PERF_REG_X86_AX | \ - PERF_REG_X86_BX | \ - PERF_REG_X86_CX | \ - PERF_REG_X86_DX | \ - PERF_REG_X86_DI | \ - PERF_REG_X86_SI | \ - PERF_REG_X86_SP | \ - PERF_REG_X86_BP | \ - PERF_REG_X86_IP | \ - PERF_REG_X86_FLAGS | \ - PERF_REG_X86_R8 | \ - PERF_REG_X86_R9 | \ - PERF_REG_X86_R10 | \ - PERF_REG_X86_R11 | \ - PERF_REG_X86_R12 | \ - PERF_REG_X86_R13 | \ - PERF_REG_X86_R14 | \ - PERF_REG_X86_R15) +#define PEBS_GP_REGS \ + ((1ULL << PERF_REG_X86_AX) | \ + (1ULL << PERF_REG_X86_BX) | \ + (1ULL << PERF_REG_X86_CX) | \ + (1ULL << PERF_REG_X86_DX) | \ + (1ULL << PERF_REG_X86_DI) | \ + (1ULL << PERF_REG_X86_SI) | \ + (1ULL << PERF_REG_X86_SP) | \ + (1ULL << PERF_REG_X86_BP) | \ + (1ULL << PERF_REG_X86_IP) | \ + (1ULL << PERF_REG_X86_FLAGS) | \ + (1ULL << PERF_REG_X86_R8) | \ + (1ULL << PERF_REG_X86_R9) | \ + (1ULL << PERF_REG_X86_R10) | \ + (1ULL << PERF_REG_X86_R11) | \ + (1ULL << PERF_REG_X86_R12) | \ + (1ULL << PERF_REG_X86_R13) | \ + (1ULL << PERF_REG_X86_R14) | \ + (1ULL << PERF_REG_X86_R15)) /* * Per register state. -- cgit v1.2.3 From 205c53cbe553c9e5a9fe93f63e398da7e59124b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 19:51:06 +0200 Subject: x86/speculation: Prevent deadlock on ssb_state::lock commit 2f5fb19341883bb6e37da351bc3700489d8506a7 upstream. Mikhail reported a lockdep splat related to the AMD specific ssb_state lock: CPU0 CPU1 lock(&st->lock); local_irq_disable(); lock(&(&sighand->siglock)->rlock); lock(&st->lock); lock(&(&sighand->siglock)->rlock); *** DEADLOCK *** The connection between sighand->siglock and st->lock comes through seccomp, which takes st->lock while holding sighand->siglock. Make sure interrupts are disabled when __speculation_ctrl_update() is invoked via prctl() -> speculation_ctrl_update(). Add a lockdep assert to catch future offenders. Fixes: 1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD") Reported-by: Mikhail Gavrilov Signed-off-by: Thomas Gleixner Tested-by: Mikhail Gavrilov Cc: Thomas Lendacky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1904141948200.4917@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 90ae0ca51083..9db049f06f2f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -414,6 +414,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, u64 msr = x86_spec_ctrl_base; bool updmsr = false; + lockdep_assert_irqs_disabled(); + /* * If TIF_SSBD is different, select the proper mitigation * method. Note that if SSBD mitigation is disabled or permanentely @@ -465,10 +467,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) void speculation_ctrl_update(unsigned long tif) { + unsigned long flags; + /* Forced update. Make sure all relevant TIF flags are different */ - preempt_disable(); + local_irq_save(flags); __speculation_ctrl_update(~tif, tif); - preempt_enable(); + local_irq_restore(flags); } /* Called from seccomp/prctl update */ -- cgit v1.2.3 From 451fd88f62b9e1a684d6e6503425ee0610d755a0 Mon Sep 17 00:00:00 2001 From: Chang-An Chen Date: Fri, 29 Mar 2019 10:59:09 +0800 Subject: timers/sched_clock: Prevent generic sched_clock wrap caused by tick_freeze() commit 3f2552f7e9c5abef2775c53f7af66532f8bf65bc upstream. tick_freeze() introduced by suspend-to-idle in commit 124cf9117c5f ("PM / sleep: Make it possible to quiesce timers during suspend-to-idle") uses timekeeping_suspend() instead of syscore_suspend() during suspend-to-idle. As a consequence generic sched_clock will keep going because sched_clock_suspend() and sched_clock_resume() are not invoked during suspend-to-idle which can result in a generic sched_clock wrap. On a ARM system with suspend-to-idle enabled, sched_clock is registered as "56 bits at 13MHz, resolution 76ns, wraps every 4398046511101ns", which means the real wrapping duration is 8796093022202ns. [ 134.551779] suspend-to-idle suspend (timekeeping_suspend()) [ 1204.912239] suspend-to-idle resume (timekeeping_resume()) ...... [ 1206.912239] suspend-to-idle suspend (timekeeping_suspend()) [ 5880.502807] suspend-to-idle resume (timekeeping_resume()) ...... [ 6000.403724] suspend-to-idle suspend (timekeeping_suspend()) [ 8035.753167] suspend-to-idle resume (timekeeping_resume()) ...... [ 8795.786684] (2)[321:charger_thread]...... [ 8795.788387] (2)[321:charger_thread]...... [ 0.057226] (0)[0:swapper/0]...... [ 0.061447] (2)[0:swapper/2]...... sched_clock was not stopped during suspend-to-idle, and sched_clock_poll hrtimer was not expired because timekeeping_suspend() was invoked during suspend-to-idle. It makes sched_clock wrap at kernel time 8796s. To prevent this, invoke sched_clock_suspend() and sched_clock_resume() in tick_freeze() together with timekeeping_suspend() and timekeeping_resume(). Fixes: 124cf9117c5f (PM / sleep: Make it possible to quiesce timers during suspend-to-idle) Signed-off-by: Chang-An Chen Signed-off-by: Thomas Gleixner Cc: Frederic Weisbecker Cc: Matthias Brugger Cc: John Stultz Cc: Kees Cook Cc: Corey Minyard Cc: Cc: Cc: Stanley Chu Cc: Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1553828349-8914-1-git-send-email-chang-an.chen@mediatek.com Signed-off-by: Greg Kroah-Hartman --- kernel/time/sched_clock.c | 4 ++-- kernel/time/tick-common.c | 2 ++ kernel/time/timekeeping.h | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 094b82ca95e5..930113b9799a 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -272,7 +272,7 @@ static u64 notrace suspended_sched_clock_read(void) return cd.read_data[seq & 1].epoch_cyc; } -static int sched_clock_suspend(void) +int sched_clock_suspend(void) { struct clock_read_data *rd = &cd.read_data[0]; @@ -283,7 +283,7 @@ static int sched_clock_suspend(void) return 0; } -static void sched_clock_resume(void) +void sched_clock_resume(void) { struct clock_read_data *rd = &cd.read_data[0]; diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 529143b4c8d2..df401463a191 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -487,6 +487,7 @@ void tick_freeze(void) trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), true); system_state = SYSTEM_SUSPEND; + sched_clock_suspend(); timekeeping_suspend(); } else { tick_suspend_local(); @@ -510,6 +511,7 @@ void tick_unfreeze(void) if (tick_freeze_depth == num_online_cpus()) { timekeeping_resume(); + sched_clock_resume(); system_state = SYSTEM_RUNNING; trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 7a9b4eb7a1d5..141ab3ab0354 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -14,6 +14,13 @@ extern u64 timekeeping_max_deferment(void); extern void timekeeping_warp_clock(void); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); +#ifdef CONFIG_GENERIC_SCHED_CLOCK +extern int sched_clock_suspend(void); +extern void sched_clock_resume(void); +#else +static inline int sched_clock_suspend(void) { return 0; } +static inline void sched_clock_resume(void) { } +#endif extern void do_timer(unsigned long ticks); extern void update_wall_time(void); -- cgit v1.2.3 From 2cc8e8c230e8cb047e94abaa59dd722e4c900792 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 22 Apr 2019 16:08:10 -0700 Subject: nfit/ars: Remove ars_start_flags commit 317a992ab9266b86b774b9f6b0f87eb4f59879a1 upstream. The ars_start_flags property of 'struct acpi_nfit_desc' is no longer used since ARS_REQ_SHORT and ARS_REQ_LONG were added. Reviewed-by: Toshi Kani Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/acpi/nfit/core.c | 10 +++++----- drivers/acpi/nfit/nfit.h | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index f75f8f870ce3..1a48c92eaed5 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2660,11 +2660,11 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc) struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; - memset(&ars_start, 0, sizeof(ars_start)); - ars_start.address = ars_status->restart_address; - ars_start.length = ars_status->restart_length; - ars_start.type = ars_status->type; - ars_start.flags = acpi_desc->ars_start_flags; + ars_start = (struct nd_cmd_ars_start) { + .address = ars_status->restart_address, + .length = ars_status->restart_length, + .type = ars_status->type, + }; rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, sizeof(ars_start), &cmd_rc); if (rc < 0) diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 33691aecfcee..871fb3de3b30 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -223,7 +223,6 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; - u8 ars_start_flags; struct nd_cmd_ars_status *ars_status; struct nfit_spa *scrub_spa; struct delayed_work dwork; -- cgit v1.2.3 From b49a9157c3ec867af424fde0c475eb2904492daa Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 22 Apr 2019 16:08:16 -0700 Subject: nfit/ars: Introduce scrub_flags commit e34b8252a3d2893ca55c82dbfcdaa302fa03d400 upstream. In preparation for introducing new flags to gate whether ARS results are stale, or poll the completion state, convert the existing flags to an unsigned long with enumerated values. This conversion allows the flags to be atomically updated outside of ->init_mutex. Reviewed-by: Toshi Kani Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/acpi/nfit/core.c | 30 +++++++++++++++++------------- drivers/acpi/nfit/nfit.h | 8 ++++++-- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 1a48c92eaed5..64308e669250 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1319,19 +1319,23 @@ static ssize_t scrub_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; ssize_t rc = -ENXIO; + bool busy; device_lock(dev); nd_desc = dev_get_drvdata(dev); - if (nd_desc) { - struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); - - mutex_lock(&acpi_desc->init_mutex); - rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, - acpi_desc->scrub_busy - && !acpi_desc->cancel ? "+\n" : "\n"); - mutex_unlock(&acpi_desc->init_mutex); + if (!nd_desc) { + device_unlock(dev); + return rc; } + acpi_desc = to_acpi_desc(nd_desc); + + mutex_lock(&acpi_desc->init_mutex); + busy = test_bit(ARS_BUSY, &acpi_desc->scrub_flags) + && !test_bit(ARS_CANCEL, &acpi_desc->scrub_flags); + rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, busy ? "+\n" : "\n"); + mutex_unlock(&acpi_desc->init_mutex); device_unlock(dev); return rc; } @@ -3081,7 +3085,7 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc, lockdep_assert_held(&acpi_desc->init_mutex); - if (acpi_desc->cancel) + if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags)) return 0; if (query_rc == -EBUSY) { @@ -3155,7 +3159,7 @@ static void __sched_ars(struct acpi_nfit_desc *acpi_desc, unsigned int tmo) { lockdep_assert_held(&acpi_desc->init_mutex); - acpi_desc->scrub_busy = 1; + set_bit(ARS_BUSY, &acpi_desc->scrub_flags); /* note this should only be set from within the workqueue */ if (tmo) acpi_desc->scrub_tmo = tmo; @@ -3171,7 +3175,7 @@ static void notify_ars_done(struct acpi_nfit_desc *acpi_desc) { lockdep_assert_held(&acpi_desc->init_mutex); - acpi_desc->scrub_busy = 0; + clear_bit(ARS_BUSY, &acpi_desc->scrub_flags); acpi_desc->scrub_count++; if (acpi_desc->scrub_count_state) sysfs_notify_dirent(acpi_desc->scrub_count_state); @@ -3460,7 +3464,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa; mutex_lock(&acpi_desc->init_mutex); - if (acpi_desc->cancel) { + if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags)) { mutex_unlock(&acpi_desc->init_mutex); return 0; } @@ -3539,7 +3543,7 @@ void acpi_nfit_shutdown(void *data) mutex_unlock(&acpi_desc_lock); mutex_lock(&acpi_desc->init_mutex); - acpi_desc->cancel = 1; + set_bit(ARS_CANCEL, &acpi_desc->scrub_flags); cancel_delayed_work_sync(&acpi_desc->dwork); mutex_unlock(&acpi_desc->init_mutex); diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 871fb3de3b30..897ce10192a0 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -210,6 +210,11 @@ struct nfit_mem { int family; }; +enum scrub_flags { + ARS_BUSY, + ARS_CANCEL, +}; + struct acpi_nfit_desc { struct nvdimm_bus_descriptor nd_desc; struct acpi_table_header acpi_header; @@ -231,8 +236,7 @@ struct acpi_nfit_desc { unsigned int max_ars; unsigned int scrub_count; unsigned int scrub_mode; - unsigned int scrub_busy:1; - unsigned int cancel:1; + unsigned long scrub_flags; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; unsigned long bus_nfit_cmd_force_en; -- cgit v1.2.3 From 33dcadf02c1fa08b2e247be4b0a9e5ffd3c931df Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 22 Apr 2019 16:08:21 -0700 Subject: nfit/ars: Allow root to busy-poll the ARS state machine commit 5479b2757f26fe9908fc341d105b2097fe820b6f upstream. The ARS implementation implements exponential back-off on the poll interval to prevent high-frequency access to the DIMM / platform interface. Depending on when the ARS completes the poll interval may exceed the completion event by minutes. Allow root to reset the timeout each time it probes the status. A one-second timeout is still enforced, but root can otherwise can control the poll interval. Fixes: bc6ba8085842 ("nfit, address-range-scrub: rework and simplify ARS...") Cc: Reported-by: Erwin Tsaur Reviewed-by: Toshi Kani Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/acpi/nfit/core.c | 8 ++++++++ drivers/acpi/nfit/nfit.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 64308e669250..61d3fd65fbe9 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1335,6 +1335,13 @@ static ssize_t scrub_show(struct device *dev, busy = test_bit(ARS_BUSY, &acpi_desc->scrub_flags) && !test_bit(ARS_CANCEL, &acpi_desc->scrub_flags); rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, busy ? "+\n" : "\n"); + /* Allow an admin to poll the busy state at a higher rate */ + if (busy && capable(CAP_SYS_RAWIO) && !test_and_set_bit(ARS_POLL, + &acpi_desc->scrub_flags)) { + acpi_desc->scrub_tmo = 1; + mod_delayed_work(nfit_wq, &acpi_desc->dwork, HZ); + } + mutex_unlock(&acpi_desc->init_mutex); device_unlock(dev); return rc; @@ -3196,6 +3203,7 @@ static void acpi_nfit_scrub(struct work_struct *work) else notify_ars_done(acpi_desc); memset(acpi_desc->ars_status, 0, acpi_desc->max_ars); + clear_bit(ARS_POLL, &acpi_desc->scrub_flags); mutex_unlock(&acpi_desc->init_mutex); } diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 897ce10192a0..d14bad687fb8 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -213,6 +213,7 @@ struct nfit_mem { enum scrub_flags { ARS_BUSY, ARS_CANCEL, + ARS_POLL, }; struct acpi_nfit_desc { -- cgit v1.2.3 From 4a96e63f8f7dcfc3aaae2414e7fec63aee6ab5db Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 22 Apr 2019 16:08:26 -0700 Subject: nfit/ars: Avoid stale ARS results commit 78153dd45e7e0596ba32b15d02bda08e1513111e upstream. Gate ARS result consumption on whether the OS issued start-ARS since the previous consumption. The BIOS may only clear its result buffers after a successful start-ARS. Fixes: 0caeef63e6d2 ("libnvdimm: Add a poison list and export badblocks") Cc: Reported-by: Krzysztof Rusocki Reported-by: Vishal Verma Reviewed-by: Toshi Kani Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/acpi/nfit/core.c | 17 ++++++++++++++++- drivers/acpi/nfit/nfit.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 61d3fd65fbe9..4be4dc3e8aa6 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2661,7 +2661,10 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, if (rc < 0) return rc; - return cmd_rc; + if (cmd_rc < 0) + return cmd_rc; + set_bit(ARS_VALID, &acpi_desc->scrub_flags); + return 0; } static int ars_continue(struct acpi_nfit_desc *acpi_desc) @@ -2754,6 +2757,17 @@ static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc) */ if (ars_status->out_length < 44) return 0; + + /* + * Ignore potentially stale results that are only refreshed + * after a start-ARS event. + */ + if (!test_and_clear_bit(ARS_VALID, &acpi_desc->scrub_flags)) { + dev_dbg(acpi_desc->dev, "skip %d stale records\n", + ars_status->num_records); + return 0; + } + for (i = 0; i < ars_status->num_records; i++) { /* only process full records */ if (ars_status->out_length @@ -3238,6 +3252,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) struct nfit_spa *nfit_spa; int rc; + set_bit(ARS_VALID, &acpi_desc->scrub_flags); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { switch (nfit_spa_type(nfit_spa->spa)) { case NFIT_SPA_VOLATILE: diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index d14bad687fb8..0cbe5009eb2c 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -213,6 +213,7 @@ struct nfit_mem { enum scrub_flags { ARS_BUSY, ARS_CANCEL, + ARS_VALID, ARS_POLL, }; -- cgit v1.2.3 From 03c1d8f8afd8e59b67566d39416db8b0bc66668f Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 8 Feb 2019 18:30:59 +0200 Subject: tpm/tpm_i2c_atmel: Return -E2BIG when the transfer is incomplete [ Upstream commit 442601e87a4769a8daba4976ec3afa5222ca211d ] Return -E2BIG when the transfer is incomplete. The upper layer does not retry, so not doing that is incorrect behaviour. Cc: stable@vger.kernel.org Fixes: a2871c62e186 ("tpm: Add support for Atmel I2C TPMs") Signed-off-by: Jarkko Sakkinen Reviewed-by: Stefan Berger Reviewed-by: Jerry Snitselaar Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm_i2c_atmel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index 32a8e27c5382..cc4e642d3180 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -69,6 +69,10 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) if (status < 0) return status; + /* The upper layer does not support incomplete sends. */ + if (status != len) + return -E2BIG; + return 0; } -- cgit v1.2.3 From 18636692a1b49f0c0ebf623ad29b65514886c59c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 23 Apr 2019 16:05:18 +0300 Subject: tpm: Fix the type of the return value in calc_tpm2_event_size() commit b9d0a85d6b2e76630cfd4c475ee3af4109bfd87a upstream calc_tpm2_event_size() has an invalid signature because it returns a 'size_t' where as its signature says that it returns 'int'. Cc: Fixes: 4d23cc323cdb ("tpm: add securityfs support for TPM 2.0 firmware event log") Suggested-by: Jarkko Sakkinen Signed-off-by: Yue Haibing Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris Signed-off-by: Sasha Levin --- drivers/char/tpm/eventlog/tpm2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c index 1b8fa9de2cac..41b9f6c92da7 100644 --- a/drivers/char/tpm/eventlog/tpm2.c +++ b/drivers/char/tpm/eventlog/tpm2.c @@ -37,8 +37,8 @@ * * Returns size of the event. If it is an invalid event, returns 0. */ -static int calc_tpm2_event_size(struct tcg_pcr_event2 *event, - struct tcg_pcr_event *event_header) +static size_t calc_tpm2_event_size(struct tcg_pcr_event2 *event, + struct tcg_pcr_event *event_header) { struct tcg_efi_specid_event *efispecid; struct tcg_event_field *event_field; -- cgit v1.2.3 From fb9693679feb5e3fa9c983ec35dfa068a9ad83af Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 23 Apr 2019 12:04:26 -0700 Subject: Revert "kbuild: use -Oz instead of -Os when using clang" commit a75bb4eb9e565b9f5115e2e8c07377ce32cbe69a upstream. The clang option -Oz enables *aggressive* optimization for size, which doesn't necessarily result in smaller images, but can have negative impact on performance. Switch back to the less aggressive -Os. This reverts commit 6748cb3c299de1ffbe56733647b01dbcc398c419. Suggested-by: Peter Zijlstra Signed-off-by: Matthias Kaehlcke Reviewed-by: Nick Desaulniers Signed-off-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Signed-off-by: Sasha Levin --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ef192ca04330..807ae0e3ff6e 100644 --- a/Makefile +++ b/Makefile @@ -678,8 +678,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -KBUILD_CFLAGS += $(call cc-option,-Oz,-Os) -KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) +KBUILD_CFLAGS += -Os $(call cc-disable-warning,maybe-uninitialized,) else ifdef CONFIG_PROFILE_ALL_BRANCHES KBUILD_CFLAGS += -O2 $(call cc-disable-warning,maybe-uninitialized,) -- cgit v1.2.3 From 6799f32fff632c0659d4385edcf8f4ce883bc5fc Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Tue, 23 Apr 2019 19:51:06 -0400 Subject: sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup [ Upstream commit 2e8e19226398db8265a8e675fcc0118b9e80c9e8 ] With extremely short cfs_period_us setting on a parent task group with a large number of children the for loop in sched_cfs_period_timer() can run until the watchdog fires. There is no guarantee that the call to hrtimer_forward_now() will ever return 0. The large number of children can make do_sched_cfs_period_timer() take longer than the period. NMI watchdog: Watchdog detected hard LOCKUP on cpu 24 RIP: 0010:tg_nop+0x0/0x10 walk_tg_tree_from+0x29/0xb0 unthrottle_cfs_rq+0xe0/0x1a0 distribute_cfs_runtime+0xd3/0xf0 sched_cfs_period_timer+0xcb/0x160 ? sched_cfs_slack_timer+0xd0/0xd0 __hrtimer_run_queues+0xfb/0x270 hrtimer_interrupt+0x122/0x270 smp_apic_timer_interrupt+0x6a/0x140 apic_timer_interrupt+0xf/0x20 To prevent this we add protection to the loop that detects when the loop has run too many times and scales the period and quota up, proportionally, so that the timer can complete before then next period expires. This preserves the relative runtime quota while preventing the hard lockup. A warning is issued reporting this state and the new values. Signed-off-by: Phil Auld Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Anton Blanchard Cc: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190319130005.25492-1-pauld@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5e61a1a99e38..eeb605656d59 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4859,12 +4859,15 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } +extern const u64 max_cfs_quota_period; + static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) { struct cfs_bandwidth *cfs_b = container_of(timer, struct cfs_bandwidth, period_timer); int overrun; int idle = 0; + int count = 0; raw_spin_lock(&cfs_b->lock); for (;;) { @@ -4872,6 +4875,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) if (!overrun) break; + if (++count > 3) { + u64 new, old = ktime_to_ns(cfs_b->period); + + new = (old * 147) / 128; /* ~115% */ + new = min(new, max_cfs_quota_period); + + cfs_b->period = ns_to_ktime(new); + + /* since max is 1s, this is limited to 1e9^2, which fits in u64 */ + cfs_b->quota *= new; + cfs_b->quota = div64_u64(cfs_b->quota, old); + + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + div_u64(new, NSEC_PER_USEC), + div_u64(cfs_b->quota, NSEC_PER_USEC)); + + /* reset count so we don't come right back in here */ + count = 0; + } + idle = do_sched_cfs_period_timer(cfs_b, overrun); } if (idle) -- cgit v1.2.3 From edc94cb2c13b06d9f72eda5dd96fecbe3973ad96 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Wed, 27 Mar 2019 11:32:38 -0700 Subject: tpm: fix an invalid condition in tpm_common_poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7110629263469b4664d00b38ef80a656eddf3637 ] The poll condition should only check response_length, because reads should only be issued if there is data to read. The response_read flag only prevents double writes. The problem was that the write set the response_read to false, enqued a tpm job, and returned. Then application called poll which checked the response_read flag and returned EPOLLIN. Then the application called read, but got nothing. After all that the async_work kicked in. Added also mutex_lock around the poll check to prevent other possible race conditions. Fixes: 9488585b21bef0df12 ("tpm: add support for partial reads") Reported-by: Mantas Mikulėnas Tested-by: Mantas Mikulėnas Signed-off-by: Tadeusz Struk Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm-dev-common.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index 5eecad233ea1..744b0237300a 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -203,12 +203,19 @@ __poll_t tpm_common_poll(struct file *file, poll_table *wait) __poll_t mask = 0; poll_wait(file, &priv->async_wait, wait); + mutex_lock(&priv->buffer_mutex); - if (!priv->response_read || priv->response_length) + /* + * The response_length indicates if there is still response + * (or part of it) to be consumed. Partial reads decrease it + * by the number of bytes read, and write resets it the zero. + */ + if (priv->response_length) mask = EPOLLIN | EPOLLRDNORM; else mask = EPOLLOUT | EPOLLWRNORM; + mutex_unlock(&priv->buffer_mutex); return mask; } -- cgit v1.2.3 From 74b4ef5df5902466a9cb832c27bcead743cda822 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 5 Apr 2019 13:42:56 +0200 Subject: mt76x02: avoid status_list.lock and sta->rate_ctrl_lock dependency commit bafdf85dfa59374f927ff597bc8c259193afda30 upstream. Move ieee80211_tx_status_ext() outside of status_list lock section in order to avoid locking dependency and possible deadlock reposed by LOCKDEP in below warning. Also do mt76_tx_status_lock() just before it's needed. [ 440.224832] WARNING: possible circular locking dependency detected [ 440.224833] 5.1.0-rc2+ #22 Not tainted [ 440.224834] ------------------------------------------------------ [ 440.224835] kworker/u16:28/2362 is trying to acquire lock: [ 440.224836] 0000000089b8cacf (&(&q->lock)->rlock#2){+.-.}, at: mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.224842] but task is already holding lock: [ 440.224842] 000000002cfedc59 (&(&sta->lock)->rlock){+.-.}, at: ieee80211_stop_tx_ba_cb+0x32/0x1f0 [mac80211] [ 440.224863] which lock already depends on the new lock. [ 440.224863] the existing dependency chain (in reverse order) is: [ 440.224864] -> #3 (&(&sta->lock)->rlock){+.-.}: [ 440.224869] _raw_spin_lock_bh+0x34/0x40 [ 440.224880] ieee80211_start_tx_ba_session+0xe4/0x3d0 [mac80211] [ 440.224894] minstrel_ht_get_rate+0x45c/0x510 [mac80211] [ 440.224906] rate_control_get_rate+0xc1/0x140 [mac80211] [ 440.224918] ieee80211_tx_h_rate_ctrl+0x195/0x3c0 [mac80211] [ 440.224930] ieee80211_xmit_fast+0x26d/0xa50 [mac80211] [ 440.224942] __ieee80211_subif_start_xmit+0xfc/0x310 [mac80211] [ 440.224954] ieee80211_subif_start_xmit+0x38/0x390 [mac80211] [ 440.224956] dev_hard_start_xmit+0xb8/0x300 [ 440.224957] __dev_queue_xmit+0x7d4/0xbb0 [ 440.224968] ip6_finish_output2+0x246/0x860 [ipv6] [ 440.224978] mld_sendpack+0x1bd/0x360 [ipv6] [ 440.224987] mld_ifc_timer_expire+0x1a4/0x2f0 [ipv6] [ 440.224989] call_timer_fn+0x89/0x2a0 [ 440.224990] run_timer_softirq+0x1bd/0x4d0 [ 440.224992] __do_softirq+0xdb/0x47c [ 440.224994] irq_exit+0xfa/0x100 [ 440.224996] smp_apic_timer_interrupt+0x9a/0x220 [ 440.224997] apic_timer_interrupt+0xf/0x20 [ 440.224999] cpuidle_enter_state+0xc1/0x470 [ 440.225000] do_idle+0x21a/0x260 [ 440.225001] cpu_startup_entry+0x19/0x20 [ 440.225004] start_secondary+0x135/0x170 [ 440.225006] secondary_startup_64+0xa4/0xb0 [ 440.225007] -> #2 (&(&sta->rate_ctrl_lock)->rlock){+.-.}: [ 440.225009] _raw_spin_lock_bh+0x34/0x40 [ 440.225022] rate_control_tx_status+0x4f/0xb0 [mac80211] [ 440.225031] ieee80211_tx_status_ext+0x142/0x1a0 [mac80211] [ 440.225035] mt76x02_send_tx_status+0x2e4/0x340 [mt76x02_lib] [ 440.225037] mt76x02_tx_status_data+0x31/0x40 [mt76x02_lib] [ 440.225040] mt76u_tx_status_data+0x51/0xa0 [mt76_usb] [ 440.225042] process_one_work+0x237/0x5d0 [ 440.225043] worker_thread+0x3c/0x390 [ 440.225045] kthread+0x11d/0x140 [ 440.225046] ret_from_fork+0x3a/0x50 [ 440.225047] -> #1 (&(&list->lock)->rlock#8){+.-.}: [ 440.225049] _raw_spin_lock_bh+0x34/0x40 [ 440.225052] mt76_tx_status_skb_add+0x51/0x100 [mt76] [ 440.225054] mt76x02u_tx_prepare_skb+0xbd/0x116 [mt76x02_usb] [ 440.225056] mt76u_tx_queue_skb+0x5f/0x180 [mt76_usb] [ 440.225058] mt76_tx+0x93/0x190 [mt76] [ 440.225070] ieee80211_tx_frags+0x148/0x210 [mac80211] [ 440.225081] __ieee80211_tx+0x75/0x1b0 [mac80211] [ 440.225092] ieee80211_tx+0xde/0x110 [mac80211] [ 440.225105] __ieee80211_tx_skb_tid_band+0x72/0x90 [mac80211] [ 440.225122] ieee80211_send_auth+0x1f3/0x360 [mac80211] [ 440.225141] ieee80211_auth.cold.40+0x6c/0x100 [mac80211] [ 440.225156] ieee80211_mgd_auth.cold.50+0x132/0x15f [mac80211] [ 440.225171] cfg80211_mlme_auth+0x149/0x360 [cfg80211] [ 440.225181] nl80211_authenticate+0x273/0x2e0 [cfg80211] [ 440.225183] genl_family_rcv_msg+0x196/0x3a0 [ 440.225184] genl_rcv_msg+0x47/0x8e [ 440.225185] netlink_rcv_skb+0x3a/0xf0 [ 440.225187] genl_rcv+0x24/0x40 [ 440.225188] netlink_unicast+0x16d/0x210 [ 440.225189] netlink_sendmsg+0x204/0x3b0 [ 440.225191] sock_sendmsg+0x36/0x40 [ 440.225193] ___sys_sendmsg+0x259/0x2b0 [ 440.225194] __sys_sendmsg+0x47/0x80 [ 440.225196] do_syscall_64+0x60/0x1f0 [ 440.225197] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 440.225198] -> #0 (&(&q->lock)->rlock#2){+.-.}: [ 440.225200] lock_acquire+0xb9/0x1a0 [ 440.225202] _raw_spin_lock_bh+0x34/0x40 [ 440.225204] mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225215] ieee80211_agg_start_txq+0xe8/0x2b0 [mac80211] [ 440.225225] ieee80211_stop_tx_ba_cb+0xb8/0x1f0 [mac80211] [ 440.225235] ieee80211_ba_session_work+0x1c1/0x2f0 [mac80211] [ 440.225236] process_one_work+0x237/0x5d0 [ 440.225237] worker_thread+0x3c/0x390 [ 440.225239] kthread+0x11d/0x140 [ 440.225240] ret_from_fork+0x3a/0x50 [ 440.225240] other info that might help us debug this: [ 440.225241] Chain exists of: &(&q->lock)->rlock#2 --> &(&sta->rate_ctrl_lock)->rlock --> &(&sta->lock)->rlock [ 440.225243] Possible unsafe locking scenario: [ 440.225244] CPU0 CPU1 [ 440.225244] ---- ---- [ 440.225245] lock(&(&sta->lock)->rlock); [ 440.225245] lock(&(&sta->rate_ctrl_lock)->rlock); [ 440.225246] lock(&(&sta->lock)->rlock); [ 440.225247] lock(&(&q->lock)->rlock#2); [ 440.225248] *** DEADLOCK *** [ 440.225249] 5 locks held by kworker/u16:28/2362: [ 440.225250] #0: 0000000048fcd291 ((wq_completion)phy0){+.+.}, at: process_one_work+0x1b5/0x5d0 [ 440.225252] #1: 00000000f1c6828f ((work_completion)(&sta->ampdu_mlme.work)){+.+.}, at: process_one_work+0x1b5/0x5d0 [ 440.225254] #2: 00000000433d2b2c (&sta->ampdu_mlme.mtx){+.+.}, at: ieee80211_ba_session_work+0x5c/0x2f0 [mac80211] [ 440.225265] #3: 000000002cfedc59 (&(&sta->lock)->rlock){+.-.}, at: ieee80211_stop_tx_ba_cb+0x32/0x1f0 [mac80211] [ 440.225276] #4: 000000009d7b9a44 (rcu_read_lock){....}, at: ieee80211_agg_start_txq+0x33/0x2b0 [mac80211] [ 440.225286] stack backtrace: [ 440.225288] CPU: 2 PID: 2362 Comm: kworker/u16:28 Not tainted 5.1.0-rc2+ #22 [ 440.225289] Hardware name: LENOVO 20KGS23S0P/20KGS23S0P, BIOS N23ET55W (1.30 ) 08/31/2018 [ 440.225300] Workqueue: phy0 ieee80211_ba_session_work [mac80211] [ 440.225301] Call Trace: [ 440.225304] dump_stack+0x85/0xc0 [ 440.225306] print_circular_bug.isra.38.cold.58+0x15c/0x195 [ 440.225307] check_prev_add.constprop.48+0x5f0/0xc00 [ 440.225309] ? check_prev_add.constprop.48+0x39d/0xc00 [ 440.225311] ? __lock_acquire+0x41d/0x1100 [ 440.225312] __lock_acquire+0xd98/0x1100 [ 440.225313] ? __lock_acquire+0x41d/0x1100 [ 440.225315] lock_acquire+0xb9/0x1a0 [ 440.225317] ? mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225319] _raw_spin_lock_bh+0x34/0x40 [ 440.225321] ? mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225323] mt76_wake_tx_queue+0x4c/0xb0 [mt76] [ 440.225334] ieee80211_agg_start_txq+0xe8/0x2b0 [mac80211] [ 440.225344] ieee80211_stop_tx_ba_cb+0xb8/0x1f0 [mac80211] [ 440.225354] ieee80211_ba_session_work+0x1c1/0x2f0 [mac80211] [ 440.225356] process_one_work+0x237/0x5d0 [ 440.225358] worker_thread+0x3c/0x390 [ 440.225359] ? wq_calc_node_cpumask+0x70/0x70 [ 440.225360] kthread+0x11d/0x140 [ 440.225362] ? kthread_create_on_node+0x40/0x40 [ 440.225363] ret_from_fork+0x3a/0x50 Cc: stable@vger.kernel.org Fixes: 88046b2c9f6d ("mt76: add support for reporting tx status with skb") Signed-off-by: Stanislaw Gruszka Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 7c9dfa54fee8..9678322aca60 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -421,7 +421,6 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, return; rcu_read_lock(); - mt76_tx_status_lock(mdev, &list); if (stat->wcid < ARRAY_SIZE(dev->mt76.wcid)) wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]); @@ -434,6 +433,8 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, drv_priv); } + mt76_tx_status_lock(mdev, &list); + if (wcid) { if (stat->pktid) status.skb = mt76_tx_status_skb_get(mdev, wcid, @@ -453,7 +454,9 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, if (*update == 0 && stat_val == stat_cache && stat->wcid == msta->status.wcid && msta->n_frames < 32) { msta->n_frames++; - goto out; + mt76_tx_status_unlock(mdev, &list); + rcu_read_unlock(); + return; } mt76x02_mac_fill_tx_status(dev, status.info, &msta->status, @@ -469,11 +472,10 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, if (status.skb) mt76_tx_status_skb_done(mdev, status.skb, &list); - else - ieee80211_tx_status_ext(mt76_hw(dev), &status); - -out: mt76_tx_status_unlock(mdev, &list); + + if (!status.skb) + ieee80211_tx_status_ext(mt76_hw(dev), &status); rcu_read_unlock(); } -- cgit v1.2.3 From 65fce15dbd8726c9f13c7b0b240f430afdca6a2c Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 19 Mar 2019 02:36:59 +0100 Subject: device_cgroup: fix RCU imbalance in error case commit 0fcc4c8c044e117ac126ab6df4138ea9a67fa2a9 upstream. When dev_exception_add() returns an error (due to a failed memory allocation), make sure that we move the RCU preemption count back to where it was before we were called. We dropped the RCU read lock inside the loop body, so we can't just "break". sparse complains about this, too: $ make -s C=2 security/device_cgroup.o ./include/linux/rcupdate.h:647:9: warning: context imbalance in 'propagate_exception' - unexpected unlock Fixes: d591fb56618f ("device_cgroup: simplify cgroup tree walk in propagate_exception()") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Acked-by: Michal Hocko Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- security/device_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index cd97929fac66..dc28914fa72e 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -560,7 +560,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root, devcg->behavior == DEVCG_DEFAULT_ALLOW) { rc = dev_exception_add(devcg, ex); if (rc) - break; + return rc; } else { /* * in the other possible cases: -- cgit v1.2.3 From c138ed72186accb0dac95066118374cf6c0807ae Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 29 Mar 2019 11:13:38 +0200 Subject: perf/ring_buffer: Fix AUX record suppression commit 339bc4183596e1f68c2c98a03b87aa124107c317 upstream. The following commit: 1627314fb54a33e ("perf: Suppress AUX/OVERWRITE records") has an unintended side-effect of also suppressing all AUX records with no flags and non-zero size, so all the regular records in the full trace mode. This breaks some use cases for people. Fix this by restoring "regular" AUX records. Reported-by: Ben Gainey Tested-by: Ben Gainey Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 1627314fb54a33e ("perf: Suppress AUX/OVERWRITE records") Link: https://lkml.kernel.org/r/20190329091338.29999-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/ring_buffer.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 878c62ec0190..dbd7656b4f73 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -456,24 +456,21 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) rb->aux_head += size; } - if (size || handle->aux_flags) { - /* - * Only send RECORD_AUX if we have something useful to communicate - * - * Note: the OVERWRITE records by themselves are not considered - * useful, as they don't communicate any *new* information, - * aside from the short-lived offset, that becomes history at - * the next event sched-in and therefore isn't useful. - * The userspace that needs to copy out AUX data in overwrite - * mode should know to use user_page::aux_head for the actual - * offset. So, from now on we don't output AUX records that - * have *only* OVERWRITE flag set. - */ - - if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE) - perf_event_aux_event(handle->event, aux_head, size, - handle->aux_flags); - } + /* + * Only send RECORD_AUX if we have something useful to communicate + * + * Note: the OVERWRITE records by themselves are not considered + * useful, as they don't communicate any *new* information, + * aside from the short-lived offset, that becomes history at + * the next event sched-in and therefore isn't useful. + * The userspace that needs to copy out AUX data in overwrite + * mode should know to use user_page::aux_head for the actual + * offset. So, from now on we don't output AUX records that + * have *only* OVERWRITE flag set. + */ + if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)) + perf_event_aux_event(handle->event, aux_head, size, + handle->aux_flags); rb->user_page->aux_head = rb->aux_head; if (rb_need_aux_wakeup(rb)) -- cgit v1.2.3 From 4ae522890cc19038c84769d20f9ffdfc2d58a36b Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Thu, 18 Apr 2019 17:50:16 -0700 Subject: mm/memory_hotplug: do not unlock after failing to take the device_hotplug_lock commit 37803841c92d7b327147e0b1be3436423189e1cf upstream. When adding memory by probing a memory block in the sysfs interface, there is an obvious issue where we will unlock the device_hotplug_lock when we failed to takes it. That issue was introduced in 8df1d0e4a265 ("mm/memory_hotplug: make add_memory() take the device_hotplug_lock"). We should drop out in time when failing to take the device_hotplug_lock. Link: http://lkml.kernel.org/r/1554696437-9593-1-git-send-email-zhongjiang@huawei.com Fixes: 8df1d0e4a265 ("mm/memory_hotplug: make add_memory() take the device_hotplug_lock") Signed-off-by: zhong jiang Reported-by: Yang yingliang Acked-by: Michal Hocko Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 048cbf7d5233..23125f276ff1 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -505,7 +505,7 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr, ret = lock_device_hotplug_sysfs(); if (ret) - goto out; + return ret; nid = memory_add_physaddr_to_nid(phys_addr); ret = __add_memory(nid, phys_addr, -- cgit v1.2.3 From c3d0cf332bcde20591186a72d2de915c5bc0cc68 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 18 Apr 2019 17:50:20 -0700 Subject: mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n commit e8277b3b52240ec1caad8e6df278863e4bf42eac upstream. Commit 58bc4c34d249 ("mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly") depends on skipping vmstat entries with empty name introduced in 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in /proc/vmstat") but reverted in b29940c1abd7 ("mm: rename and change semantics of nr_indirectly_reclaimable_bytes"). So skipping no longer works and /proc/vmstat has misformatted lines " 0". This patch simply shows debug counters "nr_tlb_remote_*" for UP. Link: http://lkml.kernel.org/r/155481488468.467.4295519102880913454.stgit@buzz Fixes: 58bc4c34d249 ("mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly") Signed-off-by: Konstantin Khlebnikov Acked-by: Vlastimil Babka Cc: Roman Gushchin Cc: Jann Horn Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 83b30edc2f7f..f807f2e3b4cb 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1274,13 +1274,8 @@ const char * const vmstat_text[] = { #endif #endif /* CONFIG_MEMORY_BALLOON */ #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", -#else - "", /* nr_tlb_remote_flush */ - "", /* nr_tlb_remote_flush_received */ -#endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", #endif /* CONFIG_DEBUG_TLBFLUSH */ -- cgit v1.2.3 From b2c65593ea626d28f1d20ed86281f8a8fdec2738 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Apr 2019 15:25:00 +0200 Subject: ALSA: info: Fix racy addition/deletion of nodes commit 8c2f870890fd28e023b0fcf49dcee333f2c8bad7 upstream. The ALSA proc helper manages the child nodes in a linked list, but its addition and deletion is done without any lock. This leads to a corruption if they are operated concurrently. Usually this isn't a problem because the proc entries are added sequentially in the driver probe procedure itself. But the card registrations are done often asynchronously, and the crash could be actually reproduced with syzkaller. This patch papers over it by protecting the link addition and deletion with the parent's mutex. There is "access" mutex that is used for the file access, and this can be reused for this purpose as well. Reported-by: syzbot+48df349490c36f9f54ab@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/info.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/core/info.c b/sound/core/info.c index fe502bc5e6d2..679136fba730 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -722,8 +722,11 @@ snd_info_create_entry(const char *name, struct snd_info_entry *parent) INIT_LIST_HEAD(&entry->children); INIT_LIST_HEAD(&entry->list); entry->parent = parent; - if (parent) + if (parent) { + mutex_lock(&parent->access); list_add_tail(&entry->list, &parent->children); + mutex_unlock(&parent->access); + } return entry; } @@ -805,7 +808,12 @@ void snd_info_free_entry(struct snd_info_entry * entry) list_for_each_entry_safe(p, n, &entry->children, list) snd_info_free_entry(p); - list_del(&entry->list); + p = entry->parent; + if (p) { + mutex_lock(&p->access); + list_del(&entry->list); + mutex_unlock(&p->access); + } kfree(entry->name); if (entry->private_free) entry->private_free(entry); -- cgit v1.2.3 From 822482bff7c53312f508c8e94b424823f66d67f8 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 18 Mar 2019 02:32:36 +0100 Subject: percpu: stop printing kernel addresses commit 00206a69ee32f03e6f40837684dcbe475ea02266 upstream. Since commit ad67b74d2469d9b8 ("printk: hash addresses printed with %p"), at boot "____ptrval____" is printed instead of actual addresses: percpu: Embedded 38 pages/cpu @(____ptrval____) s124376 r0 d31272 u524288 Instead of changing the print to "%px", and leaking kernel addresses, just remove the print completely, cfr. e.g. commit 071929dbdd865f77 ("arm64: Stop printing the virtual memory layout"). Signed-off-by: Matteo Croce Signed-off-by: Dennis Zhou Signed-off-by: Greg Kroah-Hartman --- mm/percpu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index db86282fd024..59bd6a51954c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2531,8 +2531,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, ai->groups[group].base_offset = areas[group] - base; } - pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", - PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, + pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n", + PFN_DOWN(size_sum), ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); rc = pcpu_setup_first_chunk(ai, base); @@ -2653,8 +2653,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size, } /* we're ready, commit */ - pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n", - unit_pages, psize_str, vm.addr, ai->static_size, + pr_info("%d %s pages/cpu s%zu r%zu d%zu\n", + unit_pages, psize_str, ai->static_size, ai->reserved_size, ai->dyn_size); rc = pcpu_setup_first_chunk(ai, vm.addr); -- cgit v1.2.3 From c735a988a9caa19b752565203b8c8924202a8555 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Apr 2019 18:39:38 -0700 Subject: kernel/sysctl.c: fix out-of-bounds access when setting file-max commit 9002b21465fa4d829edfc94a5a441005cffaa972 upstream. Commit 32a5ad9c2285 ("sysctl: handle overflow for file-max") hooked up min/max values for the file-max sysctl parameter via the .extra1 and .extra2 fields in the corresponding struct ctl_table entry. Unfortunately, the minimum value points at the global 'zero' variable, which is an int. This results in a KASAN splat when accessed as a long by proc_doulongvec_minmax on 64-bit architectures: | BUG: KASAN: global-out-of-bounds in __do_proc_doulongvec_minmax+0x5d8/0x6a0 | Read of size 8 at addr ffff2000133d1c20 by task systemd/1 | | CPU: 0 PID: 1 Comm: systemd Not tainted 5.1.0-rc3-00012-g40b114779944 #2 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x228 | show_stack+0x14/0x20 | dump_stack+0xe8/0x124 | print_address_description+0x60/0x258 | kasan_report+0x140/0x1a0 | __asan_report_load8_noabort+0x18/0x20 | __do_proc_doulongvec_minmax+0x5d8/0x6a0 | proc_doulongvec_minmax+0x4c/0x78 | proc_sys_call_handler.isra.19+0x144/0x1d8 | proc_sys_write+0x34/0x58 | __vfs_write+0x54/0xe8 | vfs_write+0x124/0x3c0 | ksys_write+0xbc/0x168 | __arm64_sys_write+0x68/0x98 | el0_svc_common+0x100/0x258 | el0_svc_handler+0x48/0xc0 | el0_svc+0x8/0xc | | The buggy address belongs to the variable: | zero+0x0/0x40 | | Memory state around the buggy address: | ffff2000133d1b00: 00 00 00 00 00 00 00 00 fa fa fa fa 04 fa fa fa | ffff2000133d1b80: fa fa fa fa 04 fa fa fa fa fa fa fa 04 fa fa fa | >ffff2000133d1c00: fa fa fa fa 04 fa fa fa fa fa fa fa 00 00 00 00 | ^ | ffff2000133d1c80: fa fa fa fa 00 fa fa fa fa fa fa fa 00 00 00 00 | ffff2000133d1d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fix the splat by introducing a unsigned long 'zero_ul' and using that instead. Link: http://lkml.kernel.org/r/20190403153409.17307-1-will.deacon@arm.com Fixes: 32a5ad9c2285 ("sysctl: handle overflow for file-max") Signed-off-by: Will Deacon Acked-by: Christian Brauner Cc: Kees Cook Cc: Alexey Dobriyan Cc: Matteo Croce Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 28ec71d914c7..f50f1471c119 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -126,6 +126,7 @@ static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; +static unsigned long zero_ul; static unsigned long one_ul = 1; static unsigned long long_max = LONG_MAX; static int one_hundred = 100; @@ -1723,7 +1724,7 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(files_stat.max_files), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, + .extra1 = &zero_ul, .extra2 = &long_max, }, { -- cgit v1.2.3 From d3da1f09fff27b3ae5908119dab312b9baec09e0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 27 Apr 2019 09:37:45 +0200 Subject: Linux 5.0.10 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 807ae0e3ff6e..b282c4143b21 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 9 +SUBLEVEL = 10 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3 From 5b31245480624a4fb8de558d2565b765d1d23811 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Mar 2019 15:30:03 +0100 Subject: netfilter: nf_tables: bogus EBUSY when deleting set after flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 273fe3f1006ea5ebc63d6729e43e8e45e32b256a ] Set deletion after flush coming in the same batch results in EBUSY. Add set use counter to track the number of references to this set from rules. We cannot rely on the list of bindings for this since such list is still populated from the preparation phase. Reported-by: Václav Zindulka Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nf_tables_api.c | 28 +++++++++++++++++++++++++++- net/netfilter/nft_dynset.c | 13 +++++++++---- net/netfilter/nft_lookup.c | 13 +++++++++---- net/netfilter/nft_objref.c | 13 +++++++++---- 5 files changed, 60 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0612439909dc..9e0b9ecb43db 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -382,6 +382,7 @@ void nft_unregister_set(struct nft_set_type *type); * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size + * @use: number of rules references to this set * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal * @timeout: default timeout value in jiffies @@ -407,6 +408,7 @@ struct nft_set { u32 dtype; u32 objtype; u32 size; + u32 use; atomic_t nelems; u32 ndeact; u64 timeout; @@ -467,6 +469,10 @@ struct nft_set_binding { u32 flags; }; +enum nft_trans_phase; +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index acb124ce92ec..e2aac80f9b7b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3624,6 +3624,9 @@ err1: static void nft_set_destroy(struct nft_set *set) { + if (WARN_ON(set->use > 0)) + return; + set->ops->destroy(set); module_put(to_set_type(set->ops)->owner); kfree(set->name); @@ -3664,7 +3667,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(set); } - if (!list_empty(&set->bindings) || + if (set->use || (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) { NL_SET_BAD_ATTR(extack, attr); return -EBUSY; @@ -3694,6 +3697,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; + if (set->use == UINT_MAX) + return -EOVERFLOW; + if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -3721,6 +3727,7 @@ bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); + set->use++; return 0; } @@ -3740,6 +3747,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase) +{ + switch (phase) { + case NFT_TRANS_PREPARE: + set->use--; + return; + case NFT_TRANS_ABORT: + case NFT_TRANS_RELEASE: + set->use--; + /* fall through */ + default: + nf_tables_unbind_set(ctx, set, binding, + phase == NFT_TRANS_COMMIT); + } +} +EXPORT_SYMBOL_GPL(nf_tables_deactivate_set); + void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index f1172f99752b..eb7f9a5f2aeb 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -241,11 +241,15 @@ static void nft_dynset_deactivate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_dynset_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_dynset *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -293,6 +297,7 @@ static const struct nft_expr_ops nft_dynset_ops = { .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, + .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 14496da5141d..161c3451a747 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -127,11 +127,15 @@ static void nft_lookup_deactivate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_lookup_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_lookup *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -222,6 +226,7 @@ static const struct nft_expr_ops nft_lookup_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, + .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index ae178e914486..d8737c115257 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -161,11 +161,15 @@ static void nft_objref_map_deactivate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_objref_map_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -182,6 +186,7 @@ static const struct nft_expr_ops nft_objref_map_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, + .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, -- cgit v1.2.3 From 0ccd99339030d617498d801d828594898c6f5bf6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Mar 2019 10:50:20 +0100 Subject: netfilter: nf_tables: bogus EBUSY in helper removal from transaction [ Upstream commit 8ffcd32f64633926163cdd07a7d295c500a947d1 ] Proper use counter updates when activating and deactivating the object, otherwise, this hits bogus EBUSY error. Fixes: cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate and destroy phase") Reported-by: Laura Garcia Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_objref.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index d8737c115257..bf92a40dd1b2 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -64,21 +64,34 @@ nla_put_failure: return -1; } -static void nft_objref_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_objref_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_object *obj = nft_objref_priv(expr); + if (phase == NFT_TRANS_COMMIT) + return; + obj->use--; } +static void nft_objref_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_object *obj = nft_objref_priv(expr); + + obj->use++; +} + static struct nft_expr_type nft_objref_type; static const struct nft_expr_ops nft_objref_ops = { .type = &nft_objref_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)), .eval = nft_objref_eval, .init = nft_objref_init, - .destroy = nft_objref_destroy, + .activate = nft_objref_activate, + .deactivate = nft_objref_deactivate, .dump = nft_objref_dump, }; -- cgit v1.2.3 From 92d4af2766a1645763c15c738223bb0f9254329c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 1 Mar 2019 10:09:55 +0200 Subject: intel_th: gth: Fix an off-by-one in output unassigning [ Upstream commit 91d3f8a629849968dc91d6ce54f2d46abf4feb7f ] Commit 9ed3f22223c3 ("intel_th: Don't reference unassigned outputs") fixes a NULL dereference for all masters except the last one ("256+"), which keeps the stale pointer after the output driver had been unassigned. Fix the off-by-one. Signed-off-by: Alexander Shishkin Fixes: 9ed3f22223c3 ("intel_th: Don't reference unassigned outputs") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/intel_th/gth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c index cc287cf6eb29..edc52d75e6bd 100644 --- a/drivers/hwtracing/intel_th/gth.c +++ b/drivers/hwtracing/intel_th/gth.c @@ -616,7 +616,7 @@ static void intel_th_gth_unassign(struct intel_th_device *thdev, othdev->output.port = -1; othdev->output.active = false; gth->output[port].output = NULL; - for (master = 0; master < TH_CONFIGURABLE_MASTERS; master++) + for (master = 0; master <= TH_CONFIGURABLE_MASTERS; master++) if (gth->master[master] == port) gth->master[master] = -1; spin_unlock(>h->gth_lock); -- cgit v1.2.3 From b6f3aa978866902ba23b3b38437cc41a8eced565 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 4 Apr 2019 12:20:05 +0000 Subject: powerpc/vdso32: fix CLOCK_MONOTONIC on PPC64 [ Upstream commit dd9a994fc68d196a052b73747e3366c57d14a09e ] Commit b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038") changed the type of wtom_clock_sec to s64 on PPC64. Therefore, VDSO32 needs to read it with a 4 bytes shift in order to retrieve the lower part of it. Fixes: b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038") Reported-by: Christian Zigotzky Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/vdso32/gettimeofday.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 1e0bc5955a40..afd516b572f8 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -98,7 +98,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) * can be used, r7 contains NSEC_PER_SEC. */ - lwz r5,WTOM_CLOCK_SEC(r9) + lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) lwz r6,WTOM_CLOCK_NSEC(r9) /* We now have our offset in r5,r6. We create a fake dependency -- cgit v1.2.3 From 685550ad1192cd848439034c9e371322d6160b6e Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 3 Apr 2019 15:31:49 +0800 Subject: ALSA: hda/realtek - Move to ACT_INIT state [ Upstream commit 8983eb602af511fc5822f5ff4a82074c68816fd9 ] It will be lose Mic JD state when Chrome OS boot and headset was plugged. Just Implement of reset combo jack JD verb for ACT_PRE_PROBE state. Intel test result was also failed. It test passed until changed the initial state to ACT_INIT. Mic JD will show every time. This patch also changed the model name as 'alc-chrome-book' for application of Chrome OS. Fixes: 10f5b1b85ed1 ("ALSA: hda/realtek - Fixed Headset Mic JD not stable") Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f061167062bc..a9f69c3a3e0b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5490,7 +5490,7 @@ static void alc_headset_btn_callback(struct hda_codec *codec, jack->jack->button_state = report; } -static void alc295_fixup_chromebook(struct hda_codec *codec, +static void alc_fixup_headset_jack(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -5500,16 +5500,6 @@ static void alc295_fixup_chromebook(struct hda_codec *codec, alc_headset_btn_callback); snd_hda_jack_add_kctl(codec, 0x55, "Headset Jack", false, SND_JACK_HEADSET, alc_headset_btn_keymap); - switch (codec->core.vendor_id) { - case 0x10ec0295: - alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */ - alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15); - break; - case 0x10ec0236: - alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ - alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); - break; - } break; case HDA_FIXUP_ACT_INIT: switch (codec->core.vendor_id) { @@ -5530,6 +5520,25 @@ static void alc295_fixup_chromebook(struct hda_codec *codec, } } +static void alc295_fixup_chromebook(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + switch (action) { + case HDA_FIXUP_ACT_INIT: + switch (codec->core.vendor_id) { + case 0x10ec0295: + alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */ + alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15); + break; + case 0x10ec0236: + alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ + alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); + break; + } + break; + } +} + static void alc_fixup_disable_mic_vref(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -5684,6 +5693,7 @@ enum { ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE, ALC255_FIXUP_ACER_HEADSET_MIC, ALC295_FIXUP_CHROME_BOOK, + ALC225_FIXUP_HEADSET_JACK, ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE, ALC225_FIXUP_WYSE_AUTO_MUTE, ALC225_FIXUP_WYSE_DISABLE_MIC_VREF, @@ -6645,6 +6655,12 @@ static const struct hda_fixup alc269_fixups[] = { [ALC295_FIXUP_CHROME_BOOK] = { .type = HDA_FIXUP_FUNC, .v.func = alc295_fixup_chromebook, + .chained = true, + .chain_id = ALC225_FIXUP_HEADSET_JACK + }, + [ALC225_FIXUP_HEADSET_JACK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_jack, }, [ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, @@ -7143,7 +7159,8 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC255_FIXUP_DUMMY_LINEOUT_VERB, .name = "alc255-dummy-lineout"}, {.id = ALC255_FIXUP_DELL_HEADSET_MIC, .name = "alc255-dell-headset"}, {.id = ALC295_FIXUP_HP_X360, .name = "alc295-hp-x360"}, - {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-sense-combo"}, + {.id = ALC225_FIXUP_HEADSET_JACK, .name = "alc-headset-jack"}, + {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-chrome-book"}, {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"}, {} }; -- cgit v1.2.3 From 41e09d7eab076a3368449470eb14de195bf5de29 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 25 Apr 2019 22:24:05 -0700 Subject: fs/proc/proc_sysctl.c: Fix a NULL pointer dereference [ Upstream commit 89189557b47b35683a27c80ee78aef18248eefb4 ] Syzkaller report this: sysctl could not get directory: /net//bridge -12 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 7027 Comm: syz-executor.0 Tainted: G C 5.1.0-rc3+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:__write_once_size include/linux/compiler.h:220 [inline] RIP: 0010:__rb_change_child include/linux/rbtree_augmented.h:144 [inline] RIP: 0010:__rb_erase_augmented include/linux/rbtree_augmented.h:186 [inline] RIP: 0010:rb_erase+0x5f4/0x19f0 lib/rbtree.c:459 Code: 00 0f 85 60 13 00 00 48 89 1a 48 83 c4 18 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 f2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 75 0c 00 00 4d 85 ed 4c 89 2e 74 ce 4c 89 ea 48 RSP: 0018:ffff8881bb507778 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: ffff8881f224b5b8 RCX: ffffffff818f3f6a RDX: 000000000000000a RSI: 0000000000000050 RDI: ffff8881f224b568 RBP: 0000000000000000 R08: ffffed10376a0ef4 R09: ffffed10376a0ef4 R10: 0000000000000001 R11: ffffed10376a0ef4 R12: ffff8881f224b558 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f3e7ce13700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd60fbe9398 CR3: 00000001cb55c001 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: erase_entry fs/proc/proc_sysctl.c:178 [inline] erase_header+0xe3/0x160 fs/proc/proc_sysctl.c:207 start_unregistering fs/proc/proc_sysctl.c:331 [inline] drop_sysctl_table+0x558/0x880 fs/proc/proc_sysctl.c:1631 get_subdir fs/proc/proc_sysctl.c:1022 [inline] __register_sysctl_table+0xd65/0x1090 fs/proc/proc_sysctl.c:1335 br_netfilter_init+0x68/0x1000 [br_netfilter] do_one_initcall+0xbc/0x47d init/main.c:901 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Modules linked in: br_netfilter(+) backlight comedi(C) hid_sensor_hub max3100 ti_ads8688 udc_core fddi snd_mona leds_gpio rc_streamzap mtd pata_netcell nf_log_common rc_winfast udp_tunnel snd_usbmidi_lib snd_usb_toneport snd_usb_line6 snd_rawmidi snd_seq_device snd_hwdep videobuf2_v4l2 videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops rc_gadmei_rm008z 8250_of smm665 hid_tmff hid_saitek hwmon_vid rc_ati_tv_wonder_hd_600 rc_core pata_pdc202xx_old dn_rtmsg as3722 ad714x_i2c ad714x snd_soc_cs4265 hid_kensington panel_ilitek_ili9322 drm drm_panel_orientation_quirks ipack cdc_phonet usbcore phonet hid_jabra hid extcon_arizona can_dev industrialio_triggered_buffer kfifo_buf industrialio adm1031 i2c_mux_ltc4306 i2c_mux ipmi_msghandler mlxsw_core snd_soc_cs35l34 snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer ac97_bus snd_compress snd soundcore gpio_da9055 uio ecdh_generic mdio_thunder of_mdio fixed_phy libphy mdio_cavium iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel ide_pci_generic piix aes_x86_64 crypto_simd cryptd ide_core glue_helper input_leds psmouse intel_agp intel_gtt serio_raw ata_generic i2c_piix4 agpgart pata_acpi parport_pc parport floppy rtc_cmos sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: br_netfilter] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 68741688d5fbfe85 ]--- commit 23da9588037e ("fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links") forgot to handle start_unregistering() case, while header->parent is NULL, it calls erase_header() and as seen in the above syzkaller call trace, accessing &header->parent->root will trigger a NULL pointer dereference. As that commit explained, there is also no need to call start_unregistering() if header->parent is NULL. Link: http://lkml.kernel.org/r/20190409153622.28112-1-yuehaibing@huawei.com Fixes: 23da9588037e ("fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links") Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between sysctl_table_sets") Signed-off-by: YueHaibing Reported-by: Hulk Robot Reviewed-by: Kees Cook Cc: Luis Chamberlain Cc: Alexey Dobriyan Cc: Al Viro Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/proc/proc_sysctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d65390727541..7325baa8f9d4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - if (parent) + if (parent) { put_links(header); - start_unregistering(header); + start_unregistering(header); + } + if (!--header->count) kfree_rcu(header, rcu); -- cgit v1.2.3 From 34fb6f5eb2cd341a0807b7a2ff0f0a3ebfa45925 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 10 Apr 2019 10:38:33 +0200 Subject: block, bfq: fix use after free in bfq_bfqq_expire [ Upstream commit eed47d19d9362bdd958e4ab56af480b9dbf6b2b6 ] The function bfq_bfqq_expire() invokes the function __bfq_bfqq_expire(), and the latter may free the in-service bfq-queue. If this happens, then no other instruction of bfq_bfqq_expire() must be executed, or a use-after-free will occur. Basing on the assumption that __bfq_bfqq_expire() invokes bfq_put_queue() on the in-service bfq-queue exactly once, the queue is assumed to be freed if its refcounter is equal to one right before invoking __bfq_bfqq_expire(). But, since commit 9dee8b3b057e ("block, bfq: fix queue removal from weights tree") this assumption is false. __bfq_bfqq_expire() may also invoke bfq_weights_tree_remove() and, since commit 9dee8b3b057e ("block, bfq: fix queue removal from weights tree"), also the latter function may invoke bfq_put_queue(). So __bfq_bfqq_expire() may invoke bfq_put_queue() twice, and this is the actual case where the in-service queue may happen to be freed. To address this issue, this commit moves the check on the refcounter of the queue right around the last bfq_put_queue() that may be invoked on the queue. Fixes: 9dee8b3b057e ("block, bfq: fix queue removal from weights tree") Reported-by: Dmitrii Tcvetkov Reported-by: Douglas Anderson Tested-by: Dmitrii Tcvetkov Tested-by: Douglas Anderson Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 15 +++++++-------- block/bfq-iosched.h | 2 +- block/bfq-wf2q.c | 17 +++++++++++++++-- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e5ed28629271..72510c470001 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2804,7 +2804,7 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) bfq_remove_request(q, rq); } -static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) { /* * If this bfqq is shared between multiple processes, check @@ -2837,9 +2837,11 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* * All in-service entities must have been properly deactivated * or requeued before executing the next function, which - * resets all in-service entites as no more in service. + * resets all in-service entities as no more in service. This + * may cause bfqq to be freed. If this happens, the next + * function returns true. */ - __bfq_bfqd_reset_in_service(bfqd); + return __bfq_bfqd_reset_in_service(bfqd); } /** @@ -3244,7 +3246,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, bool slow; unsigned long delta = 0; struct bfq_entity *entity = &bfqq->entity; - int ref; /* * Check whether the process is slow (see bfq_bfqq_is_slow). @@ -3313,10 +3314,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, * reason. */ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); - ref = bfqq->ref; - __bfq_bfqq_expire(bfqd, bfqq); - - if (ref == 1) /* bfqq is gone, no more actions on it */ + if (__bfq_bfqq_expire(bfqd, bfqq)) + /* bfqq is gone, no more actions on it */ return; bfqq->injected_service = 0; diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 746bd570b85a..ca98c98a8179 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -993,7 +993,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree); bool next_queue_may_preempt(struct bfq_data *bfqd); struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd); -void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); +bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool ins_into_idle_tree, bool expiration); void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 4aab1a8191f0..8077bf71d2ac 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1599,7 +1599,8 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) return bfqq; } -void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) +/* returns true if the in-service queue gets freed */ +bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) { struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue; struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity; @@ -1623,8 +1624,20 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) * service tree either, then release the service reference to * the queue it represents (taken with bfq_get_entity). */ - if (!in_serv_entity->on_st) + if (!in_serv_entity->on_st) { + /* + * If no process is referencing in_serv_bfqq any + * longer, then the service reference may be the only + * reference to the queue. If this is the case, then + * bfqq gets freed here. + */ + int ref = in_serv_bfqq->ref; bfq_put_queue(in_serv_bfqq); + if (ref == 1) + return true; + } + + return false; } void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -- cgit v1.2.3 From 62cf691cdf74c40fb194143e295204f339194b11 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 23 Apr 2019 16:39:45 +1000 Subject: cifs: fix memory leak in SMB2_read [ Upstream commit 05fd5c2c61732152a6bddc318aae62d7e436629b ] Commit 088aaf17aa79300cab14dbee2569c58cfafd7d6e introduced a leak where if SMB2_read() returned an error we would return without freeing the request buffer. Cc: Stable Signed-off-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 938e75cc3b66..85a3c051e622 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3402,6 +3402,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, rc); } free_rsp_buf(resp_buftype, rsp_iov.iov_base); + cifs_small_buf_release(req); return rc == -ENODATA ? 0 : rc; } else trace_smb3_read_done(xid, req->PersistentFileId, -- cgit v1.2.3 From e6302b845f2dc3c3298f77b6bd2b754e35837a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Wed, 10 Apr 2019 15:37:47 -0400 Subject: cifs: fix page reference leak with readv/writev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13f5938d8264b5501368523c4513ff26608a33e8 upstream. CIFS can leak pages reference gotten through GUP (get_user_pages*() through iov_iter_get_pages()). This happen if cifs_send_async_read() or cifs_write_from_iter() calls fail from within __cifs_readv() and __cifs_writev() respectively. This patch move page unreference to cifs_aio_ctx_release() which will happens on all code paths this is all simpler to follow for correctness. Signed-off-by: Jérôme Glisse Cc: Steve French Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Cc: Linus Torvalds Cc: Stable Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 15 +-------------- fs/cifs/misc.c | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7c05353b766c..7c3f9d00586e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2796,7 +2796,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; struct dentry *dentry = ctx->cfile->dentry; - unsigned int i; int rc; tcon = tlink_tcon(ctx->cfile->tlink); @@ -2860,10 +2859,6 @@ restart_loop: kref_put(&wdata->refcount, cifs_uncached_writedata_release); } - if (!ctx->direct_io) - for (i = 0; i < ctx->npages; i++) - put_page(ctx->bv[i].bv_page); - cifs_stats_bytes_written(tcon, ctx->total_len); set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); @@ -3472,7 +3467,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx) struct iov_iter *to = &ctx->iter; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - unsigned int i; int rc; tcon = tlink_tcon(ctx->cfile->tlink); @@ -3556,15 +3550,8 @@ again: kref_put(&rdata->refcount, cifs_uncached_readdata_release); } - if (!ctx->direct_io) { - for (i = 0; i < ctx->npages; i++) { - if (ctx->should_dirty) - set_page_dirty(ctx->bv[i].bv_page); - put_page(ctx->bv[i].bv_page); - } - + if (!ctx->direct_io) ctx->total_len = ctx->len - iov_iter_count(to); - } cifs_stats_bytes_read(tcon, ctx->total_len); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1e1626a2cfc3..0dc6f08020ac 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -789,6 +789,11 @@ cifs_aio_ctx_alloc(void) { struct cifs_aio_ctx *ctx; + /* + * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io + * to false so that we know when we have to unreference pages within + * cifs_aio_ctx_release() + */ ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); if (!ctx) return NULL; @@ -807,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount) struct cifs_aio_ctx, refcount); cifsFileInfo_put(ctx->cfile); - kvfree(ctx->bv); + + /* + * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly + * which means that iov_iter_get_pages() was a success and thus that + * we have taken reference on pages. + */ + if (ctx->bv) { + unsigned i; + + for (i = 0; i < ctx->npages; i++) { + if (ctx->should_dirty) + set_page_dirty(ctx->bv[i].bv_page); + put_page(ctx->bv[i].bv_page); + } + kvfree(ctx->bv); + } + kfree(ctx); } -- cgit v1.2.3 From 90b70b3ed31b4dcdc0cce09931c20002d2d6c2d5 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Tue, 16 Apr 2019 08:37:27 -0500 Subject: cifs: do not attempt cifs operation on smb2+ rename error commit 652727bbe1b17993636346716ae5867627793647 upstream. A path-based rename returning EBUSY will incorrectly try opening the file with a cifs (NT Create AndX) operation on an smb2+ mount, which causes the server to force a session close. If the mount is smb2+, skip the fallback. Signed-off-by: Frank Sorenson Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 53fdb5df0d2e..538fd7d807e4 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, if (rc == 0 || rc != -EBUSY) goto do_rename_exit; + /* Don't fall back to using SMB on SMB 2+ mount */ + if (server->vals->protocol_id != 0) + goto do_rename_exit; + /* open-file renames don't work across directories */ if (to_dentry->d_parent != from_dentry->d_parent) goto do_rename_exit; -- cgit v1.2.3 From 68ab802fb8ccc6cbd953aa631bba7d98bc491996 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 19 Apr 2019 21:22:59 -0500 Subject: tracing: Fix a memory leak by early error exit in trace_pid_write() commit 91862cc7867bba4ee5c8fcf0ca2f1d30427b6129 upstream. In trace_pid_write(), the buffer for trace parser is allocated through kmalloc() in trace_parser_get_init(). Later on, after the buffer is used, it is then freed through kfree() in trace_parser_put(). However, it is possible that trace_pid_write() is terminated due to unexpected errors, e.g., ENOMEM. In that case, the allocated buffer will not be freed, which is a memory leak bug. To fix this issue, free the allocated buffer when an error is encountered. Link: http://lkml.kernel.org/r/1555726979-15633-1-git-send-email-wang6495@umn.edu Fixes: f4d34a87e9c10 ("tracing: Use pid bitmap instead of a pid array for set_event_pid") Cc: stable@vger.kernel.org Signed-off-by: Wenwen Wang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 89158aa93fa6..df7a16e45241 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, * not modified. */ pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); - if (!pid_list) + if (!pid_list) { + trace_parser_put(&parser); return -ENOMEM; + } pid_list->pid_max = READ_ONCE(pid_max); @@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); if (!pid_list->pids) { + trace_parser_put(&parser); kfree(pid_list); return -ENOMEM; } -- cgit v1.2.3 From 8659a04c77e2ca52ce2af42f7c50b49e690ffb19 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 4 Apr 2019 23:59:25 +0200 Subject: tracing: Fix buffer_ref pipe ops commit b987222654f84f7b4ca95b3a55eca784cb30235b upstream. This fixes multiple issues in buffer_pipe_buf_ops: - The ->steal() handler must not return zero unless the pipe buffer has the only reference to the page. But generic_pipe_buf_steal() assumes that every reference to the pipe is tracked by the page's refcount, which isn't true for these buffers - buffer_pipe_buf_get(), which duplicates a buffer, doesn't touch the page's refcount. Fix it by using generic_pipe_buf_nosteal(), which refuses every attempted theft. It should be easy to actually support ->steal, but the only current users of pipe_buf_steal() are the virtio console and FUSE, and they also only use it as an optimization. So it's probably not worth the effort. - The ->get() and ->release() handlers can be invoked concurrently on pipe buffers backed by the same struct buffer_ref. Make them safe against concurrency by using refcount_t. - The pointers stored in ->private were only zeroed out when the last reference to the buffer_ref was dropped. As far as I know, this shouldn't be necessary anyway, but if we do it, let's always do it. Link: http://lkml.kernel.org/r/20190404215925.253531-1-jannh@google.com Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Al Viro Cc: stable@vger.kernel.org Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") Signed-off-by: Jann Horn Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- fs/splice.c | 4 ++-- include/linux/pipe_fs_i.h | 1 + kernel/trace/trace.c | 28 ++++++++++++++-------------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 90c29675d573..7da7d5437472 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -333,8 +333,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = { .get = generic_pipe_buf_get, }; -static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { return 1; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3ecd7ea212ae..66ee63cd5968 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -181,6 +181,7 @@ void free_pipe_info(struct pipe_inode_info *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); +int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index df7a16e45241..d07fc2836786 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6823,19 +6823,23 @@ struct buffer_ref { struct ring_buffer *buffer; void *page; int cpu; - int ref; + refcount_t refcount; }; +static void buffer_ref_release(struct buffer_ref *ref) +{ + if (!refcount_dec_and_test(&ref->refcount)) + return; + ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); + kfree(ref); +} + static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct buffer_ref *ref = (struct buffer_ref *)buf->private; - if (--ref->ref) - return; - - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); - kfree(ref); + buffer_ref_release(ref); buf->private = 0; } @@ -6844,7 +6848,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, { struct buffer_ref *ref = (struct buffer_ref *)buf->private; - ref->ref++; + refcount_inc(&ref->refcount); } /* Pipe buffer operations for a buffer. */ @@ -6852,7 +6856,7 @@ static const struct pipe_buf_operations buffer_pipe_buf_ops = { .can_merge = 0, .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, - .steal = generic_pipe_buf_steal, + .steal = generic_pipe_buf_nosteal, .get = buffer_pipe_buf_get, }; @@ -6865,11 +6869,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) struct buffer_ref *ref = (struct buffer_ref *)spd->partial[i].private; - if (--ref->ref) - return; - - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); - kfree(ref); + buffer_ref_release(ref); spd->partial[i].private = 0; } @@ -6924,7 +6924,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - ref->ref = 1; + refcount_set(&ref->refcount, 1); ref->buffer = iter->trace_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (IS_ERR(ref->page)) { -- cgit v1.2.3 From 6506cdd4205b20e17dc0191fb00e02c48a146840 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Apr 2019 14:35:19 +0800 Subject: crypto: xts - Fix atomic sleep when walking skcipher commit 44427c0fbc09b448b22410978a4ef6ee37599d25 upstream. When we perform a walk in the completion function, we need to ensure that it is atomic. Reported-by: syzbot+6f72c20560060c98b566@syzkaller.appspotmail.com Fixes: 78105c7e769b ("crypto: xts - Drop use of auxiliary buffer") Cc: Signed-off-by: Herbert Xu Acked-by: Ondrej Mosnacek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/xts.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/xts.c b/crypto/xts.c index 847f54f76789..2f948328cabb 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -137,8 +137,12 @@ static void crypt_done(struct crypto_async_request *areq, int err) { struct skcipher_request *req = areq->data; - if (!err) + if (!err) { + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; err = xor_tweak_post(req); + } skcipher_request_complete(req, err); } -- cgit v1.2.3 From 0bb4e85bbd34c65af2f0abebde0e6cab0f0b6504 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Apr 2019 14:37:34 +0800 Subject: crypto: lrw - Fix atomic sleep when walking skcipher commit b257b48cd5830c5b1d0c347eb281f9c28056f881 upstream. When we perform a walk in the completion function, we need to ensure that it is atomic. Fixes: ac3c8f36c31d ("crypto: lrw - Do not use auxiliary buffer") Cc: Signed-off-by: Herbert Xu Acked-by: Ondrej Mosnacek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/lrw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index 0430ccd08728..08a0e458bc3e 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -212,8 +212,12 @@ static void crypt_done(struct crypto_async_request *areq, int err) { struct skcipher_request *req = areq->data; - if (!err) + if (!err) { + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; err = xor_tweak_post(req); + } skcipher_request_complete(req, err); } -- cgit v1.2.3 From e6093c0212d03c1e50f37ce579a26388b7442e3e Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 10 Apr 2019 15:47:54 +0800 Subject: gpio: eic: sprd: Fix incorrect irq type setting for the sync EIC commit 102bbe34b31c9159e714432afd64458f6f3876d7 upstream. When setting sync EIC as IRQ_TYPE_EDGE_BOTH type, we missed to set the SPRD_EIC_SYNC_INTMODE register to 0, which means detecting edge signals. Thus this patch fixes the issue. Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") Cc: Signed-off-by: Baolin Wang Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-eic-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index e41223c05f6e..6cf2e2ce4093 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); irq_set_handler_locked(data, handle_edge_irq); break; -- cgit v1.2.3 From f4ab3de10c84613735d55c5634927b8d38b777c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 25 Apr 2019 22:23:41 -0700 Subject: zram: pass down the bvec we need to read into in the work struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e153abc0739ff77bd89c9ba1688cdb963464af97 upstream. When scheduling work item to read page we need to pass down the proper bvec struct which points to the page to read into. Before this patch it uses a randomly initialized bvec (only if PAGE_SIZE != 4096) which is wrong. Note that without this patch on arch/kernel where PAGE_SIZE != 4096 userspace could read random memory through a zram block device (thought userspace probably would have no control on the address being read). Link: http://lkml.kernel.org/r/20190408183219.26377-1-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: Andrew Morton Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Nitin Gupta Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 684854d3b0ad..7e57f8f012c3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -774,18 +774,18 @@ struct zram_work { struct zram *zram; unsigned long entry; struct bio *bio; + struct bio_vec bvec; }; #if PAGE_SIZE != 4096 static void zram_sync_read(struct work_struct *work) { - struct bio_vec bvec; struct zram_work *zw = container_of(work, struct zram_work, work); struct zram *zram = zw->zram; unsigned long entry = zw->entry; struct bio *bio = zw->bio; - read_from_bdev_async(zram, &bvec, entry, bio); + read_from_bdev_async(zram, &zw->bvec, entry, bio); } /* @@ -798,6 +798,7 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, { struct zram_work work; + work.bvec = *bvec; work.zram = zram; work.entry = entry; work.bio = bio; -- cgit v1.2.3 From 928962e99694c00808834ea608c2db17ec66aab4 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 25 Apr 2019 22:23:44 -0700 Subject: lib/Kconfig.debug: fix build error without CONFIG_BLOCK commit ae3d6a323347940f0548bbb4b17f0bb2e9164169 upstream. If CONFIG_TEST_KMOD is set to M, while CONFIG_BLOCK is not set, XFS and BTRFS can not be compiled successly. Link: http://lkml.kernel.org/r/20190410075434.35220-1-yuehaibing@huawei.com Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Signed-off-by: YueHaibing Reported-by: Hulk Robot Reviewed-by: Kees Cook Cc: Masahiro Yamada Cc: Petr Mladek Cc: Andy Shevchenko Cc: Matthew Wilcox Cc: Joe Lawrence Cc: Robin Murphy Cc: Luis Chamberlain Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d4df5b24d75e..350d5328014f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1952,6 +1952,7 @@ config TEST_KMOD depends on m depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS depends on NETDEVICES && NET_CORE && INET # for TUN + depends on BLOCK select TEST_LKM select XFS_FS select TUN -- cgit v1.2.3 From 0f73358dff40fc8e919b059d10531ee4605fc9bf Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Tue, 9 Apr 2019 16:53:55 +0200 Subject: MIPS: scall64-o32: Fix indirect syscall number load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e upstream. Commit 4c21b8fd8f14 (MIPS: seccomp: Handle indirect system calls (o32)) added indirect syscall detection for O32 processes running on MIPS64, but it did not work correctly for big endian kernel/processes. The reason is that the syscall number is loaded from ARG1 using the lw instruction while this is a 64-bit value, so zero is loaded instead of the syscall number. Fix the code by using the ld instruction instead. When running a 32-bit processes on a 64 bit CPU, the values are properly sign-extended, so it ensures the value passed to syscall_trace_enter is correct. Recent systemd versions with seccomp enabled whitelist the getpid syscall for their internal processes (e.g. systemd-journald), but call it through syscall(SYS_getpid). This fix therefore allows O32 big endian systems with a 64-bit kernel to run recent systemd versions. Signed-off-by: Aurelien Jarno Cc: # v3.15+ Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/scall64-o32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index f158c5894a9a..feb2653490df 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -125,7 +125,7 @@ trace_a_syscall: subu t1, v0, __NR_O32_Linux move a1, v0 bnez t1, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ + ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ .set pop 1: jal syscall_trace_enter -- cgit v1.2.3 From 655b464eeaa82821846d0b68d69550ef1d83b4e6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Apr 2019 22:03:18 +0200 Subject: trace: Fix preempt_enable_no_resched() abuse commit d6097c9e4454adf1f8f2c9547c2fa6060d55d952 upstream. Unless the very next line is schedule(), or implies it, one must not use preempt_enable_no_resched(). It can cause a preemption to go missing and thereby cause arbitrary delays, breaking the PREEMPT=y invariant. Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net Cc: Waiman Long Cc: Linus Torvalds Cc: Ingo Molnar Cc: Will Deacon Cc: Thomas Gleixner Cc: the arch/x86 maintainers Cc: Davidlohr Bueso Cc: Tim Chen Cc: huang ying Cc: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: stable@vger.kernel.org Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b49affb4666b..4463ae28bf1a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -776,7 +776,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) preempt_disable_notrace(); time = rb_time_stamp(buffer); - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); return time; } -- cgit v1.2.3 From e5f06bf8f46ce22033bc4354ae05c916be87f501 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 25 Apr 2019 22:23:51 -0700 Subject: mm: do not boost watermarks to avoid fragmentation for the DISCONTIG memory model commit 24512228b7a3f412b5a51f189df302616b021c33 upstream. Mikulas Patocka reported that commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") "broke" memory management on parisc. The machine is not NUMA but the DISCONTIG model creates three pgdats even though it's a UMA machine for the following ranges 0) Start 0x0000000000000000 End 0x000000003fffffff Size 1024 MB 1) Start 0x0000000100000000 End 0x00000001bfdfffff Size 3070 MB 2) Start 0x0000004040000000 End 0x00000040ffffffff Size 3072 MB Mikulas reported: With the patch 1c30844d2, the kernel will incorrectly reclaim the first zone when it fills up, ignoring the fact that there are two completely free zones. Basiscally, it limits cache size to 1GiB. For example, if I run: # dd if=/dev/sda of=/dev/null bs=1M count=2048 - with the proper kernel, there should be "Buffers - 2GiB" when this command finishes. With the patch 1c30844d2, buffers will consume just 1GiB or slightly more, because the kernel was incorrectly reclaiming them. The page allocator and reclaim makes assumptions that pgdats really represent NUMA nodes and zones represent ranges and makes decisions on that basis. Watermark boosting for small pgdats leads to unexpected results even though this would have behaved reasonably on SPARSEMEM. DISCONTIG is essentially deprecated and even parisc plans to move to SPARSEMEM so there is no need to be fancy, this patch simply disables watermark boosting by default on DISCONTIGMEM. Link: http://lkml.kernel.org/r/20190419094335.GJ18914@techsingularity.net Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") Signed-off-by: Mel Gorman Reported-by: Mikulas Patocka Tested-by: Mikulas Patocka Acked-by: Vlastimil Babka Cc: James Bottomley Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/vm.txt | 16 ++++++++-------- mm/page_alloc.c | 13 +++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 187ce4f599a2..e4dfaf0d6e87 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -866,14 +866,14 @@ The intent is that compaction has less work to do in the future and to increase the success rate of future high-order allocations such as SLUB allocations, THP and hugetlbfs pages. -To make it sensible with respect to the watermark_scale_factor parameter, -the unit is in fractions of 10,000. The default value of 15,000 means -that up to 150% of the high watermark will be reclaimed in the event of -a pageblock being mixed due to fragmentation. The level of reclaim is -determined by the number of fragmentation events that occurred in the -recent past. If this value is smaller than a pageblock then a pageblocks -worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor -of 0 will disable the feature. +To make it sensible with respect to the watermark_scale_factor +parameter, the unit is in fractions of 10,000. The default value of +15,000 on !DISCONTIGMEM configurations means that up to 150% of the high +watermark will be reclaimed in the event of a pageblock being mixed due +to fragmentation. The level of reclaim is determined by the number of +fragmentation events that occurred in the recent past. If this value is +smaller than a pageblock then a pageblocks worth of pages will be reclaimed +(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature. ============================================================= diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 20dd3283bb1b..318ef6ccdb3b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -266,7 +266,20 @@ compound_page_dtor * const compound_page_dtors[] = { int min_free_kbytes = 1024; int user_min_free_kbytes = -1; +#ifdef CONFIG_DISCONTIGMEM +/* + * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges + * are not on separate NUMA nodes. Functionally this works but with + * watermark_boost_factor, it can reclaim prematurely as the ranges can be + * quite small. By default, do not boost watermarks on discontigmem as in + * many cases very high-order allocations like THP are likely to be + * unsupported and the premature reclaim offsets the advantage of long-term + * fragmentation avoidance. + */ +int watermark_boost_factor __read_mostly; +#else int watermark_boost_factor __read_mostly = 15000; +#endif int watermark_scale_factor = 10; static unsigned long nr_kernel_pages __initdata; -- cgit v1.2.3 From e7e378ed703ee91b5a10b5f3d93b36a3659ab9bc Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 17 Apr 2019 21:29:29 -0700 Subject: arm64: mm: Ensure tail of unaligned initrd is reserved commit d4d18e3ec6091843f607e8929a56723e28f393a6 upstream. In the event that the start address of the initrd is not aligned, but has an aligned size, the base + size will not cover the entire initrd image and there is a chance that the kernel will corrupt the tail of the image. By aligning the end of the initrd to a page boundary and then subtracting the adjusted start address the memblock reservation will cover all pages that contains the initrd. Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size") Cc: stable@vger.kernel.org Acked-by: Will Deacon Signed-off-by: Bjorn Andersson Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7205a9085b4d..c9411774555d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -406,7 +406,7 @@ void __init arm64_memblock_init(void) * Otherwise, this is a no-op */ u64 base = phys_initrd_start & PAGE_MASK; - u64 size = PAGE_ALIGN(phys_initrd_size); + u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base; /* * We can only add back the initrd memory if we don't end up -- cgit v1.2.3 From f4dba6bf743e44fd74223c40be29b4fb06ea2fad Mon Sep 17 00:00:00 2001 From: Josh Collier Date: Mon, 15 Apr 2019 11:34:22 -0700 Subject: IB/rdmavt: Fix frwr memory registration commit 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa upstream. Current implementation was not properly handling frwr memory registrations. This was uncovered by commit 27f26cec761das ("xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR)") in which xprtrdma, which is used for NFS over RDMA, started failing as it was the first ULP to modify the ib_mr iova resulting in the NFS server getting REMOTE ACCESS ERROR when attempting to perform RDMA Writes to the client. The fix is to properly capture the true iova, offset, and length in the call to ib_map_mr_sg, and then update the iova when processing the IB_WR_REG_MEM on the send queue. Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg") Cc: stable@vger.kernel.org Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Josh Collier Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rdmavt/mr.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 49c9541050d4..5819c9d6ffdc 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -611,11 +611,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) if (unlikely(mapped_segs == mr->mr.max_segs)) return -ENOMEM; - if (mr->mr.length == 0) { - mr->mr.user_base = addr; - mr->mr.iova = addr; - } - m = mapped_segs / RVT_SEGSZ; n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; @@ -633,17 +628,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) * @sg_nents: number of entries in sg * @sg_offset: offset in bytes into sg * + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. + * * Return: number of sg elements mapped to the memory region */ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct rvt_mr *mr = to_imr(ibmr); + int ret; mr->mr.length = 0; mr->mr.page_shift = PAGE_SHIFT; - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, - rvt_set_page); + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); + mr->mr.user_base = ibmr->iova; + mr->mr.iova = ibmr->iova; + mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; + mr->mr.length = (size_t)ibmr->length; + return ret; } /** @@ -674,6 +676,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, ibmr->rkey = key; mr->mr.lkey = key; mr->mr.access_flags = access; + mr->mr.iova = ibmr->iova; atomic_set(&mr->mr.lkey_invalid, 0); return 0; -- cgit v1.2.3 From 92d6731e6a6f28a0b3e59cc034e448b4280bbc23 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 16 Apr 2019 14:07:25 +0300 Subject: RDMA/mlx5: Do not allow the user to write to the clock page commit c660133c339f9ab684fdf568c0d51b9ae5e86002 upstream. The intent of this VMA was to be read-only from user space, but the VM_MAYWRITE masking was missed, so mprotect could make it writable. Cc: stable@vger.kernel.org Fixes: 5c99eaecb1fc ("IB/mlx5: Mmap the HCA's clock info to user-space") Signed-off-by: Jason Gunthorpe Reviewed-by: Haggai Eran Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94fe253d4956..6e9ce03c5855 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1982,6 +1982,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (vma->vm_flags & VM_WRITE) return -EPERM; + vma->vm_flags &= ~VM_MAYWRITE; if (!dev->mdev->clock_info_page) return -EOPNOTSUPP; @@ -2147,6 +2148,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (vma->vm_flags & VM_WRITE) return -EPERM; + vma->vm_flags &= ~VM_MAYWRITE; /* Don't expose to user-space information it shouldn't have */ if (PAGE_SIZE > 4096) -- cgit v1.2.3 From 52c44c4e81b31974284fde64deb02c4e1a2bd66c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 16 Apr 2019 14:07:26 +0300 Subject: RDMA/mlx5: Use rdma_user_map_io for mapping BAR pages commit d5e560d3f72382ac4e3bfe4e0f0420e6a220b039 upstream. Since mlx5 supports device disassociate it must use this API for all BAR page mmaps, otherwise the pages can remain mapped after the device is unplugged causing a system crash. Cc: stable@vger.kernel.org Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") Signed-off-by: Jason Gunthorpe Reviewed-by: Haggai Eran Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6e9ce03c5855..497181f5ba09 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2154,14 +2154,12 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (PAGE_SIZE > 4096) return -EOPNOTSUPP; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pfn = (dev->mdev->iseg_base + offsetof(struct mlx5_init_seg, internal_timer_h)) >> PAGE_SHIFT; - if (io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - break; + return rdma_user_mmap_io(&context->ibucontext, vma, pfn, + PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot)); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); -- cgit v1.2.3 From c3e4c555ac868a0e4f2515f38d9f4188d9585c75 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 16 Apr 2019 14:07:28 +0300 Subject: RDMA/ucontext: Fix regression with disassociate commit 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 upstream. When this code was consolidated the intention was that the VMA would become backed by anonymous zero pages after the zap_vma_pte - however this very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED bits to transform the VMA into an anonymous VMA. Since the vm_ops was removed this broke. Now userspace gets a SIGBUS if it touches the vma after disassociation. Instead of converting the VMA to anonymous provide a fault handler that puts a zero'd page into the VMA when user-space touches it after disassociation. Cc: stable@vger.kernel.org Suggested-by: Andrea Arcangeli Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_main.c | 52 +++++++++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ea0bc6885517..32cc8fe7902f 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -160,6 +160,7 @@ struct ib_uverbs_file { struct mutex umap_lock; struct list_head umaps; + struct page *disassociate_page; struct idr idr; /* spinlock protects write access to idr */ diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e2a4570a47e8..857eff31b433 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref) kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); put_device(&file->device->dev); + + if (file->disassociate_page) + __free_pages(file->disassociate_page, 0); kfree(file); } @@ -876,9 +879,50 @@ static void rdma_umap_close(struct vm_area_struct *vma) kfree(priv); } +/* + * Once the zap_vma_ptes has been called touches to the VMA will come here and + * we return a dummy writable zero page for all the pfns. + */ +static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) +{ + struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; + struct rdma_umap_priv *priv = vmf->vma->vm_private_data; + vm_fault_t ret = 0; + + if (!priv) + return VM_FAULT_SIGBUS; + + /* Read only pages can just use the system zero page. */ + if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { + vmf->page = ZERO_PAGE(vmf->vm_start); + get_page(vmf->page); + return 0; + } + + mutex_lock(&ufile->umap_lock); + if (!ufile->disassociate_page) + ufile->disassociate_page = + alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0); + + if (ufile->disassociate_page) { + /* + * This VMA is forced to always be shared so this doesn't have + * to worry about COW. + */ + vmf->page = ufile->disassociate_page; + get_page(vmf->page); + } else { + ret = VM_FAULT_SIGBUS; + } + mutex_unlock(&ufile->umap_lock); + + return ret; +} + static const struct vm_operations_struct rdma_umap_ops = { .open = rdma_umap_open, .close = rdma_umap_close, + .fault = rdma_umap_fault, }; static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, @@ -888,6 +932,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; + if (!(vma->vm_flags & VM_SHARED)) + return ERR_PTR(-EINVAL); + if (vma->vm_end - vma->vm_start != size) return ERR_PTR(-EINVAL); @@ -991,7 +1038,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) * at a time to get the lock ordering right. Typically there * will only be one mm, so no big deal. */ - down_write(&mm->mmap_sem); + down_read(&mm->mmap_sem); if (!mmget_still_valid(mm)) goto skip_mm; mutex_lock(&ufile->umap_lock); @@ -1005,11 +1052,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); } mutex_unlock(&ufile->umap_lock); skip_mm: - up_write(&mm->mmap_sem); + up_read(&mm->mmap_sem); mmput(mm); } } -- cgit v1.2.3 From e548c9702d46c13f33455fa8c3a19ba4b5794b8e Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Sat, 20 Apr 2019 16:34:16 +0800 Subject: sched/numa: Fix a possible divide-by-zero commit a860fa7b96e1a1c974556327aa1aee852d434c21 upstream. sched_clock_cpu() may not be consistent between CPUs. If a task migrates to another CPU, then se.exec_start is set to that CPU's rq_clock_task() by update_stats_curr_start(). Specifically, the new value might be before the old value due to clock skew. So then if in numa_get_avg_runtime() the expression: 'now - p->last_task_numa_placement' ends up as -1, then the divider '*period + 1' in task_numa_placement() is 0 and things go bang. Similar to update_curr(), check if time goes backwards to avoid this. [ peterz: Wrote new changelog. ] [ mingo: Tweaked the code comment. ] Signed-off-by: Xie XiuQi Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: cj.chengjian@huawei.com Cc: Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eeb605656d59..be55a64748ba 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1994,6 +1994,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) if (p->last_task_numa_placement) { delta = runtime - p->last_sum_exec_runtime; *period = now - p->last_task_numa_placement; + + /* Avoid time going backwards, prevent potential divide error: */ + if (unlikely((s64)*period < 0)) + *period = 0; } else { delta = p->se.avg.load_sum; *period = LOAD_AVG_MAX; -- cgit v1.2.3 From bcd9cbffb98f862bd56389ca205ccf42cfc9cf31 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 15 Apr 2019 12:00:42 -0400 Subject: ceph: only use d_name directly when parent is locked commit 1bcb344086f3ecf8d6705f6d708441baa823beb3 upstream. Ben reported tripping the BUG_ON in create_request_message during some performance testing. Analysis of the vmcore showed that the length of the r_dentry->d_name string changed after we allocated the buffer, but before we encoded it. build_dentry_path returns pointers to d_name in the common case of non-snapped dentries, but this optimization isn't safe unless the parent directory is locked. When it isn't, have the code make a copy of the d_name while holding the d_lock. Cc: stable@vger.kernel.org Reported-by: Ben England Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 61 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 163fc74bf221..b1bacff14021 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1958,10 +1958,39 @@ retry: return path; } +/* Duplicate the dentry->d_name.name safely */ +static int clone_dentry_name(struct dentry *dentry, const char **ppath, + int *ppathlen) +{ + u32 len; + char *name; + +retry: + len = READ_ONCE(dentry->d_name.len); + name = kmalloc(len + 1, GFP_NOFS); + if (!name) + return -ENOMEM; + + spin_lock(&dentry->d_lock); + if (dentry->d_name.len != len) { + spin_unlock(&dentry->d_lock); + kfree(name); + goto retry; + } + memcpy(name, dentry->d_name.name, len); + spin_unlock(&dentry->d_lock); + + name[len] = '\0'; + *ppath = name; + *ppathlen = len; + return 0; +} + static int build_dentry_path(struct dentry *dentry, struct inode *dir, const char **ppath, int *ppathlen, u64 *pino, - int *pfreepath) + bool *pfreepath, bool parent_locked) { + int ret; char *path; rcu_read_lock(); @@ -1970,8 +1999,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, if (dir && ceph_snap(dir) == CEPH_NOSNAP) { *pino = ceph_ino(dir); rcu_read_unlock(); - *ppath = dentry->d_name.name; - *ppathlen = dentry->d_name.len; + if (parent_locked) { + *ppath = dentry->d_name.name; + *ppathlen = dentry->d_name.len; + } else { + ret = clone_dentry_name(dentry, ppath, ppathlen); + if (ret) + return ret; + *pfreepath = true; + } return 0; } rcu_read_unlock(); @@ -1979,13 +2015,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; - *pfreepath = 1; + *pfreepath = true; return 0; } static int build_inode_path(struct inode *inode, const char **ppath, int *ppathlen, u64 *pino, - int *pfreepath) + bool *pfreepath) { struct dentry *dentry; char *path; @@ -2001,7 +2037,7 @@ static int build_inode_path(struct inode *inode, if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; - *pfreepath = 1; + *pfreepath = true; return 0; } @@ -2012,7 +2048,7 @@ static int build_inode_path(struct inode *inode, static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, struct inode *rdiri, const char *rpath, u64 rino, const char **ppath, int *pathlen, - u64 *ino, int *freepath) + u64 *ino, bool *freepath, bool parent_locked) { int r = 0; @@ -2022,7 +2058,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, ceph_snap(rinode)); } else if (rdentry) { r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, - freepath); + freepath, parent_locked); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); } else if (rpath || rino) { @@ -2048,7 +2084,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, const char *path2 = NULL; u64 ino1 = 0, ino2 = 0; int pathlen1 = 0, pathlen2 = 0; - int freepath1 = 0, freepath2 = 0; + bool freepath1 = false, freepath2 = false; int len; u16 releases; void *p, *end; @@ -2056,16 +2092,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ret = set_request_path_attr(req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, - &path1, &pathlen1, &ino1, &freepath1); + &path1, &pathlen1, &ino1, &freepath1, + test_bit(CEPH_MDS_R_PARENT_LOCKED, + &req->r_req_flags)); if (ret < 0) { msg = ERR_PTR(ret); goto out; } + /* If r_old_dentry is set, then assume that its parent is locked */ ret = set_request_path_attr(NULL, req->r_old_dentry, req->r_old_dentry_dir, req->r_path2, req->r_ino2.ino, - &path2, &pathlen2, &ino2, &freepath2); + &path2, &pathlen2, &ino2, &freepath2, true); if (ret < 0) { msg = ERR_PTR(ret); goto out_free1; -- cgit v1.2.3 From d9061ef0ab17b4230da913f5923fb8cb006fac1e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 17 Apr 2019 12:58:28 -0400 Subject: ceph: ensure d_name stability in ceph_dentry_hash() commit 76a495d666e5043ffc315695f8241f5e94a98849 upstream. Take the d_lock here to ensure that d_name doesn't change. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/dir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 82928cea0209..7f3f64ba464f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1470,6 +1470,7 @@ void ceph_dentry_lru_del(struct dentry *dn) unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) { struct ceph_inode_info *dci = ceph_inode(dir); + unsigned hash; switch (dci->i_dir_layout.dl_dir_hash) { case 0: /* for backward compat */ @@ -1477,8 +1478,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) return dn->d_name.hash; default: - return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, + spin_lock(&dn->d_lock); + hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash, dn->d_name.name, dn->d_name.len); + spin_unlock(&dn->d_lock); + return hash; } } -- cgit v1.2.3 From 870588487197dda790bf52a0100ba80a3269f9ef Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 18 Apr 2019 11:24:57 +0800 Subject: ceph: fix ci->i_head_snapc leak commit 37659182bff1eeaaeadcfc8f853c6d2b6dbc3f47 upstream. We missed two places that i_wrbuffer_ref_head, i_wr_ref, i_dirty_caps and i_flushing_caps may change. When they are all zeros, we should free i_head_snapc. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/38224 Reported-and-tested-by: Luis Henriques Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 9 +++++++++ fs/ceph/snap.c | 7 ++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b1bacff14021..5cec784e30f6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1286,6 +1286,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; } + + if (drop && + ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; + } } spin_unlock(&ci->i_ceph_lock); while (!list_empty(&to_remove)) { diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index f74193da0e09..1f46b02f7314 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -568,7 +568,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) old_snapc = NULL; update_snapc: - if (ci->i_head_snapc) { + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ci->i_head_snapc = NULL; + } else { ci->i_head_snapc = ceph_get_snap_context(new_snapc); dout(" new snapc is %p\n", new_snapc); } -- cgit v1.2.3 From 5e4a20e631a711553bf381bcd0725a9e3ba0f139 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2019 08:54:37 -0700 Subject: nfsd: Don't release the callback slot unless it was actually held commit e6abc8caa6deb14be2a206253f7e1c5e37e9515b upstream. If there are multiple callbacks queued, waiting for the callback slot when the callback gets shut down, then they all currently end up acting as if they hold the slot, and call nfsd4_cb_sequence_done() resulting in interesting side-effects. In addition, the 'retry_nowait' path in nfsd4_cb_sequence_done() causes a loop back to nfsd4_cb_prepare() without first freeing the slot, which causes a deadlock when nfsd41_cb_get_slot() gets called a second time. This patch therefore adds a boolean to track whether or not the callback did pick up the slot, so that it can do the right thing in these 2 cases. Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4callback.c | 8 +++++++- fs/nfsd/state.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c74e4538d0eb..258f741d6a21 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1023,8 +1023,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) cb->cb_seq_status = 1; cb->cb_status = 0; if (minorversion) { - if (!nfsd41_cb_get_slot(clp, task)) + if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task)) return; + cb->cb_holds_slot = true; } rpc_call_start(task); } @@ -1051,6 +1052,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback return true; } + if (!cb->cb_holds_slot) + goto need_restart; + switch (cb->cb_seq_status) { case 0: /* @@ -1089,6 +1093,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback cb->cb_seq_status); } + cb->cb_holds_slot = false; clear_bit(0, &clp->cl_cb_slot_busy); rpc_wake_up_next(&clp->cl_cb_waitq); dprintk("%s: freed slot, new seqid=%d\n", __func__, @@ -1296,6 +1301,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_seq_status = 1; cb->cb_status = 0; cb->cb_need_restart = false; + cb->cb_holds_slot = false; } void nfsd4_run_cb(struct nfsd4_callback *cb) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 396c76755b03..9d6cb246c6c5 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -70,6 +70,7 @@ struct nfsd4_callback { int cb_seq_status; int cb_status; bool cb_need_restart; + bool cb_holds_slot; }; struct nfsd4_callback_ops { -- cgit v1.2.3 From 6569ae328aa3e5b58a3b722538480bd5e766a869 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 22 Apr 2019 12:34:23 -0400 Subject: nfsd: wake waiters blocked on file_lock before deleting it commit 6aaafc43a4ecc5bc8a3f6a2811d5eddc996a97f3 upstream. After a blocked nfsd file_lock request is deleted, knfsd will send a callback to the client and then free the request. Commit 16306a61d3b7 ("fs/locks: always delete_block after waiting.") changed it such that locks_delete_block is always called on a request after it is awoken, but that patch missed fixing up blocked nfsd request handling. Call locks_delete_block on the block to wake up any locks still blocked on the nfsd lock request before freeing it. Some of its callers already do this however, so just remove those calls. URL: https://bugzilla.kernel.org/show_bug.cgi?id=203363 Fixes: 16306a61d3b7 ("fs/locks: always delete_block after waiting.") Reported-by: Slawomir Pryczek Cc: Neil Brown Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6a45fb00c5fc..e87e15df2044 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -265,6 +265,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, static void free_blocked_lock(struct nfsd4_blocked_lock *nbl) { + locks_delete_block(&nbl->nbl_lock); locks_release_private(&nbl->nbl_lock); kfree(nbl); } @@ -293,7 +294,6 @@ remove_blocked_locks(struct nfs4_lockowner *lo) nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); - locks_delete_block(&nbl->nbl_lock); free_blocked_lock(nbl); } } @@ -4863,7 +4863,6 @@ nfs4_laundromat(struct nfsd_net *nn) nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); - locks_delete_block(&nbl->nbl_lock); free_blocked_lock(nbl); } out: -- cgit v1.2.3 From 6d29f7c720c670e02bcf01a2600a6cdbdb414489 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 22 Apr 2019 12:34:24 -0400 Subject: nfsd: wake blocked file lock waiters before sending callback commit f456458e4d25a8962d0946891617c76cc3ff5fb9 upstream. When a blocked NFS lock is "awoken" we send a callback to the server and then wake any hosts waiting on it. If a client attempts to get a lock and then drops off the net, we could end up waiting for a long time until we end up waking locks blocked on that request. So, wake any other waiting lock requests before sending the callback. Do this by calling locks_delete_block in a new "prepare" phase for CB_NOTIFY_LOCK callbacks. URL: https://bugzilla.kernel.org/show_bug.cgi?id=203363 Fixes: 16306a61d3b7 ("fs/locks: always delete_block after waiting.") Reported-by: Slawomir Pryczek Cc: Neil Brown Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e87e15df2044..f056b1d3fecd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -298,6 +298,14 @@ remove_blocked_locks(struct nfs4_lockowner *lo) } } +static void +nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb) +{ + struct nfsd4_blocked_lock *nbl = container_of(cb, + struct nfsd4_blocked_lock, nbl_cb); + locks_delete_block(&nbl->nbl_lock); +} + static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { @@ -325,6 +333,7 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb) } static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { + .prepare = nfsd4_cb_notify_lock_prepare, .done = nfsd4_cb_notify_lock_done, .release = nfsd4_cb_notify_lock_release, }; -- cgit v1.2.3 From b2eeeb49027ed58f04b5ca5f805319dc418d398a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 5 Apr 2019 11:34:40 +1100 Subject: sunrpc: don't mark uninitialised items as VALID. commit d58431eacb226222430940134d97bfd72f292fcd upstream. A recent commit added a call to cache_fresh_locked() when an expired item was found. The call sets the CACHE_VALID flag, so it is important that the item actually is valid. There are two ways it could be valid: 1/ If ->update has been called to fill in relevant content 2/ if CACHE_NEGATIVE is set, to say that content doesn't exist. An expired item that is waiting for an update will be neither. Setting CACHE_VALID will mean that a subsequent call to cache_put() will be likely to dereference uninitialised pointers. So we must make sure the item is valid, and we already have code to do that in try_to_negate_entry(). This takes the hash lock and so cannot be used directly, so take out the two lines that we need and use them. Now cache_fresh_locked() is certain to be called only on a valid item. Cc: stable@kernel.org # 2.6.35 Fixes: 4ecd55ea0742 ("sunrpc: fix cache_head leak due to queued request") Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 12bb23b8e0c5..261131dfa1f1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } +static inline int cache_is_valid(struct cache_head *h); static void cache_fresh_locked(struct cache_head *head, time_t expiry, struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, @@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init_rcu(&tmp->cache_list); detail->entries --; + if (cache_is_valid(tmp) == -EAGAIN) + set_bit(CACHE_NEGATIVE, &tmp->flags); cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; -- cgit v1.2.3 From c704bba6dd3620faab4625f93e31a075a84aa179 Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Wed, 24 Apr 2019 22:50:33 +0800 Subject: perf/x86/intel: Update KBL Package C-state events to also include PC8/PC9/PC10 counters commit 82c99f7a81f28f8c1be5f701c8377d14c4075b10 upstream. Kaby Lake (and Coffee Lake) has PC8/PC9/PC10 residency counters. This patch updates the list of Kaby/Coffee Lake PMU event counters from the snb_cstates[] list of events to the hswult_cstates[] list of events, which keeps all previously supported events and also adds the PKG_C8, PKG_C9 and PKG_C10 residency counters. This allows user space tools to profile them through the perf interface. Signed-off-by: Harry Pan Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: gs0622@gmail.com Link: http://lkml.kernel.org/r/20190424145033.1924-1-harry.pan@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/cstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index d2e780705c5a..56194c571299 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -76,15 +76,15 @@ * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 - * Available model: HSW ULT,CNL + * Available model: HSW ULT,KBL,CNL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT,CNL + * Available model: HSW ULT,KBL,CNL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT,GLM,CNL + * Available model: HSW ULT,KBL,GLM,CNL * Scope: Package (physical package) * */ @@ -572,8 +572,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, hswult_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates), X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates), -- cgit v1.2.3 From 4898e9b9979528dd51407c5600e4688dc30228f2 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 26 Apr 2019 17:22:01 -0700 Subject: Input: synaptics-rmi4 - write config register values to the right offset commit 3a349763cf11e63534b8f2d302f2d0c790566497 upstream. Currently any changed config register values don't take effect, as the function to write them back is called with the wrong register offset. Fixes: ff8f83708b3e (Input: synaptics-rmi4 - add support for 2D sensors and F11) Signed-off-by: Lucas Stach Reviewed-by: Philipp Zabel Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index df64d6aed4f7..93901ebd122a 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -1230,7 +1230,7 @@ static int rmi_f11_initialize(struct rmi_function *fn) } rc = f11_write_control_regs(fn, &f11->sens_query, - &f11->dev_controls, fn->fd.query_base_addr); + &f11->dev_controls, fn->fd.control_base_addr); if (rc) dev_warn(&fn->dev, "Failed to write control registers\n"); -- cgit v1.2.3 From 16df9424ffaa5bdbeeebd6427606d3b8cc176be1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 3 Apr 2019 12:36:21 -0600 Subject: vfio/type1: Limit DMA mappings per container commit 492855939bdb59c6f947b0b5b44af9ad82b7e38c upstream. Memory backed DMA mappings are accounted against a user's locked memory limit, including multiple mappings of the same memory. This accounting bounds the number of such mappings that a user can create. However, DMA mappings that are not backed by memory, such as DMA mappings of device MMIO via mmaps, do not make use of page pinning and therefore do not count against the user's locked memory limit. These mappings still consume memory, but the memory is not well associated to the process for the purpose of oom killing a task. To add bounding on this use case, we introduce a limit to the total number of concurrent DMA mappings that a user is allowed to create. This limit is exposed as a tunable module option where the default value of 64K is expected to be well in excess of any reasonable use case (a large virtual machine configuration would typically only make use of tens of concurrent mappings). This fixes CVE-2019-3882. Reviewed-by: Eric Auger Tested-by: Eric Auger Reviewed-by: Peter Xu Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_type1.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 73652e21efec..d0f731c9920a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -58,12 +58,18 @@ module_param_named(disable_hugepages, MODULE_PARM_DESC(disable_hugepages, "Disable VFIO IOMMU support for IOMMU hugepages."); +static unsigned int dma_entry_limit __read_mostly = U16_MAX; +module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644); +MODULE_PARM_DESC(dma_entry_limit, + "Maximum number of user DMA mappings per container (65535)."); + struct vfio_iommu { struct list_head domain_list; struct vfio_domain *external_domain; /* domain for external user */ struct mutex lock; struct rb_root dma_list; struct blocking_notifier_head notifier; + unsigned int dma_avail; bool v2; bool nesting; }; @@ -836,6 +842,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); kfree(dma); + iommu->dma_avail++; } static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) @@ -1081,12 +1088,18 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } + if (!iommu->dma_avail) { + ret = -ENOSPC; + goto out_unlock; + } + dma = kzalloc(sizeof(*dma), GFP_KERNEL); if (!dma) { ret = -ENOMEM; goto out_unlock; } + iommu->dma_avail--; dma->iova = iova; dma->vaddr = vaddr; dma->prot = prot; @@ -1583,6 +1596,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->domain_list); iommu->dma_list = RB_ROOT; + iommu->dma_avail = dma_entry_limit; mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); -- cgit v1.2.3 From c46e14be004a1516600012f05d6e23b096c06f31 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Fri, 12 Apr 2019 07:29:13 +0200 Subject: dmaengine: sh: rcar-dmac: With cyclic DMA residue 0 is valid commit 907bd68a2edc491849e2fdcfe52c4596627bca94 upstream. Having a cyclic DMA, a residue 0 is not an indication of a completed DMA. In case of cyclic DMA make sure that dma_set_residue() is called and with this a residue of 0 is forwarded correctly to the caller. Fixes: 3544d2878817 ("dmaengine: rcar-dmac: use result of updated get_residue in tx_status") Signed-off-by: Dirk Behme Signed-off-by: Achim Dahlhoff Signed-off-by: Hiroyuki Yokoyama Signed-off-by: Yao Lihua Reviewed-by: Yoshihiro Shimoda Reviewed-by: Laurent Pinchart Cc: # v4.8+ Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/sh/rcar-dmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 2b4f25698169..54810ffd95e2 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1368,6 +1368,7 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, enum dma_status status; unsigned long flags; unsigned int residue; + bool cyclic; status = dma_cookie_status(chan, cookie, txstate); if (status == DMA_COMPLETE || !txstate) @@ -1375,10 +1376,11 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan, spin_lock_irqsave(&rchan->lock, flags); residue = rcar_dmac_chan_get_residue(rchan, cookie); + cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false; spin_unlock_irqrestore(&rchan->lock, flags); /* if there's no residue, the cookie is complete */ - if (!residue) + if (!residue && !cyclic) return DMA_COMPLETE; dma_set_residue(txstate, residue); -- cgit v1.2.3 From 9476c3ad5ee21b57dbfbf4f7fe6a79f672eba078 Mon Sep 17 00:00:00 2001 From: Achim Dahlhoff Date: Fri, 12 Apr 2019 07:29:14 +0200 Subject: dmaengine: sh: rcar-dmac: Fix glitch in dmaengine_tx_status commit 6e7da74775348d96e2d7efaf3f91410e18c481ef upstream. The tx_status poll in the rcar_dmac driver reads the status register which indicates which chunk is busy (DMACHCRB). Afterwards the point inside the chunk is read from DMATCRB. It is possible that the chunk has changed between the two reads. The result is a non-monotonous increase of the residue. Fix this by introducing a 'safe read' logic. Fixes: 73a47bd0da66 ("dmaengine: rcar-dmac: use TCRB instead of TCR for residue") Signed-off-by: Achim Dahlhoff Signed-off-by: Dirk Behme Reviewed-by: Yoshihiro Shimoda Cc: # v4.16+ Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/sh/rcar-dmac.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 54810ffd95e2..e2a5398f89b5 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1282,6 +1282,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, enum dma_status status; unsigned int residue = 0; unsigned int dptr = 0; + unsigned int chcrb; + unsigned int tcrb; + unsigned int i; if (!desc) return 0; @@ -1329,6 +1332,24 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, return 0; } + /* + * We need to read two registers. + * Make sure the control register does not skip to next chunk + * while reading the counter. + * Trying it 3 times should be enough: Initial read, retry, retry + * for the paranoid. + */ + for (i = 0; i < 3; i++) { + chcrb = rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK; + tcrb = rcar_dmac_chan_read(chan, RCAR_DMATCRB); + /* Still the same? */ + if (chcrb == (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & + RCAR_DMACHCRB_DPTR_MASK)) + break; + } + WARN_ONCE(i >= 3, "residue might be not continuous!"); + /* * In descriptor mode the descriptor running pointer is not maintained * by the interrupt handler, find the running descriptor from the @@ -1336,8 +1357,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, * mode just use the running descriptor pointer. */ if (desc->hwdescs.use) { - dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & - RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT; + dptr = chcrb >> RCAR_DMACHCRB_DPTR_SHIFT; if (dptr == 0) dptr = desc->nchunks; dptr--; @@ -1355,7 +1375,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, } /* Add the residue for the current chunk. */ - residue += rcar_dmac_chan_read(chan, RCAR_DMATCRB) << desc->xfer_shift; + residue += tcrb << desc->xfer_shift; return residue; } -- cgit v1.2.3 From aa5f016ba785b0c73018f9aa62ad67b5170a9d3e Mon Sep 17 00:00:00 2001 From: Shun-Chih Yu Date: Thu, 25 Apr 2019 11:53:50 +0800 Subject: dmaengine: mediatek-cqdma: fix wrong register usage in mtk_cqdma_start commit 5bb5c3a3ac102158b799bf5eda871223aa5e9c25 upstream. This patch fixes wrong register usage in the mtk_cqdma_start. The destination register should be MTK_CQDMA_DST2 instead. Fixes: b1f01e48df5a ("dmaengine: mediatek: Add MediaTek Command-Queue DMA controller for MT6765 SoC") Signed-off-by: Shun-Chih Yu Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/mediatek/mtk-cqdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index 131f3974740d..814853842e29 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -253,7 +253,7 @@ static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc, #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT mtk_dma_set(pc, MTK_CQDMA_DST2, cvd->dest >> MTK_CQDMA_ADDR2_SHFIT); #else - mtk_dma_set(pc, MTK_CQDMA_SRC2, 0); + mtk_dma_set(pc, MTK_CQDMA_DST2, 0); #endif /* setup the length */ -- cgit v1.2.3 From b9a5bc4bb079b00cb81ab8a408195332faed331e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Apr 2019 22:34:18 +0100 Subject: ARM: 8857/1: efi: enable CP15 DMB instructions before cleaning the cache commit e17b1af96b2afc38e684aa2f1033387e2ed10029 upstream. The EFI stub is entered with the caches and MMU enabled by the firmware, and once the stub is ready to hand over to the decompressor, we clean and disable the caches. The cache clean routines use CP15 barrier instructions, which can be disabled via SCTLR. Normally, when using the provided cache handling routines to enable the caches and MMU, this bit is enabled as well. However, but since we entered the stub with the caches already enabled, this routine is not executed before we call the cache clean routines, resulting in undefined instruction exceptions if the firmware never enabled this bit. So set the bit explicitly in the EFI entry code, but do so in a way that guarantees that the resulting code can still run on v6 cores as well (which are guaranteed to have CP15 barriers enabled) Cc: # v4.9+ Acked-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/compressed/head.S | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 6c7ccb428c07..7135820f76d4 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1438,7 +1438,21 @@ ENTRY(efi_stub_entry) @ Preserve return value of efi_entry() in r4 mov r4, r0 - bl cache_clean_flush + + @ our cache maintenance code relies on CP15 barrier instructions + @ but since we arrived here with the MMU and caches configured + @ by UEFI, we must check that the CP15BEN bit is set in SCTLR. + @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in + @ the enable path will be executed on v7+ only. + mrc p15, 0, r1, c1, c0, 0 @ read SCTLR + tst r1, #(1 << 5) @ CP15BEN bit set? + bne 0f + orr r1, r1, #(1 << 5) @ CP15 barrier instructions + mcr p15, 0, r1, c1, c0, 0 @ write SCTLR + ARM( .inst 0xf57ff06f @ v7+ isb ) + THUMB( isb ) + +0: bl cache_clean_flush bl cache_off @ Set parameters for booting zImage according to boot protocol -- cgit v1.2.3 From aaceebbb71c58fd9086525ed8fdaf4510a8e5907 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Apr 2019 23:59:02 +1000 Subject: powerpc/mm/radix: Make Radix require HUGETLB_PAGE commit 8adddf349fda0d3de2f6bb41ddf838cbf36a8ad2 upstream. Joel reported weird crashes using skiroot_defconfig, in his case we jumped into an NX page: kernel tried to execute exec-protected page (c000000002bff4f0) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch Faulting instruction address: 0xc000000002bff4f0 Looking at the disassembly, we had simply branched to that address: c000000000c001bc 49fff335 bl c000000002bff4f0 But that didn't match the original kernel image: c000000000c001bc 4bfff335 bl c000000000bff4f0 When STRICT_KERNEL_RWX is enabled, and we're using the radix MMU, we call radix__change_memory_range() late in boot to change page protections. We do that both to mark rodata read only and also to mark init text no-execute. That involves walking the kernel page tables, and clearing _PAGE_WRITE or _PAGE_EXEC respectively. With radix we may use hugepages for the linear mapping, so the code in radix__change_memory_range() uses eg. pmd_huge() to test if it has found a huge mapping, and if so it stops the page table walk and changes the PMD permissions. However if the kernel is built without HUGETLBFS support, pmd_huge() is just a #define that always returns 0. That causes the code in radix__change_memory_range() to incorrectly interpret the PMD value as a pointer to a PTE page rather than as a PTE at the PMD level. We can see this using `dv` in xmon which also uses pmd_huge(): 0:mon> dv c000000000000000 pgd @ 0xc000000001740000 pgdp @ 0xc000000001740000 = 0x80000000ffffb009 pudp @ 0xc0000000ffffb000 = 0x80000000ffffa009 pmdp @ 0xc0000000ffffa000 = 0xc00000000000018f <- this is a PTE ptep @ 0xc000000000000100 = 0xa64bb17da64ab07d <- kernel text The end result is we treat the value at 0xc000000000000100 as a PTE and clear _PAGE_WRITE or _PAGE_EXEC, potentially corrupting the code at that address. In Joel's specific case we cleared the sign bit in the offset of the branch, causing a backward branch to turn into a forward branch which caused us to branch into a non-executable page. However the exact nature of the crash depends on kernel version, compiler version, and other factors. We need to fix radix__change_memory_range() to not use accessors that depend on HUGETLBFS, but we also have radix memory hotplug code that uses pmd_huge() etc that will also need fixing. So for now just disallow the broken combination of Radix with HUGETLBFS disabled. The only defconfig we have that is affected is skiroot_defconfig, so turn on HUGETLBFS there so that it still gets Radix. Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()") Cc: stable@vger.kernel.org # v4.7+ Reported-by: Joel Stanley Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/configs/skiroot_defconfig | 1 + arch/powerpc/platforms/Kconfig.cputype | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index cfdd08897a06..e2b0c5f15c7b 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -260,6 +260,7 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y +CONFIG_HUGETLBFS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_NLS=y diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 8c7464c3f27f..2782188a5ba1 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -318,7 +318,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config PPC_RADIX_MMU bool "Radix MMU Support" - depends on PPC_BOOK3S_64 + depends on PPC_BOOK3S_64 && HUGETLB_PAGE select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA default y help -- cgit v1.2.3 From 0b029ce3f4ee15d4e5625b6266dc1551ae1cda2d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 1 Mar 2019 13:56:11 +0100 Subject: drm/vc4: Fix memory leak during gpu reset. commit d08106796a78a4273e39e1bbdf538dc4334b2635 upstream. __drm_atomic_helper_crtc_destroy_state does not free memory, it only cleans it up. Fix this by calling the functions own destroy function. Fixes: 6d6e50039187 ("drm/vc4: Allocate the right amount of space for boot-time CRTC state.") Cc: Eric Anholt Cc: # v4.6+ Reviewed-by: Eric Anholt Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190301125627.7285-2-maarten.lankhorst@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 3ce136ba8791..cd226e63d557 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -999,7 +999,7 @@ static void vc4_crtc_reset(struct drm_crtc *crtc) { if (crtc->state) - __drm_atomic_helper_crtc_destroy_state(crtc->state); + vc4_crtc_destroy_state(crtc->state); crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); if (crtc->state) -- cgit v1.2.3 From 6dca846c920d690b1164fc2e33ff599462c4cea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Apr 2019 11:49:17 +0200 Subject: drm/ttm: fix re-init of global structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bd4264112f93045704731850c5e4d85db981cd85 upstream. When a driver unloads without unloading TTM we don't correctly clear the global structures leading to errors on re-init. Next step should probably be to remove the global structures and kobjs all together, but this is tricky since we need to maintain backward compatibility. Signed-off-by: Christian König Reviewed-by: Karol Herbst Tested-by: Karol Herbst CC: stable@vger.kernel.org # 5.0.x Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 10 +++++----- drivers/gpu/drm/ttm/ttm_memory.c | 5 +++-- include/drm/ttm/ttm_bo_driver.h | 1 - 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 0ec08394e17a..996cadd83f24 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -49,9 +49,8 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj); * ttm_global_mutex - protecting the global BO state */ DEFINE_MUTEX(ttm_global_mutex); -struct ttm_bo_global ttm_bo_glob = { - .use_count = 0 -}; +unsigned ttm_bo_glob_use_count; +struct ttm_bo_global ttm_bo_glob; static struct attribute ttm_bo_count = { .name = "bo_count", @@ -1535,12 +1534,13 @@ static void ttm_bo_global_release(void) struct ttm_bo_global *glob = &ttm_bo_glob; mutex_lock(&ttm_global_mutex); - if (--glob->use_count > 0) + if (--ttm_bo_glob_use_count > 0) goto out; kobject_del(&glob->kobj); kobject_put(&glob->kobj); ttm_mem_global_release(&ttm_mem_glob); + memset(glob, 0, sizeof(*glob)); out: mutex_unlock(&ttm_global_mutex); } @@ -1552,7 +1552,7 @@ static int ttm_bo_global_init(void) unsigned i; mutex_lock(&ttm_global_mutex); - if (++glob->use_count > 1) + if (++ttm_bo_glob_use_count > 1) goto out; ret = ttm_mem_global_init(&ttm_mem_glob); diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index f1567c353b54..9a0909decb36 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -461,8 +461,8 @@ out_no_zone: void ttm_mem_global_release(struct ttm_mem_global *glob) { - unsigned int i; struct ttm_mem_zone *zone; + unsigned int i; /* let the page allocator first stop the shrink work. */ ttm_page_alloc_fini(); @@ -475,9 +475,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob) zone = glob->zones[i]; kobject_del(&zone->kobj); kobject_put(&zone->kobj); - } + } kobject_del(&glob->kobj); kobject_put(&glob->kobj); + memset(glob, 0, sizeof(*glob)); } static void ttm_check_swapping(struct ttm_mem_global *glob) diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 1021106438b2..c80e5833b1d6 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -411,7 +411,6 @@ extern struct ttm_bo_global { /** * Protected by ttm_global_mutex. */ - unsigned int use_count; struct list_head device_list; /** -- cgit v1.2.3 From 3e869fd348565c3bab7a6c61cc4b66c3594e6db2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Apr 2019 10:47:56 +1000 Subject: Revert "drm/i915/fbdev: Actually configure untiled displays" commit 9fa246256e09dc30820524401cdbeeaadee94025 upstream. This reverts commit d179b88deb3bf6fed4991a31fd6f0f2cad21fab5. This commit is documented to break userspace X.org modesetting driver in certain configurations. The X.org modesetting userspace driver is broken. No fixes are available yet. In order for this patch to be applied it either needs a config option or a workaround developed. This has been reported a few times, saying it's a userspace problem is clearly against the regression rules. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109806 Signed-off-by: Dave Airlie Cc: # v3.19+ Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_fbdev.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 4ee16b264dbe..7f365ac0b549 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -336,8 +336,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); - unsigned long conn_configured, conn_seq; int i, j; bool *save_enabled; bool fallback = true, ret = true; @@ -355,9 +355,10 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, drm_modeset_backoff(&ctx); memcpy(save_enabled, enabled, count); - conn_seq = GENMASK(count - 1, 0); + mask = GENMASK(count - 1, 0); conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -370,8 +371,7 @@ retry: if (conn_configured & BIT(i)) continue; - /* First pass, only consider tiled connectors */ - if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -475,10 +475,8 @@ retry: conn_configured |= BIT(i); } - if (conn_configured != conn_seq) { /* repeat until no more are found */ - conn_seq = conn_configured; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the -- cgit v1.2.3 From 6ec39721f131f6c939fb3063109644d97ac2745b Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 24 Apr 2019 17:06:29 +0200 Subject: drm/vc4: Fix compilation error reported by kbuild test bot commit 462ce5d963f18b71c63f6b7730a35a2ee5273540 upstream. A pointer to crtc was missing, resulting in the following build error: drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse: sparse: incorrect type in argument 1 (different base types) drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse: expected struct drm_crtc *crtc drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse: got struct drm_crtc_state *state drivers/gpu/drm/vc4/vc4_crtc.c:1045:39: sparse: sparse: not enough arguments for function vc4_crtc_destroy_state Signed-off-by: Maarten Lankhorst Reported-by: kbuild test robot Cc: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/2b6ed5e6-81b0-4276-8860-870b54ca3262@linux.intel.com Fixes: d08106796a78 ("drm/vc4: Fix memory leak during gpu reset.") Cc: # v4.6+ Acked-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index cd226e63d557..2ae4ece0dcea 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -999,7 +999,7 @@ static void vc4_crtc_reset(struct drm_crtc *crtc) { if (crtc->state) - vc4_crtc_destroy_state(crtc->state); + vc4_crtc_destroy_state(crtc, crtc->state); crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); if (crtc->state) -- cgit v1.2.3 From 30d57be1f6eb0670c69c4912c33b28b6522b366b Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sat, 12 Jan 2019 03:54:24 +0800 Subject: USB: Add new USB LPM helpers commit 7529b2574a7aaf902f1f8159fbc2a7caa74be559 upstream. Use new helpers to make LPM enabling/disabling more clear. This is a preparation to subsequent patch. Signed-off-by: Kai-Heng Feng Cc: stable # after much soaking Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 12 +++++++++++- drivers/usb/core/hub.c | 12 ++++++------ drivers/usb/core/message.c | 2 +- drivers/usb/core/sysfs.c | 5 ++++- drivers/usb/core/usb.h | 10 ++++++++-- 5 files changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 53564386ed57..c276ffc5561f 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1896,7 +1896,7 @@ int usb_runtime_idle(struct device *dev) return -EBUSY; } -int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) +static int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); int ret = -EPERM; @@ -1913,6 +1913,16 @@ int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) return ret; } +int usb_enable_usb2_hardware_lpm(struct usb_device *udev) +{ + return usb_set_usb2_hardware_lpm(udev, 1); +} + +int usb_disable_usb2_hardware_lpm(struct usb_device *udev) +{ + return usb_set_usb2_hardware_lpm(udev, 0); +} + #endif /* CONFIG_PM */ struct bus_type usb_bus_type = { diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1d1e61e980f3..ca38ff8124a7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3221,7 +3221,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) /* disable USB2 hardware LPM */ if (udev->usb2_hw_lpm_enabled == 1) - usb_set_usb2_hardware_lpm(udev, 0); + usb_disable_usb2_hardware_lpm(udev); if (usb_disable_ltm(udev)) { dev_err(&udev->dev, "Failed to disable LTM before suspend\n"); @@ -3260,7 +3260,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) err_ltm: /* Try to enable USB2 hardware LPM again */ if (udev->usb2_hw_lpm_capable == 1) - usb_set_usb2_hardware_lpm(udev, 1); + usb_enable_usb2_hardware_lpm(udev); if (udev->do_remote_wakeup) (void) usb_disable_remote_wakeup(udev); @@ -3544,7 +3544,7 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) } else { /* Try to enable USB2 hardware LPM */ if (udev->usb2_hw_lpm_capable == 1) - usb_set_usb2_hardware_lpm(udev, 1); + usb_enable_usb2_hardware_lpm(udev); /* Try to enable USB3 LTM */ usb_enable_ltm(udev); @@ -4435,7 +4435,7 @@ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev) if ((udev->bos->ext_cap->bmAttributes & cpu_to_le32(USB_BESL_SUPPORT)) || connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) { udev->usb2_hw_lpm_allowed = 1; - usb_set_usb2_hardware_lpm(udev, 1); + usb_enable_usb2_hardware_lpm(udev); } } @@ -5650,7 +5650,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) * It will be re-enabled by the enumeration process. */ if (udev->usb2_hw_lpm_enabled == 1) - usb_set_usb2_hardware_lpm(udev, 0); + usb_disable_usb2_hardware_lpm(udev); /* Disable LPM while we reset the device and reinstall the alt settings. * Device-initiated LPM, and system exit latency settings are cleared @@ -5753,7 +5753,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) done: /* Now that the alt settings are re-installed, enable LTM and LPM. */ - usb_set_usb2_hardware_lpm(udev, 1); + usb_enable_usb2_hardware_lpm(udev); usb_unlocked_enable_lpm(udev); usb_enable_ltm(udev); usb_release_bos_descriptor(udev); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index bfa5eda0cc26..35951aea7f50 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1244,7 +1244,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) } if (dev->usb2_hw_lpm_enabled == 1) - usb_set_usb2_hardware_lpm(dev, 0); + usb_disable_usb2_hardware_lpm(dev); usb_unlocked_disable_lpm(dev); usb_disable_ltm(dev); diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index ea18284dfa9a..7e88fdfe3cf5 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -528,7 +528,10 @@ static ssize_t usb2_hardware_lpm_store(struct device *dev, if (!ret) { udev->usb2_hw_lpm_allowed = value; - ret = usb_set_usb2_hardware_lpm(udev, value); + if (value) + ret = usb_enable_usb2_hardware_lpm(udev); + else + ret = usb_disable_usb2_hardware_lpm(udev); } usb_unlock_device(udev); diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 546a2219454b..d95a5358f73d 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -92,7 +92,8 @@ extern int usb_remote_wakeup(struct usb_device *dev); extern int usb_runtime_suspend(struct device *dev); extern int usb_runtime_resume(struct device *dev); extern int usb_runtime_idle(struct device *dev); -extern int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable); +extern int usb_enable_usb2_hardware_lpm(struct usb_device *udev); +extern int usb_disable_usb2_hardware_lpm(struct usb_device *udev); #else @@ -112,7 +113,12 @@ static inline int usb_autoresume_device(struct usb_device *udev) return 0; } -static inline int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) +static inline int usb_enable_usb2_hardware_lpm(struct usb_device *udev) +{ + return 0; +} + +static inline int usb_disable_usb2_hardware_lpm(struct usb_device *udev) { return 0; } -- cgit v1.2.3 From 2cd705091e361e14c583689c146f8fd9c43fb74f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sat, 12 Jan 2019 03:54:25 +0800 Subject: USB: Consolidate LPM checks to avoid enabling LPM twice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d7a6c0ce8d26412903c7981503bad9e1cc7c45d2 upstream. USB Bluetooth controller QCA ROME (0cf3:e007) sometimes stops working after S3: [ 165.110742] Bluetooth: hci0: using NVM file: qca/nvm_usb_00000302.bin [ 168.432065] Bluetooth: hci0: Failed to send body at 4 of 1953 (-110) After some experiments, I found that disabling LPM can workaround the issue. On some platforms, the USB power is cut during S3, so the driver uses reset-resume to resume the device. During port resume, LPM gets enabled twice, by usb_reset_and_verify_device() and usb_port_resume(). Consolidate all checks into new LPM helpers to make sure LPM only gets enabled once. Fixes: de68bab4fa96 ("usb: Don't enable USB 2.0 Link PM by default.”) Signed-off-by: Kai-Heng Feng Cc: stable # after much soaking Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 11 ++++++++--- drivers/usb/core/hub.c | 12 ++++-------- drivers/usb/core/message.c | 3 +-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index c276ffc5561f..8987cec9549d 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1901,9 +1901,6 @@ static int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) struct usb_hcd *hcd = bus_to_hcd(udev->bus); int ret = -EPERM; - if (enable && !udev->usb2_hw_lpm_allowed) - return 0; - if (hcd->driver->set_usb2_hw_lpm) { ret = hcd->driver->set_usb2_hw_lpm(hcd, udev, enable); if (!ret) @@ -1915,11 +1912,19 @@ static int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable) int usb_enable_usb2_hardware_lpm(struct usb_device *udev) { + if (!udev->usb2_hw_lpm_capable || + !udev->usb2_hw_lpm_allowed || + udev->usb2_hw_lpm_enabled) + return 0; + return usb_set_usb2_hardware_lpm(udev, 1); } int usb_disable_usb2_hardware_lpm(struct usb_device *udev) { + if (!udev->usb2_hw_lpm_enabled) + return 0; + return usb_set_usb2_hardware_lpm(udev, 0); } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index ca38ff8124a7..55c87be5764c 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -3220,8 +3220,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) } /* disable USB2 hardware LPM */ - if (udev->usb2_hw_lpm_enabled == 1) - usb_disable_usb2_hardware_lpm(udev); + usb_disable_usb2_hardware_lpm(udev); if (usb_disable_ltm(udev)) { dev_err(&udev->dev, "Failed to disable LTM before suspend\n"); @@ -3259,8 +3258,7 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) usb_enable_ltm(udev); err_ltm: /* Try to enable USB2 hardware LPM again */ - if (udev->usb2_hw_lpm_capable == 1) - usb_enable_usb2_hardware_lpm(udev); + usb_enable_usb2_hardware_lpm(udev); if (udev->do_remote_wakeup) (void) usb_disable_remote_wakeup(udev); @@ -3543,8 +3541,7 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) hub_port_logical_disconnect(hub, port1); } else { /* Try to enable USB2 hardware LPM */ - if (udev->usb2_hw_lpm_capable == 1) - usb_enable_usb2_hardware_lpm(udev); + usb_enable_usb2_hardware_lpm(udev); /* Try to enable USB3 LTM */ usb_enable_ltm(udev); @@ -5649,8 +5646,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) /* Disable USB2 hardware LPM. * It will be re-enabled by the enumeration process. */ - if (udev->usb2_hw_lpm_enabled == 1) - usb_disable_usb2_hardware_lpm(udev); + usb_disable_usb2_hardware_lpm(udev); /* Disable LPM while we reset the device and reinstall the alt settings. * Device-initiated LPM, and system exit latency settings are cleared diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 35951aea7f50..4f33eb632a88 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1243,8 +1243,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) dev->actconfig->interface[i] = NULL; } - if (dev->usb2_hw_lpm_enabled == 1) - usb_disable_usb2_hardware_lpm(dev); + usb_disable_usb2_hardware_lpm(dev); usb_unlocked_disable_lpm(dev); usb_disable_ltm(dev); -- cgit v1.2.3 From acaec7f6e25d8db2d79937780f5c48f7e43c0e4f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Feb 2019 11:17:34 -0500 Subject: ext4: fix some error pointer dereferences commit 7159a986b4202343f6cca3bb8079ecace5816fd6 upstream. We can't pass error pointers to brelse(). Fixes: fb265c9cb49e ("ext4: add ext4_sb_bread() to disambiguate ENOMEM cases") Signed-off-by: Dan Carpenter Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 86ed9c686249..dc82e7757f67 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -829,6 +829,7 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); if (IS_ERR(bh)) { ret = PTR_ERR(bh); + bh = NULL; goto out; } @@ -2903,6 +2904,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, if (error == -EIO) EXT4_ERROR_INODE(inode, "block %llu read error", EXT4_I(inode)->i_file_acl); + bh = NULL; goto cleanup; } error = ext4_xattr_check_block(inode, bh); @@ -3059,6 +3061,7 @@ ext4_xattr_block_cache_find(struct inode *inode, if (IS_ERR(bh)) { if (PTR_ERR(bh) == -ENOMEM) return NULL; + bh = NULL; EXT4_ERROR_INODE(inode, "block %lu read error", (unsigned long)ce->e_value); } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { -- cgit v1.2.3 From 2501b17f4f72e774e66b846edada376ff1a73697 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 22 Feb 2019 22:10:19 +0800 Subject: loop: do not print warn message if partition scan is successful commit 40853d6fc619a6fd3d3177c3973a2eac9b598a80 upstream. Do not print warn message when the partition scan returns 0. Fixes: d57f3374ba48 ("loop: Move special partition reread handling in loop_clr_fd()") Signed-off-by: Dongli Zhang Reviewed-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 9a8d83bc1e75..fc7aefd42ae0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1111,8 +1111,9 @@ out_unlock: err = __blkdev_reread_part(bdev); else err = blkdev_reread_part(bdev); - pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", - __func__, lo_number, err); + if (err) + pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", + __func__, lo_number, err); /* Device is gone, no point in returning error */ err = 0; } -- cgit v1.2.3 From ba965c2c980872cf2dbbf61136fd6eeedc839c47 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 31 Mar 2019 22:50:10 +0800 Subject: tipc: handle the err returned from cmd header function commit 2ac695d1d602ce00b12170242f58c3d3a8e36d04 upstream. Syzbot found a crash: BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872 Call Trace: tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872 __tipc_nl_compat_dumpit+0x59e/0xda0 net/tipc/netlink_compat.c:215 tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:280 tipc_nl_compat_handle net/tipc/netlink_compat.c:1226 [inline] tipc_nl_compat_recv+0x1b5f/0x2750 net/tipc/netlink_compat.c:1265 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] Uninit was created at: __alloc_skb+0x309/0xa20 net/core/skbuff.c:208 alloc_skb include/linux/skbuff.h:1012 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] It was supposed to be fixed on commit 974cb0e3e7c9 ("tipc: fix uninit-value in tipc_nl_compat_name_table_dump") by checking TLV_GET_DATA_LEN(msg->req) in cmd->header()/tipc_nl_compat_name_table_dump_header(), which is called ahead of tipc_nl_compat_name_table_dump(). However, tipc_nl_compat_dumpit() doesn't handle the error returned from cmd header function. It means even when the check added in that fix fails, it won't stop calling tipc_nl_compat_name_table_dump(), and the issue will be triggered again. So this patch is to add the process for the err returned from cmd header function in tipc_nl_compat_dumpit(). Reported-by: syzbot+3ce8520484b0d4e260a5@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 4ad3586da8f0..c8b1c06e0ea4 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, if (msg->rep_type) tipc_tlv_init(msg->rep, msg->rep_type); - if (cmd->header) - (*cmd->header)(msg); + if (cmd->header) { + err = (*cmd->header)(msg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + return err; + } + } arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { -- cgit v1.2.3 From abcbf3078130bf3151bca846f0b710e83a6623c4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 25 Apr 2019 16:13:58 -0700 Subject: slip: make slhc_free() silently accept an error pointer commit baf76f0c58aec435a3a864075b8f6d8ee5d1f17e upstream. This way, slhc_free() accepts what slhc_init() returns, whether that is an error or not. In particular, the pattern in sl_alloc_bufs() is slcomp = slhc_init(16, 16); ... slhc_free(slcomp); for the error handling path, and rather than complicate that code, just make it ok to always free what was returned by the init function. That's what the code used to do before commit 4ab42d78e37a ("ppp, slip: Validate VJ compression slot parameters completely") when slhc_init() just returned NULL for the error case, with no actual indication of the details of the error. Reported-by: syzbot+45474c076a4927533d2e@syzkaller.appspotmail.com Fixes: 4ab42d78e37a ("ppp, slip: Validate VJ compression slot parameters completely") Acked-by: Ben Hutchings Cc: David Miller Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/net/slip/slhc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index f4e93f5fc204..ea90db3c7705 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -153,7 +153,7 @@ out_fail: void slhc_free(struct slcompress *comp) { - if ( comp == NULLSLCOMPR ) + if ( IS_ERR_OR_NULL(comp) ) return; if ( comp->tstate != NULLSLSTATE ) -- cgit v1.2.3 From 8fe1600c91f9ba41990c35a0f817bd58d8a3c1d1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 23 Jan 2019 09:44:12 +0900 Subject: workqueue: Try to catch flush_work() without INIT_WORK(). commit 4d43d395fed124631ca02356c711facb90185175 upstream. syzbot found a flush_work() caller who forgot to call INIT_WORK() because that work_struct was allocated by kzalloc() [1]. But the message INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. by lock_map_acquire() is failing to tell that INIT_WORK() is missing. Since flush_work() without INIT_WORK() is a bug, and INIT_WORK() should set ->func field to non-zero, let's warn if ->func field is zero. [1] https://syzkaller.appspot.com/bug?id=a5954455fcfa51c29ca2ab55b203076337e1c770 Signed-off-by: Tetsuo Handa Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index fc5d23d752a5..e94d2b6bee7f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2931,6 +2931,9 @@ static bool __flush_work(struct work_struct *work, bool from_cancel) if (WARN_ON(!wq_online)) return false; + if (WARN_ON(!work->func)) + return false; + if (!from_cancel) { lock_map_acquire(&work->lockdep_map); lock_map_release(&work->lockdep_map); -- cgit v1.2.3 From bbb19ca082ce27ce60ca65be016a951806ea947c Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 14 Feb 2019 15:22:57 -0800 Subject: binder: fix handling of misaligned binder object commit 26528be6720bb40bc8844e97ee73a37e530e9c5e upstream. Fixes crash found by syzbot: kernel BUG at drivers/android/binder_alloc.c:LINE! (2) Reported-and-tested-by: syzbot+55de1eb4975dec156d8f@syzkaller.appspotmail.com Signed-off-by: Todd Kjos Reviewed-by: Joel Fernandes (Google) Cc: stable # 5.0, 4.19, 4.14 Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 022cd80e80cc..a6e556bf62df 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -959,14 +959,13 @@ enum lru_status binder_alloc_free_page(struct list_head *item, index = page - alloc->pages; page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; + + mm = alloc->vma_vm_mm; + if (!mmget_not_zero(mm)) + goto err_mmget; + if (!down_write_trylock(&mm->mmap_sem)) + goto err_down_write_mmap_sem_failed; vma = binder_alloc_get_vma(alloc); - if (vma) { - if (!mmget_not_zero(alloc->vma_vm_mm)) - goto err_mmget; - mm = alloc->vma_vm_mm; - if (!down_write_trylock(&mm->mmap_sem)) - goto err_down_write_mmap_sem_failed; - } list_lru_isolate(lru, item); spin_unlock(lock); @@ -979,10 +978,9 @@ enum lru_status binder_alloc_free_page(struct list_head *item, PAGE_SIZE); trace_binder_unmap_user_end(alloc, index); - - up_write(&mm->mmap_sem); - mmput(mm); } + up_write(&mm->mmap_sem); + mmput(mm); trace_binder_unmap_kernel_start(alloc, index); -- cgit v1.2.3 From 16de5d9b8f6be7a5fcb4a7dccd915e955c38e731 Mon Sep 17 00:00:00 2001 From: luca abeni Date: Mon, 25 Mar 2019 14:15:30 +0100 Subject: sched/deadline: Correctly handle active 0-lag timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1b02cd6a2d7f3e2a6a5262887d2cb2912083e42f upstream. syzbot reported the following warning: [ ] WARNING: CPU: 4 PID: 17089 at kernel/sched/deadline.c:255 task_non_contending+0xae0/0x1950 line 255 of deadline.c is: WARN_ON(hrtimer_active(&dl_se->inactive_timer)); in task_non_contending(). Unfortunately, in some cases (for example, a deadline task continuosly blocking and waking immediately) it can happen that a task blocks (and task_non_contending() is called) while the 0-lag timer is still active. In this case, the safest thing to do is to immediately decrease the running bandwidth of the task, without trying to re-arm the 0-lag timer. Signed-off-by: luca abeni Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: chengjian (D) Link: https://lkml.kernel.org/r/20190325131530.34706-1-luca.abeni@santannapisa.it Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fb8b7b5d745d..451b1f9e80a6 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -252,7 +252,6 @@ static void task_non_contending(struct task_struct *p) if (dl_entity_is_special(dl_se)) return; - WARN_ON(hrtimer_active(&dl_se->inactive_timer)); WARN_ON(dl_se->dl_non_contending); zerolag_time = dl_se->deadline - @@ -269,7 +268,7 @@ static void task_non_contending(struct task_struct *p) * If the "0-lag time" already passed, decrease the active * utilization now, instead of starting a timer */ - if (zerolag_time < 0) { + if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) { if (dl_task(p)) sub_running_bw(dl_se, dl_rq); if (!dl_task(p) || p->state == TASK_DEAD) { -- cgit v1.2.3 From b5db8ea57de55585027304c1ec30d5542a9360ef Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 29 Mar 2019 08:56:22 +0100 Subject: mac80211_hwsim: calculate if_combination.max_interfaces commit 45fcef8b727b6f171bc5443e8153181a367d7a15 upstream. If we just set this to 2048, and have multiple limits you can select from, the total number might run over and cause a warning in cfg80211. This doesn't make sense, so we just calculate the total max_interfaces now. Reported-by: syzbot+8f91bd563bbff230d0ee@syzkaller.appspotmail.com Fixes: 99e3a44bac37 ("mac80211_hwsim: allow setting iftype support") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mac80211_hwsim.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6359053bd0c7..862fd2b92d12 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2642,7 +2642,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, enum nl80211_band band; const struct ieee80211_ops *ops = &mac80211_hwsim_ops; struct net *net; - int idx; + int idx, i; int n_limits = 0; if (WARN_ON(param->channels > 1 && !param->use_chanctx)) @@ -2766,12 +2766,23 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, goto failed_hw; } + data->if_combination.max_interfaces = 0; + for (i = 0; i < n_limits; i++) + data->if_combination.max_interfaces += + data->if_limits[i].max; + data->if_combination.n_limits = n_limits; - data->if_combination.max_interfaces = 2048; data->if_combination.limits = data->if_limits; - hw->wiphy->iface_combinations = &data->if_combination; - hw->wiphy->n_iface_combinations = 1; + /* + * If we actually were asked to support combinations, + * advertise them - if there's only a single thing like + * only IBSS then don't advertise it as combinations. + */ + if (data->if_combination.max_interfaces > 1) { + hw->wiphy->iface_combinations = &data->if_combination; + hw->wiphy->n_iface_combinations = 1; + } if (param->ciphers) { memcpy(data->ciphers, param->ciphers, -- cgit v1.2.3 From becfa96eaecd76a46e9b164d79141b5baa315535 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 30 Mar 2019 10:21:07 +0900 Subject: NFS: Forbid setting AF_INET6 to "struct sockaddr_in"->sin_family. commit 7c2bd9a39845bfb6d72ddb55ce737650271f6f96 upstream. syzbot is reporting uninitialized value at rpc_sockaddr2uaddr() [1]. This is because syzbot is setting AF_INET6 to "struct sockaddr_in"->sin_family (which is embedded into user-visible "struct nfs_mount_data" structure) despite nfs23_validate_mount_data() cannot pass sizeof(struct sockaddr_in6) bytes of AF_INET6 address to rpc_sockaddr2uaddr(). Since "struct nfs_mount_data" structure is user-visible, we can't change "struct nfs_mount_data" to use "struct sockaddr_storage". Therefore, assuming that everybody is using AF_INET family when passing address via "struct nfs_mount_data"->addr, reject if its sin_family is not AF_INET. [1] https://syzkaller.appspot.com/bug?id=599993614e7cbbf66bc2656a919ab2a95fb5d75c Reported-by: syzbot Signed-off-by: Tetsuo Handa Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0570391eaa16..15c025c1a305 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2041,7 +2041,8 @@ static int nfs23_validate_mount_data(void *options, memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); args->nfs_server.port = ntohs(data->addr.sin_port); - if (!nfs_verify_server_address(sap)) + if (sap->sa_family != AF_INET || + !nfs_verify_server_address(sap)) goto out_no_address; if (!(data->flags & NFS_MOUNT_TCP)) -- cgit v1.2.3 From 0d41789ad8205743e06dacd85fc0bd95a3d8948c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 15 Apr 2019 00:43:00 +0200 Subject: netfilter: ebtables: CONFIG_COMPAT: drop a bogus WARN_ON commit 7caa56f006e9d712b44f27b32520c66420d5cbc6 upstream. It means userspace gave us a ruleset where there is some other data after the ebtables target but before the beginning of the next rule. Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support") Reported-by: syzbot+659574e7bcc7f7eb4df7@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/bridge/netfilter/ebtables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f77888ec93f1..0bb4d712b80c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) + /* rule should have no remaining data after target */ + if (type == EBT_COMPAT_TARGET && size_left) return -EINVAL; match32 = (struct compat_ebt_entry_mwt *) buf; -- cgit v1.2.3 From a92526e257a5d752a37f0d148fc6182a517b5ce4 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 21 Mar 2019 22:42:23 +0800 Subject: fm10k: Fix a potential NULL pointer dereference commit 01ca667133d019edc9f0a1f70a272447c84ec41f upstream. Syzkaller report this: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 0 PID: 4378 Comm: syz-executor.0 Tainted: G C 5.0.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:__lock_acquire+0x95b/0x3200 kernel/locking/lockdep.c:3573 Code: 00 0f 85 28 1e 00 00 48 81 c4 08 01 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 4c 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 cc 24 00 00 49 81 7d 00 e0 de 03 a6 41 bc 00 00 RSP: 0018:ffff8881e3c07a40 EFLAGS: 00010002 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000010 RSI: 0000000000000000 RDI: 0000000000000080 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8881e3c07d98 R11: ffff8881c7f21f80 R12: 0000000000000001 R13: 0000000000000080 R14: 0000000000000000 R15: 0000000000000001 FS: 00007fce2252e700(0000) GS:ffff8881f2400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fffc7eb0228 CR3: 00000001e5bea002 CR4: 00000000007606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: lock_acquire+0xff/0x2c0 kernel/locking/lockdep.c:4211 __mutex_lock_common kernel/locking/mutex.c:925 [inline] __mutex_lock+0xdf/0x1050 kernel/locking/mutex.c:1072 drain_workqueue+0x24/0x3f0 kernel/workqueue.c:2934 destroy_workqueue+0x23/0x630 kernel/workqueue.c:4319 __do_sys_delete_module kernel/module.c:1018 [inline] __se_sys_delete_module kernel/module.c:961 [inline] __x64_sys_delete_module+0x30c/0x480 kernel/module.c:961 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fce2252dc58 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000140 RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fce2252e6bc R13: 00000000004bcca9 R14: 00000000006f6b48 R15: 00000000ffffffff If alloc_workqueue fails, it should return -ENOMEM, otherwise may trigger this NULL pointer dereference while unloading drivers. Reported-by: Hulk Robot Fixes: 0a38c17a21a0 ("fm10k: Remove create_workqueue") Signed-off-by: Yue Haibing Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 6fd15a734324..58f02c85f2fe 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -41,6 +41,8 @@ static int __init fm10k_init_module(void) /* create driver workqueue */ fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, fm10k_driver_name); + if (!fm10k_workqueue) + return -ENOMEM; fm10k_dbg_init(); -- cgit v1.2.3 From 4ee29a10021fdfaf02d22b82c1f7fcc605d365f3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 31 Mar 2019 22:50:08 +0800 Subject: tipc: check bearer name with right length in tipc_nl_compat_bearer_enable commit 6f07e5f06c8712acc423485f657799fc8e11e56c upstream. Syzbot reported the following crash: BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:961 memchr+0xce/0x110 lib/string.c:961 string_is_valid net/tipc/netlink_compat.c:176 [inline] tipc_nl_compat_bearer_enable+0x2c4/0x910 net/tipc/netlink_compat.c:401 __tipc_nl_compat_doit net/tipc/netlink_compat.c:321 [inline] tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:354 tipc_nl_compat_handle net/tipc/netlink_compat.c:1162 [inline] tipc_nl_compat_recv+0x1ae7/0x2750 net/tipc/netlink_compat.c:1265 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] Uninit was created at: __alloc_skb+0x309/0xa20 net/core/skbuff.c:208 alloc_skb include/linux/skbuff.h:1012 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg net/socket.c:632 [inline] It was triggered when the bearer name size < TIPC_MAX_BEARER_NAME, it would check with a wrong len/TLV_GET_DATA_LEN(msg->req), which also includes priority and disc_domain length. This patch is to fix it by checking it with a right length: 'TLV_GET_DATA_LEN(msg->req) - offsetof(struct tipc_bearer_config, name)'. Reported-by: syzbot+8b707430713eb46e1e45@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index c8b1c06e0ea4..1d7b13ada19b 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -403,7 +403,12 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_bearer_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(b->name, len)) return -EINVAL; -- cgit v1.2.3 From d0cf85f49505796b28e03926b0ce0e17d34461f5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 31 Mar 2019 22:50:09 +0800 Subject: tipc: check link name with right length in tipc_nl_compat_link_set commit 8c63bf9ab4be8b83bd8c34aacfd2f1d2c8901c8a upstream. A similar issue as fixed by Patch "tipc: check bearer name with right length in tipc_nl_compat_bearer_enable" was also found by syzbot in tipc_nl_compat_link_set(). The length to check with should be 'TLV_GET_DATA_LEN(msg->req) - offsetof(struct tipc_link_config, name)'. Reported-by: syzbot+de00a87b8644a582ae79@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/netlink_compat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1d7b13ada19b..340a6e7c43a7 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -777,7 +777,12 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_link_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(lc->name, len)) return -EINVAL; -- cgit v1.2.3 From d0d184559cb42063a6fe960f0a9376d2aaabd8e6 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 9 Apr 2019 19:53:55 +0800 Subject: net: netrom: Fix error cleanup path of nr_proto_init commit d3706566ae3d92677b932dd156157fd6c72534b1 upstream. Syzkaller report this: BUG: unable to handle kernel paging request at fffffbfff830524b PGD 237fe8067 P4D 237fe8067 PUD 237e64067 PMD 1c9716067 PTE 0 Oops: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 5.0.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:__list_add_valid+0x21/0xe0 lib/list_debug.c:23 Code: 8b 0c 24 e9 17 fd ff ff 90 55 48 89 fd 48 8d 7a 08 53 48 89 d3 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 48 83 ec 08 <80> 3c 02 00 0f 85 8b 00 00 00 48 8b 53 08 48 39 f2 75 35 48 89 f2 RSP: 0018:ffff8881ea2278d0 EFLAGS: 00010282 RAX: dffffc0000000000 RBX: ffffffffc1829250 RCX: 1ffff1103d444ef4 RDX: 1ffffffff830524b RSI: ffffffff85659300 RDI: ffffffffc1829258 RBP: ffffffffc1879250 R08: fffffbfff0acb269 R09: fffffbfff0acb269 R10: ffff8881ea2278f0 R11: fffffbfff0acb268 R12: ffffffffc1829250 R13: dffffc0000000000 R14: 0000000000000008 R15: ffffffffc187c830 FS: 00007fe0361df700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffbfff830524b CR3: 00000001eb39a001 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __list_add include/linux/list.h:60 [inline] list_add include/linux/list.h:79 [inline] proto_register+0x444/0x8f0 net/core/sock.c:3375 nr_proto_init+0x73/0x4b3 [netrom] ? 0xffffffffc1628000 ? 0xffffffffc1628000 do_one_initcall+0xbc/0x47d init/main.c:887 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fe0361dec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00007fe0361dec70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0361df6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: netrom(+) ax25 fcrypt pcbc af_alg arizona_ldo1 v4l2_common videodev media v4l2_dv_timings hdlc ide_cd_mod snd_soc_sigmadsp_regmap snd_soc_sigmadsp intel_spi_platform intel_spi mtd spi_nor snd_usbmidi_lib usbcore lcd ti_ads7950 hi6421_regulator snd_soc_kbl_rt5663_max98927 snd_soc_hdac_hdmi snd_hda_ext_core snd_hda_core snd_soc_rt5663 snd_soc_core snd_pcm_dmaengine snd_compress snd_soc_rl6231 mac80211 rtc_rc5t583 spi_slave_time leds_pwm hid_gt683r hid industrialio_triggered_buffer kfifo_buf industrialio ir_kbd_i2c rc_core led_class_flash dwc_xlgmac snd_ymfpci gameport snd_mpu401_uart snd_rawmidi snd_ac97_codec snd_pcm ac97_bus snd_opl3_lib snd_timer snd_seq_device snd_hwdep snd soundcore iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ide_pci_generic piix aesni_intel aes_x86_64 crypto_simd cryptd glue_helper ide_core psmouse input_leds i2c_piix4 serio_raw intel_agp intel_gtt ata_generic agpgart pata_acpi parport_pc rtc_cmos parport floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: rxrpc] Dumping ftrace buffer: (ftrace buffer empty) CR2: fffffbfff830524b ---[ end trace 039ab24b305c4b19 ]--- If nr_proto_init failed, it may forget to call proto_unregister, tiggering this issue.This patch rearrange code of nr_proto_init to avoid such issues. Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/netrom.h | 2 +- net/netrom/af_netrom.c | 76 ++++++++++++++++++++++++++++++------------ net/netrom/nr_loopback.c | 2 +- net/netrom/nr_route.c | 2 +- net/netrom/sysctl_net_netrom.c | 5 ++- 5 files changed, 61 insertions(+), 26 deletions(-) diff --git a/include/net/netrom.h b/include/net/netrom.h index 5a0714ff500f..80f15b1c1a48 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -266,7 +266,7 @@ void nr_stop_idletimer(struct sock *); int nr_t1timer_running(struct sock *); /* sysctl_net_netrom.c */ -void nr_register_sysctl(void); +int nr_register_sysctl(void); void nr_unregister_sysctl(void); #endif diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1d3144d19903..71ffd1a6dc7c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1392,18 +1392,22 @@ static int __init nr_proto_init(void) int i; int rc = proto_register(&nr_proto, 0); - if (rc != 0) - goto out; + if (rc) + return rc; if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n"); - return -1; + pr_err("NET/ROM: %s - nr_ndevs parameter too large\n", + __func__); + rc = -EINVAL; + goto unregister_proto; } dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL); - if (dev_nr == NULL) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n"); - return -1; + if (!dev_nr) { + pr_err("NET/ROM: %s - unable to allocate device array\n", + __func__); + rc = -ENOMEM; + goto unregister_proto; } for (i = 0; i < nr_ndevs; i++) { @@ -1413,13 +1417,13 @@ static int __init nr_proto_init(void) sprintf(name, "nr%d", i); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup); if (!dev) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); + rc = -ENOMEM; goto fail; } dev->base_addr = i; - if (register_netdev(dev)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n"); + rc = register_netdev(dev); + if (rc) { free_netdev(dev); goto fail; } @@ -1427,36 +1431,64 @@ static int __init nr_proto_init(void) dev_nr[i] = dev; } - if (sock_register(&nr_family_ops)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n"); + rc = sock_register(&nr_family_ops); + if (rc) goto fail; - } - register_netdevice_notifier(&nr_dev_notifier); + rc = register_netdevice_notifier(&nr_dev_notifier); + if (rc) + goto out_sock; ax25_register_pid(&nr_pid); ax25_linkfail_register(&nr_linkfail_notifier); #ifdef CONFIG_SYSCTL - nr_register_sysctl(); + rc = nr_register_sysctl(); + if (rc) + goto out_sysctl; #endif nr_loopback_init(); - proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops); - proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops); - proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops); -out: - return rc; + rc = -ENOMEM; + if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops)) + goto proc_remove1; + if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net, + &nr_neigh_seqops)) + goto proc_remove2; + if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net, + &nr_node_seqops)) + goto proc_remove3; + + return 0; + +proc_remove3: + remove_proc_entry("nr_neigh", init_net.proc_net); +proc_remove2: + remove_proc_entry("nr", init_net.proc_net); +proc_remove1: + + nr_loopback_clear(); + nr_rt_free(); + +#ifdef CONFIG_SYSCTL + nr_unregister_sysctl(); +out_sysctl: +#endif + ax25_linkfail_release(&nr_linkfail_notifier); + ax25_protocol_release(AX25_P_NETROM); + unregister_netdevice_notifier(&nr_dev_notifier); +out_sock: + sock_unregister(PF_NETROM); fail: while (--i >= 0) { unregister_netdev(dev_nr[i]); free_netdev(dev_nr[i]); } kfree(dev_nr); +unregister_proto: proto_unregister(&nr_proto); - rc = -1; - goto out; + return rc; } module_init(nr_proto_init); diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 215ad22a9647..93d13f019981 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused) } } -void __exit nr_loopback_clear(void) +void nr_loopback_clear(void) { del_timer_sync(&loopback_timer); skb_queue_purge(&loopback_queue); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 6485f593e2f0..b76aa668a94b 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = { /* * Free all memory associated with the nodes and routes lists. */ -void __exit nr_rt_free(void) +void nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index ba1c368b3f18..771011b84270 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = { { } }; -void __init nr_register_sysctl(void) +int __init nr_register_sysctl(void) { nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table); + if (!nr_table_header) + return -ENOMEM; + return 0; } void nr_unregister_sysctl(void) -- cgit v1.2.3 From 4f2c074416346c2c1241ce573772f1354619cda7 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:51:52 +0900 Subject: net/rds: Check address length before reading address family commit dd3ac9a684358b8c1d5c432ca8322aaf5e4f28ee upstream. syzbot is reporting uninitialized value at rds_connect() [1] and rds_bind() [2]. This is because syzbot is passing ulen == 0 whereas these functions expect that it is safe to access sockaddr->family field in order to determine minimal address length for validation. [1] https://syzkaller.appspot.com/bug?id=f4e61c010416c1e6f0fa3ffe247561b60a50ad71 [2] https://syzkaller.appspot.com/bug?id=a4bf9e41b7e055c3823fdcd83e8c58ca7270e38f Reported-by: syzbot Reported-by: syzbot Signed-off-by: Tetsuo Handa Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/af_rds.c | 3 +++ net/rds/bind.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 65387e1e6964..cd7e01ea8144 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -506,6 +506,9 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; + lock_sock(sk); switch (uaddr->sa_family) { diff --git a/net/rds/bind.c b/net/rds/bind.c index 17c9d9f0c848..0f4398e7f2a7 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* We allow an RDS socket to be bound to either IPv4 or IPv6 * address. */ + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; if (uaddr->sa_family == AF_INET) { struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; -- cgit v1.2.3 From e2f0a96eeb501b0bfb667e4d2f2a4cccf09914b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Apr 2019 09:44:11 -0700 Subject: rxrpc: fix race condition in rxrpc_input_packet() commit 032be5f19a94de51093851757089133dcc1e92aa upstream. After commit 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook"), rxrpc_input_packet() is directly called from lockless UDP receive path, under rcu_read_lock() protection. It must therefore use RCU rules : - udp_sk->sk_user_data can be cleared at any point in this function. rcu_dereference_sk_user_data() is what we need here. - Also, since sk_user_data might have been set in rxrpc_open_socket() we must observe a proper RCU grace period before kfree(local) in rxrpc_lookup_local() v4: @local can be NULL in xrpc_lookup_local() as reported by kbuild test robot and Julia Lawall , thanks ! v3,v2 : addressed David Howells feedback, thanks ! syzbot reported : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 19236 Comm: syz-executor703 Not tainted 5.1.0-rc6 #79 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__lock_acquire+0xbef/0x3fb0 kernel/locking/lockdep.c:3573 Code: 00 0f 85 a5 1f 00 00 48 81 c4 10 01 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 4a 21 00 00 49 81 7d 00 20 54 9c 89 0f 84 cf f4 RSP: 0018:ffff88809d7aef58 EFLAGS: 00010002 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000026 RSI: 0000000000000000 RDI: 0000000000000001 RBP: ffff88809d7af090 R08: 0000000000000001 R09: 0000000000000001 R10: ffffed1015d05bc7 R11: ffff888089428600 R12: 0000000000000000 R13: 0000000000000130 R14: 0000000000000001 R15: 0000000000000001 FS: 00007f059044d700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004b6040 CR3: 00000000955ca000 CR4: 00000000001406f0 Call Trace: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:4211 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:152 skb_queue_tail+0x26/0x150 net/core/skbuff.c:2972 rxrpc_reject_packet net/rxrpc/input.c:1126 [inline] rxrpc_input_packet+0x4a0/0x5536 net/rxrpc/input.c:1414 udp_queue_rcv_one_skb+0xaf2/0x1780 net/ipv4/udp.c:2011 udp_queue_rcv_skb+0x128/0x730 net/ipv4/udp.c:2085 udp_unicast_rcv_skb.isra.0+0xb9/0x360 net/ipv4/udp.c:2245 __udp4_lib_rcv+0x701/0x2ca0 net/ipv4/udp.c:2301 udp_rcv+0x22/0x30 net/ipv4/udp.c:2482 ip_protocol_deliver_rcu+0x60/0x8f0 net/ipv4/ip_input.c:208 ip_local_deliver_finish+0x23b/0x390 net/ipv4/ip_input.c:234 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:255 dst_input include/net/dst.h:450 [inline] ip_rcv_finish+0x1e1/0x300 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0x115/0x1a0 net/core/dev.c:4987 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5099 netif_receive_skb_internal+0x117/0x660 net/core/dev.c:5202 napi_frags_finish net/core/dev.c:5769 [inline] napi_gro_frags+0xade/0xd10 net/core/dev.c:5843 tun_get_user+0x2f24/0x3fb0 drivers/net/tun.c:1981 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2027 call_write_iter include/linux/fs.h:1866 [inline] do_iter_readv_writev+0x5e1/0x8e0 fs/read_write.c:681 do_iter_write fs/read_write.c:957 [inline] do_iter_write+0x184/0x610 fs/read_write.c:938 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1002 do_writev+0x15e/0x370 fs/read_write.c:1037 __do_sys_writev fs/read_write.c:1110 [inline] __se_sys_writev fs/read_write.c:1107 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1107 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/input.c | 12 ++++++++---- net/rxrpc/local_object.c | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9128aa0e40aa..b4ffb81223ad 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1155,19 +1155,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * handle data received on the local endpoint * - may be called in interrupt context * - * The socket is locked by the caller and this prevents the socket from being - * shut down and the local endpoint from going away, thus sk_user_data will not - * be cleared until this function returns. + * [!] Note that as this is called from the encap_rcv hook, the socket is not + * held locked by the caller and nothing prevents sk_user_data on the UDP from + * being cleared in the middle of processing this function. * * Called with the RCU read lock held from the IP layer via UDP. */ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { + struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); struct rxrpc_connection *conn; struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; unsigned int channel; @@ -1175,6 +1175,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) _enter("%p", udp_sk); + if (unlikely(!local)) { + kfree_skb(skb); + return 0; + } if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 0906e51d3cfb..10317dbdab5f 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -304,7 +304,8 @@ nomem: ret = -ENOMEM; sock_error: mutex_unlock(&rxnet->local_mutex); - kfree(local); + if (local) + call_rcu(&local->rcu, rxrpc_local_rcu); _leave(" = %d", ret); return ERR_PTR(ret); -- cgit v1.2.3 From 199f34c1355b2b277081320d2c5965bb98d08d85 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 6 Mar 2019 20:22:54 -0500 Subject: pin iocb through aio. commit b53119f13a04879c3bf502828d99d13726639ead upstream. aio_poll() is not the only case that needs file pinned; worse, while aio_read()/aio_write() can live without pinning iocb itself, the proof is rather brittle and can easily break on later changes. Signed-off-by: Linus Torvalds Signed-off-by: Al Viro Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 3d9669d011b9..363d7d7c8bff 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1022,6 +1022,9 @@ static bool get_reqs_available(struct kioctx *ctx) /* aio_get_req * Allocate a slot for an aio request. * Returns NULL if no requests are free. + * + * The refcount is initialized to 2 - one for the async op completion, + * one for the synchronous code that does this. */ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) { @@ -1034,7 +1037,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) percpu_ref_get(&ctx->reqs); req->ki_ctx = ctx; INIT_LIST_HEAD(&req->ki_list); - refcount_set(&req->ki_refcnt, 0); + refcount_set(&req->ki_refcnt, 2); req->ki_eventfd = NULL; return req; } @@ -1067,15 +1070,18 @@ out: return ret; } +static inline void iocb_destroy(struct aio_kiocb *iocb) +{ + if (iocb->ki_filp) + fput(iocb->ki_filp); + percpu_ref_put(&iocb->ki_ctx->reqs); + kmem_cache_free(kiocb_cachep, iocb); +} + static inline void iocb_put(struct aio_kiocb *iocb) { - if (refcount_read(&iocb->ki_refcnt) == 0 || - refcount_dec_and_test(&iocb->ki_refcnt)) { - if (iocb->ki_filp) - fput(iocb->ki_filp); - percpu_ref_put(&iocb->ki_ctx->reqs); - kmem_cache_free(kiocb_cachep, iocb); - } + if (refcount_dec_and_test(&iocb->ki_refcnt)) + iocb_destroy(iocb); } static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb, @@ -1749,9 +1755,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) INIT_LIST_HEAD(&req->wait.entry); init_waitqueue_func_entry(&req->wait, aio_poll_wake); - /* one for removal from waitqueue, one for this function */ - refcount_set(&aiocb->ki_refcnt, 2); - mask = vfs_poll(req->file, &apt.pt) & req->events; if (unlikely(!req->head)) { /* we did not manage to set up a waitqueue, done */ @@ -1782,7 +1785,6 @@ out: if (mask) aio_poll_complete(aiocb, mask); - iocb_put(aiocb); return 0; } @@ -1873,18 +1875,21 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, break; } + /* Done with the synchronous reference */ + iocb_put(req); + /* * If ret is 0, we'd either done aio_complete() ourselves or have * arranged for that to be done asynchronously. Anything non-zero * means that we need to destroy req ourselves. */ - if (ret) - goto out_put_req; - return 0; + if (!ret) + return 0; + out_put_req: if (req->ki_eventfd) eventfd_ctx_put(req->ki_eventfd); - iocb_put(req); + iocb_destroy(req); out_put_reqs_available: put_reqs_available(ctx, 1); return ret; -- cgit v1.2.3 From 636fa71ed5f7d7442c263aa3111de776d235f881 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Mar 2019 19:00:36 -0400 Subject: aio: fold lookup_kiocb() into its sole caller commit 833f4154ed560232120bc475935ee1d6a20e159f upstream. Signed-off-by: Al Viro Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 363d7d7c8bff..ae60c29b8a98 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2002,24 +2002,6 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, } #endif -/* lookup_kiocb - * Finds a given iocb for cancellation. - */ -static struct aio_kiocb * -lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb) -{ - struct aio_kiocb *kiocb; - - assert_spin_locked(&ctx->ctx_lock); - - /* TODO: use a hash or array, this sucks. */ - list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { - if (kiocb->ki_user_iocb == iocb) - return kiocb; - } - return NULL; -} - /* sys_io_cancel: * Attempts to cancel an iocb previously passed to io_submit. If * the operation is successfully cancelled, the resulting event is @@ -2048,10 +2030,13 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, return -EINVAL; spin_lock_irq(&ctx->ctx_lock); - kiocb = lookup_kiocb(ctx, iocb); - if (kiocb) { - ret = kiocb->ki_cancel(&kiocb->rw); - list_del_init(&kiocb->ki_list); + /* TODO: use a hash or array, this sucks. */ + list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { + if (kiocb->ki_user_iocb == iocb) { + ret = kiocb->ki_cancel(&kiocb->rw); + list_del_init(&kiocb->ki_list); + break; + } } spin_unlock_irq(&ctx->ctx_lock); -- cgit v1.2.3 From a8a538aed4f9bd89862be15e207bfef506a4ba36 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Mar 2019 19:43:45 -0500 Subject: aio: keep io_event in aio_kiocb commit a9339b7855094ba11a97e8822ae038135e879e79 upstream. We want to separate forming the resulting io_event from putting it into the ring buffer. Signed-off-by: Al Viro Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index ae60c29b8a98..387b224217b5 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -204,8 +204,7 @@ struct aio_kiocb { struct kioctx *ki_ctx; kiocb_cancel_fn *ki_cancel; - struct iocb __user *ki_user_iocb; /* user's aiocb */ - __u64 ki_user_data; /* user's data for completion */ + struct io_event ki_res; struct list_head ki_list; /* the aio core uses this * for cancellation */ @@ -1084,15 +1083,6 @@ static inline void iocb_put(struct aio_kiocb *iocb) iocb_destroy(iocb); } -static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb, - long res, long res2) -{ - ev->obj = (u64)(unsigned long)iocb->ki_user_iocb; - ev->data = iocb->ki_user_data; - ev->res = res; - ev->res2 = res2; -} - /* aio_complete * Called when the io request on the given iocb is complete. */ @@ -1104,6 +1094,8 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) unsigned tail, pos, head; unsigned long flags; + iocb->ki_res.res = res; + iocb->ki_res.res2 = res2; /* * Add a completion event to the ring buffer. Must be done holding * ctx->completion_lock to prevent other code from messing with the tail @@ -1120,14 +1112,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; - aio_fill_event(event, iocb, res, res2); + *event = iocb->ki_res; kunmap_atomic(ev_page); flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); - pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", - ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data, - res, res2); + pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb, + (void __user *)(unsigned long)iocb->ki_res.obj, + iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2); /* after flagging the request as done, we * must never even look at it again @@ -1844,8 +1836,10 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, goto out_put_req; } - req->ki_user_iocb = user_iocb; - req->ki_user_data = iocb->aio_data; + req->ki_res.obj = (u64)(unsigned long)user_iocb; + req->ki_res.data = iocb->aio_data; + req->ki_res.res = 0; + req->ki_res.res2 = 0; switch (iocb->aio_lio_opcode) { case IOCB_CMD_PREAD: @@ -2019,6 +2013,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct aio_kiocb *kiocb; int ret = -EINVAL; u32 key; + u64 obj = (u64)(unsigned long)iocb; if (unlikely(get_user(key, &iocb->aio_key))) return -EFAULT; @@ -2032,7 +2027,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, spin_lock_irq(&ctx->ctx_lock); /* TODO: use a hash or array, this sucks. */ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) { - if (kiocb->ki_user_iocb == iocb) { + if (kiocb->ki_res.obj == obj) { ret = kiocb->ki_cancel(&kiocb->rw); list_del_init(&kiocb->ki_list); break; -- cgit v1.2.3 From f6408361ded74c1f031fa14da39865d963e966c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Mar 2019 19:49:55 -0500 Subject: aio: store event at final iocb_put() commit 2bb874c0d873d13bd9b9b9c6d7b7c4edab18c8b4 upstream. Instead of having aio_complete() set ->ki_res.{res,res2}, do that explicitly in its callers, drop the reference (as aio_complete() used to do) and delay the rest until the final iocb_put(). Signed-off-by: Al Viro Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 387b224217b5..82bf5dffb272 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1077,16 +1077,10 @@ static inline void iocb_destroy(struct aio_kiocb *iocb) kmem_cache_free(kiocb_cachep, iocb); } -static inline void iocb_put(struct aio_kiocb *iocb) -{ - if (refcount_dec_and_test(&iocb->ki_refcnt)) - iocb_destroy(iocb); -} - /* aio_complete * Called when the io request on the given iocb is complete. */ -static void aio_complete(struct aio_kiocb *iocb, long res, long res2) +static void aio_complete(struct aio_kiocb *iocb) { struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; @@ -1094,8 +1088,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) unsigned tail, pos, head; unsigned long flags; - iocb->ki_res.res = res; - iocb->ki_res.res2 = res2; /* * Add a completion event to the ring buffer. Must be done holding * ctx->completion_lock to prevent other code from messing with the tail @@ -1161,7 +1153,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - iocb_put(iocb); +} + +static inline void iocb_put(struct aio_kiocb *iocb) +{ + if (refcount_dec_and_test(&iocb->ki_refcnt)) { + aio_complete(iocb); + iocb_destroy(iocb); + } } /* aio_read_events_ring @@ -1435,7 +1434,9 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) file_end_write(kiocb->ki_filp); } - aio_complete(iocb, res, res2); + iocb->ki_res.res = res; + iocb->ki_res.res2 = res2; + iocb_put(iocb); } static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) @@ -1583,11 +1584,10 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb, static void aio_fsync_work(struct work_struct *work) { - struct fsync_iocb *req = container_of(work, struct fsync_iocb, work); - int ret; + struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); - ret = vfs_fsync(req->file, req->datasync); - aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); + iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); + iocb_put(iocb); } static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, @@ -1608,7 +1608,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) { - aio_complete(iocb, mangle_poll(mask), 0); + iocb->ki_res.res = mangle_poll(mask); + iocb_put(iocb); } static void aio_poll_complete_work(struct work_struct *work) -- cgit v1.2.3 From b6dd51f041faa9e239d40782e58abb57eaef124c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Mar 2019 21:45:41 -0500 Subject: Fix aio_poll() races commit af5c72b1fc7a00aa484e90b0c4e0eeb582545634 upstream. aio_poll() has to cope with several unpleasant problems: * requests that might stay around indefinitely need to be made visible for io_cancel(2); that must not be done to a request already completed, though. * in cases when ->poll() has placed us on a waitqueue, wakeup might have happened (and request completed) before ->poll() returns. * worse, in some early wakeup cases request might end up re-added into the queue later - we can't treat "woken up and currently not in the queue" as "it's not going to stick around indefinitely" * ... moreover, ->poll() might have decided not to put it on any queues to start with, and that needs to be distinguished from the previous case * ->poll() might have tried to put us on more than one queue. Only the first will succeed for aio poll, so we might end up missing wakeups. OTOH, we might very well notice that only after the wakeup hits and request gets completed (all before ->poll() gets around to the second poll_wait()). In that case it's too late to decide that we have an error. req->woken was an attempt to deal with that. Unfortunately, it was broken. What we need to keep track of is not that wakeup has happened - the thing might come back after that. It's that async reference is already gone and won't come back, so we can't (and needn't) put the request on the list of cancellables. The easiest case is "request hadn't been put on any waitqueues"; we can tell by seeing NULL apt.head, and in that case there won't be anything async. We should either complete the request ourselves (if vfs_poll() reports anything of interest) or return an error. In all other cases we get exclusion with wakeups by grabbing the queue lock. If request is currently on queue and we have something interesting from vfs_poll(), we can steal it and complete the request ourselves. If it's on queue and vfs_poll() has not reported anything interesting, we either put it on the cancellable list, or, if we know that it hadn't been put on all queues ->poll() wanted it on, we steal it and return an error. If it's _not_ on queue, it's either been already dealt with (in which case we do nothing), or there's aio_poll_complete_work() about to be executed. In that case we either put it on the cancellable list, or, if we know it hadn't been put on all queues ->poll() wanted it on, simulate what cancel would've done. It's a lot more convoluted than I'd like it to be. Single-consumer APIs suck, and unfortunately aio is not an exception... Signed-off-by: Al Viro Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 90 +++++++++++++++++++++++++++++----------------------------------- 1 file changed, 40 insertions(+), 50 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 82bf5dffb272..efa13410e04e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -181,7 +181,7 @@ struct poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool woken; + bool done; bool cancelled; struct wait_queue_entry wait; struct work_struct work; @@ -1606,12 +1606,6 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, return 0; } -static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) -{ - iocb->ki_res.res = mangle_poll(mask); - iocb_put(iocb); -} - static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1637,9 +1631,11 @@ static void aio_poll_complete_work(struct work_struct *work) return; } list_del_init(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + req->done = true; spin_unlock_irq(&ctx->ctx_lock); - aio_poll_complete(iocb, mask); + iocb_put(iocb); } /* assumes we are called with irqs disabled */ @@ -1667,31 +1663,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, __poll_t mask = key_to_poll(key); unsigned long flags; - req->woken = true; - /* for instances that support it check for an event match first: */ - if (mask) { - if (!(mask & req->events)) - return 0; + if (mask && !(mask & req->events)) + return 0; + list_del_init(&req->wait.entry); + + if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { /* * Try to complete the iocb inline if we can. Use * irqsave/irqrestore because not all filesystems (e.g. fuse) * call this function with IRQs disabled and because IRQs * have to be disabled before ctx_lock is obtained. */ - if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { - list_del(&iocb->ki_list); - spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); - - list_del_init(&req->wait.entry); - aio_poll_complete(iocb, mask); - return 1; - } + list_del(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + req->done = true; + spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); + iocb_put(iocb); + } else { + schedule_work(&req->work); } - - list_del_init(&req->wait.entry); - schedule_work(&req->work); return 1; } @@ -1723,6 +1715,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) struct kioctx *ctx = aiocb->ki_ctx; struct poll_iocb *req = &aiocb->poll; struct aio_poll_table apt; + bool cancel = false; __poll_t mask; /* reject any unknown events outside the normal event mask. */ @@ -1736,7 +1729,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->head = NULL; - req->woken = false; + req->done = false; req->cancelled = false; apt.pt._qproc = aio_poll_queue_proc; @@ -1749,36 +1742,33 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) init_waitqueue_func_entry(&req->wait, aio_poll_wake); mask = vfs_poll(req->file, &apt.pt) & req->events; - if (unlikely(!req->head)) { - /* we did not manage to set up a waitqueue, done */ - goto out; - } - spin_lock_irq(&ctx->ctx_lock); - spin_lock(&req->head->lock); - if (req->woken) { - /* wake_up context handles the rest */ - mask = 0; + if (likely(req->head)) { + spin_lock(&req->head->lock); + if (unlikely(list_empty(&req->wait.entry))) { + if (apt.error) + cancel = true; + apt.error = 0; + mask = 0; + } + if (mask || apt.error) { + list_del_init(&req->wait.entry); + } else if (cancel) { + WRITE_ONCE(req->cancelled, true); + } else if (!req->done) { /* actually waiting for an event */ + list_add_tail(&aiocb->ki_list, &ctx->active_reqs); + aiocb->ki_cancel = aio_poll_cancel; + } + spin_unlock(&req->head->lock); + } + if (mask) { /* no async, we'd stolen it */ + aiocb->ki_res.res = mangle_poll(mask); apt.error = 0; - } else if (mask || apt.error) { - /* if we get an error or a mask we are done */ - WARN_ON_ONCE(list_empty(&req->wait.entry)); - list_del_init(&req->wait.entry); - } else { - /* actually waiting for an event */ - list_add_tail(&aiocb->ki_list, &ctx->active_reqs); - aiocb->ki_cancel = aio_poll_cancel; } - spin_unlock(&req->head->lock); spin_unlock_irq(&ctx->ctx_lock); - -out: - if (unlikely(apt.error)) - return apt.error; - if (mask) - aio_poll_complete(aiocb, mask); - return 0; + iocb_put(aiocb); + return apt.error; } static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb, -- cgit v1.2.3 From 6bd5637a3ebb7f7bbaa4425c1d277c758b61d8f9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 21 Feb 2019 23:19:41 +0100 Subject: x86, retpolines: Raise limit for generating indirect calls from switch-case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ce02ef06fcf7a399a6276adb83f37373d10cbbe1 upstream. From networking side, there are numerous attempts to get rid of indirect calls in fast-path wherever feasible in order to avoid the cost of retpolines, for example, just to name a few: * 283c16a2dfd3 ("indirect call wrappers: helpers to speed-up indirect calls of builtin") * aaa5d90b395a ("net: use indirect call wrappers at GRO network layer") * 028e0a476684 ("net: use indirect call wrappers at GRO transport layer") * 356da6d0cde3 ("dma-mapping: bypass indirect calls for dma-direct") * 09772d92cd5a ("bpf: avoid retpoline for lookup/update/delete calls on maps") * 10870dd89e95 ("netfilter: nf_tables: add direct calls for all builtin expressions") [...] Recent work on XDP from Björn and Magnus additionally found that manually transforming the XDP return code switch statement with more than 5 cases into if-else combination would result in a considerable speedup in XDP layer due to avoidance of indirect calls in CONFIG_RETPOLINE enabled builds. On i40e driver with XDP prog attached, a 20-26% speedup has been observed [0]. Aside from XDP, there are many other places later in the networking stack's critical path with similar switch-case processing. Rather than fixing every XDP-enabled driver and locations in stack by hand, it would be good to instead raise the limit where gcc would emit expensive indirect calls from the switch under retpolines and stick with the default as-is in case of !retpoline configured kernels. This would also have the advantage that for archs where this is not necessary, we let compiler select the underlying target optimization for these constructs and avoid potential slow-downs by if-else hand-rewrite. In case of gcc, this setting is controlled by case-values-threshold which has an architecture global default that selects 4 or 5 (latter if target does not have a case insn that compares the bounds) where some arch back ends like arm64 or s390 override it with their own target hooks, for example, in gcc commit db7a90aa0de5 ("S/390: Disable prediction of indirect branches") the threshold pretty much disables jump tables by limit of 20 under retpoline builds. Comparing gcc's and clang's default code generation on x86-64 under O2 level with retpoline build results in the following outcome for 5 switch cases: * gcc with -mindirect-branch=thunk-inline -mindirect-branch-register: # gdb -batch -ex 'disassemble dispatch' ./c-switch Dump of assembler code for function dispatch: 0x0000000000400be0 <+0>: cmp $0x4,%edi 0x0000000000400be3 <+3>: ja 0x400c35 0x0000000000400be5 <+5>: lea 0x915f8(%rip),%rdx # 0x4921e4 0x0000000000400bec <+12>: mov %edi,%edi 0x0000000000400bee <+14>: movslq (%rdx,%rdi,4),%rax 0x0000000000400bf2 <+18>: add %rdx,%rax 0x0000000000400bf5 <+21>: callq 0x400c01 0x0000000000400bfa <+26>: pause 0x0000000000400bfc <+28>: lfence 0x0000000000400bff <+31>: jmp 0x400bfa 0x0000000000400c01 <+33>: mov %rax,(%rsp) 0x0000000000400c05 <+37>: retq 0x0000000000400c06 <+38>: nopw %cs:0x0(%rax,%rax,1) 0x0000000000400c10 <+48>: jmpq 0x400c90 0x0000000000400c15 <+53>: nopl (%rax) 0x0000000000400c18 <+56>: jmpq 0x400c70 0x0000000000400c1d <+61>: nopl (%rax) 0x0000000000400c20 <+64>: jmpq 0x400c50 0x0000000000400c25 <+69>: nopl (%rax) 0x0000000000400c28 <+72>: jmpq 0x400c40 0x0000000000400c2d <+77>: nopl (%rax) 0x0000000000400c30 <+80>: jmpq 0x400cb0 0x0000000000400c35 <+85>: push %rax 0x0000000000400c36 <+86>: callq 0x40dd80 End of assembler dump. * clang with -mretpoline emitting search tree: # gdb -batch -ex 'disassemble dispatch' ./c-switch Dump of assembler code for function dispatch: 0x0000000000400b30 <+0>: cmp $0x1,%edi 0x0000000000400b33 <+3>: jle 0x400b44 0x0000000000400b35 <+5>: cmp $0x2,%edi 0x0000000000400b38 <+8>: je 0x400b4d 0x0000000000400b3a <+10>: cmp $0x3,%edi 0x0000000000400b3d <+13>: jne 0x400b52 0x0000000000400b3f <+15>: jmpq 0x400c50 0x0000000000400b44 <+20>: test %edi,%edi 0x0000000000400b46 <+22>: jne 0x400b5c 0x0000000000400b48 <+24>: jmpq 0x400c20 0x0000000000400b4d <+29>: jmpq 0x400c40 0x0000000000400b52 <+34>: cmp $0x4,%edi 0x0000000000400b55 <+37>: jne 0x400b66 0x0000000000400b57 <+39>: jmpq 0x400c60 0x0000000000400b5c <+44>: cmp $0x1,%edi 0x0000000000400b5f <+47>: jne 0x400b66 0x0000000000400b61 <+49>: jmpq 0x400c30 0x0000000000400b66 <+54>: push %rax 0x0000000000400b67 <+55>: callq 0x40dd20 End of assembler dump. For sake of comparison, clang without -mretpoline: # gdb -batch -ex 'disassemble dispatch' ./c-switch Dump of assembler code for function dispatch: 0x0000000000400b30 <+0>: cmp $0x4,%edi 0x0000000000400b33 <+3>: ja 0x400b57 0x0000000000400b35 <+5>: mov %edi,%eax 0x0000000000400b37 <+7>: jmpq *0x492148(,%rax,8) 0x0000000000400b3e <+14>: jmpq 0x400bf0 0x0000000000400b43 <+19>: jmpq 0x400c30 0x0000000000400b48 <+24>: jmpq 0x400c10 0x0000000000400b4d <+29>: jmpq 0x400c20 0x0000000000400b52 <+34>: jmpq 0x400c00 0x0000000000400b57 <+39>: push %rax 0x0000000000400b58 <+40>: callq 0x40dcf0 End of assembler dump. Raising the cases to a high number (e.g. 100) will still result in similar code generation pattern with clang and gcc as above, in other words clang generally turns off jump table emission by having an extra expansion pass under retpoline build to turn indirectbr instructions from their IR into switch instructions as a built-in -mno-jump-table lowering of a switch (in this case, even if IR input already contained an indirect branch). For gcc, adding --param=case-values-threshold=20 as in similar fashion as s390 in order to raise the limit for x86 retpoline enabled builds results in a small vmlinux size increase of only 0.13% (before=18,027,528 after=18,051,192). For clang this option is ignored due to i) not being needed as mentioned and ii) not having above cmdline parameter. Non-retpoline-enabled builds with gcc continue to use the default case-values-threshold setting, so nothing changes here. [0] https://lore.kernel.org/netdev/20190129095754.9390-1-bjorn.topel@gmail.com/ and "The Path to DPDK Speeds for AF_XDP", LPC 2018, networking track: - http://vger.kernel.org/lpc_net2018_talks/lpc18_pres_af_xdp_perf-v3.pdf - http://vger.kernel.org/lpc_net2018_talks/lpc18_paper_af_xdp_perf-v2.pdf Signed-off-by: Daniel Borkmann Signed-off-by: Thomas Gleixner Acked-by: Jesper Dangaard Brouer Acked-by: Björn Töpel Acked-by: Linus Torvalds Cc: netdev@vger.kernel.org Cc: David S. Miller Cc: Magnus Karlsson Cc: Alexei Starovoitov Cc: Peter Zijlstra Cc: David Woodhouse Cc: Andy Lutomirski Cc: Borislav Petkov Link: https://lkml.kernel.org/r/20190221221941.29358-1-daniel@iogearbox.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 9c5a67d1b9c1..f55420a67164 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -217,6 +217,11 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) + # Additionally, avoid generating expensive indirect jumps which + # are subject to retpolines for small number of switch cases. + # clang turns off jump table generation by default when under + # retpoline builds, however, gcc does not for x86. + KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20) endif archscripts: scripts_basic -- cgit v1.2.3 From dee438745d7425da3cb1c453250ae8113742bb16 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Mar 2019 14:56:20 +0100 Subject: x86/retpolines: Disable switch jump tables when retpolines are enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a9d57ef15cbe327fe54416dd194ee0ea66ae53a4 upstream. Commit ce02ef06fcf7 ("x86, retpolines: Raise limit for generating indirect calls from switch-case") raised the limit under retpolines to 20 switch cases where gcc would only then start to emit jump tables, and therefore effectively disabling the emission of slow indirect calls in this area. After this has been brought to attention to gcc folks [0], Martin Liska has then fixed gcc to align with clang by avoiding to generate switch jump tables entirely under retpolines. This is taking effect in gcc starting from stable version 8.4.0. Given kernel supports compilation with older versions of gcc where the fix is not being available or backported anymore, we need to keep the extra KBUILD_CFLAGS around for some time and generally set the -fno-jump-tables to align with what more recent gcc is doing automatically today. More than 20 switch cases are not expected to be fast-path critical, but it would still be good to align with gcc behavior for versions < 8.4.0 in order to have consistency across supported gcc versions. vmlinux size is slightly growing by 0.27% for older gcc. This flag is only set to work around affected gcc, no change for clang. [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86952 Suggested-by: Martin Liska Signed-off-by: Daniel Borkmann Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Linus Torvalds Cc: Jesper Dangaard Brouer Cc: Björn Töpel Cc: Magnus Karlsson Cc: Alexei Starovoitov Cc: H.J. Lu Cc: Alexei Starovoitov Cc: David S. Miller Link: https://lkml.kernel.org/r/20190325135620.14882-1-daniel@iogearbox.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index f55420a67164..c0c7291d4ccf 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -220,8 +220,12 @@ ifdef CONFIG_RETPOLINE # Additionally, avoid generating expensive indirect jumps which # are subject to retpolines for small number of switch cases. # clang turns off jump table generation by default when under - # retpoline builds, however, gcc does not for x86. - KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20) + # retpoline builds, however, gcc does not for x86. This has + # only been fixed starting from gcc stable version 8.4.0 and + # onwards, but not for older ones. See gcc bug #86952. + ifndef CONFIG_CC_IS_CLANG + KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables) + endif endif archscripts: scripts_basic -- cgit v1.2.3 From 7f909f520742c9862e94e013c789216e2f7cfe28 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 Apr 2019 09:48:53 -0700 Subject: rdma: fix build errors on s390 and MIPS due to bad ZERO_PAGE use commit 6a5c5d26c4c6c3cc486fef0bf04ff9551132611b upstream. The parameter to ZERO_PAGE() was wrong, but since all architectures except for MIPS and s390 ignore it, it wasn't noticed until 0-day reported the build error. Fixes: 67f269b37f9b ("RDMA/ucontext: Fix regression with disassociate") Cc: stable@vger.kernel.org Cc: Andrea Arcangeli Cc: Leon Romanovsky Cc: Jason Gunthorpe Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 857eff31b433..27ca4022ca70 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -894,7 +894,7 @@ static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) /* Read only pages can just use the system zero page. */ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { - vmf->page = ZERO_PAGE(vmf->vm_start); + vmf->page = ZERO_PAGE(vmf->address); get_page(vmf->page); return 0; } -- cgit v1.2.3 From 617d24036564ed8513ba44be20c6ceb6fea2b690 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Apr 2019 08:04:05 -0700 Subject: ipv4: add sanity checks in ipv4_link_failure() [ Upstream commit 20ff83f10f113c88d0bb74589389b05250994c16 ] Before calling __ip_options_compile(), we need to ensure the network header is a an IPv4 one, and that it is already pulled in skb->head. RAW sockets going through a tunnel can end up calling ipv4_link_failure() with total garbage in the skb, or arbitrary lengthes. syzbot report : BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:355 [inline] BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0x294/0x1120 net/ipv4/ip_options.c:123 Write of size 69 at addr ffff888096abf068 by task syz-executor.4/9204 CPU: 0 PID: 9204 Comm: syz-executor.4 Not tainted 5.1.0-rc5+ #77 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 memcpy+0x38/0x50 mm/kasan/common.c:133 memcpy include/linux/string.h:355 [inline] __ip_options_echo+0x294/0x1120 net/ipv4/ip_options.c:123 __icmp_send+0x725/0x1400 net/ipv4/icmp.c:695 ipv4_link_failure+0x29f/0x550 net/ipv4/route.c:1204 dst_link_failure include/net/dst.h:427 [inline] vti6_xmit net/ipv6/ip6_vti.c:514 [inline] vti6_tnl_xmit+0x10d4/0x1c0c net/ipv6/ip6_vti.c:553 __netdev_start_xmit include/linux/netdevice.h:4414 [inline] netdev_start_xmit include/linux/netdevice.h:4423 [inline] xmit_one net/core/dev.c:3292 [inline] dev_hard_start_xmit+0x1b2/0x980 net/core/dev.c:3308 __dev_queue_xmit+0x271d/0x3060 net/core/dev.c:3878 dev_queue_xmit+0x18/0x20 net/core/dev.c:3911 neigh_direct_output+0x16/0x20 net/core/neighbour.c:1527 neigh_output include/net/neighbour.h:508 [inline] ip_finish_output2+0x949/0x1740 net/ipv4/ip_output.c:229 ip_finish_output+0x73c/0xd50 net/ipv4/ip_output.c:317 NF_HOOK_COND include/linux/netfilter.h:278 [inline] ip_output+0x21f/0x670 net/ipv4/ip_output.c:405 dst_output include/net/dst.h:444 [inline] NF_HOOK include/linux/netfilter.h:289 [inline] raw_send_hdrinc net/ipv4/raw.c:432 [inline] raw_sendmsg+0x1d2b/0x2f20 net/ipv4/raw.c:663 inet_sendmsg+0x147/0x5d0 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:661 sock_write_iter+0x27c/0x3e0 net/socket.c:988 call_write_iter include/linux/fs.h:1866 [inline] new_sync_write+0x4c7/0x760 fs/read_write.c:474 __vfs_write+0xe4/0x110 fs/read_write.c:487 vfs_write+0x20c/0x580 fs/read_write.c:549 ksys_write+0x14f/0x2d0 fs/read_write.c:599 __do_sys_write fs/read_write.c:611 [inline] __se_sys_write fs/read_write.c:608 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:608 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458c29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f293b44bc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000458c29 RDX: 0000000000000014 RSI: 00000000200002c0 RDI: 0000000000000003 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f293b44c6d4 R13: 00000000004c8623 R14: 00000000004ded68 R15: 00000000ffffffff The buggy address belongs to the page: page:ffffea00025aafc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x1fffc0000000000() raw: 01fffc0000000000 0000000000000000 ffffffff025a0101 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888096abef80: 00 00 00 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 f2 ffff888096abf000: f2 f2 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00 >ffff888096abf080: 00 00 f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 ^ ffff888096abf100: 00 00 00 00 f1 f1 f1 f1 00 00 f3 f3 00 00 00 00 ffff888096abf180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Signed-off-by: Eric Dumazet Cc: Stephen Suryaputra Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 25d9bef27d03..3c89ca325947 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1183,25 +1183,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return dst; } -static void ipv4_link_failure(struct sk_buff *skb) +static void ipv4_send_dest_unreach(struct sk_buff *skb) { struct ip_options opt; - struct rtable *rt; int res; /* Recompile ip options since IPCB may not be valid anymore. + * Also check we have a reasonable ipv4 header. */ - memset(&opt, 0, sizeof(opt)); - opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || + ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) + return; - rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); - rcu_read_unlock(); + memset(&opt, 0, sizeof(opt)); + if (ip_hdr(skb)->ihl > 5) { + if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) + return; + opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); - if (res) - return; + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + if (res) + return; + } __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); +} + +static void ipv4_link_failure(struct sk_buff *skb) +{ + struct rtable *rt; + + ipv4_send_dest_unreach(skb); rt = skb_rtable(skb); if (rt) -- cgit v1.2.3 From d69b7c8c8ab5d55b1cc52fa6ee54ba198a1f4f6c Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Tue, 16 Apr 2019 09:47:24 +0800 Subject: ipv4: set the tcp_min_rtt_wlen range from 0 to one day [ Upstream commit 19fad20d15a6494f47f85d869f00b11343ee5c78 ] There is a UBSAN report as below: UBSAN: Undefined behaviour in net/ipv4/tcp_input.c:2877:56 signed integer overflow: 2147483647 * 1000 cannot be represented in type 'int' CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.1.0-rc4-00058-g582549e #1 Call Trace: dump_stack+0x8c/0xba ubsan_epilogue+0x11/0x60 handle_overflow+0x12d/0x170 ? ttwu_do_wakeup+0x21/0x320 __ubsan_handle_mul_overflow+0x12/0x20 tcp_ack_update_rtt+0x76c/0x780 tcp_clean_rtx_queue+0x499/0x14d0 tcp_ack+0x69e/0x1240 ? __wake_up_sync_key+0x2c/0x50 ? update_group_capacity+0x50/0x680 tcp_rcv_established+0x4e2/0xe10 tcp_v4_do_rcv+0x22b/0x420 tcp_v4_rcv+0xfe8/0x1190 ip_protocol_deliver_rcu+0x36/0x180 ip_local_deliver+0x15b/0x1a0 ip_rcv+0xac/0xd0 __netif_receive_skb_one_core+0x7f/0xb0 __netif_receive_skb+0x33/0xc0 netif_receive_skb_internal+0x84/0x1c0 napi_gro_receive+0x2a0/0x300 receive_buf+0x3d4/0x2350 ? detach_buf_split+0x159/0x390 virtnet_poll+0x198/0x840 ? reweight_entity+0x243/0x4b0 net_rx_action+0x25c/0x770 __do_softirq+0x19b/0x66d irq_exit+0x1eb/0x230 do_IRQ+0x7a/0x150 common_interrupt+0xf/0xf It can be reproduced by: echo 2147483647 > /proc/sys/net/ipv4/tcp_min_rtt_wlen Fixes: f672258391b42 ("tcp: track min RTT using windowed min-filter") Signed-off-by: ZhangXiaoxu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- Documentation/networking/ip-sysctl.txt | 1 + net/ipv4/sysctl_net_ipv4.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index acdfb5d2bcaa..e2142fe40cda 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -422,6 +422,7 @@ tcp_min_rtt_wlen - INTEGER minimum RTT when it is moved to a longer path (e.g., due to traffic engineering). A longer window makes the filter more resistant to RTT inflations such as transient congestion. The unit is seconds. + Possible values: 0 - 86400 (1 day) Default: 300 tcp_moderate_rcvbuf - BOOLEAN diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ba0fc4b18465..eeb4041fa5f9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static int comp_sack_nr_max = 255; static u32 u32_max_div_HZ = UINT_MAX / HZ; +static int one_day_secs = 24 * 3600; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one_day_secs }, { .procname = "tcp_autocorking", -- cgit v1.2.3 From f37782f2d708e07d23745eed25d517074c3e6877 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 18 Apr 2019 07:14:16 +0000 Subject: mlxsw: spectrum: Fix autoneg status in ethtool [ Upstream commit 151f0dddbbfe4c35c9c5b64873115aafd436af9d ] If link is down and autoneg is set to on/off, the status in ethtool does not change. The reason is when the link is down the function returns with zero before changing autoneg value. Move the checking of link state (up/down) to be performed after setting autoneg value, in order to be sure that autoneg will change in any case. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cbdee5164be7..9183c84da72c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2667,11 +2667,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, if (err) return err; + mlxsw_sp_port->link.autoneg = autoneg; + if (!netif_running(dev)) return 0; - mlxsw_sp_port->link.autoneg = autoneg; - mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); -- cgit v1.2.3 From 283882b045adb3f58aa89a62f5be551f1111de8e Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Thu, 11 Apr 2019 10:41:03 +0300 Subject: net/mlx5e: ethtool, Remove unsupported SFP EEPROM high pages query [ Upstream commit ace329f4ab3ba434be2adf618073c752d083b524 ] Querying EEPROM high pages data for SFP module is currently not supported by our driver and yet queried, resulting in invalid FW queries. Set the EEPROM ethtool data length to 256 for SFP module will limit the reading for page 0 only and prevent invalid FW queries. Fixes: bb64143eee8c ("net/mlx5e: Add ethtool support for dump module EEPROM") Signed-off-by: Erez Alfasi Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3b9e5f0d0212..253496c4a3db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1470,7 +1470,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, break; case MLX5_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH; break; default: netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 2b82f35f4c35..efce1fa37f6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -404,10 +404,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; i2c_addr = MLX5_I2C_ADDR_LOW; - if (offset >= MLX5_EEPROM_PAGE_LENGTH) { - i2c_addr = MLX5_I2C_ADDR_HIGH; - offset -= MLX5_EEPROM_PAGE_LENGTH; - } MLX5_SET(mcia_reg, in, l, 0); MLX5_SET(mcia_reg, in, module, module_num); -- cgit v1.2.3 From b225e518109bff2b0cb507ba9b8535e2ed78062d Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 24 Apr 2019 02:56:42 -0400 Subject: net: rds: exchange of 8K and 1M pool [ Upstream commit 4b9fc7146249a6e0e3175d0acc033fdcd2bfcb17 ] Before the commit 490ea5967b0d ("RDS: IB: move FMR code to its own file"), when the dirty_count is greater than 9/10 of max_items of 8K pool, 1M pool is used, Vice versa. After the commit 490ea5967b0d ("RDS: IB: move FMR code to its own file"), the above is removed. When we make the following tests. Server: rds-stress -r 1.1.1.16 -D 1M Client: rds-stress -r 1.1.1.14 -s 1.1.1.16 -D 1M The following will appear. " connecting to 1.1.1.16:4000 negotiated options, tasks will start in 2 seconds Starting up..header from 1.1.1.166:4001 to id 4001 bogus .. tsks tx/s rx/s tx+rx K/s mbi K/s mbo K/s tx us/c rtt us cpu % 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 ... " So this exchange between 8K and 1M pool is added back. Fixes: commit 490ea5967b0d ("RDS: IB: move FMR code to its own file") Signed-off-by: Zhu Yanjun Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/ib_fmr.c | 11 +++++++++++ net/rds/ib_rdma.c | 3 --- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index e0f70c4051b6..01e764f8f224 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) else pool = rds_ibdev->mr_1m_pool; + if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) + queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); + + /* Switch pools if one of the pool is reaching upper limit */ + if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { + if (pool->pool_type == RDS_IB_MR_8K_POOL) + pool = rds_ibdev->mr_1m_pool; + else + pool = rds_ibdev->mr_8k_pool; + } + ibmr = rds_ib_try_reuse_ibmr(pool); if (ibmr) return ibmr; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 63c8d107adcf..d664e9ade74d 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) struct rds_ib_mr *ibmr = NULL; int iter = 0; - if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10) - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); - while (1) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) -- cgit v1.2.3 From 8de6f5e5b90d16e19a69e70e2cc2ea723813d8ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Apr 2019 05:35:00 -0700 Subject: net/rose: fix unbound loop in rose_loopback_timer() [ Upstream commit 0453c682459583910d611a96de928f4442205493 ] This patch adds a limit on the number of skbs that fuzzers can queue into loopback_queue. 1000 packets for rose loopback seems more than enough. Then, since we now have multiple cpus in most linux hosts, we also need to limit the number of skbs rose_loopback_timer() can dequeue at each round. rose_loopback_queue() can be drop-monitor friendly, calling consume_skb() or kfree_skb() appropriately. Finally, use mod_timer() instead of del_timer() + add_timer() syzbot report was : rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-...!: (10499 ticks this GP) idle=536/1/0x4000000000000002 softirq=103291/103291 fqs=34 rcu: (t=10500 jiffies g=140321 q=323) rcu: rcu_preempt kthread starved for 10426 jiffies! g140321 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=1 rcu: RCU grace-period kthread stack dump: rcu_preempt I29168 10 2 0x80000000 Call Trace: context_switch kernel/sched/core.c:2877 [inline] __schedule+0x813/0x1cc0 kernel/sched/core.c:3518 schedule+0x92/0x180 kernel/sched/core.c:3562 schedule_timeout+0x4db/0xfd0 kernel/time/timer.c:1803 rcu_gp_fqs_loop kernel/rcu/tree.c:1971 [inline] rcu_gp_kthread+0x962/0x17b0 kernel/rcu/tree.c:2128 kthread+0x357/0x430 kernel/kthread.c:253 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 NMI backtrace for cpu 0 CPU: 0 PID: 7632 Comm: kworker/0:4 Not tainted 5.1.0-rc5+ #172 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events iterate_cleanup_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 nmi_cpu_backtrace.cold+0x63/0xa4 lib/nmi_backtrace.c:101 nmi_trigger_cpumask_backtrace+0x1be/0x236 lib/nmi_backtrace.c:62 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline] rcu_dump_cpu_stacks+0x183/0x1cf kernel/rcu/tree.c:1223 print_cpu_stall kernel/rcu/tree.c:1360 [inline] check_cpu_stall kernel/rcu/tree.c:1434 [inline] rcu_pending kernel/rcu/tree.c:3103 [inline] rcu_sched_clock_irq.cold+0x500/0xa4a kernel/rcu/tree.c:2544 update_process_times+0x32/0x80 kernel/time/timer.c:1635 tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:161 tick_sched_timer+0x47/0x130 kernel/time/tick-sched.c:1271 __run_hrtimer kernel/time/hrtimer.c:1389 [inline] __hrtimer_run_queues+0x33e/0xde0 kernel/time/hrtimer.c:1451 hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1035 [inline] smp_apic_timer_interrupt+0x120/0x570 arch/x86/kernel/apic/apic.c:1060 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50 kernel/kcov.c:95 Code: 89 25 b4 6e ec 08 41 bc f4 ff ff ff e8 cd 5d ea ff 48 c7 05 9e 6e ec 08 00 00 00 00 e9 a4 e9 ff ff 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 48 8b 75 08 65 48 8b 04 25 00 ee 01 00 65 8b 15 c8 60 RSP: 0018:ffff8880ae807ce0 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 RAX: ffff88806fd40640 RBX: dffffc0000000000 RCX: ffffffff863fbc56 RDX: 0000000000000100 RSI: ffffffff863fbc1d RDI: ffff88808cf94228 RBP: ffff8880ae807d10 R08: ffff88806fd40640 R09: ffffed1015d00f8b R10: ffffed1015d00f8a R11: 0000000000000003 R12: ffff88808cf941c0 R13: 00000000fffff034 R14: ffff8882166cd840 R15: 0000000000000000 rose_loopback_timer+0x30d/0x3f0 net/rose/rose_loopback.c:91 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:293 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1027 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rose/rose_loopback.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 7af4f99c4a93..094a6621f8e8 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -16,6 +16,7 @@ #include static struct sk_buff_head loopback_queue; +#define ROSE_LOOPBACK_LIMIT 1000 static struct timer_list loopback_timer; static void rose_set_loopback_timer(void); @@ -35,29 +36,27 @@ static int rose_loopback_running(void) int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) { - struct sk_buff *skbn; + struct sk_buff *skbn = NULL; - skbn = skb_clone(skb, GFP_ATOMIC); + if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT) + skbn = skb_clone(skb, GFP_ATOMIC); - kfree_skb(skb); - - if (skbn != NULL) { + if (skbn) { + consume_skb(skb); skb_queue_tail(&loopback_queue, skbn); if (!rose_loopback_running()) rose_set_loopback_timer(); + } else { + kfree_skb(skb); } return 1; } - static void rose_set_loopback_timer(void) { - del_timer(&loopback_timer); - - loopback_timer.expires = jiffies + 10; - add_timer(&loopback_timer); + mod_timer(&loopback_timer, jiffies + 10); } static void rose_loopback_timer(struct timer_list *unused) @@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused) struct sock *sk; unsigned short frametype; unsigned int lci_i, lci_o; + int count; - while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) { + skb = skb_dequeue(&loopback_queue); + if (!skb) + return; if (skb->len < ROSE_MIN_LEN) { kfree_skb(skb); continue; @@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused) kfree_skb(skb); } } + if (!skb_queue_empty(&loopback_queue)) + mod_timer(&loopback_timer, jiffies + 1); } void __exit rose_loopback_clear(void) -- cgit v1.2.3 From e39643c12d3fbbe0c26f07c74f6471a4155b1cdc Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 22 Apr 2019 15:15:32 +0530 Subject: net: stmmac: move stmmac_check_ether_addr() to driver probe [ Upstream commit b561af36b1841088552464cdc3f6371d92f17710 ] stmmac_check_ether_addr() checks the MAC address and assigns one in driver open(). In many cases when we create slave netdevice, the dev addr is inherited from master but the master dev addr maybe NULL at that time, so move this call to driver probe so that address is always valid. Signed-off-by: Xiaofei Shen Tested-by: Xiaofei Shen Signed-off-by: Sneh Shah Signed-off-by: Vinod Koul Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 019ab99e65bb..1d8d6f2ddfd6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2590,8 +2590,6 @@ static int stmmac_open(struct net_device *dev) u32 chan; int ret; - stmmac_check_ether_addr(priv); - if (priv->hw->pcs != STMMAC_PCS_RGMII && priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) { @@ -4265,6 +4263,8 @@ int stmmac_dvr_probe(struct device *device, if (ret) goto error_hw_init; + stmmac_check_ether_addr(priv); + /* Configure real RX and TX queues */ netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use); netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use); -- cgit v1.2.3 From 3fd3574106ded2c8692f63032dddcb4a2cfbae64 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Apr 2019 10:51:19 -0700 Subject: net/tls: fix refcount adjustment in fallback [ Upstream commit 9188d5ca454fd665145904267e726e9e8d122f5c ] Unlike atomic_add(), refcount_add() does not deal well with a negative argument. TLS fallback code reallocates the skb and is very likely to shrink the truesize, leading to: [ 189.513254] WARNING: CPU: 5 PID: 0 at lib/refcount.c:81 refcount_add_not_zero_checked+0x15c/0x180 Call Trace: refcount_add_checked+0x6/0x40 tls_enc_skb+0xb93/0x13e0 [tls] Once wmem_allocated count saturates the application can no longer send data on the socket. This is similar to Eric's fixes for GSO, TCP: commit 7ec318feeed1 ("tcp: gso: avoid refcount_t warning from tcp_gso_segment()") and UDP: commit 575b65bc5bff ("udp: avoid refcount_t saturation in __udp_gso_segment()"). Unlike the GSO case, for TLS fallback it's likely that the skb has shrunk, so the "likely" annotation is the other way around (likely branch being "sub"). Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_device_fallback.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 450a6dbc5a88..ef8934fd8698 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -193,6 +193,9 @@ static void update_chksum(struct sk_buff *skb, int headln) static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) { + struct sock *sk = skb->sk; + int delta; + skb_copy_header(nskb, skb); skb_put(nskb, skb->len); @@ -200,11 +203,15 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) update_chksum(nskb, headln); nskb->destructor = skb->destructor; - nskb->sk = skb->sk; + nskb->sk = sk; skb->destructor = NULL; skb->sk = NULL; - refcount_add(nskb->truesize - skb->truesize, - &nskb->sk->sk_wmem_alloc); + + delta = nskb->truesize - skb->truesize; + if (likely(delta < 0)) + WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); + else if (delta) + refcount_add(delta, &sk->sk_wmem_alloc); } /* This function may be called after the user socket is already -- cgit v1.2.3 From dc2b8b627802abcec26bc4bc715fded368849a19 Mon Sep 17 00:00:00 2001 From: Su Bao Cheng Date: Thu, 18 Apr 2019 11:14:56 +0200 Subject: stmmac: pci: Adjust IOT2000 matching [ Upstream commit e0c1d14a1a3211dccf0540a6703ffbd5d2a75bdb ] Since there are more IOT2040 variants with identical hardware but different asset tags, the asset tag matching should be adjusted to support them. For the board name "SIMATIC IOT2000", currently there are 2 types of hardware, IOT2020 and IOT2040. The IOT2020 is identified by its unique asset tag. Match on it first. If we then match on the board name only, we will catch all IOT2040 variants. In the future there will be no other devices with the "SIMATIC IOT2000" DMI board name but different hardware. Signed-off-by: Su Bao Cheng Reviewed-by: Jan Kiszka Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index d819e8eaba12..cc1e887e47b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -159,6 +159,12 @@ static const struct dmi_system_id quark_pci_dmi[] = { }, .driver_data = (void *)&galileo_stmmac_dmi_data, }, + /* + * There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040. + * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which + * has only one pci network device while other asset tags are + * for IOT2040 which has two. + */ { .matches = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"), @@ -170,8 +176,6 @@ static const struct dmi_system_id quark_pci_dmi[] = { { .matches = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"), - DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG, - "6ES7647-0AA00-1YA2"), }, .driver_data = (void *)&iot2040_stmmac_dmi_data, }, -- cgit v1.2.3 From af31849700e4a1d8c65689cd9b23e6621bc387f2 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 19 Apr 2019 14:31:00 +0800 Subject: team: fix possible recursive locking when add slaves [ Upstream commit 925b0c841e066b488cc3a60272472b2c56300704 ] If we add a bond device which is already the master of the team interface, we will hold the team->lock in team_add_slave() first and then request the lock in team_set_mac_address() again. The functions are called like: - team_add_slave() - team_port_add() - team_port_enter() - team_modeop_port_enter() - __set_port_dev_addr() - dev_set_mac_address() - bond_set_mac_address() - dev_set_mac_address() - team_set_mac_address Although team_upper_dev_link() would check the upper devices but it is called too late. Fix it by adding a checking before processing the slave. v2: Do not split the string in netdev_err() Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Acked-by: Jiri Pirko Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 1283632091d5..7dcda9364009 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1157,6 +1157,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev, return -EINVAL; } + if (netdev_has_upper_dev(dev, port_dev)) { + NL_SET_ERR_MSG(extack, "Device is already an upper device of the team interface"); + netdev_err(dev, "Device %s is already an upper device of the team interface\n", + portname); + return -EBUSY; + } + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up"); -- cgit v1.2.3 From 19f6d3f49b28db54afa89d32b50523805d5ff454 Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Tue, 23 Apr 2019 09:01:41 +0300 Subject: net: socionext: replace napi_alloc_frag with the netdev variant on init [ Upstream commit ffbf9870dcf1342592a1a26f4cf70bda39046134 ] The netdev variant is usable on any context since it disables interrupts. The napi variant of the call should only be used within softirq context. Replace napi_alloc_frag on driver init with the correct netdev_alloc_frag call Changes since v1: - Adjusted commit message Acked-by: Ard Biesheuvel Acked-by: Jassi Brar Fixes: 4acb20b46214 ("net: socionext: different approach on DMA") Signed-off-by: Ilias Apalodimas Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/socionext/netsec.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index a18149720aa2..cba5881b2746 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -673,7 +673,8 @@ static void netsec_process_tx(struct netsec_priv *priv) } static void *netsec_alloc_rx_data(struct netsec_priv *priv, - dma_addr_t *dma_handle, u16 *desc_len) + dma_addr_t *dma_handle, u16 *desc_len, + bool napi) { size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); size_t payload_len = NETSEC_RX_BUF_SZ; @@ -682,7 +683,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv, total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD); - buf = napi_alloc_frag(total_len); + buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len); if (!buf) return NULL; @@ -765,7 +766,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) /* allocate a fresh buffer and map it to the hardware. * This will eventually replace the old buffer in the hardware */ - buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len); + buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len, + true); if (unlikely(!buf_addr)) break; @@ -1069,7 +1071,8 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv) void *buf; u16 len; - buf = netsec_alloc_rx_data(priv, &dma_handle, &len); + buf = netsec_alloc_rx_data(priv, &dma_handle, &len, + false); if (!buf) { netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); goto err_out; -- cgit v1.2.3 From 72f71005903d92b5bbfe1add54089d9e8cfd7639 Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Wed, 24 Apr 2019 01:43:32 +0000 Subject: net/ncsi: handle overflow when incrementing mac address [ Upstream commit 1c5c12ee308aacf635c8819cd4baa3bd58f8a8b7 ] Previously BMC's MAC address is calculated by simply adding 1 to the last byte of network controller's MAC address, and it produces incorrect result when network controller's MAC address ends with 0xFF. The problem can be fixed by calling eth_addr_inc() function to increment MAC address; besides, the MAC address is also validated before assigning to BMC. Fixes: cb10c7c0dfd9 ("net/ncsi: Add NCSI Broadcom OEM command") Signed-off-by: Tao Ren Acked-by: Jakub Kicinski Acked-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/etherdevice.h | 12 ++++++++++++ net/ncsi/ncsi-rsp.c | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2c0af7b00715..c94ab8b53a23 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -447,6 +447,18 @@ static inline void eth_addr_dec(u8 *addr) u64_to_ether_addr(u, addr); } +/** + * eth_addr_inc() - Increment the given MAC address. + * @addr: Pointer to a six-byte array containing Ethernet address to increment. + */ +static inline void eth_addr_inc(u8 *addr) +{ + u64 u = ether_addr_to_u64(addr); + + u++; + u64_to_ether_addr(u, addr); +} + /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index dc07fcc7938e..802db01e3075 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr) ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN); /* Increase mac address by 1 for BMC's address */ - saddr.sa_data[ETH_ALEN - 1]++; + eth_addr_inc((u8 *)saddr.sa_data); + if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + return -ENXIO; + ret = ops->ndo_set_mac_address(ndev, &saddr); if (ret < 0) netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); -- cgit v1.2.3 From a3964a683ca1111b4127874b1448da08ebd867c0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2019 07:14:14 +0000 Subject: mlxsw: pci: Reincrease PCI reset timeout [ Upstream commit 1ab3030193d25878b3b1409060e1e0a879800c95 ] During driver initialization the driver sends a reset to the device and waits for the firmware to signal that it is ready to continue. Commit d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout") increased the timeout to 13 seconds due to longer PHY calibration in Spectrum-2 compared to Spectrum-1. Recently it became apparent that this timeout is too short and therefore this patch increases it again to a safer limit that will be reduced in the future. Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC") Fixes: d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index ffee38e36ce8..8648ca171254 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -27,7 +27,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) -#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 13000 +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 20000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF -- cgit v1.2.3 From a7b71fc253e8fc2585e04800928651bc9164ae77 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 18 Apr 2019 07:14:13 +0000 Subject: mlxsw: spectrum: Put MC TCs into DWRR mode [ Upstream commit f476b3f809fa02f47af6333ed63715058c3fc348 ] Both Spectrum-1 and Spectrum-2 chips are currently configured such that pairs of TC n (which is used for UC traffic) and TC n+8 (which is used for MC traffic) are feeding into the same subgroup. Strict prioritization is configured between the two TCs, and by enabling MC-aware mode on the switch, the lower-numbered (UC) TCs are favored over the higher-numbered (MC) TCs. On Spectrum-2 however, there is an issue in configuration of the MC-aware mode. As a result, MC traffic is prioritized over UC traffic. To work around the issue, configure the MC TCs with DWRR mode (while keeping the UC TCs in strict mode). With this patch, the multicast-unicast arbitration results in the same behavior on both Spectrum-1 and Spectrum-2 chips. Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9183c84da72c..ce49504e1f9c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2961,7 +2961,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) err = mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HIERARCY_TC, i + 8, i, - false, 0); + true, 100); if (err) return err; } -- cgit v1.2.3 From f9444b6aaa011c03a76d3486a18e270deb5f3501 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 8 Apr 2019 15:12:45 +0300 Subject: net/mlx5e: Fix the max MTU check in case of XDP [ Upstream commit d460c2718906252a2a69bc6f89b537071f792e6e ] MLX5E_XDP_MAX_MTU was calculated incorrectly. It didn't account for NET_IP_ALIGN and MLX5E_HW2SW_MTU, and it also misused MLX5_SKB_FRAG_SZ. This commit fixes the calculations and adds a brief explanation for the formula used. Fixes: a26a5bdf3ee2d ("net/mlx5e: Restrict the combination of large MTU and XDP") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 20 ++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 03b2a9f9c589..6536ad6ee371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -33,6 +33,26 @@ #include #include "en/xdp.h" +int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +{ + int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + + /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). + * The condition checked in mlx5e_rx_is_linear_skb is: + * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) + * (Note that hw_mtu == sw_mtu + hard_mtu.) + * What is returned from this function is: + * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) + * After assigning sw_mtu := max_mtu, the left side of (1) turns to + * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, + * because both PAGE_SIZE and S are already aligned. Any number greater + * than max_mtu would make the left side of (1) greater than PAGE_SIZE, + * so max_mtu is the maximum MTU allowed. + */ + + return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); +} + static inline bool mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, struct xdp_buff *xdp) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index ee27a7c8cd87..553956cadc8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -34,13 +34,12 @@ #include "en.h" -#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \ - MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM))) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) #define MLX5E_XDP_TX_EMPTY_DS_COUNT \ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len); bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0cb19e4dd439..2d269acdbc8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3816,7 +3816,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (params->xdp_prog && !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, MLX5E_XDP_MAX_MTU); + new_mtu, mlx5e_xdp_max_mtu(params)); err = -EINVAL; goto out; } @@ -4280,7 +4280,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", - new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU); + new_channels.params.sw_mtu, + mlx5e_xdp_max_mtu(&new_channels.params)); return -EINVAL; } -- cgit v1.2.3 From c01e42fdccf5880b3ecb6bd8a8eaa8b24d17526a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 15 Mar 2019 16:41:43 +0200 Subject: net/mlx5e: Fix use-after-free after xdp_return_frame [ Upstream commit 12fc512f5741443a03adde2ead20724da8ad550a ] xdp_return_frame releases the frame. It leads to releasing the page, so it's not allowed to access xdpi.xdpf->len after that, because xdpi.xdpf is at xdp->data_hard_start after convert_to_xdp_frame. This patch moves the memory access to precede the return of the frame. Fixes: 58b99ee3e3ebe ("net/mlx5e: Add support for XDP_REDIRECT in device-out side") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 6536ad6ee371..cad34d6f5f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -324,9 +324,9 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) mlx5e_xdpi_fifo_pop(xdpi_fifo); if (is_redirect) { - xdp_return_frame(xdpi.xdpf); dma_unmap_single(sq->pdev, xdpi.dma_addr, xdpi.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.xdpf); } else { /* Recycle RX page */ mlx5e_page_release(rq, &xdpi.di, true); @@ -365,9 +365,9 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) mlx5e_xdpi_fifo_pop(xdpi_fifo); if (is_redirect) { - xdp_return_frame(xdpi.xdpf); dma_unmap_single(sq->pdev, xdpi.dma_addr, xdpi.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.xdpf); } else { /* Recycle RX page */ mlx5e_page_release(rq, &xdpi.di, false); -- cgit v1.2.3 From 2adb99038ae421985daa37379521c863462859f9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Apr 2019 16:51:38 -0700 Subject: net/tls: avoid potential deadlock in tls_set_device_offload_rx() [ Upstream commit 62ef81d5632634d5e310ed25b9b940b2b6612b46 ] If device supports offload, but offload fails tls_set_device_offload_rx() will call tls_sw_free_resources_rx() which (unhelpfully) releases and reacquires the socket lock. For a small fix release and reacquire the device_offload_lock. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4b5ff3d44912..b81b201304f7 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -884,7 +884,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto release_netdev; free_sw_resources: + up_read(&device_offload_lock); tls_sw_free_resources_rx(sk); + down_read(&device_offload_lock); release_ctx: ctx->priv_ctx_rx = NULL; release_netdev: -- cgit v1.2.3 From bcf0c1f4d56426ee7d784d9a0c958bceaa5ed24d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Apr 2019 16:52:19 -0700 Subject: net/tls: don't leak IV and record seq when offload fails [ Upstream commit 12c7686111326148b4b5db189130522a4ad1be4a ] When device refuses the offload in tls_set_device_offload_rx() it calls tls_sw_free_resources_rx() to clean up software context state. Unfortunately, tls_sw_free_resources_rx() does not free all the state tls_set_sw_offload() allocated - it leaks IV and sequence number buffers. All other code paths which lead to tls_sw_release_resources_rx() (which tls_sw_free_resources_rx() calls) free those right before the call. Avoid the leak by moving freeing of iv and rec_seq into tls_sw_release_resources_rx(). Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_device.c | 2 -- net/tls/tls_main.c | 5 +---- net/tls/tls_sw.c | 3 +++ 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index b81b201304f7..5f1d937c4be9 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -921,8 +921,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk) } out: up_read(&device_offload_lock); - kfree(tls_ctx->rx.rec_seq); - kfree(tls_ctx->rx.iv); tls_sw_release_resources_rx(sk); } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 96dbac91ac6e..ce5dd79365a7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -304,11 +304,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) #endif } - if (ctx->rx_conf == TLS_SW) { - kfree(ctx->rx.rec_seq); - kfree(ctx->rx.iv); + if (ctx->rx_conf == TLS_SW) tls_sw_free_resources_rx(sk); - } #ifdef CONFIG_TLS_DEVICE if (ctx->rx_conf == TLS_HW) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d2d4f7c0d4be..839a0a0b5dfa 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1830,6 +1830,9 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + kfree(tls_ctx->rx.rec_seq); + kfree(tls_ctx->rx.iv); + if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; -- cgit v1.2.3 From d5a2675b207d3b3629edb3e1588ccc4f8dfb5040 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2019 10:02:58 +0200 Subject: Linux 5.0.11 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b282c4143b21..c3daaefa979c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 10 +SUBLEVEL = 11 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3 From 635e82b9bdc8a321a0ac80765304f873049bb814 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sun, 24 Feb 2019 21:55:28 -0300 Subject: selinux: use kernel linux/socket.h for genheaders and mdp commit dfbd199a7cfe3e3cd8531e1353cdbd7175bfbc5e upstream. When compiling genheaders and mdp from a newer host kernel, the following error happens: In file included from scripts/selinux/genheaders/genheaders.c:18: ./security/selinux/include/classmap.h:238:2: error: #error New address family defined, please update secclass_map. #error New address family defined, please update secclass_map. ^~~~~ make[3]: *** [scripts/Makefile.host:107: scripts/selinux/genheaders/genheaders] Error 1 make[2]: *** [scripts/Makefile.build:599: scripts/selinux/genheaders] Error 2 make[1]: *** [scripts/Makefile.build:599: scripts/selinux] Error 2 make[1]: *** Waiting for unfinished jobs.... Instead of relying on the host definition, include linux/socket.h in classmap.h to have PF_MAX. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara Acked-by: Stephen Smalley [PM: manually merge in mdp.c, subject line tweaks] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- scripts/selinux/genheaders/genheaders.c | 1 - scripts/selinux/mdp/mdp.c | 1 - security/selinux/include/classmap.h | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c index 1ceedea847dd..544ca126a8a8 100644 --- a/scripts/selinux/genheaders/genheaders.c +++ b/scripts/selinux/genheaders/genheaders.c @@ -9,7 +9,6 @@ #include #include #include -#include struct security_class_mapping { const char *name; diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index 073fe7537f6c..6d51b74bc679 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -32,7 +32,6 @@ #include #include #include -#include static void usage(char *name) { diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index bd5fe0d3204a..201f7e588a29 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include #define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \ "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append", "map" -- cgit v1.2.3 From 742c556944e1ff299e3a2e305145c16876b0ed96 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 30 Apr 2019 11:18:21 +0200 Subject: Revert "ACPICA: Clear status of GPEs before enabling them" commit 2c2a2fb1e2a9256714338875bede6b7cbd4b9542 upstream. Revert commit c8b1917c8987 ("ACPICA: Clear status of GPEs before enabling them") that causes problems with Thunderbolt controllers to occur if a dock device is connected at init time (the xhci_hcd and thunderbolt modules crash which prevents peripherals connected through them from working). Commit c8b1917c8987 effectively causes commit ecc1165b8b74 ("ACPICA: Dispatch active GPEs at init time") to get undone, so the problem addressed by commit ecc1165b8b74 appears again as a result of it. Fixes: c8b1917c8987 ("ACPICA: Clear status of GPEs before enabling them") Link: https://lore.kernel.org/lkml/s5hy33siofw.wl-tiwai@suse.de/T/#u Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1132943 Reported-by: Michael Hirmke Reported-by: Takashi Iwai Cc: 4.17+ # 4.17+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/evgpe.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c index 4424997ecf30..e10fec99a182 100644 --- a/drivers/acpi/acpica/evgpe.c +++ b/drivers/acpi/acpica/evgpe.c @@ -81,12 +81,8 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info) ACPI_FUNCTION_TRACE(ev_enable_gpe); - /* Clear the GPE status */ - status = acpi_hw_clear_gpe(gpe_event_info); - if (ACPI_FAILURE(status)) - return_ACPI_STATUS(status); - /* Enable the requested GPE */ + status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE); return_ACPI_STATUS(status); } -- cgit v1.2.3 From cc37cb69fbdc69e6bf3c6d74f4e0a56fafd92e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Mar 2019 16:49:02 +0200 Subject: drm/i915: Do not enable FEC without DSC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5aae7832d1b4ec614996ea0f4fafc4d9855ec0b0 upstream. Currently we enable FEC even when DSC is no used. While that is theoretically valid supposedly there isn't much of a benefit from this. But more importantly we do not account for the FEC link bandwidth overhead (2.4%) in the non-DSC link bandwidth computations. So the code may think we have enough bandwidth when we in fact do not. Cc: stable@vger.kernel.org Cc: Anusha Srivatsa Cc: Manasi Navare Fixes: 240999cf339f ("i915/dp/fec: Add fec_enable to the crtc state.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190326144903.6617-1-ville.syrjala@linux.intel.com Reviewed-by: Manasi Navare (cherry picked from commit 6fd3134ae3551d4802a04669c0f39f2f5c56f77d) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index dcd1df5322e8..21c6016ccba5 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1871,6 +1871,9 @@ static bool intel_dp_dsc_compute_config(struct intel_dp *intel_dp, u8 dsc_max_bpc; int pipe_bpp; + pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) && + intel_dp_supports_fec(intel_dp, pipe_config); + if (!intel_dp_supports_dsc(intel_dp, pipe_config)) return false; @@ -2097,9 +2100,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; - pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) && - intel_dp_supports_fec(intel_dp, pipe_config); - if (!intel_dp_compute_link_config(encoder, pipe_config, conn_state)) return false; -- cgit v1.2.3 From 258fc3baeb4b2da15391735fd806facf4a91b585 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 11 Apr 2019 10:06:20 -0700 Subject: mm: make page ref count overflow check tighter and more explicit commit f958d7b528b1b40c44cfda5eabe2d82760d868c3 upstream. We have a VM_BUG_ON() to check that the page reference count doesn't underflow (or get close to overflow) by checking the sign of the count. That's all fine, but we actually want to allow people to use a "get page ref unless it's already very high" helper function, and we want that one to use the sign of the page ref (without triggering this VM_BUG_ON). Change the VM_BUG_ON to only check for small underflows (or _very_ close to overflowing), and ignore overflows which have strayed into negative territory. Acked-by: Matthew Wilcox Cc: Jann Horn Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 80bb6408fe73..541d99b86aea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -965,6 +965,10 @@ static inline bool is_pci_p2pdma_page(const struct page *page) } #endif /* CONFIG_DEV_PAGEMAP_OPS */ +/* 127: arbitrary random number, small enough to assemble well */ +#define page_ref_zero_or_close_to_overflow(page) \ + ((unsigned int) page_ref_count(page) + 127u <= 127u) + static inline void get_page(struct page *page) { page = compound_head(page); @@ -972,7 +976,7 @@ static inline void get_page(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_refcount. */ - VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page); + VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); page_ref_inc(page); } -- cgit v1.2.3 From bdc8dfd6587378458e248e36ab79a129f24ba8e9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 11 Apr 2019 10:14:59 -0700 Subject: mm: add 'try_get_page()' helper function commit 88b1a17dfc3ed7728316478fae0f5ad508f50397 upstream. This is the same as the traditional 'get_page()' function, but instead of unconditionally incrementing the reference count of the page, it only does so if the count was "safe". It returns whether the reference count was incremented (and is marked __must_check, since the caller obviously has to be aware of it). Also like 'get_page()', you can't use this function unless you already had a reference to the page. The intent is that you can use this exactly like get_page(), but in situations where you want to limit the maximum reference count. The code currently does an unconditional WARN_ON_ONCE() if we ever hit the reference count issues (either zero or negative), as a notification that the conditional non-increment actually happened. NOTE! The count access for the "safety" check is inherently racy, but that doesn't matter since the buffer we use is basically half the range of the reference count (ie we look at the sign of the count). Acked-by: Matthew Wilcox Cc: Jann Horn Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 541d99b86aea..7000ddd807e0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -980,6 +980,15 @@ static inline void get_page(struct page *page) page_ref_inc(page); } +static inline __must_check bool try_get_page(struct page *page) +{ + page = compound_head(page); + if (WARN_ON_ONCE(page_ref_count(page) <= 0)) + return false; + page_ref_inc(page); + return true; +} + static inline void put_page(struct page *page) { page = compound_head(page); -- cgit v1.2.3 From ad73e3a199066ad9bf48ea1334ef312e5aa078f4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 11 Apr 2019 10:49:19 -0700 Subject: mm: prevent get_user_pages() from overflowing page refcount commit 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 upstream. If the page refcount wraps around past zero, it will be freed while there are still four billion references to it. One of the possible avenues for an attacker to try to make this happen is by doing direct IO on a page multiple times. This patch makes get_user_pages() refuse to take a new page reference if there are already more than two billion references to the page. Reported-by: Jann Horn Acked-by: Matthew Wilcox Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/gup.c | 48 ++++++++++++++++++++++++++++++++++++------------ mm/hugetlb.c | 13 +++++++++++++ 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 75029649baca..81e0bdefa2cc 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -157,8 +157,12 @@ retry: goto retry; } - if (flags & FOLL_GET) - get_page(page); + if (flags & FOLL_GET) { + if (unlikely(!try_get_page(page))) { + page = ERR_PTR(-ENOMEM); + goto out; + } + } if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) @@ -295,7 +299,10 @@ retry_locked: if (pmd_trans_unstable(pmd)) ret = -EBUSY; } else { - get_page(page); + if (unlikely(!try_get_page(page))) { + spin_unlock(ptl); + return ERR_PTR(-ENOMEM); + } spin_unlock(ptl); lock_page(page); ret = split_huge_page(page); @@ -497,7 +504,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, if (is_device_public_page(*page)) goto unmap; } - get_page(*page); + if (unlikely(!try_get_page(*page))) { + ret = -ENOMEM; + goto unmap; + } out: ret = 0; unmap: @@ -1393,6 +1403,20 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) } } +/* + * Return the compund head page with ref appropriately incremented, + * or NULL if that failed. + */ +static inline struct page *try_get_compound_head(struct page *page, int refs) +{ + struct page *head = compound_head(page); + if (WARN_ON_ONCE(page_ref_count(head) < 0)) + return NULL; + if (unlikely(!page_cache_add_speculative(head, refs))) + return NULL; + return head; +} + #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) @@ -1427,9 +1451,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); - head = compound_head(page); - if (!page_cache_get_speculative(head)) + head = try_get_compound_head(page, 1); + if (!head) goto pte_unmap; if (unlikely(pte_val(pte) != pte_val(*ptep))) { @@ -1568,8 +1592,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - head = compound_head(pmd_page(orig)); - if (!page_cache_add_speculative(head, refs)) { + head = try_get_compound_head(pmd_page(orig), refs); + if (!head) { *nr -= refs; return 0; } @@ -1606,8 +1630,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - head = compound_head(pud_page(orig)); - if (!page_cache_add_speculative(head, refs)) { + head = try_get_compound_head(pud_page(orig), refs); + if (!head) { *nr -= refs; return 0; } @@ -1643,8 +1667,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - head = compound_head(pgd_page(orig)); - if (!page_cache_add_speculative(head, refs)) { + head = try_get_compound_head(pgd_page(orig), refs); + if (!head) { *nr -= refs; return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8dfdffc34a99..c220315dc533 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4298,6 +4298,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT; page = pte_page(huge_ptep_get(pte)); + + /* + * Instead of doing 'try_get_page()' below in the same_page + * loop, just check the count once here. + */ + if (unlikely(page_count(page) <= 0)) { + if (pages) { + spin_unlock(ptl); + remainder = 0; + err = -ENOMEM; + break; + } + } same_page: if (pages) { pages[i] = mem_map_offset(page, pfn_offset); -- cgit v1.2.3 From 27f651142eec95373c7ed6e10a3929fab1733c1a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 5 Apr 2019 14:02:10 -0700 Subject: fs: prevent page refcount overflow in pipe_buf_get commit 15fab63e1e57be9fdb5eec1bbc5916e9825e9acb upstream. Change pipe_buf_get() to return a bool indicating whether it succeeded in raising the refcount of the page (if the thing in the pipe is a page). This removes another mechanism for overflowing the page refcount. All callers converted to handle a failure. Reported-by: Jann Horn Signed-off-by: Matthew Wilcox Cc: stable@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 12 ++++++------ fs/pipe.c | 4 ++-- fs/splice.c | 12 ++++++++++-- include/linux/pipe_fs_i.h | 10 ++++++---- kernel/trace/trace.c | 6 +++++- 5 files changed, 29 insertions(+), 15 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 809c0f2f9942..64f4de983468 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2034,10 +2034,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; ret = -EINVAL; - if (rem < len) { - pipe_unlock(pipe); - goto out; - } + if (rem < len) + goto out_free; rem = len; while (rem) { @@ -2055,7 +2053,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); pipe->nrbufs--; } else { - pipe_buf_get(pipe, ibuf); + if (!pipe_buf_get(pipe, ibuf)) + goto out_free; + *obuf = *ibuf; obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->len = rem; @@ -2078,11 +2078,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ret = fuse_dev_do_write(fud, &cs, len); pipe_lock(pipe); +out_free: for (idx = 0; idx < nbuf; idx++) pipe_buf_release(pipe, &bufs[idx]); pipe_unlock(pipe); -out: kvfree(bufs); return ret; } diff --git a/fs/pipe.c b/fs/pipe.c index c51750ed4011..2a297bce381f 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -189,9 +189,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal); * in the tee() system call, when we duplicate the buffers in one * pipe into another. */ -void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) +bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - get_page(buf->page); + return try_get_page(buf->page); } EXPORT_SYMBOL(generic_pipe_buf_get); diff --git a/fs/splice.c b/fs/splice.c index 7da7d5437472..c38c7e7a49c9 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1588,7 +1588,11 @@ retry: * Get a reference to this pipe buffer, * so we can copy the contents over. */ - pipe_buf_get(ipipe, ibuf); + if (!pipe_buf_get(ipipe, ibuf)) { + if (ret == 0) + ret = -EFAULT; + break; + } *obuf = *ibuf; /* @@ -1662,7 +1666,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, * Get a reference to this pipe buffer, * so we can copy the contents over. */ - pipe_buf_get(ipipe, ibuf); + if (!pipe_buf_get(ipipe, ibuf)) { + if (ret == 0) + ret = -EFAULT; + break; + } obuf = opipe->bufs + nbuf; *obuf = *ibuf; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 66ee63cd5968..7897a3cc05b9 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -108,18 +108,20 @@ struct pipe_buf_operations { /* * Get a reference to the pipe buffer. */ - void (*get)(struct pipe_inode_info *, struct pipe_buffer *); + bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to + * + * Return: %true if the reference was successfully obtained. */ -static inline void pipe_buf_get(struct pipe_inode_info *pipe, +static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - buf->ops->get(pipe, buf); + return buf->ops->get(pipe, buf); } /** @@ -178,7 +180,7 @@ struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ -void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); +bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d07fc2836786..3842773b8aee 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6843,12 +6843,16 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, buf->private = 0; } -static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, +static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct buffer_ref *ref = (struct buffer_ref *)buf->private; + if (refcount_read(&ref->refcount) > INT_MAX/2) + return false; + refcount_inc(&ref->refcount); + return true; } /* Pipe buffer operations for a buffer. */ -- cgit v1.2.3 From 9979cd3953b972f841eda028e9bf635e9419f658 Mon Sep 17 00:00:00 2001 From: Takeshi Kihara Date: Thu, 21 Feb 2019 13:59:38 +0100 Subject: arm64: dts: renesas: r8a77990: Fix SCIF5 DMA channels [ Upstream commit e20119f7eaaaf6aad5b44f35155ce500429e17f6 ] According to the R-Car Gen3 Hardware Manual Errata for Rev 1.50 of Feb 12, 2019, the DMA channels for SCIF5 are corrected from 16..47 to 0..15 on R-Car E3. Signed-off-by: Takeshi Kihara Fixes: a5ebe5e49a862e21 ("arm64: dts: renesas: r8a77990: Add SCIF-{0,1,3,4,5} device nodes") Signed-off-by: Geert Uytterhoeven Reviewed-by: Fabrizio Castro Signed-off-by: Simon Horman Signed-off-by: Sasha Levin (Microsoft) --- arch/arm64/boot/dts/renesas/r8a77990.dtsi | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index b2f606e286ce..327d12097643 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -2,7 +2,7 @@ /* * Device Tree Source for the R-Car E3 (R8A77990) SoC * - * Copyright (C) 2018 Renesas Electronics Corp. + * Copyright (C) 2018-2019 Renesas Electronics Corp. */ #include @@ -1040,9 +1040,8 @@ <&cpg CPG_CORE R8A77990_CLK_S3D1C>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; - dmas = <&dmac1 0x5b>, <&dmac1 0x5a>, - <&dmac2 0x5b>, <&dmac2 0x5a>; - dma-names = "tx", "rx", "tx", "rx"; + dmas = <&dmac0 0x5b>, <&dmac0 0x5a>; + dma-names = "tx", "rx"; power-domains = <&sysc R8A77990_PD_ALWAYS_ON>; resets = <&cpg 202>; status = "disabled"; -- cgit v1.2.3 From 87240adabb460a1ddf205a0cd6e0f3fc731b4654 Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Mon, 4 Mar 2019 18:48:37 -0300 Subject: ARM: dts: bcm283x: Fix hdmi hpd gpio pull [ Upstream commit 544e784188f1dd7c797c70b213385e67d92005b6 ] Raspberry pi board model B revison 2 have the hot plug detector gpio active high (and not low as it was in the dts). Signed-off-by: Helen Koike Fixes: 49ac67e0c39c ("ARM: bcm2835: Add VC4 to the device tree.") Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts b/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts index 5641d162dfdb..28e7513ce617 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts @@ -93,7 +93,7 @@ }; &hdmi { - hpd-gpios = <&gpio 46 GPIO_ACTIVE_LOW>; + hpd-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>; }; &pwm { -- cgit v1.2.3 From 1d568d7476dac6cf2e312b33b5e68aba2402184c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 4 Mar 2019 12:33:28 +0100 Subject: s390: limit brk randomization to 32MB [ Upstream commit cd479eccd2e057116d504852814402a1e68ead80 ] For a 64-bit process the randomization of the program break is quite large with 1GB. That is as big as the randomization of the anonymous mapping base, for a test case started with '/lib/ld64.so.1 ' it can happen that the heap is placed after the stack. To avoid this limit the program break randomization to 32MB for 64-bit and keep 8MB for 31-bit. Reported-by: Stefan Liebler Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin (Microsoft) --- arch/s390/include/asm/elf.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 7d22a474a040..f74639a05f0f 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -252,11 +252,14 @@ do { \ /* * Cache aliasing on the latest machines calls for a mapping granularity - * of 512KB. For 64-bit processes use a 512KB alignment and a randomization - * of up to 1GB. For 31-bit processes the virtual address space is limited, - * use no alignment and limit the randomization to 8MB. + * of 512KB for the anonymous mapping base. For 64-bit processes use a + * 512KB alignment and a randomization of up to 1GB. For 31-bit processes + * the virtual address space is limited, use no alignment and limit the + * randomization to 8MB. + * For the additional randomization of the program break use 32MB for + * 64-bit and 8MB for 31-bit. */ -#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ffffUL) +#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x1fffUL) #define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL) #define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL) #define STACK_RND_MASK MMAP_RND_MASK -- cgit v1.2.3 From a495f4c9afbf4dc5da8955ff26035eaeee510e2d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 28 Feb 2019 16:11:06 +0100 Subject: mt76x02: fix hdr pointer in write txwi for USB [ Upstream commit 7b25d3b8e485c7721cba9c71b44d1c286e61c8e7 ] Since we add txwi at the begining of skb->data, it no longer point to ieee80211_hdr. This breaks settings TS bit for probe response and beacons. Acked-by: Lorenzo Bianconi Signed-off-by: Stanislaw Gruszka Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c index 81970cf777c0..8cafa5a749ca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c @@ -81,8 +81,9 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data, mt76x02_insert_hdr_pad(skb); - txwi = skb_push(skb, sizeof(struct mt76x02_txwi)); + txwi = (struct mt76x02_txwi *)(skb->data - sizeof(struct mt76x02_txwi)); mt76x02_mac_write_txwi(dev, txwi, skb, wcid, sta, len); + skb_push(skb, sizeof(struct mt76x02_txwi)); pid = mt76_tx_status_skb_add(mdev, wcid, skb); txwi->pktid = pid; -- cgit v1.2.3 From 63019ec07c615ebb1dc06f4a4bac758a180a0939 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 15:10:00 +0100 Subject: mt76: mt76x2: fix external LNA gain settings [ Upstream commit 45a042e3026824a7e910db7a4dd38fef0540b902 ] Devices with external LNA need different values for AGC registers 8 and 9 Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/wireless/mediatek/mt76/mt76x2/phy.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c index c9634a774705..11167b7af668 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c @@ -260,10 +260,15 @@ mt76x2_phy_set_gain_val(struct mt76x02_dev *dev) gain_val[0] = dev->cal.agc_gain_cur[0] - dev->cal.agc_gain_adjust; gain_val[1] = dev->cal.agc_gain_cur[1] - dev->cal.agc_gain_adjust; - if (dev->mt76.chandef.width >= NL80211_CHAN_WIDTH_40) + val = 0x1836 << 16; + if (!mt76x2_has_ext_lna(dev) && + dev->mt76.chandef.width >= NL80211_CHAN_WIDTH_40) val = 0x1e42 << 16; - else - val = 0x1836 << 16; + + if (mt76x2_has_ext_lna(dev) && + dev->mt76.chandef.chan->band == NL80211_BAND_2GHZ && + dev->mt76.chandef.width < NL80211_CHAN_WIDTH_40) + val = 0x0f36 << 16; val |= 0xf8; -- cgit v1.2.3 From edb6168384083131af64ba77813c3d56cd83c082 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 15:12:14 +0100 Subject: mt76: mt76x2: fix 2.4 GHz channel gain settings [ Upstream commit b8cfd87ac24273e36fbd3ecda631f3ba6566d493 ] AGC register 35, 37 override for the low gain setting should only be done on 5 GHz. Also, 2.4 GHz needs a different value for register 35 Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/wireless/mediatek/mt76/mt76x2/phy.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c index 11167b7af668..2f618536ef2a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c @@ -285,6 +285,7 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) { u8 *gain = dev->cal.agc_gain_init; u8 low_gain_delta, gain_delta; + u32 agc_35, agc_37; bool gain_change; int low_gain; u32 val; @@ -321,6 +322,16 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) else low_gain_delta = 14; + agc_37 = 0x2121262c; + if (dev->mt76.chandef.chan->band == NL80211_BAND_2GHZ) + agc_35 = 0x11111516; + else if (low_gain == 2) + agc_35 = agc_37 = 0x08080808; + else if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80) + agc_35 = 0x10101014; + else + agc_35 = 0x11111116; + if (low_gain == 2) { mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a990); mt76_wr(dev, MT_BBP(AGC, 35), 0x08080808); @@ -329,15 +340,13 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) dev->cal.agc_gain_adjust = 0; } else { mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a991); - if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80) - mt76_wr(dev, MT_BBP(AGC, 35), 0x10101014); - else - mt76_wr(dev, MT_BBP(AGC, 35), 0x11111116); - mt76_wr(dev, MT_BBP(AGC, 37), 0x2121262C); gain_delta = 0; dev->cal.agc_gain_adjust = low_gain_delta; } + mt76_wr(dev, MT_BBP(AGC, 35), agc_35); + mt76_wr(dev, MT_BBP(AGC, 37), agc_37); + dev->cal.agc_gain_cur[0] = gain[0] - gain_delta; dev->cal.agc_gain_cur[1] = gain[1] - gain_delta; mt76x2_phy_set_gain_val(dev); -- cgit v1.2.3 From 645dc42f97d85f3940275f13831c14c4f93da299 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Mon, 11 Mar 2019 02:25:17 -0500 Subject: net: ieee802154: fix a potential NULL pointer dereference [ Upstream commit 2795e8c251614ac0784c9d41008551109f665716 ] In case alloc_ordered_workqueue fails, the fix releases sources and returns -ENOMEM to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Acked-by: Michael Hennerich Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ieee802154/adf7242.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index cd1d8faccca5..cd6b95e673a5 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1268,6 +1268,10 @@ static int adf7242_probe(struct spi_device *spi) INIT_DELAYED_WORK(&lp->work, adf7242_rx_cal_work); lp->wqueue = alloc_ordered_workqueue(dev_name(&spi->dev), WQ_MEM_RECLAIM); + if (unlikely(!lp->wqueue)) { + ret = -ENOMEM; + goto err_hw_init; + } ret = adf7242_hw_init(lp); if (ret) -- cgit v1.2.3 From 61a9e0f4cc865289335d46ac5a46a601c5b4f098 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Feb 2019 13:10:29 +0800 Subject: ieee802154: hwsim: propagate genlmsg_reply return code [ Upstream commit 19b39a25388e71390e059906c979f87be4ef0c71 ] genlmsg_reply can fail, so propagate its return code Signed-off-by: Li RongQing Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ieee802154/mac802154_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index b6743f03dce0..3b88846de31b 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -324,7 +324,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) goto out_err; } - genlmsg_reply(skb, info); + res = genlmsg_reply(skb, info); break; } -- cgit v1.2.3 From 099a26557db93b71751d9e380c5e58de83ac2fd5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Feb 2019 13:42:30 +0000 Subject: Btrfs: fix file corruption after snapshotting due to mix of buffered/DIO writes [ Upstream commit 609e804d771f59dc5d45a93e5ee0053c74bbe2bf ] When we are mixing buffered writes with direct IO writes against the same file and snapshotting is happening concurrently, we can end up with a corrupt file content in the snapshot. Example: 1) Inode/file is empty. 2) Snapshotting starts. 2) Buffered write at offset 0 length 256Kb. This updates the i_size of the inode to 256Kb, disk_i_size remains zero. This happens after the task doing the snapshot flushes all existing delalloc. 3) DIO write at offset 256Kb length 768Kb. Once the ordered extent completes it sets the inode's disk_i_size to 1Mb (256Kb + 768Kb) and updates the inode item in the fs tree with a size of 1Mb (which is the value of disk_i_size). 4) The dealloc for the range [0, 256Kb[ did not start yet. 5) The transaction used in the DIO ordered extent completion, which updated the inode item, is committed by the snapshotting task. 6) Snapshot creation completes. 7) Dealloc for the range [0, 256Kb[ is flushed. After that when reading the file from the snapshot we always get zeroes for the range [0, 256Kb[, the file has a size of 1Mb and the data written by the direct IO write is found. From an application's point of view this is a corruption, since in the source subvolume it could never read a version of the file that included the data from the direct IO write without the data from the buffered write included as well. In the snapshot's tree, file extent items are missing for the range [0, 256Kb[. The issue, obviously, does not happen when using the -o flushoncommit mount option. Fix this by flushing delalloc for all the roots that are about to be snapshotted when committing a transaction. This guarantees total ordering when updating the disk_i_size of an inode since the flush for dealloc is done when a transaction is in the TRANS_STATE_COMMIT_START state and wait is done once no more external writers exist. This is similar to what we do when using the flushoncommit mount option, but we do it only if the transaction has snapshots to create and only for the roots of the subvolumes to be snapshotted. The bulk of the dealloc is flushed in the snapshot creation ioctl, so the flush work we do inside the transaction is minimized. This issue, involving buffered and direct IO writes with snapshotting, is often triggered by fstest btrfs/078, and got reported by fsck when not using the NO_HOLES features, for example: $ cat results/btrfs/078.full (...) _check_btrfs_filesystem: filesystem on /dev/sdc is inconsistent *** fsck.btrfs output *** [1/7] checking root items [2/7] checking extents [3/7] checking free space cache [4/7] checking fs roots root 258 inode 264 errors 100, file extent discount Found file extent holes: start: 524288, len: 65536 ERROR: errors found in fs roots Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin (Microsoft) --- fs/btrfs/transaction.c | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4ec2b660d014..7f3ece91a4d0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) } } -static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) +static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; + /* * We use writeback_inodes_sb here because if we used * btrfs_start_delalloc_roots we would deadlock with fs freeze. @@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) * from already being in a transaction and our join_transaction doesn't * have to re-take the fs freeze lock. */ - if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) + if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); + } else { + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + + /* + * Flush dellaloc for any root that is going to be snapshotted. + * This is done to avoid a corrupted version of files, in the + * snapshots, that had both buffered and direct IO writes (even + * if they were done sequentially) due to an unordered update of + * the inode's size on disk. + */ + list_for_each_entry(pending, head, list) { + int ret; + + ret = btrfs_start_delalloc_snapshot(pending->root); + if (ret) + return ret; + } + } return 0; } -static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) +static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans) { - if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) + struct btrfs_fs_info *fs_info = trans->fs_info; + + if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); + } else { + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + + /* + * Wait for any dellaloc that we started previously for the roots + * that are going to be snapshotted. This is to avoid a corrupted + * version of files in the snapshots that had both buffered and + * direct IO writes (even if they were done sequentially). + */ + list_for_each_entry(pending, head, list) + btrfs_wait_ordered_extents(pending->root, + U64_MAX, 0, U64_MAX); + } } int btrfs_commit_transaction(struct btrfs_trans_handle *trans) @@ -2024,7 +2061,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) extwriter_counter_dec(cur_trans, trans->type); - ret = btrfs_start_delalloc_flush(fs_info); + ret = btrfs_start_delalloc_flush(trans); if (ret) goto cleanup_transaction; @@ -2040,7 +2077,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto cleanup_transaction; - btrfs_wait_delalloc_flush(fs_info); + btrfs_wait_delalloc_flush(trans); btrfs_scrub_pause(fs_info); /* -- cgit v1.2.3 From 53485ee41fc31bab225d9bf8fe6b8301929825f9 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 14 Mar 2019 21:43:19 +0200 Subject: net: stmmac: don't set own bit too early for jumbo frames [ Upstream commit 80acbed9f8fca1db3fbe915540b756f048aa0fd7 ] Commit 0e80bdc9a72d ("stmmac: first frame prep at the end of xmit routine") overlooked jumbo frames when re-ordering the code, and as a result the own bit was not getting set anymore for the first jumbo frame descriptor. Commit 487e2e22ab79 ("net: stmmac: Set OWN bit for jumbo frames") tried to fix this, but now the bit is getting set too early and the DMA may start while we are still setting up the remaining descriptors. And with the chain mode the own bit remains still unset. Fix by setting the own bit at the end of xmit also with jumbo frames. Fixes: 0e80bdc9a72d ("stmmac: first frame prep at the end of xmit routine") Fixes: 487e2e22ab79 ("net: stmmac: Set OWN bit for jumbo frames") Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index c0c75c111abb..afed0f0f4027 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -59,7 +59,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, - STMMAC_RING_MODE, 1, false, skb->len); + STMMAC_RING_MODE, 0, false, skb->len); tx_q->tx_skbuff[entry] = NULL; entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE); @@ -91,7 +91,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum, - STMMAC_RING_MODE, 1, true, skb->len); + STMMAC_RING_MODE, 0, true, skb->len); } tx_q->cur_tx = entry; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1d8d6f2ddfd6..0bc3632880b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3190,14 +3190,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_prepare_tx_desc(priv, first, 1, nopaged_len, csum_insertion, priv->mode, 1, last_segment, skb->len); - - /* The own bit must be the latest setting done when prepare the - * descriptor and then barrier is needed to make sure that - * all is coherent before granting the DMA engine. - */ - wmb(); + } else { + stmmac_set_tx_owner(priv, first); } + /* The own bit must be the latest setting done when prepare the + * descriptor and then barrier is needed to make sure that + * all is coherent before granting the DMA engine. + */ + wmb(); + netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); stmmac_enable_dma_transmission(priv, priv->ioaddr); -- cgit v1.2.3 From 2e75be8dd6faf8cf21ec1006df732e9e8a947df1 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 14 Mar 2019 21:43:20 +0200 Subject: net: stmmac: fix jumbo frame sending with non-linear skbs [ Upstream commit 58f2ce6f61615dfd8dd3cc01c9e5bb54ed35637e ] When sending non-linear skbs with jumbo frames, we set up the non-paged data and mark that as a last segment, although the paged fragments are also prepared. This will stall the TX queue and trigger a watchdog warning (a simple reproducer is to run an iperf client mode TCP test with a large MTU - networking fails instantly). Fix by checking if the skb is non-linear. Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index afed0f0f4027..4d9bcb4d0378 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -79,7 +79,8 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 0, len, csum, - STMMAC_RING_MODE, 1, true, skb->len); + STMMAC_RING_MODE, 1, !skb_is_nonlinear(skb), + skb->len); } else { des2 = dma_map_single(priv->device, skb->data, nopaged_len, DMA_TO_DEVICE); @@ -91,7 +92,8 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum, - STMMAC_RING_MODE, 0, true, skb->len); + STMMAC_RING_MODE, 0, !skb_is_nonlinear(skb), + skb->len); } tx_q->cur_tx = entry; -- cgit v1.2.3 From 9fc5c6e8fce8d322c97d479579e06da0bbb7e806 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Thu, 14 Mar 2019 15:31:40 -0500 Subject: qlcnic: Avoid potential NULL pointer dereference [ Upstream commit 5bf7295fe34a5251b1d241b9736af4697b590670 ] netdev_alloc_skb can fail and return a NULL pointer which is dereferenced without a check. The patch avoids such a scenario. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 3b0adda7cc9c..a4cd6f2cfb86 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1048,6 +1048,8 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode) for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) { skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE); + if (!skb) + break; qlcnic_create_loopback_buff(skb->data, adapter->mac_addr); skb_put(skb, QLCNIC_ILB_PKT_SIZE); adapter->ahw->diag_cnt = 0; -- cgit v1.2.3 From 5227fe96b107d7cbf91d2d8f5093b0a007bb2e7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 13 Mar 2019 15:15:49 +0100 Subject: xsk: fix umem memory leak on cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 044175a06706d516aa42874bb44dbbfc3c4d20eb ] When the umem is cleaned up, the task that created it might already be gone. If the task was gone, the xdp_umem_release function did not free the pages member of struct xdp_umem. It turned out that the task lookup was not needed at all; The code was a left-over when we moved from task accounting to user accounting [1]. This patch fixes the memory leak by removing the task lookup logic completely. [1] https://lore.kernel.org/netdev/20180131135356.19134-3-bjorn.topel@gmail.com/ Link: https://lore.kernel.org/netdev/c1cb2ca8-6a14-3980-8672-f3de0bb38dfd@suse.cz/ Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt") Reported-by: Jiri Slaby Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin (Microsoft) --- include/net/xdp_sock.h | 1 - net/xdp/xdp_umem.c | 19 +------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 13acb9803a6d..05d39e579953 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -36,7 +36,6 @@ struct xdp_umem { u32 headroom; u32 chunk_size_nohr; struct user_struct *user; - struct pid *pid; unsigned long address; refcount_t users; struct work_struct work; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 37e1fe180769..9c767c68ed3a 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -189,9 +189,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem) static void xdp_umem_release(struct xdp_umem *umem) { - struct task_struct *task; - struct mm_struct *mm; - xdp_umem_clear_dev(umem); if (umem->fq) { @@ -208,21 +205,10 @@ static void xdp_umem_release(struct xdp_umem *umem) xdp_umem_unpin_pages(umem); - task = get_pid_task(umem->pid, PIDTYPE_PID); - put_pid(umem->pid); - if (!task) - goto out; - mm = get_task_mm(task); - put_task_struct(task); - if (!mm) - goto out; - - mmput(mm); kfree(umem->pages); umem->pages = NULL; xdp_umem_unaccount_pages(umem); -out: kfree(umem); } @@ -351,7 +337,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (size_chk < 0) return -EINVAL; - umem->pid = get_task_pid(current, PIDTYPE_PID); umem->address = (unsigned long)addr; umem->chunk_mask = ~((u64)chunk_size - 1); umem->size = size; @@ -367,7 +352,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) err = xdp_umem_account_pages(umem); if (err) - goto out; + return err; err = xdp_umem_pin_pages(umem); if (err) @@ -386,8 +371,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) out_account: xdp_umem_unaccount_pages(umem); -out: - put_pid(umem->pid); return err; } -- cgit v1.2.3 From 6c41d389904c62758ade7d09a47f18c0125b9f3c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Mar 2019 20:43:00 +0100 Subject: staging: axis-fifo: add CONFIG_OF dependency [ Upstream commit 1beea6204e2304dd11600791d8dad8e7350af6ad ] When building without CONFIG_OF, the compiler loses track of the flow control in axis_fifo_probe(), and thinks that many variables are used without an initialization even though we actually leave the function before the first use: drivers/staging/axis-fifo/axis-fifo.c: In function 'axis_fifo_probe': drivers/staging/axis-fifo/axis-fifo.c:900:5: error: 'rxd_tdata_width' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (rxd_tdata_width != 32) { ^ drivers/staging/axis-fifo/axis-fifo.c:907:5: error: 'txd_tdata_width' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (txd_tdata_width != 32) { ^ drivers/staging/axis-fifo/axis-fifo.c:914:5: error: 'has_tdest' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (has_tdest) { ^ drivers/staging/axis-fifo/axis-fifo.c:919:5: error: 'has_tid' may be used uninitialized in this function [-Werror=maybe-uninitialized] When CONFIG_OF is set, this does not happen, and since the driver cannot work without it, just add that option as a Kconfig dependency. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/staging/axis-fifo/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/axis-fifo/Kconfig b/drivers/staging/axis-fifo/Kconfig index 687537203d9c..d9725888af6f 100644 --- a/drivers/staging/axis-fifo/Kconfig +++ b/drivers/staging/axis-fifo/Kconfig @@ -3,6 +3,7 @@ # config XIL_AXIS_FIFO tristate "Xilinx AXI-Stream FIFO IP core driver" + depends on OF default n help This adds support for the Xilinx AXI-Stream -- cgit v1.2.3 From 7f0d096410affc37659366b16663e12debd00c7c Mon Sep 17 00:00:00 2001 From: Maxim Zhukov Date: Sat, 9 Mar 2019 12:54:00 +0300 Subject: staging, mt7621-pci: fix build without pci support [ Upstream commit 90cd9bed5adb3e3bd4d3ac4cbcecbc4a8028bbaf ] Add depends on PCI for PCI_MT7621 Signed-off-by: Maxim Zhukov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/staging/mt7621-pci/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/mt7621-pci/Kconfig b/drivers/staging/mt7621-pci/Kconfig index d33533872a16..c8fa17cfa807 100644 --- a/drivers/staging/mt7621-pci/Kconfig +++ b/drivers/staging/mt7621-pci/Kconfig @@ -1,6 +1,7 @@ config PCI_MT7621 tristate "MediaTek MT7621 PCI Controller" depends on RALINK + depends on PCI select PCI_DRIVERS_GENERIC help This selects a driver for the MediaTek MT7621 PCI Controller. -- cgit v1.2.3 From 0f9c88c5ccc9fb7421d43edcd11622565e8fcdfc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 12 Mar 2019 12:10:59 +0100 Subject: netfilter: nft_set_rbtree: check for inactive element after flag mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 05b7639da55f5555b9866a1f4b7e8995232a6323 ] Otherwise, we hit bogus ENOENT when removing elements. Fixes: e701001e7cbe ("netfilter: nft_rbtree: allow adjacent intervals with dynamic updates") Reported-by: Václav Zindulka Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin (Microsoft) --- net/netfilter/nft_set_rbtree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fa61208371f8..321a0036fdf5 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -308,10 +308,6 @@ static void *nft_rbtree_deactivate(const struct net *net, else if (d > 0) parent = parent->rb_right; else { - if (!nft_set_elem_active(&rbe->ext, genmask)) { - parent = parent->rb_left; - continue; - } if (nft_rbtree_interval_end(rbe) && !nft_rbtree_interval_end(this)) { parent = parent->rb_left; @@ -320,6 +316,9 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; + } else if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; } nft_rbtree_flush(net, set, rbe); return rbe; -- cgit v1.2.3 From eb213c54aaacbad41496302d805c9522a85c2d61 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 13 Mar 2019 16:33:29 +0800 Subject: netfilter: bridge: set skb transport_header before entering NF_INET_PRE_ROUTING [ Upstream commit e166e4fdaced850bee3d5ee12a5740258fb30587 ] Since Commit 21d1196a35f5 ("ipv4: set transport header earlier"), skb->transport_header has been always set before entering INET netfilter. This patch is to set skb->transport_header for bridge before entering INET netfilter by bridge-nf-call-iptables. It also fixes an issue that sctp_error() couldn't compute a right csum due to unset skb->transport_header. Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code") Reported-by: Li Shuang Suggested-by: Pablo Neira Ayuso Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin (Microsoft) --- net/bridge/br_netfilter_hooks.c | 1 + net/bridge/br_netfilter_ipv6.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 40d058378b52..fc605758323b 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -502,6 +502,7 @@ static unsigned int br_nf_pre_routing(void *priv, nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IP); + skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 564710f88f93..e88d6641647b 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -235,6 +235,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IPV6); + skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); -- cgit v1.2.3 From 21fde4fcbbd2f64d9805abb3ecbe047ec47f3018 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Mar 2019 22:15:59 +0100 Subject: netfilter: fix NETFILTER_XT_TARGET_TEE dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d1fa381033eb718df5c602f64b6e88676138dfc6 ] With NETFILTER_XT_TARGET_TEE=y and IP6_NF_IPTABLES=m, we get a link error when referencing the NF_DUP_IPV6 module: net/netfilter/xt_TEE.o: In function `tee_tg6': xt_TEE.c:(.text+0x14): undefined reference to `nf_dup_ipv6' The problem here is the 'select NF_DUP_IPV6 if IP6_NF_IPTABLES' that forces NF_DUP_IPV6 to be =m as well rather than setting it to =y as was intended here. Adding a soft dependency on IP6_NF_IPTABLES avoids that broken configuration. Fixes: 5d400a4933e8 ("netfilter: Kconfig: Change select IPv6 dependencies") Cc: Máté Eckl Cc: Taehee Yoo Link: https://patchwork.ozlabs.org/patch/999498/ Link: https://lore.kernel.org/patchwork/patch/960062/ Reported-by: Randy Dunlap Reported-by: Stephen Rothwell Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin (Microsoft) --- net/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index beb3a69ce1d4..0f0e5806bf77 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -995,6 +995,7 @@ config NETFILTER_XT_TARGET_TEE depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK + depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES select NF_DUP_IPV4 select NF_DUP_IPV6 if IP6_NF_IPTABLES ---help--- -- cgit v1.2.3 From f0028f723428b623dd990d34f711acb84e0d9e51 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 02:58:18 -0500 Subject: netfilter: ip6t_srh: fix NULL pointer dereferences [ Upstream commit 6d65561f3d5ec933151939c543d006b79044e7a6 ] skb_header_pointer may return NULL. The current code dereference its return values without a NULL check. The fix inserts the checks to avoid NULL pointer dereferences. Fixes: 202a8ff545cc ("netfilter: add IPv6 segment routing header 'srh' match") Signed-off-by: Kangjie Lu Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin (Microsoft) --- net/ipv6/netfilter/ip6t_srh.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c index 1059894a6f4c..4cb83fb69844 100644 --- a/net/ipv6/netfilter/ip6t_srh.c +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -210,6 +210,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) psidoff = srhoff + sizeof(struct ipv6_sr_hdr) + ((srh->segments_left + 1) * sizeof(struct in6_addr)); psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid); + if (!psid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID, ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk, &srhinfo->psid_addr))) @@ -223,6 +225,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) + ((srh->segments_left - 1) * sizeof(struct in6_addr)); nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid); + if (!nsid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID, ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk, &srhinfo->nsid_addr))) @@ -233,6 +237,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (srhinfo->mt_flags & IP6T_SRH_LSID) { lsidoff = srhoff + sizeof(struct ipv6_sr_hdr); lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid); + if (!lsid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID, ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk, &srhinfo->lsid_addr))) -- cgit v1.2.3 From 42ed22a8b969cbea8c66f829a0deb991551f3d73 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Mar 2019 16:40:55 +0100 Subject: s390/qeth: fix race when initializing the IP address table [ Upstream commit 7221b727f0079a32aca91f657141e1de564d4b97 ] The ucast IP table is utilized by some of the L3-specific sysfs attributes that qeth_l3_create_device_attributes() provides. So initialize the table _before_ registering the attributes. Fixes: ebccc7397e4a ("s390/qeth: add missing hash table initializations") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/s390/net/qeth_l3_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index df34bff4ac31..f73ce96e9603 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2316,12 +2316,14 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; + hash_init(card->ip_htable); + if (gdev->dev.type == &qeth_generic_devtype) { rc = qeth_l3_create_device_attributes(&gdev->dev); if (rc) return rc; } - hash_init(card->ip_htable); + hash_init(card->ip_mc_htable); card->info.hwtrap = 0; return 0; -- cgit v1.2.3 From 0bb1f79a14273a388f5a667cfa649d2eb6efdaab Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 1 Mar 2019 16:56:46 +0800 Subject: ARM: imx51: fix a leaked reference by adding missing of_node_put [ Upstream commit 0c17e83fe423467e3ccf0a02f99bd050a73bbeb4 ] The call to of_get_next_child returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./arch/arm/mach-imx/mach-imx51.c:64:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 57, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Russell King Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: Lucas Stach Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/mach-imx/mach-imx51.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-imx/mach-imx51.c b/arch/arm/mach-imx/mach-imx51.c index c7169c2f94c4..08c7892866c2 100644 --- a/arch/arm/mach-imx/mach-imx51.c +++ b/arch/arm/mach-imx/mach-imx51.c @@ -59,6 +59,7 @@ static void __init imx51_m4if_setup(void) return; m4if_base = of_iomap(np, 0); + of_node_put(np); if (!m4if_base) { pr_err("Unable to map M4IF registers\n"); return; -- cgit v1.2.3 From d25b0c89b3b9ab045154497ca344cea5713ced10 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Fri, 8 Mar 2019 22:08:31 +0800 Subject: sc16is7xx: missing unregister/delete driver on error in sc16is7xx_init() [ Upstream commit ac0cdb3d990108df795b676cd0d0e65ac34b2273 ] Add the missing uart_unregister_driver() and i2c_del_driver() before return from sc16is7xx_init() in the error handling case. Signed-off-by: Mao Wenan Reviewed-by: Vladimir Zapolskiy Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/tty/serial/sc16is7xx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 268098681856..114e94f476c6 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1509,7 +1509,7 @@ static int __init sc16is7xx_init(void) ret = i2c_add_driver(&sc16is7xx_i2c_uart_driver); if (ret < 0) { pr_err("failed to init sc16is7xx i2c --> %d\n", ret); - return ret; + goto err_i2c; } #endif @@ -1517,10 +1517,18 @@ static int __init sc16is7xx_init(void) ret = spi_register_driver(&sc16is7xx_spi_uart_driver); if (ret < 0) { pr_err("failed to init sc16is7xx spi --> %d\n", ret); - return ret; + goto err_spi; } #endif return ret; + +err_spi: +#ifdef CONFIG_SERIAL_SC16IS7XX_I2C + i2c_del_driver(&sc16is7xx_i2c_uart_driver); +#endif +err_i2c: + uart_unregister_driver(&sc16is7xx_uart); + return ret; } module_init(sc16is7xx_init); -- cgit v1.2.3 From 4343cbf52e36d45631efdfb3867475f07cea4e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Wed, 6 Mar 2019 17:54:03 +0100 Subject: serial: ar933x_uart: Fix build failure with disabled console MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 72ff51d8dd262d1fef25baedc2ac35116435be47 ] Andrey has reported on OpenWrt's bug tracking system[1], that he currently can't use ar93xx_uart as pure serial UART without console (CONFIG_SERIAL_8250_CONSOLE and CONFIG_SERIAL_AR933X_CONSOLE undefined), because compilation ends with following error: ar933x_uart.c: In function 'ar933x_uart_console_write': ar933x_uart.c:550:14: error: 'struct uart_port' has no member named 'sysrq' So this patch moves all the code related to console handling behind series of CONFIG_SERIAL_AR933X_CONSOLE ifdefs. 1. https://bugs.openwrt.org/index.php?do=details&task_id=2152 Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Andrey Batyiev Reported-by: Andrey Batyiev Tested-by: Andrey Batyiev Signed-off-by: Petr Štetiar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/tty/serial/ar933x_uart.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index db5df3d54818..3bdd56a1021b 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -49,11 +49,6 @@ struct ar933x_uart_port { struct clk *clk; }; -static inline bool ar933x_uart_console_enabled(void) -{ - return IS_ENABLED(CONFIG_SERIAL_AR933X_CONSOLE); -} - static inline unsigned int ar933x_uart_read(struct ar933x_uart_port *up, int offset) { @@ -508,6 +503,7 @@ static const struct uart_ops ar933x_uart_ops = { .verify_port = ar933x_uart_verify_port, }; +#ifdef CONFIG_SERIAL_AR933X_CONSOLE static struct ar933x_uart_port * ar933x_console_ports[CONFIG_SERIAL_AR933X_NR_UARTS]; @@ -604,14 +600,7 @@ static struct console ar933x_uart_console = { .index = -1, .data = &ar933x_uart_driver, }; - -static void ar933x_uart_add_console_port(struct ar933x_uart_port *up) -{ - if (!ar933x_uart_console_enabled()) - return; - - ar933x_console_ports[up->port.line] = up; -} +#endif /* CONFIG_SERIAL_AR933X_CONSOLE */ static struct uart_driver ar933x_uart_driver = { .owner = THIS_MODULE, @@ -700,7 +689,9 @@ static int ar933x_uart_probe(struct platform_device *pdev) baud = ar933x_uart_get_baud(port->uartclk, 0, AR933X_UART_MAX_STEP); up->max_baud = min_t(unsigned int, baud, AR933X_UART_MAX_BAUD); - ar933x_uart_add_console_port(up); +#ifdef CONFIG_SERIAL_AR933X_CONSOLE + ar933x_console_ports[up->port.line] = up; +#endif ret = uart_add_one_port(&ar933x_uart_driver, &up->port); if (ret) @@ -749,8 +740,9 @@ static int __init ar933x_uart_init(void) { int ret; - if (ar933x_uart_console_enabled()) - ar933x_uart_driver.cons = &ar933x_uart_console; +#ifdef CONFIG_SERIAL_AR933X_CONSOLE + ar933x_uart_driver.cons = &ar933x_uart_console; +#endif ret = uart_register_driver(&ar933x_uart_driver); if (ret) -- cgit v1.2.3 From e38694c6b9a8e5a29d8b044acdb53b3e18cc04f1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Mar 2019 17:37:44 +0000 Subject: KVM: arm64: Reset the PMU in preemptible context [ Upstream commit ebff0b0e3d3c862c16c487959db5e0d879632559 ] We've become very cautious to now always reset the vcpu when nothing is loaded on the physical CPU. To do so, we now disable preemption and do a kvm_arch_vcpu_put() to make sure we have all the state in memory (and that it won't be loaded behind out back). This now causes issues with resetting the PMU, which calls into perf. Perf itself uses mutexes, which clashes with the lack of preemption. It is worth realizing that the PMU is fully emulated, and that no PMU state is ever loaded on the physical CPU. This means we can perfectly reset the PMU outside of the non-preemptible section. Fixes: e761a927bc9a ("KVM: arm/arm64: Reset the VCPU without preemption and vcpu state loaded") Reported-by: Julien Grall Tested-by: Julien Grall Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin (Microsoft) --- arch/arm64/kvm/reset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f16a5f8ff2b4..e2a0500cd7a2 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -123,6 +123,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) int ret = -EINVAL; bool loaded; + /* Reset PMU outside of the non-preemptible section */ + kvm_pmu_vcpu_reset(vcpu); + preempt_disable(); loaded = (vcpu->cpu != -1); if (loaded) @@ -170,9 +173,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.reset_state.reset = false; } - /* Reset PMU */ - kvm_pmu_vcpu_reset(vcpu); - /* Default workaround setup is enabled (if supported) */ if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; -- cgit v1.2.3 From d6d23d3356721c3949208f9ac35535f104b6e801 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Mar 2019 18:07:50 +0000 Subject: arm64: KVM: Always set ICH_HCR_EL2.EN if GICv4 is enabled [ Upstream commit ca71228b42a96908eca7658861eafacd227856c9 ] The normal interrupt flow is not to enable the vgic when no virtual interrupt is to be injected (i.e. the LRs are empty). But when a guest is likely to use GICv4 for LPIs, we absolutely need to switch it on at all times. Otherwise, VLPIs only get delivered when there is something in the LRs, which doesn't happen very often. Reported-by: Nianyao Tang Tested-by: Shameerali Kolothum Thodi Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin (Microsoft) --- virt/kvm/arm/hyp/vgic-v3-sr.c | 4 ++-- virt/kvm/arm/vgic/vgic.c | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 9652c453480f..3c3f7cda95c7 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -222,7 +222,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) } } - if (used_lrs) { + if (used_lrs || cpu_if->its_vpe.its_vm) { int i; u32 elrsr; @@ -247,7 +247,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; int i; - if (used_lrs) { + if (used_lrs || cpu_if->its_vpe.its_vm) { write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); for (i = 0; i < used_lrs; i++) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index abd9c7352677..3af69f2a3866 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -867,15 +867,21 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * either observe the new interrupt before or after doing this check, * and introducing additional synchronization mechanism doesn't change * this. + * + * Note that we still need to go through the whole thing if anything + * can be directly injected (GICv4). */ - if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && + !vgic_supports_direct_msis(vcpu->kvm)) return; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); - raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); - vgic_flush_lr_state(vcpu); - raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { + raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); + vgic_flush_lr_state(vcpu); + raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + } if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); -- cgit v1.2.3 From d2eecdbc6ecab9719113fc448d6df4eb387f1148 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Mar 2019 12:47:11 +0000 Subject: KVM: arm/arm64: vgic-its: Take the srcu lock when writing to guest memory [ Upstream commit a6ecfb11bf37743c1ac49b266595582b107b61d4 ] When halting a guest, QEMU flushes the virtual ITS caches, which amounts to writing to the various tables that the guest has allocated. When doing this, we fail to take the srcu lock, and the kernel shouts loudly if running a lockdep kernel: [ 69.680416] ============================= [ 69.680819] WARNING: suspicious RCU usage [ 69.681526] 5.1.0-rc1-00008-g600025238f51-dirty #18 Not tainted [ 69.682096] ----------------------------- [ 69.682501] ./include/linux/kvm_host.h:605 suspicious rcu_dereference_check() usage! [ 69.683225] [ 69.683225] other info that might help us debug this: [ 69.683225] [ 69.683975] [ 69.683975] rcu_scheduler_active = 2, debug_locks = 1 [ 69.684598] 6 locks held by qemu-system-aar/4097: [ 69.685059] #0: 0000000034196013 (&kvm->lock){+.+.}, at: vgic_its_set_attr+0x244/0x3a0 [ 69.686087] #1: 00000000f2ed935e (&its->its_lock){+.+.}, at: vgic_its_set_attr+0x250/0x3a0 [ 69.686919] #2: 000000005e71ea54 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.687698] #3: 00000000c17e548d (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.688475] #4: 00000000ba386017 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.689978] #5: 00000000c2c3c335 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.690729] [ 69.690729] stack backtrace: [ 69.691151] CPU: 2 PID: 4097 Comm: qemu-system-aar Not tainted 5.1.0-rc1-00008-g600025238f51-dirty #18 [ 69.691984] Hardware name: rockchip evb_rk3399/evb_rk3399, BIOS 2019.04-rc3-00124-g2feec69fb1 03/15/2019 [ 69.692831] Call trace: [ 69.694072] lockdep_rcu_suspicious+0xcc/0x110 [ 69.694490] gfn_to_memslot+0x174/0x190 [ 69.694853] kvm_write_guest+0x50/0xb0 [ 69.695209] vgic_its_save_tables_v0+0x248/0x330 [ 69.695639] vgic_its_set_attr+0x298/0x3a0 [ 69.696024] kvm_device_ioctl_attr+0x9c/0xd8 [ 69.696424] kvm_device_ioctl+0x8c/0xf8 [ 69.696788] do_vfs_ioctl+0xc8/0x960 [ 69.697128] ksys_ioctl+0x8c/0xa0 [ 69.697445] __arm64_sys_ioctl+0x28/0x38 [ 69.697817] el0_svc_common+0xd8/0x138 [ 69.698173] el0_svc_handler+0x38/0x78 [ 69.698528] el0_svc+0x8/0xc The fix is to obviously take the srcu lock, just like we do on the read side of things since bf308242ab98. One wonders why this wasn't fixed at the same time, but hey... Fixes: bf308242ab98 ("KVM: arm/arm64: VGIC/ITS: protect kvm_read_guest() calls with SRCU lock") Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/include/asm/kvm_mmu.h | 11 +++++++++++ arch/arm64/include/asm/kvm_mmu.h | 11 +++++++++++ virt/kvm/arm/vgic/vgic-its.c | 8 ++++---- virt/kvm/arm/vgic/vgic-v3.c | 4 ++-- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3a875fc1b63c..cee06509f00a 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -381,6 +381,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm, return ret; } +static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, + const void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_write_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + static inline void *kvm_get_hyp_vector(void) { switch(read_cpuid_part()) { diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8af4b1befa42..c246effd1b67 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -444,6 +444,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm, return ret; } +static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, + const void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_write_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + #ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index ab3f47745d9c..c41e11fd841c 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1919,7 +1919,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | ite->collection->collection_id; val = cpu_to_le64(val); - return kvm_write_guest(kvm, gpa, &val, ite_esz); + return kvm_write_guest_lock(kvm, gpa, &val, ite_esz); } /** @@ -2066,7 +2066,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | (dev->num_eventid_bits - 1)); val = cpu_to_le64(val); - return kvm_write_guest(kvm, ptr, &val, dte_esz); + return kvm_write_guest_lock(kvm, ptr, &val, dte_esz); } /** @@ -2246,7 +2246,7 @@ static int vgic_its_save_cte(struct vgic_its *its, ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | collection->collection_id); val = cpu_to_le64(val); - return kvm_write_guest(its->dev->kvm, gpa, &val, esz); + return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz); } static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) @@ -2317,7 +2317,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) */ val = 0; BUG_ON(cte_esz > sizeof(val)); - ret = kvm_write_guest(its->dev->kvm, gpa, &val, cte_esz); + ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz); return ret; } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 4ee0aeb9a905..89260964be73 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -358,7 +358,7 @@ retry: if (status) { /* clear consumed data */ val &= ~(1 << bit_nr); - ret = kvm_write_guest(kvm, ptr, &val, 1); + ret = kvm_write_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; } @@ -409,7 +409,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) else val &= ~(1 << bit_nr); - ret = kvm_write_guest(kvm, ptr, &val, 1); + ret = kvm_write_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; } -- cgit v1.2.3 From 7f482404c11eecd024dbe434272de9fe4acf7c4f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Mar 2019 12:56:23 +0000 Subject: KVM: arm/arm64: vgic-its: Take the srcu lock when parsing the memslots [ Upstream commit 7494cec6cb3ba7385a6a223b81906384f15aae34 ] Calling kvm_is_visible_gfn() implies that we're parsing the memslots, and doing this without the srcu lock is frown upon: [12704.164532] ============================= [12704.164544] WARNING: suspicious RCU usage [12704.164560] 5.1.0-rc1-00008-g600025238f51-dirty #16 Tainted: G W [12704.164573] ----------------------------- [12704.164589] ./include/linux/kvm_host.h:605 suspicious rcu_dereference_check() usage! [12704.164602] other info that might help us debug this: [12704.164616] rcu_scheduler_active = 2, debug_locks = 1 [12704.164631] 6 locks held by qemu-system-aar/13968: [12704.164644] #0: 000000007ebdae4f (&kvm->lock){+.+.}, at: vgic_its_set_attr+0x244/0x3a0 [12704.164691] #1: 000000007d751022 (&its->its_lock){+.+.}, at: vgic_its_set_attr+0x250/0x3a0 [12704.164726] #2: 00000000219d2706 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [12704.164761] #3: 00000000a760aecd (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [12704.164794] #4: 000000000ef8e31d (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [12704.164827] #5: 000000007a872093 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [12704.164861] stack backtrace: [12704.164878] CPU: 2 PID: 13968 Comm: qemu-system-aar Tainted: G W 5.1.0-rc1-00008-g600025238f51-dirty #16 [12704.164887] Hardware name: rockchip evb_rk3399/evb_rk3399, BIOS 2019.04-rc3-00124-g2feec69fb1 03/15/2019 [12704.164896] Call trace: [12704.164910] dump_backtrace+0x0/0x138 [12704.164920] show_stack+0x24/0x30 [12704.164934] dump_stack+0xbc/0x104 [12704.164946] lockdep_rcu_suspicious+0xcc/0x110 [12704.164958] gfn_to_memslot+0x174/0x190 [12704.164969] kvm_is_visible_gfn+0x28/0x70 [12704.164980] vgic_its_check_id.isra.0+0xec/0x1e8 [12704.164991] vgic_its_save_tables_v0+0x1ac/0x330 [12704.165001] vgic_its_set_attr+0x298/0x3a0 [12704.165012] kvm_device_ioctl_attr+0x9c/0xd8 [12704.165022] kvm_device_ioctl+0x8c/0xf8 [12704.165035] do_vfs_ioctl+0xc8/0x960 [12704.165045] ksys_ioctl+0x8c/0xa0 [12704.165055] __arm64_sys_ioctl+0x28/0x38 [12704.165067] el0_svc_common+0xd8/0x138 [12704.165078] el0_svc_handler+0x38/0x78 [12704.165089] el0_svc+0x8/0xc Make sure the lock is taken when doing this. Fixes: bf308242ab98 ("KVM: arm/arm64: VGIC/ITS: protect kvm_read_guest() calls with SRCU lock") Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin (Microsoft) --- virt/kvm/arm/vgic/vgic-its.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index c41e11fd841c..fcb2fceaa4a5 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -754,8 +754,9 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, u64 indirect_ptr, type = GITS_BASER_TYPE(baser); phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); int esz = GITS_BASER_ENTRY_SIZE(baser); - int index; + int index, idx; gfn_t gfn; + bool ret; switch (type) { case GITS_BASER_TYPE_DEVICE: @@ -782,7 +783,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (eaddr) *eaddr = addr; - return kvm_is_visible_gfn(its->dev->kvm, gfn); + + goto out; } /* calculate and check the index into the 1st level */ @@ -812,7 +814,12 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (eaddr) *eaddr = indirect_ptr; - return kvm_is_visible_gfn(its->dev->kvm, gfn); + +out: + idx = srcu_read_lock(&its->dev->kvm->srcu); + ret = kvm_is_visible_gfn(its->dev->kvm, gfn); + srcu_read_unlock(&its->dev->kvm->srcu, idx); + return ret; } static int vgic_its_alloc_collection(struct vgic_its *its, -- cgit v1.2.3 From e5a7f0905664cb60d1acc1081102374c298379df Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 31 Jan 2019 11:04:19 +0200 Subject: usb: dwc3: pci: add support for Comet Lake PCH ID [ Upstream commit 7ae622c978db6b2e28b4fced6ecd2a174492059d ] This patch simply adds a new PCI Device ID Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin (Microsoft) --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index fdc6e4e403e8..8cced3609e24 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -29,6 +29,7 @@ #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa #define PCI_DEVICE_ID_INTEL_APL 0x5aaa #define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 +#define PCI_DEVICE_ID_INTEL_CMLH 0x02ee #define PCI_DEVICE_ID_INTEL_GLK 0x31aa #define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e @@ -305,6 +306,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD), (kernel_ulong_t) &dwc3_pci_mrfld_properties, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLH), + (kernel_ulong_t) &dwc3_pci_intel_properties, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTLP), (kernel_ulong_t) &dwc3_pci_intel_properties, }, -- cgit v1.2.3 From ad7be5bfe9694eb9f71b260a4e409cecd4b97c77 Mon Sep 17 00:00:00 2001 From: Guido Kiener Date: Tue, 19 Mar 2019 19:12:03 +0100 Subject: usb: gadget: net2280: Fix overrun of OUT messages [ Upstream commit 9d6a54c1430647355a5e23434881b2ca3d192b48 ] The OUT endpoint normally blocks (NAK) subsequent packets when a short packet was received and returns an incomplete queue entry to the gadget driver. Thereby the gadget driver can detect a short packet when reading queue entries with a length that is not equal to a multiple of packet size. The start_queue() function enables receiving OUT packets regardless of the content of the OUT FIFO. This results in a race: With the current code, it's possible that the "!ep->is_in && (readl(&ep->regs->ep_stat) & BIT(NAK_OUT_PACKETS))" test in start_dma() will fail, then a short packet will be received, and then start_queue() will call stop_out_naking(). That's what we don't want (OUT naking gets turned off while there is data in the FIFO) because then the next driver request might receive a mixture of old and new packets. With the patch, this race can't occur because the FIFO's state is tested after we know that OUT naking is already turned on, and OUT naking is stopped only when both of the conditions are met. This ensures that all received data is delivered to the gadget driver, which can detect a short packet now before new packets are appended to the last short packet. Acked-by: Alan Stern Signed-off-by: Guido Kiener Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin (Microsoft) --- drivers/usb/gadget/udc/net2280.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index e7dae5379e04..dc6f5a78d124 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -866,9 +866,6 @@ static void start_queue(struct net2280_ep *ep, u32 dmactl, u32 td_dma) (void) readl(&ep->dev->pci->pcimstctl); writel(BIT(DMA_START), &dma->dmastat); - - if (!ep->is_in) - stop_out_naking(ep); } static void start_dma(struct net2280_ep *ep, struct net2280_request *req) @@ -907,6 +904,7 @@ static void start_dma(struct net2280_ep *ep, struct net2280_request *req) writel(BIT(DMA_START), &dma->dmastat); return; } + stop_out_naking(ep); } tmp = dmactl_default; -- cgit v1.2.3 From 202db3b5c17c709f36460af47bcc26aea705472b Mon Sep 17 00:00:00 2001 From: Guido Kiener Date: Mon, 18 Mar 2019 09:18:33 +0100 Subject: usb: gadget: net2280: Fix net2280_dequeue() [ Upstream commit f1d3fba17cd4eeea20397f1324b7b9c69a6a935c ] When a request must be dequeued with net2280_dequeue() e.g. due to a device clear action and the same request is finished by the function scan_dma_completions() then the function net2280_dequeue() does not find the request in the following search loop and returns the error -EINVAL without restoring the status ep->stopped. Thus the endpoint keeps blocked and does not receive any data anymore. This fix restores the status and does not issue an error message. Acked-by: Alan Stern Signed-off-by: Guido Kiener Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin (Microsoft) --- drivers/usb/gadget/udc/net2280.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index dc6f5a78d124..d93cf4171953 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1273,9 +1273,9 @@ static int net2280_dequeue(struct usb_ep *_ep, struct usb_request *_req) break; } if (&req->req != _req) { + ep->stopped = stopped; spin_unlock_irqrestore(&ep->dev->lock, flags); - dev_err(&ep->dev->pdev->dev, "%s: Request mismatch\n", - __func__); + ep_dbg(ep->dev, "%s: Request mismatch\n", __func__); return -EINVAL; } -- cgit v1.2.3 From 2af87002a5e4545c9c66323b7fbbb3adb17dff20 Mon Sep 17 00:00:00 2001 From: Guido Kiener Date: Mon, 18 Mar 2019 09:18:34 +0100 Subject: usb: gadget: net2272: Fix net2272_dequeue() [ Upstream commit 091dacc3cc10979ab0422f0a9f7fcc27eee97e69 ] Restore the status of ep->stopped in function net2272_dequeue(). When the given request is not found in the endpoint queue the function returns -EINVAL without restoring the state of ep->stopped. Thus the endpoint keeps blocked and does not transfer any data anymore. This fix is only compile-tested, since we do not have a corresponding hardware. An analogous fix was tested in the sibling driver. See "usb: gadget: net2280: Fix net2280_dequeue()" Acked-by: Alan Stern Signed-off-by: Guido Kiener Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin (Microsoft) --- drivers/usb/gadget/udc/net2272.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index b77f3126580e..c2011cd7df8c 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -945,6 +945,7 @@ net2272_dequeue(struct usb_ep *_ep, struct usb_request *_req) break; } if (&req->req != _req) { + ep->stopped = stopped; spin_unlock_irqrestore(&ep->dev->lock, flags); return -EINVAL; } -- cgit v1.2.3 From bcc78b63f0b44db998062901d4e160d890137d98 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 4 Mar 2019 11:49:40 +0100 Subject: ARM: dts: pfla02: increase phy reset duration [ Upstream commit 032f85c9360fb1a08385c584c2c4ed114b33c260 ] Increase the reset duration to ensure correct phy functionality. The reset duration is taken from barebox commit 52fdd510de ("ARM: dts: pfla02: use long enough reset for ethernet phy"): Use a longer reset time for ethernet phy Micrel KSZ9031RNX. Otherwise a small percentage of modules have 'transmission timeouts' errors like barebox@Phytec phyFLEX-i.MX6 Quad Carrier-Board:/ ifup eth0 warning: No MAC address set. Using random address 7e:94:4d:02:f8:f3 eth0: 1000Mbps full duplex link detected eth0: transmission timeout T eth0: transmission timeout T eth0: transmission timeout T eth0: transmission timeout T eth0: transmission timeout Cc: Stefan Christ Cc: Christian Hemp Signed-off-by: Marco Felsch Fixes: 3180f956668e ("ARM: dts: Phytec imx6q pfla02 and pbab01 support") Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi index 1b50b01e9bac..65d03c5d409b 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi @@ -90,6 +90,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-reset-duration = <10>; /* in msecs */ phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>; phy-supply = <&vdd_eth_io_reg>; status = "disabled"; -- cgit v1.2.3 From e68cc902c624d7a5cf1ebde8f27e484ab1756da6 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 15 Mar 2019 12:56:49 +0200 Subject: i2c: i801: Add support for Intel Comet Lake [ Upstream commit 5cd1c56c42beb6d228cc8d4373fdc5f5ec78a5ad ] Add PCI ID for Intel Comet Lake PCH. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin (Microsoft) --- Documentation/i2c/busses/i2c-i801 | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index d1ee484a787d..ee9984f35868 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -36,6 +36,7 @@ Supported adapters: * Intel Cannon Lake (PCH) * Intel Cedar Fork (PCH) * Intel Ice Lake (PCH) + * Intel Comet Lake (PCH) Datasheets: Publicly available at the Intel website On Intel Patsburg and later chipsets, both the normal host SMBus controller diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index f2c681971201..f8979abb9a19 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -131,6 +131,7 @@ config I2C_I801 Cannon Lake (PCH) Cedar Fork (PCH) Ice Lake (PCH) + Comet Lake (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index c91e145ef5a5..679c6c41f64b 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -71,6 +71,7 @@ * Cannon Lake-LP (PCH) 0x9da3 32 hard yes yes yes * Cedar Fork (PCH) 0x18df 32 hard yes yes yes * Ice Lake-LP (PCH) 0x34a3 32 hard yes yes yes + * Comet Lake (PCH) 0x02a3 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -240,6 +241,7 @@ #define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS 0xa223 #define PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS 0xa2a3 #define PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS 0xa323 +#define PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS 0x02a3 struct i801_mux_config { char *gpio_chip; @@ -1038,6 +1040,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS) }, { 0, } }; @@ -1534,6 +1537,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_DNV_SMBUS: case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS: case PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS: + case PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS: priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; priv->features |= FEATURE_SMBUS_PEC; -- cgit v1.2.3 From 07c5093a04f824fcbee179d7c7408d47f1ac6931 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 20 Mar 2019 14:57:19 +0000 Subject: KVM: arm/arm64: Fix handling of stage2 huge mappings [ Upstream commit 3c3736cd32bf5197aed1410ae826d2d254a5b277 ] We rely on the mmu_notifier call backs to handle the split/merge of huge pages and thus we are guaranteed that, while creating a block mapping, either the entire block is unmapped at stage2 or it is missing permission. However, we miss a case where the block mapping is split for dirty logging case and then could later be made block mapping, if we cancel the dirty logging. This not only creates inconsistent TLB entries for the pages in the the block, but also leakes the table pages for PMD level. Handle this corner case for the huge mappings at stage2 by unmapping the non-huge mapping for the block. This could potentially release the upper level table. So we need to restart the table walk once we unmap the range. Fixes : ad361f093c1e31d ("KVM: ARM: Support hugetlbfs backed huge pages") Reported-by: Zheng Xiang Cc: Zheng Xiang Cc: Zenghui Yu Cc: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/include/asm/stage2_pgtable.h | 2 ++ virt/kvm/arm/mmu.c | 59 +++++++++++++++++++++++++---------- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h index de2089501b8b..9e11dce55e06 100644 --- a/arch/arm/include/asm/stage2_pgtable.h +++ b/arch/arm/include/asm/stage2_pgtable.h @@ -75,6 +75,8 @@ static inline bool kvm_stage2_has_pud(struct kvm *kvm) #define S2_PMD_MASK PMD_MASK #define S2_PMD_SIZE PMD_SIZE +#define S2_PUD_MASK PUD_MASK +#define S2_PUD_SIZE PUD_SIZE static inline bool kvm_stage2_has_pmd(struct kvm *kvm) { diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 5cc22cdaa5ba..31e22b615d99 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1060,25 +1060,43 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache { pmd_t *pmd, old_pmd; +retry: pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); old_pmd = *pmd; + /* + * Multiple vcpus faulting on the same PMD entry, can + * lead to them sequentially updating the PMD with the + * same value. Following the break-before-make + * (pmd_clear() followed by tlb_flush()) process can + * hinder forward progress due to refaults generated + * on missing translations. + * + * Skip updating the page table if the entry is + * unchanged. + */ + if (pmd_val(old_pmd) == pmd_val(*new_pmd)) + return 0; + if (pmd_present(old_pmd)) { /* - * Multiple vcpus faulting on the same PMD entry, can - * lead to them sequentially updating the PMD with the - * same value. Following the break-before-make - * (pmd_clear() followed by tlb_flush()) process can - * hinder forward progress due to refaults generated - * on missing translations. + * If we already have PTE level mapping for this block, + * we must unmap it to avoid inconsistent TLB state and + * leaking the table page. We could end up in this situation + * if the memory slot was marked for dirty logging and was + * reverted, leaving PTE level mappings for the pages accessed + * during the period. So, unmap the PTE level mapping for this + * block and retry, as we could have released the upper level + * table in the process. * - * Skip updating the page table if the entry is - * unchanged. + * Normal THP split/merge follows mmu_notifier callbacks and do + * get handled accordingly. */ - if (pmd_val(old_pmd) == pmd_val(*new_pmd)) - return 0; - + if (!pmd_thp_or_huge(old_pmd)) { + unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); + goto retry; + } /* * Mapping in huge pages should only happen through a * fault. If a page is merged into a transparent huge @@ -1090,8 +1108,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache * should become splitting first, unmapped, merged, * and mapped back in on-demand. */ - VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); - + WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { @@ -1107,6 +1124,7 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac { pud_t *pudp, old_pud; +retry: pudp = stage2_get_pud(kvm, cache, addr); VM_BUG_ON(!pudp); @@ -1114,14 +1132,23 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac /* * A large number of vcpus faulting on the same stage 2 entry, - * can lead to a refault due to the - * stage2_pud_clear()/tlb_flush(). Skip updating the page - * tables if there is no change. + * can lead to a refault due to the stage2_pud_clear()/tlb_flush(). + * Skip updating the page tables if there is no change. */ if (pud_val(old_pud) == pud_val(*new_pudp)) return 0; if (stage2_pud_present(kvm, old_pud)) { + /* + * If we already have table level mapping for this block, unmap + * the range for this block and retry. + */ + if (!stage2_pud_huge(kvm, old_pud)) { + unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); + goto retry; + } + + WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); stage2_pud_clear(kvm, pudp); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { -- cgit v1.2.3 From 75e3e76a45c6fb4f7bfa43fe5a0b623fe3a9545e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Dequeue RX packets explicitly [ Upstream commit 536d3680fd2dab5c39857d62a3e084198fc74ff9 ] The ks8851 driver lets the chip auto-dequeue received packets once they have been read in full. It achieves that by setting the ADRFE flag in the RXQCR register ("Auto-Dequeue RXQ Frame Enable"). However if allocation of a packet's socket buffer or retrieval of the packet over the SPI bus fails, the packet will not have been read in full and is not auto-dequeued. Such partial retrieval of a packet confuses the chip's RX queue management: On the next RX interrupt, the first packet read from the queue will be the one left there previously and this one can be retrieved without issues. But for any newly received packets, the frame header status and byte count registers (RXFHSR and RXFHBCR) contain bogus values, preventing their retrieval. The chip allows explicitly dequeueing a packet from the RX queue by setting the RRXEF flag in the RXQCR register ("Release RX Error Frame"). This could be used to dequeue the packet in case of an error, but if that error is a failed SPI transfer, it is unknown if the packet was transferred in full and was auto-dequeued or if it was only transferred in part and requires an explicit dequeue. The safest approach is thus to always dequeue packets explicitly and forgo auto-dequeueing. Without this change, I've witnessed packet retrieval break completely when an SPI DMA transfer fails, requiring a chip reset. Explicit dequeueing magically fixes this and makes packet retrieval absolutely robust for me. The chip's documentation suggests auto-dequeuing and uses the RRXEF flag only to dequeue error frames which the driver doesn't want to retrieve. But that seems to be a fair-weather approach. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/micrel/ks8851.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index bd6e9014bc74..a93f8e842c07 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -535,9 +535,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) /* set dma read address */ ks8851_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI | 0x00); - /* start the packet dma process, and set auto-dequeue rx */ - ks8851_wrreg16(ks, KS_RXQCR, - ks->rc_rxqcr | RXQCR_SDA | RXQCR_ADRFE); + /* start DMA access */ + ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); if (rxlen > 4) { unsigned int rxalign; @@ -568,7 +567,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) } } - ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); + /* end DMA access and dequeue packet */ + ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_RRXEF); } } -- cgit v1.2.3 From 860fd08630a6ee7ef0ef78f3820805c2d928b93f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Reassert reset pin if chip ID check fails [ Upstream commit 761cfa979a0c177d6c2d93ef5585cd79ae49a7d5 ] Commit 73fdeb82e963 ("net: ks8851: Add optional vdd_io regulator and reset gpio") amended the ks8851 driver to briefly assert the chip's reset pin on probe. It also amended the probe routine's error path to reassert the reset pin if a subsequent initialization step fails. However the commit misplaced reassertion of the reset pin in the error path such that it is not performed if the check of the Chip ID and Enable Register (CIDER) fails. The error path is therefore slightly asymmetrical to the probe routine's body. Fix it. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Stephen Boyd Cc: Nishanth Menon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/micrel/ks8851.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index a93f8e842c07..1633fa5c709c 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1554,9 +1554,9 @@ err_netdev: free_irq(ndev->irq, ks); err_irq: +err_id: if (gpio_is_valid(gpio)) gpio_set_value(gpio, 0); -err_id: regulator_disable(ks->vdd_reg); err_reg: regulator_disable(ks->vdd_io); -- cgit v1.2.3 From 372b41d6efe32629c6eb4b4f60aa60c76aad20ae Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Delay requesting IRQ until opened [ Upstream commit d268f31552794abf5b6aa5af31021643411f25f5 ] The ks8851 driver currently requests the IRQ before registering the net_device. Because the net_device name is used as IRQ name and is still "eth%d" when the IRQ is requested, it's impossibe to tell IRQs apart if multiple ks8851 chips are present. Most other drivers delay requesting the IRQ until the net_device is opened. Do the same. The driver doesn't enable interrupts on the chip before opening the net_device and disables them when closing it, so there doesn't seem to be a need to request the IRQ already on probe. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/micrel/ks8851.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 1633fa5c709c..c9faec4c5b25 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -785,6 +785,15 @@ static void ks8851_tx_work(struct work_struct *work) static int ks8851_net_open(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); + int ret; + + ret = request_threaded_irq(dev->irq, NULL, ks8851_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + dev->name, ks); + if (ret < 0) { + netdev_err(dev, "failed to get irq\n"); + return ret; + } /* lock the card, even if we may not actually be doing anything * else at the moment */ @@ -899,6 +908,8 @@ static int ks8851_net_stop(struct net_device *dev) dev_kfree_skb(txb); } + free_irq(dev->irq, ks); + return 0; } @@ -1529,14 +1540,6 @@ static int ks8851_probe(struct spi_device *spi) ks8851_read_selftest(ks); ks8851_init_mac(ks); - ret = request_threaded_irq(spi->irq, NULL, ks8851_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - ndev->name, ks); - if (ret < 0) { - dev_err(&spi->dev, "failed to get irq\n"); - goto err_irq; - } - ret = register_netdev(ndev); if (ret) { dev_err(&spi->dev, "failed to register network device\n"); @@ -1549,11 +1552,7 @@ static int ks8851_probe(struct spi_device *spi) return 0; - err_netdev: - free_irq(ndev->irq, ks); - -err_irq: err_id: if (gpio_is_valid(gpio)) gpio_set_value(gpio, 0); @@ -1574,7 +1573,6 @@ static int ks8851_remove(struct spi_device *spi) dev_info(&spi->dev, "remove\n"); unregister_netdev(priv->netdev); - free_irq(spi->irq, priv); if (gpio_is_valid(priv->gpio)) gpio_set_value(priv->gpio, 0); regulator_disable(priv->vdd_reg); -- cgit v1.2.3 From fe4e7a03c7428e6d7d464270a2f5e61345fb73d0 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Set initial carrier state to down [ Upstream commit 9624bafa5f6418b9ca5b3f66d1f6a6a2e8bf6d4c ] The ks8851 chip's initial carrier state is down. A Link Change Interrupt is signaled once interrupts are enabled if the carrier is up. The ks8851 driver has it backwards by assuming that the initial carrier state is up. The state is therefore misrepresented if the interface is opened with no cable attached. Fix it. The Link Change interrupt is sometimes not signaled unless the P1MBSR register (which contains the Link Status bit) is read on ->ndo_open(). This might be a hardware erratum. Read the register by calling mii_check_link(), which has the desirable side effect of setting the carrier state to down if the cable was detached while the interface was closed. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/micrel/ks8851.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index c9faec4c5b25..b83b070a9eec 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -858,6 +858,7 @@ static int ks8851_net_open(struct net_device *dev) netif_dbg(ks, ifup, ks->netdev, "network device up\n"); mutex_unlock(&ks->lock); + mii_check_link(&ks->mii); return 0; } @@ -1519,6 +1520,7 @@ static int ks8851_probe(struct spi_device *spi) spi_set_drvdata(spi, ks); + netif_carrier_off(ks->netdev); ndev->if_port = IF_PORT_100BASET; ndev->netdev_ops = &ks8851_netdev_ops; ndev->irq = spi->irq; -- cgit v1.2.3 From 5fad07771fb815935c651ca05d754b9fdcdd2a07 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Wed, 20 Mar 2019 12:21:35 -0500 Subject: staging: rtl8188eu: Fix potential NULL pointer dereference of kcalloc [ Upstream commit 7671ce0d92933762f469266daf43bd34d422d58c ] hwxmits is allocated via kcalloc and not checked for failure before its dereference. The patch fixes this problem by returning error upstream in rtl8723bs, rtl8188eu. Signed-off-by: Aditya Pakki Acked-by: Mukesh Ojha Reviewed-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/staging/rtl8188eu/core/rtw_xmit.c | 9 +++++++-- drivers/staging/rtl8188eu/include/rtw_xmit.h | 2 +- drivers/staging/rtl8723bs/core/rtw_xmit.c | 14 +++++++------- drivers/staging/rtl8723bs/include/rtw_xmit.h | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_xmit.c b/drivers/staging/rtl8188eu/core/rtw_xmit.c index 3b1ccd138c3f..6fb6ea29a8b6 100644 --- a/drivers/staging/rtl8188eu/core/rtw_xmit.c +++ b/drivers/staging/rtl8188eu/core/rtw_xmit.c @@ -174,7 +174,9 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf; - rtw_alloc_hwxmits(padapter); + res = rtw_alloc_hwxmits(padapter); + if (res == _FAIL) + goto exit; rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry); for (i = 0; i < 4; i++) @@ -1503,7 +1505,7 @@ exit: return res; } -void rtw_alloc_hwxmits(struct adapter *padapter) +s32 rtw_alloc_hwxmits(struct adapter *padapter) { struct hw_xmit *hwxmits; struct xmit_priv *pxmitpriv = &padapter->xmitpriv; @@ -1512,6 +1514,8 @@ void rtw_alloc_hwxmits(struct adapter *padapter) pxmitpriv->hwxmits = kcalloc(pxmitpriv->hwxmit_entry, sizeof(struct hw_xmit), GFP_KERNEL); + if (!pxmitpriv->hwxmits) + return _FAIL; hwxmits = pxmitpriv->hwxmits; @@ -1519,6 +1523,7 @@ void rtw_alloc_hwxmits(struct adapter *padapter) hwxmits[1] .sta_queue = &pxmitpriv->vi_pending; hwxmits[2] .sta_queue = &pxmitpriv->be_pending; hwxmits[3] .sta_queue = &pxmitpriv->bk_pending; + return _SUCCESS; } void rtw_free_hwxmits(struct adapter *padapter) diff --git a/drivers/staging/rtl8188eu/include/rtw_xmit.h b/drivers/staging/rtl8188eu/include/rtw_xmit.h index 788f59c74ea1..ba7e15fbde72 100644 --- a/drivers/staging/rtl8188eu/include/rtw_xmit.h +++ b/drivers/staging/rtl8188eu/include/rtw_xmit.h @@ -336,7 +336,7 @@ s32 rtw_txframes_sta_ac_pending(struct adapter *padapter, void rtw_init_hwxmits(struct hw_xmit *phwxmit, int entry); s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter); void _rtw_free_xmit_priv(struct xmit_priv *pxmitpriv); -void rtw_alloc_hwxmits(struct adapter *padapter); +s32 rtw_alloc_hwxmits(struct adapter *padapter); void rtw_free_hwxmits(struct adapter *padapter); s32 rtw_xmit(struct adapter *padapter, struct sk_buff **pkt); diff --git a/drivers/staging/rtl8723bs/core/rtw_xmit.c b/drivers/staging/rtl8723bs/core/rtw_xmit.c index 625e67f39889..a36b2213d8ee 100644 --- a/drivers/staging/rtl8723bs/core/rtw_xmit.c +++ b/drivers/staging/rtl8723bs/core/rtw_xmit.c @@ -260,7 +260,9 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) } } - rtw_alloc_hwxmits(padapter); + res = rtw_alloc_hwxmits(padapter); + if (res == _FAIL) + goto exit; rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry); for (i = 0; i < 4; i++) { @@ -2144,7 +2146,7 @@ exit: return res; } -void rtw_alloc_hwxmits(struct adapter *padapter) +s32 rtw_alloc_hwxmits(struct adapter *padapter) { struct hw_xmit *hwxmits; struct xmit_priv *pxmitpriv = &padapter->xmitpriv; @@ -2155,10 +2157,8 @@ void rtw_alloc_hwxmits(struct adapter *padapter) pxmitpriv->hwxmits = rtw_zmalloc(sizeof(struct hw_xmit) * pxmitpriv->hwxmit_entry); - if (pxmitpriv->hwxmits == NULL) { - DBG_871X("alloc hwxmits fail!...\n"); - return; - } + if (!pxmitpriv->hwxmits) + return _FAIL; hwxmits = pxmitpriv->hwxmits; @@ -2204,7 +2204,7 @@ void rtw_alloc_hwxmits(struct adapter *padapter) } - + return _SUCCESS; } void rtw_free_hwxmits(struct adapter *padapter) diff --git a/drivers/staging/rtl8723bs/include/rtw_xmit.h b/drivers/staging/rtl8723bs/include/rtw_xmit.h index 1b38b9182b31..37f42b2f22f1 100644 --- a/drivers/staging/rtl8723bs/include/rtw_xmit.h +++ b/drivers/staging/rtl8723bs/include/rtw_xmit.h @@ -487,7 +487,7 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter); void _rtw_free_xmit_priv (struct xmit_priv *pxmitpriv); -void rtw_alloc_hwxmits(struct adapter *padapter); +s32 rtw_alloc_hwxmits(struct adapter *padapter); void rtw_free_hwxmits(struct adapter *padapter); -- cgit v1.2.3 From 6300a60f50b91cedd52baa38ec6623ef49449c08 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Wed, 20 Mar 2019 12:02:49 -0500 Subject: staging: rtlwifi: rtl8822b: fix to avoid potential NULL pointer dereference [ Upstream commit d70d70aec9632679dd00dcc1b1e8b2517e2c7da0 ] skb allocated via dev_alloc_skb can fail and return a NULL pointer. This patch avoids such a scenario and returns, consistent with other invocations. Signed-off-by: Aditya Pakki Reviewed-by: Mukesh Ojha Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/staging/rtlwifi/rtl8822be/fw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/rtlwifi/rtl8822be/fw.c b/drivers/staging/rtlwifi/rtl8822be/fw.c index a40396614814..c1ed52df05f0 100644 --- a/drivers/staging/rtlwifi/rtl8822be/fw.c +++ b/drivers/staging/rtlwifi/rtl8822be/fw.c @@ -741,6 +741,8 @@ void rtl8822be_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) u1_rsvd_page_loc, 3); skb = dev_alloc_skb(totalpacketlen); + if (!skb) + return; memcpy((u8 *)skb_put(skb, totalpacketlen), &reserved_page_packet, totalpacketlen); -- cgit v1.2.3 From 62d91f5a1ea4598375beca40dc3c094f1854f25a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Mar 2019 09:26:38 +0300 Subject: staging: rtl8712: uninitialized memory in read_bbreg_hdl() [ Upstream commit 22c971db7dd4b0ad8dd88e99c407f7a1f4231a2e ] Colin King reported a bug in read_bbreg_hdl(): memcpy(pcmd->rsp, (u8 *)&val, pcmd->rspsz); The problem is that "val" is uninitialized. This code is obviously not useful, but so far as I can tell "pcmd->cmdcode" is never GEN_CMD_CODE(_Read_BBREG) so it's not harmful either. For now the easiest fix is to just call r8712_free_cmd_obj() and return. Fixes: 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel") Reported-by: Colin Ian King Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/staging/rtl8712/rtl8712_cmd.c | 10 +--------- drivers/staging/rtl8712/rtl8712_cmd.h | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index 1920d02f7c9f..8c36acedf507 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -147,17 +147,9 @@ static u8 write_macreg_hdl(struct _adapter *padapter, u8 *pbuf) static u8 read_bbreg_hdl(struct _adapter *padapter, u8 *pbuf) { - u32 val; - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; - if (pcmd->rsp && pcmd->rspsz > 0) - memcpy(pcmd->rsp, (u8 *)&val, pcmd->rspsz); - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (!pcmd_callback) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); + r8712_free_cmd_obj(pcmd); return H2C_SUCCESS; } diff --git a/drivers/staging/rtl8712/rtl8712_cmd.h b/drivers/staging/rtl8712/rtl8712_cmd.h index 92fb77666d44..1ef86b8c592f 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.h +++ b/drivers/staging/rtl8712/rtl8712_cmd.h @@ -140,7 +140,7 @@ enum rtl8712_h2c_cmd { static struct _cmd_callback cmd_callback[] = { {GEN_CMD_CODE(_Read_MACREG), NULL}, /*0*/ {GEN_CMD_CODE(_Write_MACREG), NULL}, - {GEN_CMD_CODE(_Read_BBREG), &r8712_getbbrfreg_cmdrsp_callback}, + {GEN_CMD_CODE(_Read_BBREG), NULL}, {GEN_CMD_CODE(_Write_BBREG), NULL}, {GEN_CMD_CODE(_Read_RFREG), &r8712_getbbrfreg_cmdrsp_callback}, {GEN_CMD_CODE(_Write_RFREG), NULL}, /*5*/ -- cgit v1.2.3 From 9bd87bbd2988f3a5cd5c463418b2012b92913a5f Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Wed, 20 Mar 2019 10:42:32 -0500 Subject: staging: rtlwifi: Fix potential NULL pointer dereference of kzalloc [ Upstream commit 6a8ca24590a2136921439b376c926c11a6effc0e ] phydm.internal is allocated using kzalloc which is used multiple times without a check for NULL pointer. This patch avoids such a scenario by returning 0, consistent with the failure case. Signed-off-by: Aditya Pakki Reviewed-by: Mukesh Ojha Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/staging/rtlwifi/phydm/rtl_phydm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/rtlwifi/phydm/rtl_phydm.c b/drivers/staging/rtlwifi/phydm/rtl_phydm.c index 9930ed954abb..4cc77b2016e1 100644 --- a/drivers/staging/rtlwifi/phydm/rtl_phydm.c +++ b/drivers/staging/rtlwifi/phydm/rtl_phydm.c @@ -180,6 +180,8 @@ static int rtl_phydm_init_priv(struct rtl_priv *rtlpriv, rtlpriv->phydm.internal = kzalloc(sizeof(struct phy_dm_struct), GFP_KERNEL); + if (!rtlpriv->phydm.internal) + return 0; _rtl_phydm_init_com_info(rtlpriv, ic, params); -- cgit v1.2.3 From ead946dc38247eef58aa7d01cdd494267fd41385 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 20 Mar 2019 07:36:55 -0500 Subject: net: phy: Add DP83825I to the DP83822 driver [ Upstream commit 06acc17a96215a11134114aee26532b12dc8fde1 ] Add the DP83825I ethernet PHY to the DP83822 driver. These devices share the same WoL register bits and addresses. The phy_driver init was made into a macro as there may be future devices appended to this driver that will share the register space. http://www.ti.com/lit/gpn/dp83825i Reviewed-by: Florian Fainelli Signed-off-by: Dan Murphy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/phy/dp83822.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 24c7f149f3e6..e11057892f07 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -23,6 +23,8 @@ #include #define DP83822_PHY_ID 0x2000a240 +#define DP83825I_PHY_ID 0x2000a150 + #define DP83822_DEVADDR 0x1f #define MII_DP83822_PHYSCR 0x11 @@ -312,26 +314,30 @@ static int dp83822_resume(struct phy_device *phydev) return 0; } +#define DP83822_PHY_DRIVER(_id, _name) \ + { \ + PHY_ID_MATCH_MODEL(_id), \ + .name = (_name), \ + .features = PHY_BASIC_FEATURES, \ + .soft_reset = dp83822_phy_reset, \ + .config_init = dp83822_config_init, \ + .get_wol = dp83822_get_wol, \ + .set_wol = dp83822_set_wol, \ + .ack_interrupt = dp83822_ack_interrupt, \ + .config_intr = dp83822_config_intr, \ + .suspend = dp83822_suspend, \ + .resume = dp83822_resume, \ + } + static struct phy_driver dp83822_driver[] = { - { - .phy_id = DP83822_PHY_ID, - .phy_id_mask = 0xfffffff0, - .name = "TI DP83822", - .features = PHY_BASIC_FEATURES, - .config_init = dp83822_config_init, - .soft_reset = dp83822_phy_reset, - .get_wol = dp83822_get_wol, - .set_wol = dp83822_set_wol, - .ack_interrupt = dp83822_ack_interrupt, - .config_intr = dp83822_config_intr, - .suspend = dp83822_suspend, - .resume = dp83822_resume, - }, + DP83822_PHY_DRIVER(DP83822_PHY_ID, "TI DP83822"), + DP83822_PHY_DRIVER(DP83825I_PHY_ID, "TI DP83825I"), }; module_phy_driver(dp83822_driver); static struct mdio_device_id __maybe_unused dp83822_tbl[] = { { DP83822_PHY_ID, 0xfffffff0 }, + { DP83825I_PHY_ID, 0xfffffff0 }, { }, }; MODULE_DEVICE_TABLE(mdio, dp83822_tbl); -- cgit v1.2.3 From fe9fae9c5bff1389ad5045bd9baba0bd144a56da Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Wed, 20 Mar 2019 19:12:22 +0530 Subject: net: macb: Add null check for PCLK and HCLK [ Upstream commit cd5afa91f078c0787be0a62b5ef90301c00b0271 ] Both PCLK and HCLK are "required" clocks according to macb devicetree documentation. There is a chance that devm_clk_get doesn't return a negative error but just a NULL clock structure instead. In such a case the driver proceeds as usual and uses pclk value 0 to calculate MDC divisor which is incorrect. Hence fix the same in clock initialization. Signed-off-by: Harini Katakam Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/cadence/macb_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2b2882615e8b..6cbe515bfdeb 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3318,14 +3318,20 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, *hclk = devm_clk_get(&pdev->dev, "hclk"); } - if (IS_ERR(*pclk)) { + if (IS_ERR_OR_NULL(*pclk)) { err = PTR_ERR(*pclk); + if (!err) + err = -ENODEV; + dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); return err; } - if (IS_ERR(*hclk)) { + if (IS_ERR_OR_NULL(*hclk)) { err = PTR_ERR(*hclk); + if (!err) + err = -ENODEV; + dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); return err; } -- cgit v1.2.3 From 4da511ff427b43f7db6ee6368223bde1b925e4f4 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:15 +0100 Subject: net/sched: don't dereference a->goto_chain to read the chain index [ Upstream commit fe384e2fa36ca084a456fd30558cccc75b4b3fbd ] callers of tcf_gact_goto_chain_index() can potentially read an old value of the chain index, or even dereference a NULL 'goto_chain' pointer, because 'goto_chain' and 'tcfa_action' are read in the traffic path without caring of concurrent write in the control path. The most recent value of chain index can be read also from a->tcfa_action (it's encoded there together with TC_ACT_GOTO_CHAIN bits), so we don't really need to dereference 'goto_chain': just read the chain id from the control action. Fixes: e457d86ada27 ("net: sched: add couple of goto_chain helpers") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- include/net/tc_act/tc_gact.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index ef8dd0db70ce..56935bf027a7 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -56,7 +56,7 @@ static inline bool is_tcf_gact_goto_chain(const struct tc_action *a) static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) { - return a->goto_chain->index; + return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK; } #endif /* __NET_TC_GACT_H */ -- cgit v1.2.3 From ec44a9a47e7e7fb83221facf77566c2d3173a011 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 19 Mar 2019 01:30:09 +0900 Subject: ARM: dts: imx6qdl: Fix typo in imx6qdl-icore-rqs.dtsi [ Upstream commit 41b37f4c0fa67185691bcbd30201cad566f2f0d1 ] This patch fixes a spelling typo. Signed-off-by: Masanari Iida Fixes: cc42603de320 ("ARM: dts: imx6q-icore-rqs: Add Engicam IMX6 Q7 initial support") Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi b/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi index 1d1b4bd0670f..a4217f564a53 100644 --- a/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi +++ b/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi @@ -264,7 +264,7 @@ pinctrl-2 = <&pinctrl_usdhc3_200mhz>; vmcc-supply = <®_sd3_vmmc>; cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; - bus-witdh = <4>; + bus-width = <4>; no-1-8-v; status = "okay"; }; @@ -275,7 +275,7 @@ pinctrl-1 = <&pinctrl_usdhc4_100mhz>; pinctrl-2 = <&pinctrl_usdhc4_200mhz>; vmcc-supply = <®_sd4_vmmc>; - bus-witdh = <8>; + bus-width = <8>; no-1-8-v; non-removable; status = "okay"; -- cgit v1.2.3 From 4d88134c2f7ca1935cb29c5534d214bf60281e68 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 11 Feb 2019 11:51:20 +0100 Subject: drm/tegra: hub: Fix dereference before check [ Upstream commit 7cf77b273a8fc51e7de622fa6691abd4436a9a6b ] Reported-by: Dan Carpenter Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin (Microsoft) --- drivers/gpu/drm/tegra/hub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 922a48d5a483..c7c612579270 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -378,14 +378,16 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane, static void tegra_shared_plane_atomic_disable(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct tegra_dc *dc = to_tegra_dc(old_state->crtc); struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_dc *dc; u32 value; /* rien ne va plus */ if (!old_state || !old_state->crtc) return; + dc = to_tegra_dc(old_state->crtc); + /* * XXX Legacy helpers seem to sometimes call ->atomic_disable() even * on planes that are already disabled. Make sure we fallback to the -- cgit v1.2.3 From 8dcf6dce402312bbec71826f6293a559e4897d38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Mar 2019 17:57:56 -0400 Subject: NFS: Fix a typo in nfs_init_timeout_values() [ Upstream commit 5a698243930c441afccec04e4d5dc8febfd2b775 ] Specifying a retrans=0 mount parameter to a NFS/TCP mount, is inadvertently causing the NFS client to rewrite any specified timeout parameter to the default of 60 seconds. Fixes: a956beda19a6 ("NFS: Allow the mount option retrans=0") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin (Microsoft) --- fs/nfs/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index fb1cf1a4bda2..90d71fda65ce 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -453,7 +453,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, case XPRT_TRANSPORT_RDMA: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_TCP_RETRANS; - if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0) + if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0) to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; -- cgit v1.2.3 From 3da122192cde6f8cc6465e2f824867149378d114 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:07 +0800 Subject: net: xilinx: fix possible object reference leak [ Upstream commit fa3a419d2f674b431d38748cb58fb7da17ee8949 ] The call to of_parse_phandle returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1624:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1569, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Anirudha Sarangi Cc: John Linn Cc: "David S. Miller" Cc: Michal Simek Cc: netdev@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 0789d8af7d72..1ef56edb3918 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1575,12 +1575,14 @@ static int axienet_probe(struct platform_device *pdev) ret = of_address_to_resource(np, 0, &dmares); if (ret) { dev_err(&pdev->dev, "unable to get DMA resource\n"); + of_node_put(np); goto free_netdev; } lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); ret = PTR_ERR(lp->dma_regs); + of_node_put(np); goto free_netdev; } lp->rx_irq = irq_of_parse_and_map(np, 1); -- cgit v1.2.3 From 6e4f7c129b2400f375ec8a65459e3e7f8ab9506d Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:08 +0800 Subject: net: ibm: fix possible object reference leak [ Upstream commit be693df3cf9dd113ff1d2c0d8150199efdba37f6 ] The call to ehea_get_eth_dn returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/ibm/ehea/ehea_main.c:3163:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3154, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Douglas Miller Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 3baabdc89726..90b62c1412c8 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -3160,6 +3160,7 @@ static ssize_t ehea_probe_port(struct device *dev, if (ehea_add_adapter_mr(adapter)) { pr_err("creating MR failed\n"); + of_node_put(eth_dn); return -EIO; } -- cgit v1.2.3 From c4cdbd4075a6d910cbf8e58b5e87cde491f424c8 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:09 +0800 Subject: net: ethernet: ti: fix possible object reference leak [ Upstream commit 75eac7b5f68b0a0671e795ac636457ee27cc11d8 ] The call to of_get_child_by_name returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/ti/netcp_ethss.c:3661:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3654, but without a corresponding object release within this function. ./drivers/net/ethernet/ti/netcp_ethss.c:3665:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3654, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Wingman Kwok Cc: Murali Karicheri Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/ti/netcp_ethss.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 5174d318901e..0a920c5936b2 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3657,12 +3657,16 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, ret = netcp_txpipe_init(&gbe_dev->tx_pipe, netcp_device, gbe_dev->dma_chan_name, gbe_dev->tx_queue_id); - if (ret) + if (ret) { + of_node_put(interfaces); return ret; + } ret = netcp_txpipe_open(&gbe_dev->tx_pipe); - if (ret) + if (ret) { + of_node_put(interfaces); return ret; + } /* Create network interfaces */ INIT_LIST_HEAD(&gbe_dev->gbe_intf_head); -- cgit v1.2.3 From e9917fd844ecfa08591c08d2fd4dc3f71a962e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Fri, 8 Feb 2019 15:01:02 +0100 Subject: drm: Fix drm_release() and device unplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3f04e0a6cfebf48152ac64502346cdc258811f79 ] If userspace has open fd(s) when drm_dev_unplug() is run, it will result in drm_dev_unregister() being called twice. First in drm_dev_unplug() and then later in drm_release() through the call to drm_put_dev(). Since userspace already holds a ref on drm_device through the drm_minor, it's not necessary to add extra ref counting based on no open file handles. Instead just drm_dev_put() unconditionally in drm_dev_unplug(). We now have this: - Userpace holds a ref on drm_device as long as there's open fd(s) - The driver holds a ref on drm_device as long as it's bound to the struct device When both sides are done with drm_device, it is released. Signed-off-by: Noralf Trønnes Reviewed-by: Oleksandr Andrushchenko Reviewed-by: Daniel Vetter Reviewed-by: Sean Paul Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190208140103.28919-2-noralf@tronnes.org Signed-off-by: Sasha Levin (Microsoft) --- drivers/gpu/drm/drm_drv.c | 6 +----- drivers/gpu/drm/drm_file.c | 6 ++---- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 12e5e2be7890..7a59b8b3ed5a 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -381,11 +381,7 @@ void drm_dev_unplug(struct drm_device *dev) synchronize_srcu(&drm_unplug_srcu); drm_dev_unregister(dev); - - mutex_lock(&drm_global_mutex); - if (dev->open_count == 0) - drm_dev_put(dev); - mutex_unlock(&drm_global_mutex); + drm_dev_put(dev); } EXPORT_SYMBOL(drm_dev_unplug); diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 46f48f245eb5..3f20f598cd7c 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -479,11 +479,9 @@ int drm_release(struct inode *inode, struct file *filp) drm_file_free(file_priv); - if (!--dev->open_count) { + if (!--dev->open_count) drm_lastclose(dev); - if (drm_dev_is_unplugged(dev)) - drm_put_dev(dev); - } + mutex_unlock(&drm_global_mutex); drm_minor_release(minor); -- cgit v1.2.3 From 55a5c46268ff48cc2c734d831f30b5b5878d2e42 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sun, 24 Mar 2019 18:10:02 -0500 Subject: gpio: aspeed: fix a potential NULL pointer dereference [ Upstream commit 6cf4511e9729c00a7306cf94085f9cc3c52ee723 ] In case devm_kzalloc, the patch returns ENOMEM to avoid potential NULL pointer dereference. Signed-off-by: Kangjie Lu Reviewed-by: Andrew Jeffery Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin (Microsoft) --- drivers/gpio/gpio-aspeed.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 854bce4fb9e7..217507002dbc 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -1224,6 +1224,8 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev) gpio->offset_timer = devm_kzalloc(&pdev->dev, gpio->chip.ngpio, GFP_KERNEL); + if (!gpio->offset_timer) + return -ENOMEM; return aspeed_gpio_setup_irqs(gpio, pdev); } -- cgit v1.2.3 From da9c64dc934a57b9404a87e4ad8869e10b0bea29 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 22 Mar 2019 15:26:56 +0000 Subject: drm/meson: Fix invalid pointer in meson_drv_unbind() [ Upstream commit 776e78677f514ecddd12dba48b9040958999bd5a ] meson_drv_bind() registers a meson_drm struct as the device's privdata, but meson_drv_unbind() tries to retrieve a drm_device. This may cause a segfault on shutdown: [ 5194.593429] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000197 ... [ 5194.788850] Call trace: [ 5194.791349] drm_dev_unregister+0x1c/0x118 [drm] [ 5194.795848] meson_drv_unbind+0x50/0x78 [meson_drm] Retrieve the right pointer in meson_drv_unbind(). Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Signed-off-by: Jean-Philippe Brucker Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190322152657.13752-1-jean-philippe.brucker@arm.com Signed-off-by: Sasha Levin (Microsoft) --- drivers/gpu/drm/meson/meson_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 12ff47b13668..c1115a96453f 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -336,8 +336,8 @@ static int meson_drv_bind(struct device *dev) static void meson_drv_unbind(struct device *dev) { - struct drm_device *drm = dev_get_drvdata(dev); - struct meson_drm *priv = drm->dev_private; + struct meson_drm *priv = dev_get_drvdata(dev); + struct drm_device *drm = priv->drm; if (priv->canvas) { meson_canvas_free(priv->canvas, priv->canvas_id_osd1); -- cgit v1.2.3 From e24c5428060f4041e5878bdb863ccb375246588a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 22 Mar 2019 15:26:57 +0000 Subject: drm/meson: Uninstall IRQ handler [ Upstream commit 2d8f92897ad816f5dda54b2ed2fd9f2d7cb1abde ] meson_drv_unbind() doesn't unregister the IRQ handler, which can lead to use-after-free if the IRQ fires after unbind: [ 64.656876] Unable to handle kernel paging request at virtual address ffff000011706dbc ... [ 64.662001] pc : meson_irq+0x18/0x30 [meson_drm] I'm assuming that a similar problem could happen on the error path of bind(), so uninstall the IRQ handler there as well. Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Signed-off-by: Jean-Philippe Brucker Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190322152657.13752-2-jean-philippe.brucker@arm.com Signed-off-by: Sasha Levin (Microsoft) --- drivers/gpu/drm/meson/meson_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index c1115a96453f..a13704ab5d11 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -317,12 +317,14 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) ret = drm_dev_register(drm, 0); if (ret) - goto free_drm; + goto uninstall_irq; drm_fbdev_generic_setup(drm, 32); return 0; +uninstall_irq: + drm_irq_uninstall(drm); free_drm: drm_dev_put(drm); @@ -347,6 +349,7 @@ static void meson_drv_unbind(struct device *dev) } drm_dev_unregister(drm); + drm_irq_uninstall(drm); drm_kms_helper_poll_fini(drm); drm_mode_config_cleanup(drm); drm_dev_put(drm); -- cgit v1.2.3 From 44a7849ecbd24805b13be92a83455bb419e18886 Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Wed, 20 Feb 2019 16:36:52 +0530 Subject: ARM: davinci: fix build failure with allnoconfig [ Upstream commit 2dbed152e2d4c3fe2442284918d14797898b1e8a ] allnoconfig build with just ARCH_DAVINCI enabled fails because drivers/clk/davinci/* depends on REGMAP being enabled. Fix it by selecting REGMAP_MMIO when building in DaVinci support. Signed-off-by: Sekhar Nori Reviewed-by: David Lechner Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 26524b75970a..e5d56d9b712c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -593,6 +593,7 @@ config ARCH_DAVINCI select HAVE_IDE select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF + select REGMAP_MMIO select RESET_CONTROLLER select USE_OF select ZONE_DMA -- cgit v1.2.3 From b2814ce32d478878ddcc44bc98e5d6ca282b1554 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Mar 2019 09:13:51 +0800 Subject: sbitmap: order READ/WRITE freed instance and setting clear bit [ Upstream commit e6d1fa584e0dd9bfebaf345e9feea588cf75ead2 ] Inside sbitmap_queue_clear(), once the clear bit is set, it will be visiable to allocation path immediately. Meantime READ/WRITE on old associated instance(such as request in case of blk-mq) may be out-of-order with the setting clear bit, so race with re-allocation may be triggered. Adds one memory barrier for ordering READ/WRITE of the freed associated instance with setting clear bit for avoiding race with re-allocation. The following kernel oops triggerd by block/006 on aarch64 may be fixed: [ 142.330954] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000330 [ 142.338794] Mem abort info: [ 142.341554] ESR = 0x96000005 [ 142.344632] Exception class = DABT (current EL), IL = 32 bits [ 142.350500] SET = 0, FnV = 0 [ 142.353544] EA = 0, S1PTW = 0 [ 142.356678] Data abort info: [ 142.359528] ISV = 0, ISS = 0x00000005 [ 142.363343] CM = 0, WnR = 0 [ 142.366305] user pgtable: 64k pages, 48-bit VAs, pgdp = 000000002a3c51c0 [ 142.372983] [0000000000000330] pgd=0000000000000000, pud=0000000000000000 [ 142.379777] Internal error: Oops: 96000005 [#1] SMP [ 142.384613] Modules linked in: null_blk ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp vfat fat rpcrdma sunrpc rdma_ucm ib_iser rdma_cm iw_cm libiscsi ib_umad scsi_transport_iscsi ib_ipoib ib_cm mlx5_ib ib_uverbs ib_core sbsa_gwdt crct10dif_ce ghash_ce ipmi_ssif sha2_ce ipmi_devintf sha256_arm64 sg sha1_ce ipmi_msghandler ip_tables xfs libcrc32c mlx5_core sdhci_acpi mlxfw ahci_platform at803x sdhci libahci_platform qcom_emac mmc_core hdma hdma_mgmt i2c_dev [last unloaded: null_blk] [ 142.429753] CPU: 7 PID: 1983 Comm: fio Not tainted 5.0.0.cki #2 [ 142.449458] pstate: 00400005 (nzcv daif +PAN -UAO) [ 142.454239] pc : __blk_mq_free_request+0x4c/0xa8 [ 142.458830] lr : blk_mq_free_request+0xec/0x118 [ 142.463344] sp : ffff00003360f6a0 [ 142.466646] x29: ffff00003360f6a0 x28: ffff000010e70000 [ 142.471941] x27: ffff801729a50048 x26: 0000000000010000 [ 142.477232] x25: ffff00003360f954 x24: ffff7bdfff021440 [ 142.482529] x23: 0000000000000000 x22: 00000000ffffffff [ 142.487830] x21: ffff801729810000 x20: 0000000000000000 [ 142.493123] x19: ffff801729a50000 x18: 0000000000000000 [ 142.498413] x17: 0000000000000000 x16: 0000000000000001 [ 142.503709] x15: 00000000000000ff x14: ffff7fe000000000 [ 142.509003] x13: ffff8017dcde09a0 x12: 0000000000000000 [ 142.514308] x11: 0000000000000001 x10: 0000000000000008 [ 142.519597] x9 : ffff8017dcde09a0 x8 : 0000000000002000 [ 142.524889] x7 : ffff8017dcde0a00 x6 : 000000015388f9be [ 142.530187] x5 : 0000000000000001 x4 : 0000000000000000 [ 142.535478] x3 : 0000000000000000 x2 : 0000000000000000 [ 142.540777] x1 : 0000000000000001 x0 : ffff00001041b194 [ 142.546071] Process fio (pid: 1983, stack limit = 0x000000006460a0ea) [ 142.552500] Call trace: [ 142.554926] __blk_mq_free_request+0x4c/0xa8 [ 142.559181] blk_mq_free_request+0xec/0x118 [ 142.563352] blk_mq_end_request+0xfc/0x120 [ 142.567444] end_cmd+0x3c/0xa8 [null_blk] [ 142.571434] null_complete_rq+0x20/0x30 [null_blk] [ 142.576194] blk_mq_complete_request+0x108/0x148 [ 142.580797] null_handle_cmd+0x1d4/0x718 [null_blk] [ 142.585662] null_queue_rq+0x60/0xa8 [null_blk] [ 142.590171] blk_mq_try_issue_directly+0x148/0x280 [ 142.594949] blk_mq_try_issue_list_directly+0x9c/0x108 [ 142.600064] blk_mq_sched_insert_requests+0xb0/0xd0 [ 142.604926] blk_mq_flush_plug_list+0x16c/0x2a0 [ 142.609441] blk_flush_plug_list+0xec/0x118 [ 142.613608] blk_finish_plug+0x3c/0x4c [ 142.617348] blkdev_direct_IO+0x3b4/0x428 [ 142.621336] generic_file_read_iter+0x84/0x180 [ 142.625761] blkdev_read_iter+0x50/0x78 [ 142.629579] aio_read.isra.6+0xf8/0x190 [ 142.633409] __io_submit_one.isra.8+0x148/0x738 [ 142.637912] io_submit_one.isra.9+0x88/0xb8 [ 142.642078] __arm64_sys_io_submit+0xe0/0x238 [ 142.646428] el0_svc_handler+0xa0/0x128 [ 142.650238] el0_svc+0x8/0xc [ 142.653104] Code: b9402a63 f9000a7f 3100047f 540000a0 (f9419a81) [ 142.659202] ---[ end trace 467586bc175eb09d ]--- Fixes: ea86ea2cdced20057da ("sbitmap: ammortize cost of clearing bits") Reported-and-bisected_and_tested-by: Yi Zhang Cc: Yi Zhang Cc: "jianchao.wang" Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin (Microsoft) --- lib/sbitmap.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 5b382c1244ed..155fe38756ec 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -591,6 +591,17 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { + /* + * Once the clear bit is set, the bit may be allocated out. + * + * Orders READ/WRITE on the asssociated instance(such as request + * of blk_mq) by this bit for avoiding race with re-allocation, + * and its pair is the memory barrier implied in __sbitmap_get_word. + * + * One invariant is that the clear bit has to be zero when the bit + * is in use. + */ + smp_mb__before_atomic(); sbitmap_deferred_clear_bit(&sbq->sb, nr); /* -- cgit v1.2.3 From 08dd1acccb43bad1df5a893341f517fdac517ac1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 25 Mar 2019 11:56:59 +0300 Subject: staging: vc04_services: Fix an error code in vchiq_probe() [ Upstream commit 9b9c87cf51783cbe7140c51472762094033cfeab ] We need to set "err" on this error path. Fixes: 187ac53e590c ("staging: vchiq_arm: rework probe and init functions") Signed-off-by: Dan Carpenter Acked-by: Stefan Wahren Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 804daf83be35..064d0db4c51e 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -3513,6 +3513,7 @@ static int vchiq_probe(struct platform_device *pdev) struct device_node *fw_node; const struct of_device_id *of_id; struct vchiq_drvdata *drvdata; + struct device *vchiq_dev; int err; of_id = of_match_node(vchiq_of_match, pdev->dev.of_node); @@ -3547,9 +3548,12 @@ static int vchiq_probe(struct platform_device *pdev) goto failed_platform_init; } - if (IS_ERR(device_create(vchiq_class, &pdev->dev, vchiq_devid, - NULL, "vchiq"))) + vchiq_dev = device_create(vchiq_class, &pdev->dev, vchiq_devid, NULL, + "vchiq"); + if (IS_ERR(vchiq_dev)) { + err = PTR_ERR(vchiq_dev); goto failed_device_create; + } vchiq_debugfs_init(); -- cgit v1.2.3 From 9a5795520b91e1ccba3cb581b67e1c1280dec734 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 4 Mar 2019 07:26:35 -0500 Subject: scsi: mpt3sas: Fix kernel panic during expander reset [ Upstream commit c2fe742ff6e77c5b4fe4ad273191ddf28fdea25e ] During expander reset handling, the driver invokes kernel function scsi_host_find_tag() to obtain outstanding requests associated with the scsi host managed by the driver. Driver loops from tag value zero to hba queue depth to obtain the outstanding scmds. But when blk-mq is enabled, the block layer may return stale entry for one or more requests. This may lead to kernel panic if the returned value is inaccessible or the memory pointed by the returned value is reused. Reference of upstream discussion: https://patchwork.kernel.org/patch/10734933/ Instead of calling scsi_host_find_tag() API for each and every smid (smid is tag +1) from one to shost->can_queue, now driver will call this API (to obtain the outstanding scmd) only for those smid's which are outstanding at the driver level. Driver will determine whether this smid is outstanding at driver level by looking into it's corresponding MPI request frame, if its MPI request frame is empty, then it means that this smid is free and does not need to call scsi_host_find_tag() for it. By doing this, driver will invoke scsi_host_find_tag() for only those tags which are outstanding at the driver level. Driver will check whether particular MPI request frame is empty or not by looking into the "DevHandle" field. If this field is zero then it means that this MPI request is empty. For active MPI request DevHandle must be non-zero. Also driver will memset the MPI request frame once the corresponding scmd is processed (i.e. just before calling scmd->done function). Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (Microsoft) --- drivers/scsi/mpt3sas/mpt3sas_base.c | 6 ++++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 0a6cb8f0680c..c39f88100f31 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3281,12 +3281,18 @@ mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid) if (smid < ioc->hi_priority_smid) { struct scsiio_tracker *st; + void *request; st = _get_st_from_smid(ioc, smid); if (!st) { _base_recovery_check(ioc); return; } + + /* Clear MPI request frame */ + request = mpt3sas_base_get_msg_frame(ioc, smid); + memset(request, 0, ioc->request_sz); + mpt3sas_base_clear_st(ioc, st); _base_recovery_check(ioc); return; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6be39dc27103..6173c211a5e5 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1462,11 +1462,23 @@ mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid) { struct scsi_cmnd *scmd = NULL; struct scsiio_tracker *st; + Mpi25SCSIIORequest_t *mpi_request; if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) { u32 unique_tag = smid - 1; + mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); + + /* + * If SCSI IO request is outstanding at driver level then + * DevHandle filed must be non-zero. If DevHandle is zero + * then it means that this smid is free at driver level, + * so return NULL. + */ + if (!mpi_request->DevHandle) + return scmd; + scmd = scsi_host_find_tag(ioc->shost, unique_tag); if (scmd) { st = scsi_cmd_priv(scmd); -- cgit v1.2.3 From ab602d0043a4915a21c749656c56319ca3c35ab0 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Fri, 22 Mar 2019 12:16:03 -0600 Subject: scsi: aacraid: Insure we don't access PCIe space during AER/EEH [ Upstream commit b6554cfe09e1f610aed7d57164ab7760be57acd9 ] There are a few windows during AER/EEH when we can access PCIe I/O mapped registers. This will harden the access to insure we do not allow PCIe access during errors Signed-off-by: Dave Carroll Reviewed-by: Sagar Biradar Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (Microsoft) --- drivers/scsi/aacraid/aacraid.h | 7 ++++++- drivers/scsi/aacraid/commsup.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 3291d1c16864..8bd09b96ea18 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -2640,9 +2640,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor) return capacity; } +static inline int aac_pci_offline(struct aac_dev *dev) +{ + return pci_channel_offline(dev->pdev) || dev->handle_pci_error; +} + static inline int aac_adapter_check_health(struct aac_dev *dev) { - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -1; return (dev)->a_ops.adapter_check_health(dev); diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index a3adc954f40f..09367b8a3885 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -672,7 +672,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, return -ETIMEDOUT; } - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -EFAULT; if ((blink = aac_adapter_check_health(dev)) > 0) { @@ -772,7 +772,7 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback, spin_unlock_irqrestore(&fibptr->event_lock, flags); - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -EFAULT; fibptr->flags |= FIB_CONTEXT_FLAG_WAIT; -- cgit v1.2.3 From 3ce3290dee8440f60b59ad7da0be3ba97662eaa4 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 01:30:59 -0500 Subject: scsi: qla4xxx: fix a potential NULL pointer dereference [ Upstream commit fba1bdd2a9a93f3e2181ec1936a3c2f6b37e7ed6 ] In case iscsi_lookup_endpoint fails, the fix returns -EINVAL to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Acked-by: Manish Rangankar Reviewed-by: Mukesh Ojha Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (Microsoft) --- drivers/scsi/qla4xxx/ql4_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index a77bfb224248..80289c885c07 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -3203,6 +3203,8 @@ static int qla4xxx_conn_bind(struct iscsi_cls_session *cls_session, if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) return -EINVAL; ep = iscsi_lookup_endpoint(transport_fd); + if (!ep) + return -EINVAL; conn = cls_conn->dd_data; qla_conn = conn->dd_data; qla_conn->qla_ep = ep->dd_data; -- cgit v1.2.3 From facfcac8357da74e84a2a89fa836f668766b1fca Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Wed, 20 Mar 2019 10:27:11 -0500 Subject: usb: usb251xb: fix to avoid potential NULL pointer dereference [ Upstream commit 41f00e6e9e55546390031996b773e7f3c1d95928 ] of_match_device in usb251xb_probe can fail and returns a NULL pointer. The patch avoids a potential NULL pointer dereference in this scenario. Signed-off-by: Aditya Pakki Reviewed-by: Richard Leitner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/usb/misc/usb251xb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index a6efb9a72939..5f7734c729b1 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -601,7 +601,7 @@ static int usb251xb_probe(struct usb251xb *hub) dev); int err; - if (np) { + if (np && of_id) { err = usb251xb_get_ofdata(hub, (struct usb251xb_data *)of_id->data); if (err) { -- cgit v1.2.3 From ec44e4273006d34d67765faaf002036ae2d03cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 28 Feb 2019 22:57:33 +0100 Subject: leds: trigger: netdev: fix refcnt leak on interface rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4cb6560514fa19d556954b88128f3846fee66a03 ] Renaming a netdev-trigger-tracked interface was resulting in an unbalanced dev_hold(). Example: > iw phy phy0 interface add foo type __ap > echo netdev > trigger > echo foo > device_name > ip link set foo name bar > iw dev bar del [ 237.355366] unregister_netdevice: waiting for bar to become free. Usage count = 1 [ 247.435362] unregister_netdevice: waiting for bar to become free. Usage count = 1 [ 257.545366] unregister_netdevice: waiting for bar to become free. Usage count = 1 Above problem was caused by trigger checking a dev->name which obviously changes after renaming an interface. It meant missing all further events including the NETDEV_UNREGISTER which is required for calling dev_put(). This change fixes that by: 1) Comparing device struct *address* for notification-filtering purposes 2) Dropping unneeded NETDEV_CHANGENAME code (no behavior change) Fixes: 06f502f57d0d ("leds: trigger: Introduce a NETDEV trigger") Signed-off-by: Rafał Miłecki Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski Signed-off-by: Sasha Levin (Microsoft) --- drivers/leds/trigger/ledtrig-netdev.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index 3dd3ed46d473..167a94c02d05 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -301,11 +301,11 @@ static int netdev_trig_notify(struct notifier_block *nb, container_of(nb, struct led_netdev_data, notifier); if (evt != NETDEV_UP && evt != NETDEV_DOWN && evt != NETDEV_CHANGE - && evt != NETDEV_REGISTER && evt != NETDEV_UNREGISTER - && evt != NETDEV_CHANGENAME) + && evt != NETDEV_REGISTER && evt != NETDEV_UNREGISTER) return NOTIFY_DONE; - if (strcmp(dev->name, trigger_data->device_name)) + if (!(dev == trigger_data->net_dev || + (evt == NETDEV_REGISTER && !strcmp(dev->name, trigger_data->device_name)))) return NOTIFY_DONE; cancel_delayed_work_sync(&trigger_data->work); @@ -320,12 +320,9 @@ static int netdev_trig_notify(struct notifier_block *nb, dev_hold(dev); trigger_data->net_dev = dev; break; - case NETDEV_CHANGENAME: case NETDEV_UNREGISTER: - if (trigger_data->net_dev) { - dev_put(trigger_data->net_dev); - trigger_data->net_dev = NULL; - } + dev_put(trigger_data->net_dev); + trigger_data->net_dev = NULL; break; case NETDEV_UP: case NETDEV_CHANGE: -- cgit v1.2.3 From 89ae17728a98b7a36811d2d5e6b86d2ba4f0bb5e Mon Sep 17 00:00:00 2001 From: Alakesh Haloi Date: Tue, 26 Mar 2019 02:00:01 +0000 Subject: SUNRPC: fix uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 01f2f5b82a2b523ae76af53f2ff43c48dde10a00 ] Avoid following compiler warning on uninitialized variable net/sunrpc/xprtsock.c: In function ‘xs_read_stream_request.constprop’: net/sunrpc/xprtsock.c:525:10: warning: ‘read’ may be used uninitialized in this function [-Wmaybe-uninitialized] return read; ^~~~ net/sunrpc/xprtsock.c:529:23: warning: ‘ret’ may be used uninitialized in this function [-Wmaybe-uninitialized] return ret < 0 ? ret : read; ~~~~~~~~~~~~~~^~~~~~ Signed-off-by: Alakesh Haloi Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin (Microsoft) --- net/sunrpc/xprtsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7754aa3e434f..f88c2bd1335a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -486,8 +486,8 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, int flags, struct rpc_rqst *req) { struct xdr_buf *buf = &req->rq_private_buf; - size_t want, read; - ssize_t ret; + size_t want, uninitialized_var(read); + ssize_t uninitialized_var(ret); xs_read_header(transport, buf); -- cgit v1.2.3 From 4f5d11bf2ecafbb04c7a2b841cfb88ae1f8b0c90 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 26 Mar 2019 21:30:46 +0100 Subject: x86/realmode: Don't leak the trampoline kernel address [ Upstream commit b929a500d68479163c48739d809cbf4c1335db6f ] Since commit ad67b74d2469 ("printk: hash addresses printed with %p") at boot "____ptrval____" is printed instead of the trampoline addresses: Base memory trampoline at [(____ptrval____)] 99000 size 24576 Remove the print as we don't want to leak kernel addresses and this statement is not needed anymore. Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Signed-off-by: Matteo Croce Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190326203046.20787-1-mcroce@redhat.com Signed-off-by: Sasha Levin (Microsoft) --- arch/x86/realmode/init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index d10105825d57..47d097946872 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -20,8 +20,6 @@ void __init set_real_mode_mem(phys_addr_t mem, size_t size) void *base = __va(mem); real_mode_header = (struct real_mode_header *) base; - printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - base, (unsigned long long)mem, size); } void __init reserve_real_mode(void) -- cgit v1.2.3 From 1ab2566c5329bda0c06036669dcb936b1eb6f881 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Tue, 26 Mar 2019 13:42:22 +0530 Subject: usb: u132-hcd: fix resource leak [ Upstream commit f276e002793cdb820862e8ea8f76769d56bba575 ] if platform_driver_register fails, cleanup the allocated resource gracefully. Signed-off-by: Mukesh Ojha Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin (Microsoft) --- drivers/usb/host/u132-hcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c index 5b8a3d9530c4..5cac83aaeac3 100644 --- a/drivers/usb/host/u132-hcd.c +++ b/drivers/usb/host/u132-hcd.c @@ -3202,6 +3202,9 @@ static int __init u132_hcd_init(void) printk(KERN_INFO "driver %s\n", hcd_name); workqueue = create_singlethread_workqueue("u132"); retval = platform_driver_register(&u132_platform_driver); + if (retval) + destroy_workqueue(workqueue); + return retval; } -- cgit v1.2.3 From b0921da055f17621210fdf42d34d78b49590b7b5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Mar 2019 01:38:58 +0000 Subject: ceph: fix use-after-free on symlink traversal [ Upstream commit daf5cc27eed99afdea8d96e71b89ba41f5406ef6 ] free the symlink body after the same RCU delay we have for freeing the struct inode itself, so that traversal during RCU pathwalk wouldn't step into freed memory. Signed-off-by: Al Viro Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin (Microsoft) --- fs/ceph/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9d1f34d46627..f7f9e305aaf8 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -524,6 +524,7 @@ static void ceph_i_callback(struct rcu_head *head) struct inode *inode = container_of(head, struct inode, i_rcu); struct ceph_inode_info *ci = ceph_inode(inode); + kfree(ci->i_symlink); kmem_cache_free(ceph_inode_cachep, ci); } @@ -561,7 +562,6 @@ void ceph_destroy_inode(struct inode *inode) ceph_put_snap_realm(mdsc, realm); } - kfree(ci->i_symlink); while ((n = rb_first(&ci->i_fragtree)) != NULL) { frag = rb_entry(n, struct ceph_inode_frag, node); rb_erase(n, &ci->i_fragtree); -- cgit v1.2.3 From ab4a5df699b58fb17c2bfd88570a4342d0a566a7 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:37:00 +0100 Subject: scsi: zfcp: reduce flood of fcrscn1 trace records on multi-element RSCN [ Upstream commit c8206579175c34a2546de8a74262456278a7795a ] If an incoming ELS of type RSCN contains more than one element, zfcp suboptimally causes repeated erp trigger NOP trace records for each previously failed port. These could be ports that went away. It loops over each RSCN element, and for each of those in an inner loop over all zfcp_ports. The trigger to recover failed ports should be just the reception of some RSCN, no matter how many elements it has. So we can loop over failed ports separately, and only then loop over each RSCN element to handle the non-failed ports. The call chain was: zfcp_fc_incoming_rscn for (i = 1; i < no_entries; i++) _zfcp_fc_incoming_rscn list_for_each_entry(port, &adapter->port_list, list) if (masked port->d_id match) zfcp_fc_test_link if (!port->d_id) zfcp_erp_port_reopen "fcrscn1" <=== In order the reduce the "flooding" of the REC trace area in such cases, we factor out handling the failed ports to be outside of the entries loop: zfcp_fc_incoming_rscn if (no_entries > 1) <=== list_for_each_entry(port, &adapter->port_list, list) <=== if (!port->d_id) zfcp_erp_port_reopen "fcrscn1" <=== for (i = 1; i < no_entries; i++) _zfcp_fc_incoming_rscn list_for_each_entry(port, &adapter->port_list, list) if (masked port->d_id match) zfcp_fc_test_link Abbreviated example trace records before this code change: Tag : fcrscn1 WWPN : 0x500507630310d327 ERP want : 0x02 ERP need : 0x02 Tag : fcrscn1 WWPN : 0x500507630310d327 ERP want : 0x02 ERP need : 0x00 NOP => superfluous trace record The last trace entry repeats if there are more than 2 RSCN elements. Signed-off-by: Steffen Maier Reviewed-by: Benjamin Block Reviewed-by: Jens Remus Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (Microsoft) --- drivers/s390/scsi/zfcp_fc.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index db00b5e3abbe..33eddb02ee30 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -239,10 +239,6 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range, list_for_each_entry(port, &adapter->port_list, list) { if ((port->d_id & range) == (ntoh24(page->rscn_fid) & range)) zfcp_fc_test_link(port); - if (!port->d_id) - zfcp_erp_port_reopen(port, - ZFCP_STATUS_COMMON_ERP_FAILED, - "fcrscn1"); } read_unlock_irqrestore(&adapter->port_list_lock, flags); } @@ -250,6 +246,7 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range, static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req) { struct fsf_status_read_buffer *status_buffer = (void *)fsf_req->data; + struct zfcp_adapter *adapter = fsf_req->adapter; struct fc_els_rscn *head; struct fc_els_rscn_page *page; u16 i; @@ -263,6 +260,22 @@ static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req) no_entries = be16_to_cpu(head->rscn_plen) / sizeof(struct fc_els_rscn_page); + if (no_entries > 1) { + /* handle failed ports */ + unsigned long flags; + struct zfcp_port *port; + + read_lock_irqsave(&adapter->port_list_lock, flags); + list_for_each_entry(port, &adapter->port_list, list) { + if (port->d_id) + continue; + zfcp_erp_port_reopen(port, + ZFCP_STATUS_COMMON_ERP_FAILED, + "fcrscn1"); + } + read_unlock_irqrestore(&adapter->port_list_lock, flags); + } + for (i = 1; i < no_entries; i++) { /* skip head and start with 1st element */ page++; -- cgit v1.2.3 From 8d3c7282cec428196344f4dcf3aab78d59f1a398 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 25 Mar 2019 17:18:17 -0700 Subject: x86/mm: Don't exceed the valid physical address space [ Upstream commit 92c77f7c4d5dfaaf45b2ce19360e69977c264766 ] valid_phys_addr_range() is used to sanity check the physical address range of an operation, e.g., access to /dev/mem. It uses __pa(high_memory) internally. If memory is populated at the end of the physical address space, then __pa(high_memory) is outside of the physical address space because: high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; For the comparison in valid_phys_addr_range() this is not an issue, but if CONFIG_DEBUG_VIRTUAL is enabled, __pa() maps to __phys_addr(), which verifies that the resulting physical address is within the valid physical address space of the CPU. So in the case that memory is populated at the end of the physical address space, this is not true and triggers a VIRTUAL_BUG_ON(). Use __pa(high_memory - 1) to prevent the conversion from going beyond the end of valid physical addresses. Fixes: be62a3204406 ("x86/mm: Limit mmap() of /dev/mem to valid physical addresses") Signed-off-by: Ralph Campbell Signed-off-by: Thomas Gleixner Cc: Craig Bergstrom Cc: Linus Torvalds Cc: Boris Ostrovsky Cc: Fengguang Wu Cc: Greg Kroah-Hartman Cc: Hans Verkuil Cc: Mauro Carvalho Chehab Cc: Peter Zijlstra Cc: Sander Eikelenboom Cc: Sean Young Link: https://lkml.kernel.org/r/20190326001817.15413-2-rcampbell@nvidia.com Signed-off-by: Sasha Levin (Microsoft) --- arch/x86/mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index db3165714521..dc726e07d8ba 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -230,7 +230,7 @@ bool mmap_address_hint_valid(unsigned long addr, unsigned long len) /* Can we access it for direct reading/writing? Must be RAM: */ int valid_phys_addr_range(phys_addr_t addr, size_t count) { - return addr + count <= __pa(high_memory); + return addr + count - 1 <= __pa(high_memory - 1); } /* Can we access it through mmap? Must be a valid physical address: */ -- cgit v1.2.3 From 479e1afd232e0bf502f2a92a21fd09c0ee6747ca Mon Sep 17 00:00:00 2001 From: raymond pang Date: Thu, 28 Mar 2019 12:19:25 +0000 Subject: libata: fix using DMA buffers on stack [ Upstream commit dd08a8d9a66de4b54575c294a92630299f7e0fe7 ] When CONFIG_VMAP_STACK=y, __pa() returns incorrect physical address for a stack virtual address. Stack DMA buffers must be avoided. Signed-off-by: raymond pang Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin (Microsoft) --- drivers/ata/libata-zpodd.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index b3ed8f9953a8..173e6f2dd9af 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -52,38 +52,52 @@ static int eject_tray(struct ata_device *dev) /* Per the spec, only slot type and drawer type ODD can be supported */ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) { - char buf[16]; + char *buf; unsigned int ret; - struct rm_feature_desc *desc = (void *)(buf + 8); + struct rm_feature_desc *desc; struct ata_taskfile tf; static const char cdb[] = { GPCMD_GET_CONFIGURATION, 2, /* only 1 feature descriptor requested */ 0, 3, /* 3, removable medium feature */ 0, 0, 0,/* reserved */ - 0, sizeof(buf), + 0, 16, 0, 0, 0, }; + buf = kzalloc(16, GFP_KERNEL); + if (!buf) + return ODD_MECH_TYPE_UNSUPPORTED; + desc = (void *)(buf + 8); + ata_tf_init(dev, &tf); tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; tf.protocol = ATAPI_PROT_PIO; - tf.lbam = sizeof(buf); + tf.lbam = 16; ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, - buf, sizeof(buf), 0); - if (ret) + buf, 16, 0); + if (ret) { + kfree(buf); return ODD_MECH_TYPE_UNSUPPORTED; + } - if (be16_to_cpu(desc->feature_code) != 3) + if (be16_to_cpu(desc->feature_code) != 3) { + kfree(buf); return ODD_MECH_TYPE_UNSUPPORTED; + } - if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) + if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) { + kfree(buf); return ODD_MECH_TYPE_SLOT; - else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1) + } else if (desc->mech_type == 1 && desc->load == 0 && + desc->eject == 1) { + kfree(buf); return ODD_MECH_TYPE_DRAWER; - else + } else { + kfree(buf); return ODD_MECH_TYPE_UNSUPPORTED; + } } /* Test if ODD is zero power ready by sense code */ -- cgit v1.2.3 From c6d02b1ea4d7c58e3f631d4bc7b5287b3f72ca53 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Mar 2019 13:02:19 +0900 Subject: kbuild: skip parsing pre sub-make code for recursion [ Upstream commit 221cc2d27ddc49b3e06d4637db02bf78e70c573c ] When Make recurses to the top Makefile with sub-make-done unset, the code block surrounded by 'ifneq ($(sub-make-done),1) ... endif' is parsed multiple times. This happens for in-tree building of include/config/auto.conf, *-pkg, etc. with GNU Make 4.x. This is a slight regression by commit 688931a5ad4e ("kbuild: skip sub-make for in-tree build with GNU Make 4.x") in terms of performance since that code block contains one $(shell ...) invocation. Fix it by exporting the variable irrespective of sub-make being run. I renamed it because GNU Make cannot properly export variables containing hyphens. This is probably a bug of GNU Make, and the issue in Kbuild had already been reported by commit 2bfbe7881ee0 ("kbuild: Do not use hyphen in exported variable name"). Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin (Microsoft) --- Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index c3daaefa979c..12870303a029 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ _all: # descending is started. They are now explicitly listed as the # prepare rule. -ifneq ($(sub-make-done),1) +ifneq ($(sub_make_done),1) # Do not use make's built-in rules and variables # (this increases performance and avoids hard-to-debug behaviour) @@ -159,6 +159,8 @@ need-sub-make := 1 $(lastword $(MAKEFILE_LIST)): ; endif +export sub_make_done := 1 + ifeq ($(need-sub-make),1) PHONY += $(MAKECMDGOALS) sub-make @@ -168,12 +170,12 @@ $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make # Invoke a second make in the output directory, passing relevant variables sub-make: - $(Q)$(MAKE) sub-make-done=1 \ + $(Q)$(MAKE) \ $(if $(KBUILD_OUTPUT),-C $(KBUILD_OUTPUT) KBUILD_SRC=$(CURDIR)) \ -f $(CURDIR)/Makefile $(filter-out _all sub-make,$(MAKECMDGOALS)) endif # need-sub-make -endif # sub-make-done +endif # sub_make_done # We process the rest of the Makefile if this is the final invocation of make ifeq ($(need-sub-make),) -- cgit v1.2.3 From fb853a4a77483848e42fafb1270fd5cff0a97338 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Mar 2019 22:48:02 +0000 Subject: afs: Fix StoreData op marshalling [ Upstream commit 8c7ae38d1ce12a0eaeba655df8562552b3596c7f ] The marshalling of AFS.StoreData, AFS.StoreData64 and YFS.StoreData64 calls generated by ->setattr() ops for the purpose of expanding a file is incorrect due to older documentation incorrectly describing the way the RPC 'FileLength' parameter is meant to work. The older documentation says that this is the length the file is meant to end up at the end of the operation; however, it was never implemented this way in any of the servers, but rather the file is truncated down to this before the write operation is effected, and never expanded to it (and, indeed, it was renamed to 'TruncPos' in 2014). Fix this by setting the position parameter to the new file length and doing a zero-lengh write there. The bug causes Xwayland to SIGBUS due to unexpected non-expansion of a file it then mmaps. This can be tested by giving the following test program a filename in an AFS directory: #include #include #include #include #include int main(int argc, char *argv[]) { char *p; int fd; if (argc != 2) { fprintf(stderr, "Format: test-trunc-mmap \n"); exit(2); } fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC); if (fd < 0) { perror(argv[1]); exit(1); } if (ftruncate(fd, 0x140008) == -1) { perror("ftruncate"); exit(1); } p = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (p == MAP_FAILED) { perror("mmap"); exit(1); } p[0] = 'a'; if (munmap(p, 4096) < 0) { perror("munmap"); exit(1); } if (close(fd) < 0) { perror("close"); exit(1); } exit(0); } Fixes: 31143d5d515e ("AFS: implement basic file write support") Reported-by: Jonathan Billings Tested-by: Jonathan Billings Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin (Microsoft) --- fs/afs/fsclient.c | 6 +++--- fs/afs/yfsclient.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index ca08c83168f5..0b37867b5c20 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1515,8 +1515,8 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = 0; /* position of start of write */ - *bp++ = 0; + *bp++ = htonl(attr->ia_size >> 32); /* position of start of write */ + *bp++ = htonl((u32) attr->ia_size); *bp++ = 0; /* size of write */ *bp++ = 0; *bp++ = htonl(attr->ia_size >> 32); /* new file length */ @@ -1564,7 +1564,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = 0; /* position of start of write */ + *bp++ = htonl(attr->ia_size); /* position of start of write */ *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 5aa57929e8c2..6e97a42d24d1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -1514,7 +1514,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) bp = xdr_encode_u32(bp, 0); /* RPC flags */ bp = xdr_encode_YFSFid(bp, &vnode->fid); bp = xdr_encode_YFS_StoreStatus(bp, attr); - bp = xdr_encode_u64(bp, 0); /* position of start of write */ + bp = xdr_encode_u64(bp, attr->ia_size); /* position of start of write */ bp = xdr_encode_u64(bp, 0); /* size of write */ bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */ yfs_check_req(call, bp); -- cgit v1.2.3 From 0e48494c0edf1d7d90ff32c8a116664c3c0e1eb0 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 25 Mar 2019 23:32:08 -0700 Subject: gpio: of: Check propname before applying "cs-gpios" quirks [ Upstream commit e5545c94e43b8f6599ffc01df8d1aedf18ee912a ] SPI GPIO device has more than just "cs-gpio" property in its node and would request those GPIOs as a part of its initialization. To avoid applying CS-specific quirk to all of them add a check to make sure that propname is "cs-gpios". Signed-off-by: Andrey Smirnov Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Chris Healy Cc: linux-gpio@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin (Microsoft) --- drivers/gpio/gpiolib-of.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index a1dd2f1c0d02..9470563f2506 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -119,7 +119,8 @@ static void of_gpio_flags_quirks(struct device_node *np, * to determine if the flags should have inverted semantics. */ if (IS_ENABLED(CONFIG_SPI_MASTER) && - of_property_read_bool(np, "cs-gpios")) { + of_property_read_bool(np, "cs-gpios") && + !strcmp(propname, "cs-gpios")) { struct device_node *child; u32 cs; int ret; -- cgit v1.2.3 From e8b8dde69f819253e7a6cd83a834618f166a52b7 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 25 Mar 2019 23:32:09 -0700 Subject: gpio: of: Check for "spi-cs-high" in child instead of parent node [ Upstream commit 7ce40277bf848391705011ba37eac2e377cbd9e6 ] "spi-cs-high" is going to be specified in child node of an SPI controller's representing attached SPI device, so change the code to look for it there, instead of checking parent node. Signed-off-by: Andrey Smirnov Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Chris Healy Cc: linux-gpio@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin (Microsoft) --- drivers/gpio/gpiolib-of.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 9470563f2506..f1ae28289a67 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -142,16 +142,16 @@ static void of_gpio_flags_quirks(struct device_node *np, * conflict and the "spi-cs-high" flag will * take precedence. */ - if (of_property_read_bool(np, "spi-cs-high")) { + if (of_property_read_bool(child, "spi-cs-high")) { if (*flags & OF_GPIO_ACTIVE_LOW) { pr_warn("%s GPIO handle specifies active low - ignored\n", - of_node_full_name(np)); + of_node_full_name(child)); *flags &= ~OF_GPIO_ACTIVE_LOW; } } else { if (!(*flags & OF_GPIO_ACTIVE_LOW)) pr_info("%s enforce active low on chipselect handle\n", - of_node_full_name(np)); + of_node_full_name(child)); *flags |= OF_GPIO_ACTIVE_LOW; } break; -- cgit v1.2.3 From 5369433777ecc0330e60eb5b7f0b2d92361d63b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 7 Mar 2019 15:27:43 -0800 Subject: KVM: nVMX: Do not inherit quadrant and invalid for the root shadow EPT [ Upstream commit 552c69b1dc714854a5f4e27d37a43c6d797adf7d ] Explicitly zero out quadrant and invalid instead of inheriting them from the root_mmu. Functionally, this patch is a nop as we (should) never set quadrant for a direct mapped (EPT) root_mmu and nested EPT is only allowed if EPT is used for L1, and the root_mmu will never be invalid at this point. Explicitly setting flags sets the stage for repurposing the legacy paging bits in role, e.g. nxe, cr0_wp, and sm{a,e}p_andnot_wp, at which point 'smm' would be the only flag to be inherited from root_mmu. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- arch/x86/kvm/mmu.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9ab33cab9486..acab95dcffb6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4915,11 +4915,15 @@ static union kvm_mmu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly) { - union kvm_mmu_role role; + union kvm_mmu_role role = {0}; + union kvm_mmu_page_role root_base = vcpu->arch.root_mmu.mmu_role.base; - /* Base role is inherited from root_mmu */ - role.base.word = vcpu->arch.root_mmu.mmu_role.base.word; - role.ext = kvm_calc_mmu_role_ext(vcpu); + /* Legacy paging and SMM flags are inherited from root_mmu */ + role.base.smm = root_base.smm; + role.base.nxe = root_base.nxe; + role.base.cr0_wp = root_base.cr0_wp; + role.base.smep_andnot_wp = root_base.smep_andnot_wp; + role.base.smap_andnot_wp = root_base.smap_andnot_wp; role.base.level = PT64_ROOT_4LEVEL; role.base.direct = false; @@ -4927,6 +4931,7 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, role.base.guest_mode = true; role.base.access = ACC_ALL; + role.ext = kvm_calc_mmu_role_ext(vcpu); role.ext.execonly = execonly; return role; -- cgit v1.2.3 From 3d714ded80f65bcf68667e272495536e85bc3daf Mon Sep 17 00:00:00 2001 From: "Singh, Brijesh" Date: Fri, 15 Feb 2019 17:24:12 +0000 Subject: KVM: SVM: Workaround errata#1096 (insn_len maybe zero on SMAP violation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 05d5a48635259e621ea26d01e8316c6feeb34190 ] Errata#1096: On a nested data page fault when CR.SMAP=1 and the guest data read generates a SMAP violation, GuestInstrBytes field of the VMCB on a VMEXIT will incorrectly return 0h instead the correct guest instruction bytes . Recommend Workaround: To determine what instruction the guest was executing the hypervisor will have to decode the instruction at the instruction pointer. The recommended workaround can not be implemented for the SEV guest because guest memory is encrypted with the guest specific key, and instruction decoder will not be able to decode the instruction bytes. If we hit this errata in the SEV guest then log the message and request a guest shutdown. Reported-by: Venkatesh Srinivas Cc: Jim Mattson Cc: Tom Lendacky Cc: Borislav Petkov Cc: Joerg Roedel Cc: "Radim Krčmář" Cc: Paolo Bonzini Signed-off-by: Brijesh Singh Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/mmu.c | 8 +++++--- arch/x86/kvm/svm.c | 32 ++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.c | 6 ++++++ 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 71d763ad2637..9f2d890733a9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1198,6 +1198,8 @@ struct kvm_x86_ops { int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); + + bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index acab95dcffb6..77dbb57412cc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5395,10 +5395,12 @@ emulate: * This can happen if a guest gets a page-fault on data access but the HW * table walker is not able to read the instruction page (e.g instruction * page is not present in memory). In those cases we simply restart the - * guest. + * guest, with the exception of AMD Erratum 1096 which is unrecoverable. */ - if (unlikely(insn && !insn_len)) - return 1; + if (unlikely(insn && !insn_len)) { + if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu)) + return 1; + } er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 516c1de03d47..e544cec812f9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7114,6 +7114,36 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, return -ENODEV; } +static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + bool is_user, smap; + + is_user = svm_get_cpl(vcpu) == 3; + smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); + + /* + * Detect and workaround Errata 1096 Fam_17h_00_0Fh + * + * In non SEV guest, hypervisor will be able to read the guest + * memory to decode the instruction pointer when insn_len is zero + * so we return true to indicate that decoding is possible. + * + * But in the SEV guest, the guest memory is encrypted with the + * guest specific key and hypervisor will not be able to decode the + * instruction pointer so we will not able to workaround it. Lets + * print the error and request to kill the guest. + */ + if (is_user && smap) { + if (!sev_guest(vcpu->kvm)) + return true; + + pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } + + return false; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7247,6 +7277,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .nested_enable_evmcs = nested_enable_evmcs, .nested_get_evmcs_version = nested_get_evmcs_version, + + .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 34499081022c..e7fe8c692362 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7526,6 +7526,11 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) return 0; } +static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + return 0; +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; @@ -7828,6 +7833,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .set_nested_state = NULL, .get_vmcs12_pages = NULL, .nested_enable_evmcs = NULL, + .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, }; static void vmx_cleanup_l1d_flush(void) -- cgit v1.2.3 From e10edda577cc4db621e0eb713c56e8f749c72b69 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Fri, 8 Mar 2019 15:57:20 +0800 Subject: kvm/x86: Move MSR_IA32_ARCH_CAPABILITIES to array emulated_msrs [ Upstream commit 2bdb76c015df7125783d8394d6339d181cb5bc30 ] Since MSR_IA32_ARCH_CAPABILITIES is emualted unconditionally even if host doesn't suppot it. We should move it to array emulated_msrs from arry msrs_to_save, to report to userspace that guest support this msr. Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2db58067bb59..8c9fb6453b2f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1127,7 +1127,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_SPEC_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, @@ -1160,6 +1160,7 @@ static u32 emulated_msrs[] = { MSR_IA32_TSC_ADJUST, MSR_IA32_TSCDEADLINE, + MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, -- cgit v1.2.3 From 02a7fc6c77d5f06b351f1aa8f72f2e42a0d4049a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 13 Mar 2019 18:13:42 +0100 Subject: x86/kvm/hyper-v: avoid spurious pending stimer on vCPU init [ Upstream commit 013cc6ebbf41496ce4badedd71ea6d4a6d198c14 ] When userspace initializes guest vCPUs it may want to zero all supported MSRs including Hyper-V related ones including HV_X64_MSR_STIMERn_CONFIG/ HV_X64_MSR_STIMERn_COUNT. With commit f3b138c5d89a ("kvm/x86: Update SynIC timers on guest entry only") we began doing stimer_mark_pending() unconditionally on every config change. The issue I'm observing manifests itself as following: - Qemu writes 0 to STIMERn_{CONFIG,COUNT} MSRs and marks all stimers as pending in stimer_pending_bitmap, arms KVM_REQ_HV_STIMER; - kvm_hv_has_stimer_pending() starts returning true; - kvm_vcpu_has_events() starts returning true; - kvm_arch_vcpu_runnable() starts returning true; - when kvm_arch_vcpu_ioctl_run() gets into (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) case: - kvm_vcpu_block() gets in 'kvm_vcpu_check_block(vcpu) < 0' and returns immediately, avoiding normal wait path; - -EAGAIN is returned from kvm_arch_vcpu_ioctl_run() immediately forcing userspace to retry. So instead of normal wait path we get a busy loop on all secondary vCPUs before they get INIT signal. This seems to be undesirable, especially given that this happens even when Hyper-V extensions are not used. Generally, it seems to be pointless to mark an stimer as pending in stimer_pending_bitmap and arm KVM_REQ_HV_STIMER as the only thing kvm_hv_process_stimers() will do is clear the corresponding bit. We may just not mark disabled timers as pending instead. Fixes: f3b138c5d89a ("kvm/x86: Update SynIC timers on guest entry only") Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- arch/x86/kvm/hyperv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 89d20ed1d2e8..371c669696d7 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -526,7 +526,9 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, new_config.enable = 0; stimer->config.as_uint64 = new_config.as_uint64; - stimer_mark_pending(stimer, false); + if (stimer->config.enable) + stimer_mark_pending(stimer, false); + return 0; } @@ -542,7 +544,10 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, stimer->config.enable = 0; else if (stimer->config.auto_enable) stimer->config.enable = 1; - stimer_mark_pending(stimer, false); + + if (stimer->config.enable) + stimer_mark_pending(stimer, false); + return 0; } -- cgit v1.2.3 From e478d1c9b5853998e3bab7668c457e9890a7967d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Mar 2019 13:19:26 -0700 Subject: KVM: selftests: assert on exit reason in CR4/cpuid sync test [ Upstream commit 8df98ae0ab2ead9a02228756eec26f8d7b17f499 ] ...so that the test doesn't end up in an infinite loop if it fails for whatever reason, e.g. SHUTDOWN due to gcc inserting stack canary code into ucall() and attempting to derefence a null segment. Fixes: ca359066889f7 ("kvm: selftests: add cr4_cpuid_sync_test") Cc: Wei Huang Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- .../selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 35 ++++++++++++---------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index d503a51fad30..7c2c4d4055a8 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -87,22 +87,25 @@ int main(int argc, char *argv[]) while (1) { rc = _vcpu_run(vm, VCPU_ID); - if (run->exit_reason == KVM_EXIT_IO) { - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_SYNC: - /* emulate hypervisor clearing CR4.OSXSAVE */ - vcpu_sregs_get(vm, VCPU_ID, &sregs); - sregs.cr4 &= ~X86_CR4_OSXSAVE; - vcpu_sregs_set(vm, VCPU_ID, &sregs); - break; - case UCALL_ABORT: - TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); - break; - case UCALL_DONE: - goto done; - default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); - } + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + /* emulate hypervisor clearing CR4.OSXSAVE */ + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.cr4 &= ~X86_CR4_OSXSAVE; + vcpu_sregs_set(vm, VCPU_ID, &sregs); + break; + case UCALL_ABORT: + TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } } -- cgit v1.2.3 From 77507936fea3c1533432f33023d3f9274c445868 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Mar 2019 16:19:30 -0700 Subject: KVM: selftests: explicitly disable PIE for tests [ Upstream commit 0a3f29b5a77d6c27796d7a7adabafd199dc066d5 ] KVM selftests embed the guest "image" as a function in the test itself and extract the guest code at runtime by manually parsing the elf headers. The parsing is very simple and doesn't supporting fancy things like position independent executables. Recent versions of gcc enable pie by default, which results in triple fault shutdowns in the guest due to the virtual address in the headers not matching up with the virtual address retrieved from the function pointer. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- tools/testing/selftests/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index f9a0e9938480..212b8f0032ae 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -28,7 +28,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$( Date: Wed, 13 Mar 2019 12:43:14 -0700 Subject: KVM: selftests: disable stack protector for all KVM tests [ Upstream commit ffac839d040619847217647434b2b02469926871 ] Since 4.8.3, gcc has enabled -fstack-protector by default. This is problematic for the KVM selftests as they do not configure fs or gs segments (the stack canary is pulled from fs:0x28). With the default behavior, gcc will insert a stack canary on any function that creates buffers of 8 bytes or more. As a result, ucall() will hit a triple fault shutdown due to reading a bad fs segment when inserting its stack canary, i.e. every test fails with an unexpected SHUTDOWN. Fixes: 14c47b7530e2d ("kvm: selftests: introduce ucall") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- tools/testing/selftests/kvm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 212b8f0032ae..cb4a992d6dd3 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -28,8 +28,8 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -CFLAGS += -O2 -g -std=gnu99 -no-pie -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$( Date: Wed, 13 Mar 2019 16:49:31 -0700 Subject: KVM: selftests: complete IO before migrating guest state [ Upstream commit 0f73bbc851ed32d22bbd86be09e0365c460bcd2e ] Documentation/virtual/kvm/api.txt states: NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace can re-enter the guest with an unmasked signal pending to complete pending operations. Because guest state may be inconsistent, starting state migration after an IO exit without first completing IO may result in test failures, e.g. a proposed change to KVM's handling of %rip in its fast PIO handling[1] will cause the new VM, i.e. the post-migration VM, to have its %rip set to the IN instruction that triggered KVM_EXIT_IO, leading to a test assertion due to a stage mismatch. For simplicitly, require KVM_CAP_IMMEDIATE_EXIT to complete IO and skip the test if it's not available. The addition of KVM_CAP_IMMEDIATE_EXIT predates the state selftest by more than a year. [1] https://patchwork.kernel.org/patch/10848545/ Fixes: fa3899add1056 ("kvm: selftests: add basic test for state save and restore") Reported-by: Jim Mattson Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 16 ++++++++++++++++ tools/testing/selftests/kvm/x86_64/state_test.c | 18 ++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a84785b02557..07b71ad9734a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -102,6 +102,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b52cfdefecbf..efa0aad8b3c6 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1121,6 +1121,22 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) return rc; } +void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + vcpu->state->immediate_exit = 1; + ret = ioctl(vcpu->fd, KVM_RUN, NULL); + vcpu->state->immediate_exit = 0; + + TEST_ASSERT(ret == -1 && errno == EINTR, + "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", + ret, errno); +} + /* * VM VCPU Set MP State * diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4b3f556265f1..30f75856cf39 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -134,6 +134,11 @@ int main(int argc, char *argv[]) struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { + fprintf(stderr, "immediate_exit not available, skipping test\n"); + exit(KSFT_SKIP); + } + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); @@ -156,8 +161,6 @@ int main(int argc, char *argv[]) stage, run->exit_reason, exit_reason_str(run->exit_reason)); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], @@ -176,6 +179,17 @@ int main(int argc, char *argv[]) uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", stage, (ulong)uc.args[1]); + /* + * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees + * guest state is consistent only after userspace re-enters the + * kernel with KVM_RUN. Complete IO prior to migrating state + * to a new VM. + */ + vcpu_run_complete_io(vm, VCPU_ID); + + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + state = vcpu_save_state(vm, VCPU_ID); kvm_vm_release(vm); -- cgit v1.2.3 From a72f60dbb55745c943ec304dba7911ed27dbc172 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Mar 2019 14:13:47 +0100 Subject: gpio: of: Fix of_gpiochip_add() error path [ Upstream commit f7299d441a4da8a5088e651ea55023525a793a13 ] If the call to of_gpiochip_scan_gpios() in of_gpiochip_add() fails, no error handling is performed. This lead to the need of callers to call of_gpiochip_remove() on failure, which causes "BAD of_node_put() on ..." if the failure happened before the call to of_node_get(). Fix this by adding proper error handling. Note that calling gpiochip_remove_pin_ranges() multiple times causes no harm: subsequent calls are a no-op. Fixes: dfbd379ba9b7431e ("gpio: of: Return error if gpio hog configuration failed") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mukesh Ojha Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin (Microsoft) --- drivers/gpio/gpiolib-of.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index f1ae28289a67..13a402ede07a 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -712,7 +712,13 @@ int of_gpiochip_add(struct gpio_chip *chip) of_node_get(chip->of_node); - return of_gpiochip_scan_gpios(chip); + status = of_gpiochip_scan_gpios(chip); + if (status) { + of_node_put(chip->of_node); + gpiochip_remove_pin_ranges(chip); + } + + return status; } void of_gpiochip_remove(struct gpio_chip *chip) -- cgit v1.2.3 From 2e7d5b2e1fa0060de5ed2b1c2432b9d877296e1e Mon Sep 17 00:00:00 2001 From: Martin George Date: Wed, 27 Mar 2019 09:52:56 +0100 Subject: nvme-multipath: relax ANA state check [ Upstream commit cc2278c413c3a06a93c23ee8722e4dd3d621de12 ] When undergoing state transitions I/O might be requeued, hence we should always call nvme_mpath_set_live() to schedule requeue_work whenever the nvme device is live, independent on whether the old state was live or not. Signed-off-by: Martin George Signed-off-by: Gargi Srinivas Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin (Microsoft) --- drivers/nvme/host/multipath.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index b9fff3b8ed1b..23da7beadd62 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -366,15 +366,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state) static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, struct nvme_ns *ns) { - enum nvme_ana_state old; - mutex_lock(&ns->head->lock); - old = ns->ana_state; ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); - if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old)) + if (nvme_state_is_live(ns->ana_state)) nvme_mpath_set_live(ns); mutex_unlock(&ns->head->lock); } -- cgit v1.2.3 From 163719bdcd9c1b84c9840e72e9a4f43ae1936c2c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Mar 2019 17:07:22 +0800 Subject: nvmet: fix building bvec from sg list [ Upstream commit 02db99548d3608a625cf481cff2bb7b626829b3f ] There are two mistakes for building bvec from sg list for file backed ns: - use request data length to compute number of io vector, this way doesn't consider sg->offset, and the result may be smaller than required io vectors - bvec->bv_len isn't capped by sg->length This patch fixes this issue by building bvec from sg directly, given the whole IO stack is ready for multi-page bvec. Reported-by: Yi Zhang Fixes: 3a85a5de29ea ("nvme-loop: add a NVMe loopback host driver") Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin (Microsoft) --- drivers/nvme/target/io-cmd-file.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 517522305e5c..9a0fa3943ca7 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -75,11 +75,11 @@ err: return ret; } -static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) +static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg) { - bv->bv_page = sg_page_iter_page(iter); - bv->bv_offset = iter->sg->offset; - bv->bv_len = PAGE_SIZE - iter->sg->offset; + bv->bv_page = sg_page(sg); + bv->bv_offset = sg->offset; + bv->bv_len = sg->length; } static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, @@ -128,14 +128,14 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) { - ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); - struct sg_page_iter sg_pg_iter; + ssize_t nr_bvec = req->sg_cnt; unsigned long bv_cnt = 0; bool is_sync = false; size_t len = 0, total_len = 0; ssize_t ret = 0; loff_t pos; - + int i; + struct scatterlist *sg; if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC) is_sync = true; @@ -147,8 +147,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) } memset(&req->f.iocb, 0, sizeof(struct kiocb)); - for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { - nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); + for_each_sg(req->sg, sg, req->sg_cnt, i) { + nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg); len += req->f.bvec[bv_cnt].bv_len; total_len += req->f.bvec[bv_cnt].bv_len; bv_cnt++; @@ -225,7 +225,7 @@ static void nvmet_file_submit_buffered_io(struct nvmet_req *req) static void nvmet_file_execute_rw(struct nvmet_req *req) { - ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); + ssize_t nr_bvec = req->sg_cnt; if (!req->sg_cnt || !nr_bvec) { nvmet_req_complete(req, 0); -- cgit v1.2.3 From b8bc52cd62e883929d1ad465ad9ad084fb9bb628 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 Mar 2019 12:54:03 +0200 Subject: nvmet: fix error flow during ns enable [ Upstream commit a536b49785759bf99465fdf6e248d34322123fcd ] In case we fail to enable p2pmem on the current namespace, disable the backing store device before exiting. Cc: Stephen Bates Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin (Microsoft) --- drivers/nvme/target/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 02c63c463222..7bad21a2283f 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -517,7 +517,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ret = nvmet_p2pmem_ns_enable(ns); if (ret) - goto out_unlock; + goto out_dev_disable; list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) nvmet_p2pmem_ns_add_p2p(ctrl, ns); @@ -558,7 +558,7 @@ out_unlock: out_dev_put: list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); - +out_dev_disable: nvmet_ns_dev_disable(ns); goto out_unlock; } -- cgit v1.2.3 From 441d23c8764449c3219e7d97eb3b5a98be7c534f Mon Sep 17 00:00:00 2001 From: Solomon Tan Date: Fri, 22 Mar 2019 13:22:55 +0800 Subject: perf cs-etm: Add missing case value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c8fa7a807f3c5f946bd92076fbaf7826edb650dc ] The following error was thrown when compiling `tools/perf` using OpenCSD v0.11.1. This patch fixes said error. CC util/intel-pt-decoder/intel-pt-log.o CC util/cs-etm-decoder/cs-etm-decoder.o util/cs-etm-decoder/cs-etm-decoder.c: In function ‘cs_etm_decoder__buffer_range’: util/cs-etm-decoder/cs-etm-decoder.c:370:2: error: enumeration value ‘OCSD_INSTR_WFI_WFE’ not handled in switch [-Werror=switch-enum] switch (elem->last_i_type) { ^~~~~~ CC util/intel-pt-decoder/intel-pt-decoder.o cc1: all warnings being treated as errors Because `OCSD_INSTR_WFI_WFE` case was added only in v0.11.0, the minimum required OpenCSD library version for this patch is no longer v0.10.0. Signed-off-by: Solomon Tan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190322052255.GA4809@w-OptiPlex-7050 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin (Microsoft) --- tools/build/feature/test-libopencsd.c | 4 ++-- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index d68eb4fb40cc..2b0e02c38870 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0)) +#define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 0.10.0 is required" +#error "OpenCSD >= 0.11.0 is required" #endif int main(void) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 8c155575c6c5..2a8bf6b45a30 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -374,6 +374,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, break; case OCSD_INSTR_ISB: case OCSD_INSTR_DSB_DMB: + case OCSD_INSTR_WFI_WFE: case OCSD_INSTR_OTHER: default: packet->last_instr_taken_branch = false; -- cgit v1.2.3 From 44fe72aeaae884358549acd27a434825f1cb3ca3 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 28 Feb 2019 17:20:03 +0800 Subject: perf machine: Update kernel map address and re-order properly [ Upstream commit 977c7a6d1e263ff1d755f28595b99e4bc0c48a9f ] Since commit 1fb87b8e9599 ("perf machine: Don't search for active kernel start in __machine__create_kernel_maps"), the __machine__create_kernel_maps() just create a map what start and end are both zero. Though the address will be updated later, the order of map in the rbtree may be incorrect. The commit ee05d21791db ("perf machine: Set main kernel end address properly") fixed the logic in machine__create_kernel_maps(), but it's still wrong in function machine__process_kernel_mmap_event(). To reproduce this issue, we need an environment which the module address is before the kernel text segment. I tested it on an aarch64 machine with kernel 4.19.25: [root@localhost hulk]# grep _stext /proc/kallsyms ffff000008081000 T _stext [root@localhost hulk]# grep _etext /proc/kallsyms ffff000009780000 R _etext [root@localhost hulk]# tail /proc/modules hisi_sas_v2_hw 77824 0 - Live 0xffff00000191d000 nvme_core 126976 7 nvme, Live 0xffff0000018b6000 mdio 20480 1 ixgbe, Live 0xffff0000018ab000 hisi_sas_main 106496 1 hisi_sas_v2_hw, Live 0xffff000001861000 hns_mdio 20480 2 - Live 0xffff000001822000 hnae 28672 3 hns_dsaf,hns_enet_drv, Live 0xffff000001815000 dm_mirror 40960 0 - Live 0xffff000001804000 dm_region_hash 32768 1 dm_mirror, Live 0xffff0000017f5000 dm_log 32768 2 dm_mirror,dm_region_hash, Live 0xffff0000017e7000 dm_mod 315392 17 dm_mirror,dm_log, Live 0xffff000001780000 [root@localhost hulk]# Before fix: [root@localhost bin]# perf record sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data (9 samples) ] [root@localhost bin]# perf buildid-list -i perf.data 4c4e46c971ca935f781e603a09b52a92e8bdfee8 [vdso] [root@localhost bin]# perf buildid-list -i perf.data -H 0000000000000000000000000000000000000000 /proc/kcore [root@localhost bin]# After fix: [root@localhost tools]# ./perf/perf record sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data (9 samples) ] [root@localhost tools]# ./perf/perf buildid-list -i perf.data 28a6c690262896dbd1b5e1011ed81623e6db0610 [kernel.kallsyms] 106c14ce6e4acea3453e484dc604d66666f08a2f [vdso] [root@localhost tools]# ./perf/perf buildid-list -i perf.data -H 28a6c690262896dbd1b5e1011ed81623e6db0610 /proc/kcore Signed-off-by: Wei Li Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: David Ahern Cc: Hanjun Guo Cc: Kim Phillips Cc: Li Bin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190228092003.34071-1-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin (Microsoft) --- tools/perf/util/machine.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 143f7057d581..596db1daee35 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1358,6 +1358,20 @@ static void machine__set_kernel_mmap(struct machine *machine, machine->vmlinux_map->end = ~0ULL; } +static void machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct map *map = machine__kernel_map(machine); + + map__get(map); + map_groups__remove(&machine->kmaps, map); + + machine__set_kernel_mmap(machine, start, end); + + map_groups__insert(&machine->kmaps, map); + map__put(map); +} + int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); @@ -1390,17 +1404,11 @@ int machine__create_kernel_maps(struct machine *machine) goto out_put; } - /* we have a real start address now, so re-order the kmaps */ - map = machine__kernel_map(machine); - - map__get(map); - map_groups__remove(&machine->kmaps, map); - - /* assume it's the last in the kmaps */ - machine__set_kernel_mmap(machine, addr, ~0ULL); - - map_groups__insert(&machine->kmaps, map); - map__put(map); + /* + * we have a real start address now, so re-order the kmaps + * assume it's the last in the kmaps + */ + machine__update_kernel_mmap(machine, addr, ~0ULL); } if (machine__create_extra_kernel_maps(machine, kernel)) @@ -1536,7 +1544,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); - machine__set_kernel_mmap(machine, event->mmap.start, + machine__update_kernel_mmap(machine, event->mmap.start, event->mmap.start + event->mmap.len); /* -- cgit v1.2.3 From 8bd7d23a4a76241c748aa5e2a2359b44839acc80 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 25 Mar 2019 15:16:47 +0000 Subject: kconfig/[mn]conf: handle backspace (^H) key [ Upstream commit 9c38f1f044080392603c497ecca4d7d09876ff99 ] Backspace is not working on some terminal emulators which do not send the key code defined by terminfo. Terminals either send '^H' (8) or '^?' (127). But currently only '^?' is handled. Let's also handle '^H' for those terminals. Signed-off-by: Changbin Du Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin (Microsoft) --- scripts/kconfig/lxdialog/inputbox.c | 3 ++- scripts/kconfig/nconf.c | 2 +- scripts/kconfig/nconf.gui.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c index 611945611bf8..1dcfb288ee63 100644 --- a/scripts/kconfig/lxdialog/inputbox.c +++ b/scripts/kconfig/lxdialog/inputbox.c @@ -113,7 +113,8 @@ do_resize: case KEY_DOWN: break; case KEY_BACKSPACE: - case 127: + case 8: /* ^H */ + case 127: /* ^? */ if (pos) { wattrset(dialog, dlg.inputbox.atr); if (input_x == 0) { diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index a4670f4e825a..ac92c0ded6c5 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -1048,7 +1048,7 @@ static int do_match(int key, struct match_state *state, int *ans) state->match_direction = FIND_NEXT_MATCH_UP; *ans = get_mext_match(state->pattern, state->match_direction); - } else if (key == KEY_BACKSPACE || key == 127) { + } else if (key == KEY_BACKSPACE || key == 8 || key == 127) { state->pattern[strlen(state->pattern)-1] = '\0'; adj_match_dir(&state->match_direction); } else diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 7be620a1fcdb..77f525a8617c 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -439,7 +439,8 @@ int dialog_inputbox(WINDOW *main_window, case KEY_F(F_EXIT): case KEY_F(F_BACK): break; - case 127: + case 8: /* ^H */ + case 127: /* ^? */ case KEY_BACKSPACE: if (cursor_position > 0) { memmove(&result[cursor_position-1], -- cgit v1.2.3 From 514860ee66333ed2baa99aa2c43ec83320105ed3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 Mar 2019 11:44:59 +0100 Subject: iommu/amd: Reserve exclusion range in iova-domain [ Upstream commit 8aafaaf2212192012f5bae305bb31cdf7681d777 ] If a device has an exclusion range specified in the IVRS table, this region needs to be reserved in the iova-domain of that device. This hasn't happened until now and can cause data corruption on data transfered with these devices. Treat exclusion ranges as reserved regions in the iommu-core to fix the problem. Fixes: be2a022c0dd0 ('x86, AMD IOMMU: add functions to parse IOMMU memory mapping requirements for devices') Signed-off-by: Joerg Roedel Reviewed-by: Gary R Hook Signed-off-by: Sasha Levin (Microsoft) --- drivers/iommu/amd_iommu.c | 9 ++++++--- drivers/iommu/amd_iommu_init.c | 7 ++++--- drivers/iommu/amd_iommu_types.h | 2 ++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e628ef23418f..55b3e4b9d5dc 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3166,21 +3166,24 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; list_for_each_entry(entry, &amd_iommu_unity_map, list) { + int type, prot = 0; size_t length; - int prot = 0; if (devid < entry->devid_start || devid > entry->devid_end) continue; + type = IOMMU_RESV_DIRECT; length = entry->address_end - entry->address_start; if (entry->prot & IOMMU_PROT_IR) prot |= IOMMU_READ; if (entry->prot & IOMMU_PROT_IW) prot |= IOMMU_WRITE; + if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE) + /* Exclusion range */ + type = IOMMU_RESV_RESERVED; region = iommu_alloc_resv_region(entry->address_start, - length, prot, - IOMMU_RESV_DIRECT); + length, prot, type); if (!region) { pr_err("Out of memory allocating dm-regions for %s\n", dev_name(dev)); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 66123b911ec8..84fa5b22371e 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2013,6 +2013,9 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (e == NULL) return -ENOMEM; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + switch (m->type) { default: kfree(e); @@ -2059,9 +2062,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) while (p < end) { m = (struct ivmd_header *)p; - if (m->flags & IVMD_FLAG_EXCL_RANGE) - init_exclusion_range(m); - else if (m->flags & IVMD_FLAG_UNITY_MAP) + if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) init_unity_map_range(m); p += m->length; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index eae0741f72dc..87965e4d9647 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -374,6 +374,8 @@ #define IOMMU_PROT_IR 0x01 #define IOMMU_PROT_IW 0x02 +#define IOMMU_UNITY_MAP_FLAG_EXCL_RANGE (1 << 2) + /* IOMMU capabilities */ #define IOMMU_CAP_IOTLB 24 #define IOMMU_CAP_NPCACHE 26 -- cgit v1.2.3 From 87eaf3d8ac51577c6123b43245e547c94c54dc06 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 28 Mar 2019 20:43:15 -0700 Subject: kasan: fix variable 'tag' set but not used warning [ Upstream commit c412a769d2452161e97f163c4c4f31efc6626f06 ] set_tag() compiles away when CONFIG_KASAN_SW_TAGS=n, so make arch_kasan_set_tag() a static inline function to fix warnings below. mm/kasan/common.c: In function '__kasan_kmalloc': mm/kasan/common.c:475:5: warning: variable 'tag' set but not used [-Wunused-but-set-variable] u8 tag; ^~~ Link: http://lkml.kernel.org/r/20190307185244.54648-1-cai@lca.pw Signed-off-by: Qian Cai Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin (Microsoft) --- mm/kasan/kasan.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index ea51b2d898ec..c980ce43e3ba 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -164,7 +164,10 @@ static inline u8 random_tag(void) #endif #ifndef arch_kasan_set_tag -#define arch_kasan_set_tag(addr, tag) ((void *)(addr)) +static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) +{ + return addr; +} #endif #ifndef arch_kasan_reset_tag #define arch_kasan_reset_tag(addr) ((void *)(addr)) -- cgit v1.2.3 From 107cfb99c81f37a8df6dc4726495db7b5e22967f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 28 Mar 2019 20:44:13 -0700 Subject: ptrace: take into account saved_sigmask in PTRACE{GET,SET}SIGMASK [ Upstream commit fcfc2aa0185f4a731d05a21e9f359968fdfd02e7 ] There are a few system calls (pselect, ppoll, etc) which replace a task sigmask while they are running in a kernel-space When a task calls one of these syscalls, the kernel saves a current sigmask in task->saved_sigmask and sets a syscall sigmask. On syscall-exit-stop, ptrace traps a task before restoring the saved_sigmask, so PTRACE_GETSIGMASK returns the syscall sigmask and PTRACE_SETSIGMASK does nothing, because its sigmask is replaced by saved_sigmask, when the task returns to user-space. This patch fixes this problem. PTRACE_GETSIGMASK returns saved_sigmask if it's set. PTRACE_SETSIGMASK drops the TIF_RESTORE_SIGMASK flag. Link: http://lkml.kernel.org/r/20181120060616.6043-1-avagin@gmail.com Fixes: 29000caecbe8 ("ptrace: add ability to get/set signal-blocked mask") Signed-off-by: Andrei Vagin Acked-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin (Microsoft) --- include/linux/sched/signal.h | 18 ++++++++++++++++++ kernel/ptrace.c | 15 +++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 13789d10a50e..76b8399b17f6 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -417,10 +417,20 @@ static inline void set_restore_sigmask(void) set_thread_flag(TIF_RESTORE_SIGMASK); WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } + +static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); +} + static inline void clear_restore_sigmask(void) { clear_thread_flag(TIF_RESTORE_SIGMASK); } +static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) +{ + return test_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); +} static inline bool test_restore_sigmask(void) { return test_thread_flag(TIF_RESTORE_SIGMASK); @@ -438,6 +448,10 @@ static inline void set_restore_sigmask(void) current->restore_sigmask = true; WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } +static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) +{ + tsk->restore_sigmask = false; +} static inline void clear_restore_sigmask(void) { current->restore_sigmask = false; @@ -446,6 +460,10 @@ static inline bool test_restore_sigmask(void) { return current->restore_sigmask; } +static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) +{ + return tsk->restore_sigmask; +} static inline bool test_and_clear_restore_sigmask(void) { if (!current->restore_sigmask) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 771e93f9c43f..6f357f4fc859 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * Access another process' address space via ptrace. @@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request, ret = ptrace_setsiginfo(child, &siginfo); break; - case PTRACE_GETSIGMASK: + case PTRACE_GETSIGMASK: { + sigset_t *mask; + if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } - if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t))) + if (test_tsk_restore_sigmask(child)) + mask = &child->saved_sigmask; + else + mask = &child->blocked; + + if (copy_to_user(datavp, mask, sizeof(sigset_t))) ret = -EFAULT; else ret = 0; break; + } case PTRACE_SETSIGMASK: { sigset_t new_set; @@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request, child->blocked = new_set; spin_unlock_irq(&child->sighand->siglock); + clear_tsk_restore_sigmask(child); + ret = 0; break; } -- cgit v1.2.3 From 004ec01370bc183a83d715d975aa742d99a556f0 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sat, 9 Mar 2019 00:04:11 -0600 Subject: leds: pca9532: fix a potential NULL pointer dereference [ Upstream commit 0aab8e4df4702b31314a27ec4b0631dfad0fae0a ] In case of_match_device cannot find a match, return -EINVAL to avoid NULL pointer dereference. Fixes: fa4191a609f2 ("leds: pca9532: Add device tree support") Signed-off-by: Kangjie Lu Signed-off-by: Jacek Anaszewski Signed-off-by: Sasha Levin (Microsoft) --- drivers/leds/leds-pca9532.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 7fea18b0c15d..7cb4d685a1f1 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -513,6 +513,7 @@ static int pca9532_probe(struct i2c_client *client, const struct i2c_device_id *id) { int devid; + const struct of_device_id *of_id; struct pca9532_data *data = i2c_get_clientdata(client); struct pca9532_platform_data *pca9532_pdata = dev_get_platdata(&client->dev); @@ -528,8 +529,11 @@ static int pca9532_probe(struct i2c_client *client, dev_err(&client->dev, "no platform data\n"); return -EINVAL; } - devid = (int)(uintptr_t)of_match_device( - of_pca9532_leds_match, &client->dev)->data; + of_id = of_match_device(of_pca9532_leds_match, + &client->dev); + if (unlikely(!of_id)) + return -EINVAL; + devid = (int)(uintptr_t) of_id->data; } else { devid = id->driver_data; } -- cgit v1.2.3 From 310f9b1e52c00715b0413bc47deeb2c997da1fcf Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 14 Mar 2019 15:06:14 +0100 Subject: leds: trigger: netdev: use memcpy in device_name_store [ Upstream commit 909346433064b8d840dc82af26161926b8d37558 ] If userspace doesn't end the input with a newline (which can easily happen if the write happens from a C program that does write(fd, iface, strlen(iface))), we may end up including garbage from a previous, longer value in the device_name. For example # cat device_name # printf 'eth12' > device_name # cat device_name eth12 # printf 'eth3' > device_name # cat device_name eth32 I highly doubt anybody is relying on this behaviour, so switch to simply copying the bytes (we've already checked that size is < IFNAMSIZ) and unconditionally zero-terminate it; of course, we also still have to strip a trailing newline. This is also preparation for future patches. Fixes: 06f502f57d0d ("leds: trigger: Introduce a NETDEV trigger") Signed-off-by: Rasmus Villemoes Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski Signed-off-by: Sasha Levin (Microsoft) --- drivers/leds/trigger/ledtrig-netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index 167a94c02d05..136f86a1627d 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -122,7 +122,8 @@ static ssize_t device_name_store(struct device *dev, trigger_data->net_dev = NULL; } - strncpy(trigger_data->device_name, buf, size); + memcpy(trigger_data->device_name, buf, size); + trigger_data->device_name[size] = 0; if (size > 0 && trigger_data->device_name[size - 1] == '\n') trigger_data->device_name[size - 1] = 0; -- cgit v1.2.3 From 6006d5b025224f2db8751d317e2e6bcdad69d4b6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 4 May 2019 09:21:23 +0200 Subject: Linux 5.0.12 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 12870303a029..fd044f594bbf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 11 +SUBLEVEL = 12 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3 From 5a06ba5ed2ae21eb79bae39de67101c768f33927 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 29 Apr 2019 16:39:30 +0300 Subject: ipv4: ip_do_fragment: Preserve skb_iif during fragmentation [ Upstream commit d2f0c961148f65bc73eda72b9fa3a4e80973cb49 ] Previously, during fragmentation after forwarding, skb->skb_iif isn't preserved, i.e. 'ip_copy_metadata' does not copy skb_iif from given 'from' skb. As a result, ip_do_fragment's creates fragments with zero skb_iif, leading to inconsistent behavior. Assume for example an eBPF program attached at tc egress (post forwarding) that examines __sk_buff->ingress_ifindex: - the correct iif is observed if forwarding path does not involve fragmentation/refragmentation - a bogus iif is observed if forwarding path involves fragmentation/refragmentatiom Fix, by preserving skb_iif during 'ip_copy_metadata'. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c80188875f39..e8bb2e85c5a4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -519,6 +519,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; + to->skb_iif = from->skb_iif; skb_dst_drop(to); skb_dst_copy(to, from); to->dev = from->dev; -- cgit v1.2.3 From c61a4beb31f0c92580a66c660d359049692dc1aa Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 30 Apr 2019 10:45:12 -0700 Subject: ipv6: A few fixes on dereferencing rt->from [ Upstream commit 886b7a50100a50f1cbd08a6f8ec5884dfbe082dc ] It is a followup after the fix in commit 9c69a1320515 ("route: Avoid crash from dereferencing NULL rt->from") rt6_do_redirect(): 1. NULL checking is needed on rt->from because a parallel fib6_info delete could happen that sets rt->from to NULL. (e.g. rt6_remove_exception() and fib6_drop_pcpu_from()). 2. fib6_info_hold() is not enough. Same reason as (1). Meaning, holding dst->__refcnt cannot ensure rt->from is not NULL or rt->from->fib6_ref is not 0. Instead of using fib6_info_hold_safe() which ip6_rt_cache_alloc() is already doing, this patch chooses to extend the rcu section to keep "from" dereference-able after checking for NULL. inet6_rtm_getroute(): 1. NULL checking is also needed on rt->from for a similar reason. Note that inet6_rtm_getroute() is using RTNL_FLAG_DOIT_UNLOCKED. Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Martin KaFai Lau Acked-by: Wei Wang Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b6a97115a906..9f132e471d7a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3403,11 +3403,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu rcu_read_lock(); from = rcu_dereference(rt->from); - /* This fib6_info_hold() is safe here because we hold reference to rt - * and rt already holds reference to fib6_info. - */ - fib6_info_hold(from); - rcu_read_unlock(); + if (!from) + goto out; nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); if (!nrt) @@ -3419,10 +3416,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - /* No need to remove rt from the exception table if rt is - * a cached route because rt6_insert_exception() will - * takes care of it - */ + /* rt6_insert_exception() will take care of duplicated exceptions */ if (rt6_insert_exception(nrt, from)) { dst_release_immediate(&nrt->dst); goto out; @@ -3435,7 +3429,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); out: - fib6_info_release(from); + rcu_read_unlock(); neigh_release(neigh); } @@ -4957,16 +4951,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, rcu_read_lock(); from = rcu_dereference(rt->from); - - if (fibmatch) - err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); - else - err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, - &fl6.saddr, iif, RTM_NEWROUTE, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - 0); + if (from) { + if (fibmatch) + err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, + iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + else + err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, + &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + } else { + err = -ENETUNREACH; + } rcu_read_unlock(); if (err < 0) { -- cgit v1.2.3 From d0aa794674e5df18e98f6fa2c2a80f8a4d134ad4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Apr 2019 12:22:25 -0700 Subject: ipv6: fix races in ip6_dst_destroy() [ Upstream commit 0e2338749192ce0e52e7174c5352f627632f478a ] We had many syzbot reports that seem to be caused by use-after-free of struct fib6_info. ip6_dst_destroy(), fib6_drop_pcpu_from() and rt6_remove_exception() are writers vs rt->from, and use non consistent synchronization among themselves. Switching to xchg() will solve the issues with no possible lockdep issues. BUG: KASAN: user-memory-access in atomic_dec_and_test include/asm-generic/atomic-instrumented.h:747 [inline] BUG: KASAN: user-memory-access in fib6_info_release include/net/ip6_fib.h:294 [inline] BUG: KASAN: user-memory-access in fib6_info_release include/net/ip6_fib.h:292 [inline] BUG: KASAN: user-memory-access in fib6_drop_pcpu_from net/ipv6/ip6_fib.c:927 [inline] BUG: KASAN: user-memory-access in fib6_purge_rt+0x4f6/0x670 net/ipv6/ip6_fib.c:960 Write of size 4 at addr 0000000000ffffb4 by task syz-executor.1/7649 CPU: 0 PID: 7649 Comm: syz-executor.1 Not tainted 5.1.0-rc6+ #183 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 kasan_report.cold+0x5/0x40 mm/kasan/report.c:321 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 kasan_check_write+0x14/0x20 mm/kasan/common.c:108 atomic_dec_and_test include/asm-generic/atomic-instrumented.h:747 [inline] fib6_info_release include/net/ip6_fib.h:294 [inline] fib6_info_release include/net/ip6_fib.h:292 [inline] fib6_drop_pcpu_from net/ipv6/ip6_fib.c:927 [inline] fib6_purge_rt+0x4f6/0x670 net/ipv6/ip6_fib.c:960 fib6_del_route net/ipv6/ip6_fib.c:1813 [inline] fib6_del+0xac2/0x10a0 net/ipv6/ip6_fib.c:1844 fib6_clean_node+0x3a8/0x590 net/ipv6/ip6_fib.c:2006 fib6_walk_continue+0x495/0x900 net/ipv6/ip6_fib.c:1928 fib6_walk+0x9d/0x100 net/ipv6/ip6_fib.c:1976 fib6_clean_tree+0xe0/0x120 net/ipv6/ip6_fib.c:2055 __fib6_clean_all+0x118/0x2a0 net/ipv6/ip6_fib.c:2071 fib6_clean_all+0x2b/0x40 net/ipv6/ip6_fib.c:2082 rt6_sync_down_dev+0x134/0x150 net/ipv6/route.c:4057 rt6_disable_ip+0x27/0x5f0 net/ipv6/route.c:4062 addrconf_ifdown+0xa2/0x1220 net/ipv6/addrconf.c:3705 addrconf_notify+0x19a/0x2260 net/ipv6/addrconf.c:3630 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1753 call_netdevice_notifiers_extack net/core/dev.c:1765 [inline] call_netdevice_notifiers net/core/dev.c:1779 [inline] dev_close_many+0x33f/0x6f0 net/core/dev.c:1522 rollback_registered_many+0x43b/0xfd0 net/core/dev.c:8177 rollback_registered+0x109/0x1d0 net/core/dev.c:8242 unregister_netdevice_queue net/core/dev.c:9289 [inline] unregister_netdevice_queue+0x1ee/0x2c0 net/core/dev.c:9282 unregister_netdevice include/linux/netdevice.h:2658 [inline] __tun_detach+0xd5b/0x1000 drivers/net/tun.c:727 tun_detach drivers/net/tun.c:744 [inline] tun_chr_close+0xe0/0x180 drivers/net/tun.c:3443 __fput+0x2e5/0x8d0 fs/file_table.c:278 ____fput+0x16/0x20 fs/file_table.c:309 task_work_run+0x14a/0x1c0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x90a/0x2fa0 kernel/exit.c:876 do_group_exit+0x135/0x370 kernel/exit.c:980 __do_sys_exit_group kernel/exit.c:991 [inline] __se_sys_exit_group kernel/exit.c:989 [inline] __x64_sys_exit_group+0x44/0x50 kernel/exit.c:989 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458da9 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffeafc2a6a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 000000000000001c RCX: 0000000000458da9 RDX: 0000000000412a80 RSI: 0000000000a54ef0 RDI: 0000000000000043 RBP: 00000000004be552 R08: 000000000000000c R09: 000000000004c0d1 R10: 0000000002341940 R11: 0000000000000246 R12: 00000000ffffffff R13: 00007ffeafc2a7f0 R14: 000000000004c065 R15: 00007ffeafc2a800 Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: David Ahern Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Acked-by: Wei Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 4 +--- net/ipv6/route.c | 9 ++------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6613d8dbb0e5..91247a6fc67f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -921,9 +921,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, if (pcpu_rt) { struct fib6_info *from; - from = rcu_dereference_protected(pcpu_rt->from, - lockdep_is_held(&table->tb6_lock)); - rcu_assign_pointer(pcpu_rt->from, NULL); + from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); fib6_info_release(from); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9f132e471d7a..59c90bba048c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -379,11 +379,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - rcu_read_lock(); - from = rcu_dereference(rt->from); - rcu_assign_pointer(rt->from, NULL); + from = xchg((__force struct fib6_info **)&rt->from, NULL); fib6_info_release(from); - rcu_read_unlock(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1288,9 +1285,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = rcu_dereference_protected(rt6_ex->rt6i->from, - lockdep_is_held(&rt6_exception_lock)); - rcu_assign_pointer(rt6_ex->rt6i->from, NULL); + from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); -- cgit v1.2.3 From 787a77cccb1813de3bfd655a9d6338b1a0d4491f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Apr 2019 16:49:06 -0700 Subject: ipv6/flowlabel: wait rcu grace period before put_pid() [ Upstream commit 6c0afef5fb0c27758f4d52b2210c61b6bd8b4470 ] syzbot was able to catch a use-after-free read in pid_nr_ns() [1] ip6fl_seq_show() seems to use RCU protection, dereferencing fl->owner.pid but fl_free() releases fl->owner.pid before rcu grace period is started. [1] BUG: KASAN: use-after-free in pid_nr_ns+0x128/0x140 kernel/pid.c:407 Read of size 4 at addr ffff888094012a04 by task syz-executor.0/18087 CPU: 0 PID: 18087 Comm: syz-executor.0 Not tainted 5.1.0-rc6+ #89 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131 pid_nr_ns+0x128/0x140 kernel/pid.c:407 ip6fl_seq_show+0x2f8/0x4f0 net/ipv6/ip6_flowlabel.c:794 seq_read+0xad3/0x1130 fs/seq_file.c:268 proc_reg_read+0x1fe/0x2c0 fs/proc/inode.c:227 do_loop_readv_writev fs/read_write.c:701 [inline] do_loop_readv_writev fs/read_write.c:688 [inline] do_iter_read+0x4a9/0x660 fs/read_write.c:922 vfs_readv+0xf0/0x160 fs/read_write.c:984 kernel_readv fs/splice.c:358 [inline] default_file_splice_read+0x475/0x890 fs/splice.c:413 do_splice_to+0x12a/0x190 fs/splice.c:876 splice_direct_to_actor+0x2d2/0x970 fs/splice.c:953 do_splice_direct+0x1da/0x2a0 fs/splice.c:1062 do_sendfile+0x597/0xd00 fs/read_write.c:1443 __do_sys_sendfile64 fs/read_write.c:1498 [inline] __se_sys_sendfile64 fs/read_write.c:1490 [inline] __x64_sys_sendfile64+0x15a/0x220 fs/read_write.c:1490 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458da9 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f300d24bc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 0000000000458da9 RDX: 00000000200000c0 RSI: 0000000000000008 RDI: 0000000000000007 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 000000000000005a R11: 0000000000000246 R12: 00007f300d24c6d4 R13: 00000000004c5fa3 R14: 00000000004da748 R15: 00000000ffffffff Allocated by task 17543: save_stack+0x45/0xd0 mm/kasan/common.c:75 set_track mm/kasan/common.c:87 [inline] __kasan_kmalloc mm/kasan/common.c:497 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:470 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:505 slab_post_alloc_hook mm/slab.h:437 [inline] slab_alloc mm/slab.c:3393 [inline] kmem_cache_alloc+0x11a/0x6f0 mm/slab.c:3555 alloc_pid+0x55/0x8f0 kernel/pid.c:168 copy_process.part.0+0x3b08/0x7980 kernel/fork.c:1932 copy_process kernel/fork.c:1709 [inline] _do_fork+0x257/0xfd0 kernel/fork.c:2226 __do_sys_clone kernel/fork.c:2333 [inline] __se_sys_clone kernel/fork.c:2327 [inline] __x64_sys_clone+0xbf/0x150 kernel/fork.c:2327 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 7789: save_stack+0x45/0xd0 mm/kasan/common.c:75 set_track mm/kasan/common.c:87 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:459 kasan_slab_free+0xe/0x10 mm/kasan/common.c:467 __cache_free mm/slab.c:3499 [inline] kmem_cache_free+0x86/0x260 mm/slab.c:3765 put_pid.part.0+0x111/0x150 kernel/pid.c:111 put_pid+0x20/0x30 kernel/pid.c:105 fl_free+0xbe/0xe0 net/ipv6/ip6_flowlabel.c:102 ip6_fl_gc+0x295/0x3e0 net/ipv6/ip6_flowlabel.c:152 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:293 The buggy address belongs to the object at ffff888094012a00 which belongs to the cache pid_2 of size 88 The buggy address is located 4 bytes inside of 88-byte region [ffff888094012a00, ffff888094012a58) The buggy address belongs to the page: page:ffffea0002500480 count:1 mapcount:0 mapping:ffff88809a483080 index:0xffff888094012980 flags: 0x1fffc0000000200(slab) raw: 01fffc0000000200 ffffea00018a3508 ffffea0002524a88 ffff88809a483080 raw: ffff888094012980 ffff888094012000 000000010000001b 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888094012900: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc ffff888094012980: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc >ffff888094012a00: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc ^ ffff888094012a80: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc ffff888094012b00: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc Fixes: 4f82f45730c6 ("net ip6 flowlabel: Make owner a union of struct pid * and kuid_t") Signed-off-by: Eric Dumazet Cc: Eric W. Biederman Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_flowlabel.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index cb54a8a3c273..23a525c0a9be 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) return fl; } +static void fl_free_rcu(struct rcu_head *head) +{ + struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); + + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); + kfree(fl->opt); + kfree(fl); +} + static void fl_free(struct ip6_flowlabel *fl) { - if (fl) { - if (fl->share == IPV6_FL_S_PROCESS) - put_pid(fl->owner.pid); - kfree(fl->opt); - kfree_rcu(fl, rcu); - } + if (fl) + call_rcu(&fl->rcu, fl_free_rcu); } static void fl_release(struct ip6_flowlabel *fl) -- cgit v1.2.3 From d1fa4f8784bb09e6e48765bd71eee7b702f088be Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 25 Apr 2019 12:06:54 -0400 Subject: ipv6: invert flowlabel sharing check in process and user mode [ Upstream commit 95c169251bf734aa555a1e8043e4d88ec97a04ec ] A request for a flowlabel fails in process or user exclusive mode must fail if the caller pid or uid does not match. Invert the test. Previously, the test was unsafe wrt PID recycling, but indeed tested for inequality: fl1->owner != fl->owner Fixes: 4f82f45730c68 ("net ip6 flowlabel: Make owner a union of struct pid* and kuid_t") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_flowlabel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 23a525c0a9be..be5f3d7ceb96 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -639,9 +639,9 @@ recheck: if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || ((fl1->share == IPV6_FL_S_PROCESS) && - (fl1->owner.pid == fl->owner.pid)) || + (fl1->owner.pid != fl->owner.pid)) || ((fl1->share == IPV6_FL_S_USER) && - uid_eq(fl1->owner.uid, fl->owner.uid))) + !uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -ENOMEM; -- cgit v1.2.3 From f86adc57e7db0c46e70f81bcbe4dfc0e18403762 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Apr 2019 06:27:58 -0700 Subject: l2ip: fix possible use-after-free [ Upstream commit a622b40035d16196bf19b2b33b854862595245fc ] Before taking a refcount on a rcu protected structure, we need to make sure the refcount is not zero. syzbot reported : refcount_t: increment on 0; use-after-free. WARNING: CPU: 1 PID: 23533 at lib/refcount.c:156 refcount_inc_checked lib/refcount.c:156 [inline] WARNING: CPU: 1 PID: 23533 at lib/refcount.c:156 refcount_inc_checked+0x61/0x70 lib/refcount.c:154 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 23533 Comm: syz-executor.2 Not tainted 5.1.0-rc7+ #93 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 panic+0x2cb/0x65c kernel/panic.c:214 __warn.cold+0x20/0x45 kernel/panic.c:571 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:179 [inline] fixup_bug arch/x86/kernel/traps.c:174 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973 RIP: 0010:refcount_inc_checked lib/refcount.c:156 [inline] RIP: 0010:refcount_inc_checked+0x61/0x70 lib/refcount.c:154 Code: 1d 98 2b 2a 06 31 ff 89 de e8 db 2c 40 fe 84 db 75 dd e8 92 2b 40 fe 48 c7 c7 20 7a a1 87 c6 05 78 2b 2a 06 01 e8 7d d9 12 fe <0f> 0b eb c1 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 57 41 RSP: 0018:ffff888069f0fba8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 000000000000f353 RSI: ffffffff815afcb6 RDI: ffffed100d3e1f67 RBP: ffff888069f0fbb8 R08: ffff88809b1845c0 R09: ffffed1015d23ef1 R10: ffffed1015d23ef0 R11: ffff8880ae91f787 R12: ffff8880a8f26968 R13: 0000000000000004 R14: dffffc0000000000 R15: ffff8880a49a6440 l2tp_tunnel_inc_refcount net/l2tp/l2tp_core.h:240 [inline] l2tp_tunnel_get+0x250/0x580 net/l2tp/l2tp_core.c:173 pppol2tp_connect+0xc00/0x1c70 net/l2tp/l2tp_ppp.c:702 __sys_connect+0x266/0x330 net/socket.c:1808 __do_sys_connect net/socket.c:1819 [inline] __se_sys_connect net/socket.c:1816 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1816 Fixes: 54652eb12c1b ("l2tp: hold tunnel while looking up sessions in l2tp_netlink") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index fed6becc5daf..fa789c082c3e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -169,8 +169,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id) { - l2tp_tunnel_inc_refcount(tunnel); + if (tunnel->tunnel_id == tunnel_id && + refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; @@ -190,8 +190,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth) { - l2tp_tunnel_inc_refcount(tunnel); + if (++count > nth && + refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; } -- cgit v1.2.3 From 31f4cb2730d8098323b96745621aa2766bdcb00e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Apr 2019 09:43:26 -0700 Subject: l2tp: use rcu_dereference_sk_user_data() in l2tp_udp_encap_recv() [ Upstream commit c1c477217882c610a2ba0268f5faf36c9c092528 ] Canonical way to fetch sk_user_data from an encap_rcv() handler called from UDP stack in rcu protected section is to use rcu_dereference_sk_user_data(), otherwise compiler might read it multiple times. Fixes: d00fa9adc528 ("il2tp: fix races with tunnel socket close") Signed-off-by: Eric Dumazet Cc: James Chapman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index fa789c082c3e..52b5a2797c0c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -909,7 +909,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - tunnel = l2tp_tunnel(sk); + tunnel = rcu_dereference_sk_user_data(sk); if (tunnel == NULL) goto pass_up; -- cgit v1.2.3 From 801038cca48cac5ff005f6f785a3d407cc705aa9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Apr 2019 13:44:19 +0300 Subject: net: dsa: bcm_sf2: fix buffer overflow doing set_rxnfc [ Upstream commit f949a12fd697479f68d99dc65e9bbab68ee49043 ] The "fs->location" is a u32 that comes from the user in ethtool_set_rxnfc(). We can't pass unclamped values to test_bit() or it results in an out of bounds access beyond the end of the bitmap. Fixes: 7318166cacad ("net: dsa: bcm_sf2: Add support for ethtool::rxnfc") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2_cfp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index e14663ab6dbc..8dd74700a2ef 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -854,6 +854,9 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, fs->m_ext.data[1])) return -EINVAL; + if (fs->location != RX_CLS_LOC_ANY && fs->location >= CFP_NUM_RULES) + return -EINVAL; + if (fs->location != RX_CLS_LOC_ANY && test_bit(fs->location, priv->cfp.used)) return -EBUSY; @@ -942,6 +945,9 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, u32 loc) struct cfp_rule *rule; int ret; + if (loc >= CFP_NUM_RULES) + return -EINVAL; + /* Refuse deleting unused rules, and those that are not unique since * that could leave IPv6 rules with one of the chained rule in the * table. -- cgit v1.2.3 From 52c89e455b5c97b935ea248ce753289297931871 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 25 Apr 2019 00:33:00 +0200 Subject: net: phy: marvell: Fix buffer overrun with stats counters [ Upstream commit fdfdf86720a34527f777cbe0d8599bf0528fa146 ] marvell_get_sset_count() returns how many statistics counters there are. If the PHY supports fibre, there are 3, otherwise two. marvell_get_strings() does not make this distinction, and always returns 3 strings. This then often results in writing past the end of the buffer for the strings. Fixes: 2170fef78a40 ("Marvell phy: add field to get errors from fiber link.") Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/marvell.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index abb7876a8776..66573a218df5 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1494,9 +1494,10 @@ static int marvell_get_sset_count(struct phy_device *phydev) static void marvell_get_strings(struct phy_device *phydev, u8 *data) { + int count = marvell_get_sset_count(phydev); int i; - for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) { + for (i = 0; i < count; i++) { strlcpy(data + i * ETH_GSTRING_LEN, marvell_hw_stats[i].string, ETH_GSTRING_LEN); } @@ -1524,9 +1525,10 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i) static void marvell_get_stats(struct phy_device *phydev, struct ethtool_stats *stats, u64 *data) { + int count = marvell_get_sset_count(phydev); int i; - for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) + for (i = 0; i < count; i++) data[i] = marvell_get_stat(phydev, i); } -- cgit v1.2.3 From 90cb17d3df6f72e4d80081f29f4f668120bc9c42 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 29 Apr 2019 12:19:12 -0700 Subject: net/tls: avoid NULL pointer deref on nskb->sk in fallback [ Upstream commit 2dcb003314032c6efb13a065ffae60d164b2dd35 ] update_chksum() accesses nskb->sk before it has been set by complete_skb(), move the init up. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_device_fallback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index ef8934fd8698..426dd97725e4 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -200,13 +200,14 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) skb_put(nskb, skb->len); memcpy(nskb->data, skb->data, headln); - update_chksum(nskb, headln); nskb->destructor = skb->destructor; nskb->sk = sk; skb->destructor = NULL; skb->sk = NULL; + update_chksum(nskb, headln); + delta = nskb->truesize - skb->truesize; if (likely(delta < 0)) WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); -- cgit v1.2.3 From f8970584f08c250b7b666c80ec1a0d077b98ba87 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Apr 2019 08:34:08 +0100 Subject: rxrpc: Fix net namespace cleanup [ Upstream commit b13023421b5179413421333f602850914f6a7ad8 ] In rxrpc_destroy_all_calls(), there are two phases: (1) make sure the ->calls list is empty, emitting error messages if not, and (2) wait for the RCU cleanup to happen on outstanding calls (ie. ->nr_calls becomes 0). To avoid taking the call_lock, the function prechecks ->calls and if empty, it returns to avoid taking the lock - this is wrong, however: it still needs to go and do the second phase and wait for ->nr_calls to become 0. Without this, the rxrpc_net struct may get deallocated before we get to the RCU cleanup for the last calls. This can lead to: Slab corruption (Not tainted): kmalloc-16k start=ffff88802b178000, len=16384 050: 6b 6b 6b 6b 6b 6b 6b 6b 61 6b 6b 6b 6b 6b 6b 6b kkkkkkkkakkkkkkk Note the "61" at offset 0x58. This corresponds to the ->nr_calls member of struct rxrpc_net (which is >9k in size, and thus allocated out of the 16k slab). Fix this by flipping the condition on the if-statement, putting the locked section inside the if-body and dropping the return from there. The function will then always go on to wait for the RCU cleanup on outstanding calls. Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing") Signed-off-by: David Howells Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/call_object.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 8aa2937b069f..fe96881a334d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -604,30 +604,30 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); - if (list_empty(&rxnet->calls)) - return; + if (!list_empty(&rxnet->calls)) { + write_lock(&rxnet->call_lock); - write_lock(&rxnet->call_lock); + while (!list_empty(&rxnet->calls)) { + call = list_entry(rxnet->calls.next, + struct rxrpc_call, link); + _debug("Zapping call %p", call); - while (!list_empty(&rxnet->calls)) { - call = list_entry(rxnet->calls.next, struct rxrpc_call, link); - _debug("Zapping call %p", call); + rxrpc_see_call(call); + list_del_init(&call->link); - rxrpc_see_call(call); - list_del_init(&call->link); + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + rxrpc_call_states[call->state], + call->flags, call->events); - pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - rxrpc_call_states[call->state], - call->flags, call->events); + write_unlock(&rxnet->call_lock); + cond_resched(); + write_lock(&rxnet->call_lock); + } write_unlock(&rxnet->call_lock); - cond_resched(); - write_lock(&rxnet->call_lock); } - write_unlock(&rxnet->call_lock); - atomic_dec(&rxnet->nr_calls); wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls)); } -- cgit v1.2.3 From 484d404fdc2a64261f95692d7e242ec717e4ea7b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 29 Apr 2019 14:16:19 +0800 Subject: sctp: avoid running the sctp state machine recursively [ Upstream commit fbd019737d71e405f86549fd738f81e2ff3dd073 ] Ying triggered a call trace when doing an asconf testing: BUG: scheduling while atomic: swapper/12/0/0x10000100 Call Trace: [] dump_stack+0x19/0x1b [] __schedule_bug+0x64/0x72 [] __schedule+0x9ba/0xa00 [] __cond_resched+0x26/0x30 [] _cond_resched+0x3a/0x50 [] kmem_cache_alloc_node+0x38/0x200 [] __alloc_skb+0x5d/0x2d0 [] sctp_packet_transmit+0x610/0xa20 [sctp] [] sctp_outq_flush+0x2ce/0xc00 [sctp] [] sctp_outq_uncork+0x1c/0x20 [sctp] [] sctp_cmd_interpreter.isra.22+0xc8/0x1460 [sctp] [] sctp_do_sm+0xe1/0x350 [sctp] [] sctp_primitive_ASCONF+0x3d/0x50 [sctp] [] sctp_cmd_interpreter.isra.22+0x114/0x1460 [sctp] [] sctp_do_sm+0xe1/0x350 [sctp] [] sctp_assoc_bh_rcv+0xf4/0x1b0 [sctp] [] sctp_inq_push+0x51/0x70 [sctp] [] sctp_rcv+0xa8b/0xbd0 [sctp] As it shows, the first sctp_do_sm() running under atomic context (NET_RX softirq) invoked sctp_primitive_ASCONF() that uses GFP_KERNEL flag later, and this flag is supposed to be used in non-atomic context only. Besides, sctp_do_sm() was called recursively, which is not expected. Vlad tried to fix this recursive call in Commit c0786693404c ("sctp: Fix oops when sending queued ASCONF chunks") by introducing a new command SCTP_CMD_SEND_NEXT_ASCONF. But it didn't work as this command is still used in the first sctp_do_sm() call, and sctp_primitive_ASCONF() will be called in this command again. To avoid calling sctp_do_sm() recursively, we send the next queued ASCONF not by sctp_primitive_ASCONF(), but by sctp_sf_do_prm_asconf() in the 1st sctp_do_sm() directly. Reported-by: Ying Xu Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sctp/command.h | 1 - net/sctp/sm_sideeffect.c | 29 ----------------------------- net/sctp/sm_statefuns.c | 35 +++++++++++++++++++++++++++-------- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 6640f84fe536..6d5beac29bc1 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -105,7 +105,6 @@ enum sctp_verb { SCTP_CMD_T1_RETRAN, /* Mark for retransmission after T1 timeout */ SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */ SCTP_CMD_SEND_MSG, /* Send the whole use message */ - SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1d143bc3f73d..4aa03588f87b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1112,32 +1112,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc, } -/* Sent the next ASCONF packet currently stored in the association. - * This happens after the ASCONF_ACK was succeffully processed. - */ -static void sctp_cmd_send_asconf(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - - /* Send the next asconf chunk from the addip chunk - * queue. - */ - if (!list_empty(&asoc->addip_chunk_list)) { - struct list_head *entry = asoc->addip_chunk_list.next; - struct sctp_chunk *asconf = list_entry(entry, - struct sctp_chunk, list); - list_del_init(entry); - - /* Hold the chunk until an ASCONF_ACK is received. */ - sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(net, asoc, asconf)) - sctp_chunk_free(asconf); - else - asoc->addip_last_asconf = asconf; - } -} - - /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1783,9 +1757,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, } sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; - case SCTP_CMD_SEND_NEXT_ASCONF: - sctp_cmd_send_asconf(asoc); - break; case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c9ae3404b1bb..713a669d2058 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3824,6 +3824,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return SCTP_DISPOSITION_CONSUME; } +static enum sctp_disposition sctp_send_next_asconf( + struct net *net, + const struct sctp_endpoint *ep, + struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *asconf; + struct list_head *entry; + + if (list_empty(&asoc->addip_chunk_list)) + return SCTP_DISPOSITION_CONSUME; + + entry = asoc->addip_chunk_list.next; + asconf = list_entry(entry, struct sctp_chunk, list); + + list_del_init(entry); + sctp_chunk_hold(asconf); + asoc->addip_last_asconf = asconf; + + return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands); +} + /* * ADDIP Section 4.3 General rules for address manipulation * When building TLV parameters for the ASCONF Chunk that will add or @@ -3915,14 +3938,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) { - /* Successfully processed ASCONF_ACK. We can - * release the next asconf if we have one. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, - SCTP_NULL()); - return SCTP_DISPOSITION_CONSUME; - } + asconf_ack)) + return sctp_send_next_asconf(net, ep, + (struct sctp_association *)asoc, + type, commands); abort = sctp_make_abort(asoc, asconf_ack, sizeof(struct sctp_errhdr)); -- cgit v1.2.3 From 02694885a2be1baf10a795272a506a7a11bb6d61 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 30 Apr 2019 10:46:10 +0800 Subject: selftests: fib_rule_tests: print the result and return 1 if any tests failed [ Upstream commit f68d7c44e76532e46f292ad941aa3706cb9e6e40 ] Fixes: 65b2b4939a64 ("selftests: net: initial fib rule tests") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/fib_rule_tests.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index d4cfb6a7a086..d84193bdc307 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -27,6 +27,7 @@ log_test() nsuccess=$((nsuccess+1)) printf "\n TEST: %-50s [ OK ]\n" "${msg}" else + ret=1 nfail=$((nfail+1)) printf "\n TEST: %-50s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then @@ -245,4 +246,9 @@ setup run_fibrule_tests cleanup +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + exit $ret -- cgit v1.2.3 From 3a0701b8d93a2515bad52563351791130b8ee54c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 29 Apr 2019 11:53:18 -0400 Subject: packet: validate msg_namelen in send directly [ Upstream commit 486efdc8f6ce802b27e15921d2353cc740c55451 ] Packet sockets in datagram mode take a destination address. Verify its length before passing to dev_hard_header. Prior to 2.6.14-rc3, the send code ignored sll_halen. This is established behavior. Directly compare msg_namelen to dev->addr_len. Change v1->v2: initialize addr in all paths Fixes: 6b8d95f1795c4 ("packet: validate address length if non-zero") Suggested-by: David Laight Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8406bf11eef4..97f79580ae38 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2603,8 +2603,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); + unsigned char *addr = NULL; int tp_len, size_max; - unsigned char *addr; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; @@ -2615,7 +2615,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2625,10 +2624,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_put; + if (po->sk.sk_socket->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_put; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -2800,7 +2802,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; struct net_device *dev; __be16 proto; - unsigned char *addr; + unsigned char *addr = NULL; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2817,7 +2819,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2825,10 +2826,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_unlock; + if (sock->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_unlock; + addr = saddr->sll_addr; + } } err = -ENXIO; -- cgit v1.2.3 From 3362ece2719cd505dac2a0ee860a50b08ddd20c0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 29 Apr 2019 11:46:55 -0400 Subject: packet: in recvmsg msg_name return at least sizeof sockaddr_ll [ Upstream commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c ] Packet send checks that msg_name is at least sizeof sockaddr_ll. Packet recv must return at least this length, so that its output can be passed unmodified to packet send. This ceased to be true since adding support for lladdr longer than sll_addr. Since, the return value uses true address length. Always return at least sizeof sockaddr_ll, even if address length is shorter. Zero the padding bytes. Change v1->v2: do not overwrite zeroed padding again. use copy_len. Fixes: 0fb375fb9b93 ("[AF_PACKET]: Allow for > 8 byte hardware addresses.") Suggested-by: David Laight Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 97f79580ae38..faa2bc50cfa0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3349,20 +3349,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + int copy_len; + /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); + copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + copy_len = msg->msg_namelen; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { + memset(msg->msg_name + + offsetof(struct sockaddr_ll, sll_addr), + 0, sizeof(sll->sll_addr)); + msg->msg_namelen = sizeof(struct sockaddr_ll); + } } - memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, - msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (pkt_sk(sk)->auxdata) { -- cgit v1.2.3 From 4d5aaae5fa0e5415f1cec4bb22d193c656343f4e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 29 Apr 2019 10:30:09 -0700 Subject: selftests: fib_rule_tests: Fix icmp proto with ipv6 [ Upstream commit 15d55bae4e3c43cd9f87fd93c73a263e172d34e1 ] A recent commit returns an error if icmp is used as the ip-proto for IPv6 fib rules. Update fib_rule_tests to send ipv6-icmp instead of icmp. Fixes: 5e1a99eae8499 ("ipv4: Add ICMPv6 support when parse route ipproto") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/fib_rule_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index d84193bdc307..4b7e107865bf 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -148,8 +148,8 @@ fib_rule6_test() fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then - match="ipproto icmp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match" + match="ipproto ipv6-icmp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match" fi } -- cgit v1.2.3 From b58d12d306ae210c3e5943b6422b4e0c0ef91012 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Apr 2019 10:10:05 -0700 Subject: tcp: add sanity tests in tcp_add_backlog() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ca2fe2956acef2f87f6c55549874fdd2e92d9824 ] Richard and Bruno both reported that my commit added a bug, and Bruno was able to determine the problem came when a segment wih a FIN packet was coalesced to a prior one in tcp backlog queue. It turns out the header prediction in tcp_rcv_established() looks back to TCP headers in the packet, not in the metadata (aka TCP_SKB_CB(skb)->tcp_flags) The fast path in tcp_rcv_established() is not supposed to handle a FIN flag (it does not call tcp_fin()) Therefore we need to make sure to propagate the FIN flag, so that the coalesced packet does not go through the fast path, the same than a GRO packet carrying a FIN flag. While we are at it, make sure we do not coalesce packets with RST or SYN, or if they do not have ACK set. Many thanks to Richard and Bruno for pinpointing the bad commit, and to Richard for providing a first version of the fix. Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue") Signed-off-by: Eric Dumazet Reported-by: Richard Purdie Reported-by: Bruno Prémont Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 00852f47a73d..9a2ff79a93ad 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,7 +1673,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq || TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield || ((TCP_SKB_CB(tail)->tcp_flags | - TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) || + TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) || + !((TCP_SKB_CB(tail)->tcp_flags & + TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || #ifdef CONFIG_TLS_DEVICE @@ -1692,6 +1694,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + /* We have to update both TCP_SKB_CB(tail)->tcp_flags and + * thtail->fin, so that the fast path in tcp_rcv_established() + * is not entered if we append a packet with a FIN. + * SYN, RST, URG are not present. + * ACK is set on both packets. + * PSH : we do not really care in TCP stack, + * at least for 'GRO' packets. + */ + thtail->fin |= th->fin; TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; if (TCP_SKB_CB(skb)->has_rxtstamp) { -- cgit v1.2.3 From 8858d72345421cff614861c83543fb3e87f6189a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Apr 2019 12:50:44 +0200 Subject: udp: fix GRO reception in case of length mismatch [ Upstream commit 21f1b8a6636c4dbde4aa1ec0343f42eaf653ffcc ] Currently, the UDP GRO code path does bad things on some edge conditions - Aggregation can happen even on packet with different lengths. Fix the above by rewriting the 'complete' condition for GRO packets. While at it, note explicitly that we allow merging the first packet per burst below gso_size. Reported-by: Sean Tong Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp_offload.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 64f9715173ac..d8776b2110c1 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -377,13 +377,14 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot - * leading to execessive truesize values + * leading to execessive truesize values. + * On len mismatch merge the first packet shorter than gso_size, + * otherwise complete the GRO packet. */ - if (!skb_gro_receive(p, skb) && + if (uh->len > uh2->len || skb_gro_receive(p, skb) || + uh->len != uh2->len || NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) pp = p; - else if (uh->len != uh2->len) - pp = p; return pp; } -- cgit v1.2.3 From 7a0d81d12f62626aecef35ebc9849978d8b4e6e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 May 2019 18:56:28 -0700 Subject: udp: fix GRO packet of death [ Upstream commit 4dd2b82d5adfbe0b1587ccad7a8f76d826120f37 ] syzbot was able to crash host by sending UDP packets with a 0 payload. TCP does not have this issue since we do not aggregate packets without payload. Since dev_gro_receive() sets gso_size based on skb_gro_len(skb) it seems not worth trying to cope with padded packets. BUG: KASAN: slab-out-of-bounds in skb_gro_receive+0xf5f/0x10e0 net/core/skbuff.c:3826 Read of size 16 at addr ffff88808893fff0 by task syz-executor612/7889 CPU: 0 PID: 7889 Comm: syz-executor612 Not tainted 5.1.0-rc7+ #96 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_load16_noabort+0x14/0x20 mm/kasan/generic_report.c:133 skb_gro_receive+0xf5f/0x10e0 net/core/skbuff.c:3826 udp_gro_receive_segment net/ipv4/udp_offload.c:382 [inline] call_gro_receive include/linux/netdevice.h:2349 [inline] udp_gro_receive+0xb61/0xfd0 net/ipv4/udp_offload.c:414 udp4_gro_receive+0x763/0xeb0 net/ipv4/udp_offload.c:478 inet_gro_receive+0xe72/0x1110 net/ipv4/af_inet.c:1510 dev_gro_receive+0x1cd0/0x23c0 net/core/dev.c:5581 napi_gro_frags+0x36b/0xd10 net/core/dev.c:5843 tun_get_user+0x2f24/0x3fb0 drivers/net/tun.c:1981 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2027 call_write_iter include/linux/fs.h:1866 [inline] do_iter_readv_writev+0x5e1/0x8e0 fs/read_write.c:681 do_iter_write fs/read_write.c:957 [inline] do_iter_write+0x184/0x610 fs/read_write.c:938 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1002 do_writev+0x15e/0x370 fs/read_write.c:1037 __do_sys_writev fs/read_write.c:1110 [inline] __se_sys_writev fs/read_write.c:1107 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1107 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x441cc0 Code: 05 48 3d 01 f0 ff ff 0f 83 9d 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 83 3d 51 93 29 00 00 75 14 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 74 09 fc ff c3 48 83 ec 08 e8 ba 2b 00 00 RSP: 002b:00007ffe8c716118 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00007ffe8c716150 RCX: 0000000000441cc0 RDX: 0000000000000001 RSI: 00007ffe8c716170 RDI: 00000000000000f0 RBP: 0000000000000000 R08: 000000000000ffff R09: 0000000000a64668 R10: 0000000020000040 R11: 0000000000000246 R12: 000000000000c2d9 R13: 0000000000402b50 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 5143: save_stack+0x45/0xd0 mm/kasan/common.c:75 set_track mm/kasan/common.c:87 [inline] __kasan_kmalloc mm/kasan/common.c:497 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:470 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:505 slab_post_alloc_hook mm/slab.h:437 [inline] slab_alloc mm/slab.c:3393 [inline] kmem_cache_alloc+0x11a/0x6f0 mm/slab.c:3555 mm_alloc+0x1d/0xd0 kernel/fork.c:1030 bprm_mm_init fs/exec.c:363 [inline] __do_execve_file.isra.0+0xaa3/0x23f0 fs/exec.c:1791 do_execveat_common fs/exec.c:1865 [inline] do_execve fs/exec.c:1882 [inline] __do_sys_execve fs/exec.c:1958 [inline] __se_sys_execve fs/exec.c:1953 [inline] __x64_sys_execve+0x8f/0xc0 fs/exec.c:1953 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 5351: save_stack+0x45/0xd0 mm/kasan/common.c:75 set_track mm/kasan/common.c:87 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:459 kasan_slab_free+0xe/0x10 mm/kasan/common.c:467 __cache_free mm/slab.c:3499 [inline] kmem_cache_free+0x86/0x260 mm/slab.c:3765 __mmdrop+0x238/0x320 kernel/fork.c:677 mmdrop include/linux/sched/mm.h:49 [inline] finish_task_switch+0x47b/0x780 kernel/sched/core.c:2746 context_switch kernel/sched/core.c:2880 [inline] __schedule+0x81b/0x1cc0 kernel/sched/core.c:3518 preempt_schedule_irq+0xb5/0x140 kernel/sched/core.c:3745 retint_kernel+0x1b/0x2d arch_local_irq_restore arch/x86/include/asm/paravirt.h:767 [inline] kmem_cache_free+0xab/0x260 mm/slab.c:3766 anon_vma_chain_free mm/rmap.c:134 [inline] unlink_anon_vmas+0x2ba/0x870 mm/rmap.c:401 free_pgtables+0x1af/0x2f0 mm/memory.c:394 exit_mmap+0x2d1/0x530 mm/mmap.c:3144 __mmput kernel/fork.c:1046 [inline] mmput+0x15f/0x4c0 kernel/fork.c:1067 exec_mmap fs/exec.c:1046 [inline] flush_old_exec+0x8d9/0x1c20 fs/exec.c:1279 load_elf_binary+0x9bc/0x53f0 fs/binfmt_elf.c:864 search_binary_handler fs/exec.c:1656 [inline] search_binary_handler+0x17f/0x570 fs/exec.c:1634 exec_binprm fs/exec.c:1698 [inline] __do_execve_file.isra.0+0x1394/0x23f0 fs/exec.c:1818 do_execveat_common fs/exec.c:1865 [inline] do_execve fs/exec.c:1882 [inline] __do_sys_execve fs/exec.c:1958 [inline] __se_sys_execve fs/exec.c:1953 [inline] __x64_sys_execve+0x8f/0xc0 fs/exec.c:1953 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff88808893f7c0 which belongs to the cache mm_struct of size 1496 The buggy address is located 600 bytes to the right of 1496-byte region [ffff88808893f7c0, ffff88808893fd98) The buggy address belongs to the page: page:ffffea0002224f80 count:1 mapcount:0 mapping:ffff88821bc40ac0 index:0xffff88808893f7c0 compound_mapcount: 0 flags: 0x1fffc0000010200(slab|head) raw: 01fffc0000010200 ffffea00025b4f08 ffffea00027b9d08 ffff88821bc40ac0 raw: ffff88808893f7c0 ffff88808893e440 0000000100000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88808893fe80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88808893ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88808893ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff888088940000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff888088940080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp_offload.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d8776b2110c1..065334b41d57 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -352,6 +352,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; + unsigned int ulen; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -359,6 +360,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return NULL; } + /* Do not deal with padded or malicious packets, sorry ! */ + ulen = ntohs(uh->len); + if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } /* pull encapsulating udp header */ skb_gro_pull(skb, sizeof(struct udphdr)); skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); @@ -377,12 +384,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot - * leading to execessive truesize values. + * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (uh->len > uh2->len || skb_gro_receive(p, skb) || - uh->len != uh2->len || + if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) || + ulen != ntohs(uh2->len) || NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) pp = p; -- cgit v1.2.3 From 4aaaa658466a0b30a6839c4c519ce6571ac76cfa Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:50 -0400 Subject: bnxt_en: Improve multicast address setup logic. [ Upstream commit b4e30e8e7ea1d1e35ffd64ca46f7d9a7f227b4bf ] The driver builds a list of multicast addresses and sends it to the firmware when the driver's ndo_set_rx_mode() is called. In rare cases, the firmware can fail this call if internal resources to add multicast addresses are exhausted. In that case, we should try the call again by setting the ALL_MCAST flag which is more guaranteed to succeed. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 40ca339ec3df..ef07baacc762 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8889,8 +8889,15 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp) skip_uc: rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); + if (rc && vnic->mc_list_count) { + netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n", + rc); + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; + vnic->mc_list_count = 0; + rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); + } if (rc) - netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", + netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %d\n", rc); return rc; -- cgit v1.2.3 From 76dba4d8d0db4e764da159470338bf80669e954d Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 25 Apr 2019 22:31:51 -0400 Subject: bnxt_en: Free short FW command HWRM memory in error path in bnxt_init_one() [ Upstream commit f9099d611449836a51a65f40ea7dc9cb5f2f665e ] In the bnxt_init_one() error path, short FW command request memory is not freed. This patch fixes it. Fixes: e605db801bde ("bnxt_en: Support for Short Firmware Message") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ef07baacc762..32097f838d2a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10632,6 +10632,7 @@ init_err_cleanup_tc: bnxt_clear_int_mode(bp); init_err_pci_clean: + bnxt_free_hwrm_short_cmd_req(bp); bnxt_free_hwrm_resources(bp); bnxt_free_ctx_mem(bp); kfree(bp->ctx); -- cgit v1.2.3 From 4348cae3a6e1ccf5dcc73759ed2734f88c331135 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:52 -0400 Subject: bnxt_en: Fix possible crash in bnxt_hwrm_ring_free() under error conditions. [ Upstream commit 1f83391bd6fc48f92f627b0ec0bce686d100c6a5 ] If we encounter errors during open and proceed to clean up, bnxt_hwrm_ring_free() may crash if the rings we try to free have never been allocated. bnxt_cp_ring_for_rx() or bnxt_cp_ring_for_tx() may reference pointers that have not been allocated. Fix it by checking for valid fw_ring_id first before calling bnxt_cp_ring_for_rx() or bnxt_cp_ring_for_tx(). Fixes: 2c61d2117ecb ("bnxt_en: Add helper functions to get firmware CP ring ID.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 32097f838d2a..e943f4138929 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5131,10 +5131,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) for (i = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; struct bnxt_ring_struct *ring = &txr->tx_ring_struct; - u32 cmpl_ring_id; - cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr); + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_TX, close_path ? cmpl_ring_id : @@ -5147,10 +5147,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring = &rxr->rx_ring_struct; u32 grp_idx = rxr->bnapi->index; - u32 cmpl_ring_id; - cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_RX, close_path ? cmpl_ring_id : @@ -5169,10 +5169,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring = &rxr->rx_agg_ring_struct; u32 grp_idx = rxr->bnapi->index; - u32 cmpl_ring_id; - cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); + hwrm_ring_free_send_msg(bp, ring, type, close_path ? cmpl_ring_id : INVALID_HW_RING_ID); -- cgit v1.2.3 From 1c38ed7b5ef637afe0a5d7bc5a456e20885054ff Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:53 -0400 Subject: bnxt_en: Pass correct extended TX port statistics size to firmware. [ Upstream commit ad361adf0d08f1135f3845c6b3a36be7cc0bfda5 ] If driver determines that extended TX port statistics are not supported or allocation of the data structure fails, make sure to pass 0 TX stats size to firmware to disable it. The firmware returned TX stats size should also be set to 0 for consistency. This will prevent bnxt_get_ethtool_stats() from accessing the NULL TX stats pointer in case there is mismatch between firmware and driver. Fixes: 36e53349b60b ("bnxt_en: Add additional extended port statistics.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e943f4138929..ab200d8bc119 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6745,6 +6745,7 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp) struct hwrm_queue_pri2cos_qcfg_input req2 = {0}; struct hwrm_port_qstats_ext_input req = {0}; struct bnxt_pf_info *pf = &bp->pf; + u32 tx_stat_size; int rc; if (!(bp->flags & BNXT_FLAG_PORT_STATS_EXT)) @@ -6754,13 +6755,16 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp) req.port_id = cpu_to_le16(pf->port_id); req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext)); req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_ext_map); - req.tx_stat_size = cpu_to_le16(sizeof(struct tx_port_stats_ext)); + tx_stat_size = bp->hw_tx_port_stats_ext ? + sizeof(*bp->hw_tx_port_stats_ext) : 0; + req.tx_stat_size = cpu_to_le16(tx_stat_size); req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_ext_map); mutex_lock(&bp->hwrm_cmd_lock); rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) { bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8; - bp->fw_tx_stats_ext_size = le16_to_cpu(resp->tx_stat_size) / 8; + bp->fw_tx_stats_ext_size = tx_stat_size ? + le16_to_cpu(resp->tx_stat_size) / 8 : 0; } else { bp->fw_rx_stats_ext_size = 0; bp->fw_tx_stats_ext_size = 0; -- cgit v1.2.3 From 95fc8b4b43674a32572eb8fa3db3d08197fb7c68 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:54 -0400 Subject: bnxt_en: Fix statistics context reservation logic. [ Upstream commit 3f93cd3f098e284c851acb89265ebe35b994a5c8 ] In an earlier commit that fixes the number of stats contexts to reserve for the RDMA driver, we added a function parameter to pass in the number of stats contexts to all the relevant functions. The passed in parameter should have been used to set the enables field of the firmware message. Fixes: 780baad44f0f ("bnxt_en: Reserve 1 stat_ctx for RDMA driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ab200d8bc119..4dd99c28ab5b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5311,17 +5311,16 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req, req->num_tx_rings = cpu_to_le16(tx_rings); if (BNXT_NEW_RM(bp)) { enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; + enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; if (bp->flags & BNXT_FLAG_CHIP_P5) { enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0; enables |= tx_rings + ring_grps ? - FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; } else { enables |= cp_rings ? - FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= ring_grps ? FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS | FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; @@ -5361,14 +5360,13 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS | FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; + enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; if (bp->flags & BNXT_FLAG_CHIP_P5) { enables |= tx_rings + ring_grps ? - FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; } else { enables |= cp_rings ? - FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; } -- cgit v1.2.3 From 126255f1ef2b61d3ca59488d02cdd81219cb84d2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 25 Apr 2019 22:31:55 -0400 Subject: bnxt_en: Fix uninitialized variable usage in bnxt_rx_pkt(). [ Upstream commit 0b397b17a4120cb80f7bf89eb30587b3dd9b0d1d ] In bnxt_rx_pkt(), if the driver encounters BD errors, it will recycle the buffers and jump to the end where the uninitailized variable "len" is referenced. Fix it by adding a new jump label that will skip the length update. This is the most correct fix since the length may not be valid when we get this type of error. Fixes: 6a8788f25625 ("bnxt_en: add support for software dynamic interrupt moderation") Reported-by: Nathan Chancellor Cc: Nathan Chancellor Signed-off-by: Michael Chan Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4dd99c28ab5b..c6ddbc0e084e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1621,7 +1621,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, netdev_warn(bp->dev, "RX buffer error %x\n", rx_err); bnxt_sched_reset(bp, rxr); } - goto next_rx; + goto next_rx_no_len; } len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT; @@ -1702,12 +1702,13 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rc = 1; next_rx: - rxr->rx_prod = NEXT_RX(prod); - rxr->rx_next_cons = NEXT_RX(cons); - cpr->rx_packets += 1; cpr->rx_bytes += len; +next_rx_no_len: + rxr->rx_prod = NEXT_RX(prod); + rxr->rx_next_cons = NEXT_RX(cons); + next_rx_no_prod_no_len: *raw_cons = tmp_raw_cons; -- cgit v1.2.3 From f7f4d4b85b83d0296dcf805b61f64c50833f5f5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Apr 2019 17:35:09 -0700 Subject: net/tls: don't copy negative amounts of data in reencrypt [ Upstream commit 97e1caa517e22d62a283b876fb8aa5f4672c83dd ] There is no guarantee the record starts before the skb frags. If we don't check for this condition copy amount will get negative, leading to reads and writes to random memory locations. Familiar hilarity ensues. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_device.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 5f1d937c4be9..4068101d43ea 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -610,14 +610,16 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) else err = 0; - copy = min_t(int, skb_pagelen(skb) - offset, - rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + if (skb_pagelen(skb) > offset) { + copy = min_t(int, skb_pagelen(skb) - offset, + rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); - if (skb->decrypted) - skb_store_bits(skb, offset, buf, copy); + if (skb->decrypted) + skb_store_bits(skb, offset, buf, copy); - offset += copy; - buf += copy; + offset += copy; + buf += copy; + } skb_walk_frags(skb, skb_iter) { copy = min_t(int, skb_iter->len, -- cgit v1.2.3 From a31a8f55c8e8463948775297d79db945061479d4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Apr 2019 17:35:10 -0700 Subject: net/tls: fix copy to fragments in reencrypt [ Upstream commit eb3d38d5adb520435d4e4af32529ccb13ccc9935 ] Fragments may contain data from other records so we have to account for that when we calculate the destination and max length of copy we can perform. Note that 'offset' is the offset within the message, so it can't be passed as offset within the frag.. Here skb_store_bits() would have realised the call is wrong and simply not copy data. Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_device.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4068101d43ea..7d5136ecee78 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -579,7 +579,7 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) { struct strp_msg *rxm = strp_msg(skb); - int err = 0, offset = rxm->offset, copy, nsg; + int err = 0, offset = rxm->offset, copy, nsg, data_len, pos; struct sk_buff *skb_iter, *unused; struct scatterlist sg[1]; char *orig_buf, *buf; @@ -610,9 +610,10 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) else err = 0; + data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE; + if (skb_pagelen(skb) > offset) { - copy = min_t(int, skb_pagelen(skb) - offset, - rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + copy = min_t(int, skb_pagelen(skb) - offset, data_len); if (skb->decrypted) skb_store_bits(skb, offset, buf, copy); @@ -621,16 +622,30 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) buf += copy; } + pos = skb_pagelen(skb); skb_walk_frags(skb, skb_iter) { - copy = min_t(int, skb_iter->len, - rxm->full_len - offset + rxm->offset - - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + int frag_pos; + + /* Practically all frags must belong to msg if reencrypt + * is needed with current strparser and coalescing logic, + * but strparser may "get optimized", so let's be safe. + */ + if (pos + skb_iter->len <= offset) + goto done_with_frag; + if (pos >= data_len + rxm->offset) + break; + + frag_pos = offset - pos; + copy = min_t(int, skb_iter->len - frag_pos, + data_len + rxm->offset - offset); if (skb_iter->decrypted) - skb_store_bits(skb_iter, offset, buf, copy); + skb_store_bits(skb_iter, frag_pos, buf, copy); offset += copy; buf += copy; +done_with_frag: + pos += skb_iter->len; } free_buf: -- cgit v1.2.3 From 119ac69c07728d5ab5b182a43d726223c3b53212 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 29 Apr 2019 07:04:15 -0700 Subject: KVM: x86: Whitelist port 0x7e for pre-incrementing %rip commit 8764ed55c9705e426d889ff16c26f398bba70b9b upstream. KVM's recent bug fix to update %rip after emulating I/O broke userspace that relied on the previous behavior of incrementing %rip prior to exiting to userspace. When running a Windows XP guest on AMD hardware, Qemu may patch "OUT 0x7E" instructions in reaction to the OUT itself. Because KVM's old behavior was to increment %rip before exiting to userspace to handle the I/O, Qemu manually adjusted %rip to account for the OUT instruction. Arguably this is a userspace bug as KVM requires userspace to re-enter the kernel to complete instruction emulation before taking any other actions. That being said, this is a bit of a grey area and breaking userspace that has worked for many years is bad. Pre-increment %rip on OUT to port 0x7e before exiting to userspace to hack around the issue. Fixes: 45def77ebf79e ("KVM: x86: update %rip after emulating IO") Reported-by: Simon Becherer Reported-and-tested-by: Iakov Karpov Reported-by: Gabriele Balducci Reported-by: Antti Antinoja Cc: stable@vger.kernel.org Cc: Takashi Iwai Cc: Jiri Slaby Cc: Greg Kroah-Hartman Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/x86.c | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index dabfcf7c3941..7a0e64ccd6ff 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -381,6 +381,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) +#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c9fb6453b2f..7e413ea19a9a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6536,6 +6536,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); +static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pio.count = 0; + return 1; +} + static int complete_fast_pio_out(struct kvm_vcpu *vcpu) { vcpu->arch.pio.count = 0; @@ -6552,12 +6558,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, size, port, &val, 1); + if (ret) + return ret; - if (!ret) { + /* + * Workaround userspace that relies on old KVM behavior of %rip being + * incremented prior to exiting to userspace to handle "OUT 0x7e". + */ + if (port == 0x7e && + kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) { + vcpu->arch.complete_userspace_io = + complete_fast_pio_out_port_0x7e; + kvm_skip_emulated_instruction(vcpu); + } else { vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); vcpu->arch.complete_userspace_io = complete_fast_pio_out; } - return ret; + return 0; } static int complete_fast_pio_in(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 4c38bd0d4cd68fbcacfda8b66a86891ffcf396e1 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 17 Jan 2019 11:55:58 -0800 Subject: KVM: nVMX: Fix size checks in vmx_set_nested_state commit e8ab8d24b488632d07ce5ddb261f1d454114415b upstream. The size checks in vmx_nested_state are wrong because the calculations are made based on the size of a pointer to a struct kvm_nested_state rather than the size of a struct kvm_nested_state. Reported-by: Felix Wilhelm Signed-off-by: Jim Mattson Reviewed-by: Drew Schmitt Reviewed-by: Marc Orr Reviewed-by: Peter Shier Reviewed-by: Krish Sadhukhan Fixes: 8fcc4b5923af5de58b80b53a069453b135693304 Cc: stable@ver.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/nested.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f90b3a948291..a4bcac94392c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5407,7 +5407,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return ret; /* Empty 'VMXON' state is permitted */ - if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12)) + if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) return 0; if (kvm_state->vmx.vmcs_pa != -1ull) { @@ -5451,7 +5451,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); - if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12)) + if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12)) return -EINVAL; if (copy_from_user(shadow_vmcs12, -- cgit v1.2.3 From c391e6198768b56b81295bccc473f578b79fed2d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 28 Apr 2019 18:04:11 +0200 Subject: ALSA: line6: use dynamic buffers commit e5c812e84f0dece3400d5caf42522287e6ef139f upstream. The line6 driver uses a lot of USB buffers off of the stack, which is not allowed on many systems, causing the driver to crash on some of them. Fix this up by dynamically allocating the buffers with kmalloc() which allows for proper DMA-able memory. Reported-by: Christo Gouws Reported-by: Alan Stern Tested-by: Christo Gouws Cc: stable Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/line6/driver.c | 60 +++++++++++++++++++++++++++------------------- sound/usb/line6/podhd.c | 21 +++++++++------- sound/usb/line6/toneport.c | 24 ++++++++++++++----- 3 files changed, 65 insertions(+), 40 deletions(-) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index c1376bfdc90b..aa28510d23ad 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -351,12 +351,16 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, { struct usb_device *usbdev = line6->usbdev; int ret; - unsigned char len; + unsigned char *len; unsigned count; if (address > 0xffff || datalen > 0xff) return -EINVAL; + len = kmalloc(sizeof(*len), GFP_KERNEL); + if (!len) + return -ENOMEM; + /* query the serial number: */ ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, @@ -365,7 +369,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, if (ret < 0) { dev_err(line6->ifcdev, "read request failed (error %d)\n", ret); - return ret; + goto exit; } /* Wait for data length. We'll get 0xff until length arrives. */ @@ -375,28 +379,29 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0x0012, 0x0000, &len, 1, + 0x0012, 0x0000, len, 1, LINE6_TIMEOUT * HZ); if (ret < 0) { dev_err(line6->ifcdev, "receive length failed (error %d)\n", ret); - return ret; + goto exit; } - if (len != 0xff) + if (*len != 0xff) break; } - if (len == 0xff) { + ret = -EIO; + if (*len == 0xff) { dev_err(line6->ifcdev, "read failed after %d retries\n", count); - return -EIO; - } else if (len != datalen) { + goto exit; + } else if (*len != datalen) { /* should be equal or something went wrong */ dev_err(line6->ifcdev, "length mismatch (expected %d, got %d)\n", - (int)datalen, (int)len); - return -EIO; + (int)datalen, (int)*len); + goto exit; } /* receive the result: */ @@ -405,12 +410,12 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, 0x0013, 0x0000, data, datalen, LINE6_TIMEOUT * HZ); - if (ret < 0) { + if (ret < 0) dev_err(line6->ifcdev, "read failed (error %d)\n", ret); - return ret; - } - return 0; +exit: + kfree(len); + return ret; } EXPORT_SYMBOL_GPL(line6_read_data); @@ -422,12 +427,16 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, { struct usb_device *usbdev = line6->usbdev; int ret; - unsigned char status; + unsigned char *status; int count; if (address > 0xffff || datalen > 0xffff) return -EINVAL; + status = kmalloc(sizeof(*status), GFP_KERNEL); + if (!status) + return -ENOMEM; + ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x0022, address, data, datalen, @@ -436,7 +445,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, if (ret < 0) { dev_err(line6->ifcdev, "write request failed (error %d)\n", ret); - return ret; + goto exit; } for (count = 0; count < LINE6_READ_WRITE_MAX_RETRIES; count++) { @@ -447,28 +456,29 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x0012, 0x0000, - &status, 1, LINE6_TIMEOUT * HZ); + status, 1, LINE6_TIMEOUT * HZ); if (ret < 0) { dev_err(line6->ifcdev, "receiving status failed (error %d)\n", ret); - return ret; + goto exit; } - if (status != 0xff) + if (*status != 0xff) break; } - if (status == 0xff) { + if (*status == 0xff) { dev_err(line6->ifcdev, "write failed after %d retries\n", count); - return -EIO; - } else if (status != 0) { + ret = -EIO; + } else if (*status != 0) { dev_err(line6->ifcdev, "write failed (error %d)\n", ret); - return -EIO; + ret = -EIO; } - - return 0; +exit: + kfree(status); + return ret; } EXPORT_SYMBOL_GPL(line6_write_data); diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index 36ed9c85c0eb..5f3c87264e66 100644 --- a/sound/usb/line6/podhd.c +++ b/sound/usb/line6/podhd.c @@ -225,28 +225,32 @@ static void podhd_startup_start_workqueue(struct timer_list *t) static int podhd_dev_start(struct usb_line6_podhd *pod) { int ret; - u8 init_bytes[8]; + u8 *init_bytes; int i; struct usb_device *usbdev = pod->line6.usbdev; + init_bytes = kmalloc(8, GFP_KERNEL); + if (!init_bytes) + return -ENOMEM; + ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x11, 0, NULL, 0, LINE6_TIMEOUT * HZ); if (ret < 0) { dev_err(pod->line6.ifcdev, "read request failed (error %d)\n", ret); - return ret; + goto exit; } /* NOTE: looks like some kind of ping message */ ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x11, 0x0, - &init_bytes, 3, LINE6_TIMEOUT * HZ); + init_bytes, 3, LINE6_TIMEOUT * HZ); if (ret < 0) { dev_err(pod->line6.ifcdev, "receive length failed (error %d)\n", ret); - return ret; + goto exit; } pod->firmware_version = @@ -255,7 +259,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod) for (i = 0; i <= 16; i++) { ret = line6_read_data(&pod->line6, 0xf000 + 0x08 * i, init_bytes, 8); if (ret < 0) - return ret; + goto exit; } ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), @@ -263,10 +267,9 @@ static int podhd_dev_start(struct usb_line6_podhd *pod) USB_TYPE_STANDARD | USB_RECIP_DEVICE | USB_DIR_OUT, 1, 0, NULL, 0, LINE6_TIMEOUT * HZ); - if (ret < 0) - return ret; - - return 0; +exit: + kfree(init_bytes); + return ret; } static void podhd_startup_workqueue(struct work_struct *work) diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c index f47ba94e6f4a..19bee725de00 100644 --- a/sound/usb/line6/toneport.c +++ b/sound/usb/line6/toneport.c @@ -365,16 +365,21 @@ static bool toneport_has_source_select(struct usb_line6_toneport *toneport) /* Setup Toneport device. */ -static void toneport_setup(struct usb_line6_toneport *toneport) +static int toneport_setup(struct usb_line6_toneport *toneport) { - u32 ticks; + u32 *ticks; struct usb_line6 *line6 = &toneport->line6; struct usb_device *usbdev = line6->usbdev; + ticks = kmalloc(sizeof(*ticks), GFP_KERNEL); + if (!ticks) + return -ENOMEM; + /* sync time on device with host: */ /* note: 32-bit timestamps overflow in year 2106 */ - ticks = (u32)ktime_get_real_seconds(); - line6_write_data(line6, 0x80c6, &ticks, 4); + *ticks = (u32)ktime_get_real_seconds(); + line6_write_data(line6, 0x80c6, ticks, 4); + kfree(ticks); /* enable device: */ toneport_send_cmd(usbdev, 0x0301, 0x0000); @@ -389,6 +394,7 @@ static void toneport_setup(struct usb_line6_toneport *toneport) toneport_update_led(toneport); mod_timer(&toneport->timer, jiffies + TONEPORT_PCM_DELAY * HZ); + return 0; } /* @@ -451,7 +457,9 @@ static int toneport_init(struct usb_line6 *line6, return err; } - toneport_setup(toneport); + err = toneport_setup(toneport); + if (err) + return err; /* register audio system: */ return snd_card_register(line6->card); @@ -463,7 +471,11 @@ static int toneport_init(struct usb_line6 *line6, */ static int toneport_reset_resume(struct usb_interface *interface) { - toneport_setup(usb_get_intfdata(interface)); + int err; + + err = toneport_setup(usb_get_intfdata(interface)); + if (err) + return err; return line6_resume(interface); } #endif -- cgit v1.2.3 From 3c13f6cd254daabdd5c2505e11bc8043005e7faf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 30 Mar 2019 10:31:52 +0100 Subject: iwlwifi: mvm: properly check debugfs dentry before using it commit 154d4899e4111ae24e68d6ba955f46856cb046bc upstream. debugfs can now report an error code if something went wrong instead of just NULL. So if the return value is to be used as a "real" dentry, it needs to be checked if it is an error before dereferencing it. This is now happening because of ff9fb72bc077 ("debugfs: return error values, not NULL"). If multiple iwlwifi devices are in the system, this can cause problems when the driver attempts to create the main debugfs directory again. Later on in the code we fail horribly by trying to dereference a pointer that is an error value. Reported-by: Laura Abbott Reported-by: Gabriel Ramirez Cc: Johannes Berg Cc: Emmanuel Grumbach Cc: Luca Coelho Cc: Intel Linux Wireless Cc: Kalle Valo Cc: stable # 5.0 Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 33b0af24a537..d61ab3e80759 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -1482,6 +1482,11 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) return; mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir); + if (IS_ERR_OR_NULL(mvmvif->dbgfs_dir)) { + IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n", + dbgfs_dir); + return; + } if (!mvmvif->dbgfs_dir) { IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n", -- cgit v1.2.3 From d467f3acebc5c0d816b7e7008ea8ee07d1633e72 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 3 Mar 2019 18:24:33 +0100 Subject: ath10k: Drop WARN_ON()s that always trigger during system resume commit 9e80ad37f6788ed52b89a3cfcd593e0aa69b216d upstream. ath10k_mac_vif_chan() always returns an error for the given vif during system-wide resume which reliably triggers two WARN_ON()s in ath10k_bss_info_changed() and they are not particularly useful in that code path, so drop them. Tested: QCA6174 hw3.2 PCI with WLAN.RM.2.0-00180-QCARMSWPZ-1 Tested: QCA6174 hw3.2 SDIO with WLAN.RMH.4.4.1-00007-QCARMSWP-1 Fixes: cd93b83ad927 ("ath10k: support for multicast rate control") Fixes: f279294e9ee2 ("ath10k: add support for configuring management packet rate") Cc: stable@vger.kernel.org Reviewed-by: Brian Norris Tested-by: Brian Norris Tested-by: Claire Chang Signed-off-by: Rafael J. Wysocki Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 49758490eaba..9560acc5f7da 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5705,7 +5705,7 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_MCAST_RATE && - !WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def))) { + !ath10k_mac_vif_chan(arvif->vif, &def)) { band = def.chan->band; rateidx = vif->bss_conf.mcast_rate[band] - 1; @@ -5743,7 +5743,7 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_BASIC_RATES) { - if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) { + if (ath10k_mac_vif_chan(vif, &def)) { mutex_unlock(&ar->conf_mutex); return; } -- cgit v1.2.3 From e5b9547b1aa39164a8df1d01f2996391c0356d71 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 5 May 2019 14:43:41 +0200 Subject: Linux 5.0.13 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fd044f594bbf..51a819544505 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 12 +SUBLEVEL = 13 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3 From d956e63340308f817a50a6e69f3510980c859b09 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 24 Apr 2019 09:32:55 -0700 Subject: selftests/seccomp: Prepare for exclusive seccomp flags commit 4ee0776760af03f181e6b80baf5fb1cc1a980f50 upstream. Some seccomp flags will become exclusive, so the selftest needs to be adjusted to mask those out and test them individually for the "all flags" tests. Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Kees Cook Reviewed-by: Tycho Andersen Acked-by: James Morris Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 34 ++++++++++++++++++++------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 6d7a81306f8a..1c2509104924 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -2166,11 +2166,14 @@ TEST(detect_seccomp_filter_flags) SECCOMP_FILTER_FLAG_LOG, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_NEW_LISTENER }; - unsigned int flag, all_flags; + unsigned int exclusive[] = { + SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_NEW_LISTENER }; + unsigned int flag, all_flags, exclusive_mask; int i; long ret; - /* Test detection of known-good filter flags */ + /* Test detection of individual known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { int bits = 0; @@ -2197,16 +2200,29 @@ TEST(detect_seccomp_filter_flags) all_flags |= flag; } - /* Test detection of all known-good filter flags */ - ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); - EXPECT_EQ(-1, ret); - EXPECT_EQ(EFAULT, errno) { - TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", - all_flags); + /* + * Test detection of all known-good filter flags combined. But + * for the exclusive flags we need to mask them out and try them + * individually for the "all flags" testing. + */ + exclusive_mask = 0; + for (i = 0; i < ARRAY_SIZE(exclusive); i++) + exclusive_mask |= exclusive[i]; + for (i = 0; i < ARRAY_SIZE(exclusive); i++) { + flag = all_flags & ~exclusive_mask; + flag |= exclusive[i]; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + flag); + } } - /* Test detection of an unknown filter flag */ + /* Test detection of an unknown filter flags, without exclusives. */ flag = -1; + flag &= ~exclusive_mask; ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); EXPECT_EQ(-1, ret); EXPECT_EQ(EINVAL, errno) { -- cgit v1.2.3 From 00ddbaed26634eaa8841f692e238f6994b4f261f Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Wed, 6 Mar 2019 13:14:13 -0700 Subject: seccomp: Make NEW_LISTENER and TSYNC flags exclusive commit 7a0df7fbc14505e2e2be19ed08654a09e1ed5bf6 upstream. As the comment notes, the return codes for TSYNC and NEW_LISTENER conflict, because they both return positive values, one in the case of success and one in the case of error. So, let's disallow both of these flags together. While this is technically a userspace break, all the users I know of are still waiting on me to land this feature in libseccomp, so I think it'll be safe. Also, at present my use case doesn't require TSYNC at all, so this isn't a big deal to disallow. If someone wanted to support this, a path forward would be to add a new flag like TSYNC_AND_LISTENER_YES_I_UNDERSTAND_THAT_TSYNC_WILL_JUST_RETURN_EAGAIN, but the use cases are so different I don't see it really happening. Finally, it's worth noting that this does actually fix a UAF issue: at the end of seccomp_set_mode_filter(), we have: if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { if (ret < 0) { listener_f->private_data = NULL; fput(listener_f); put_unused_fd(listener); } else { fd_install(listener, listener_f); ret = listener; } } out_free: seccomp_filter_free(prepared); But if ret > 0 because TSYNC raced, we'll install the listener fd and then free the filter out from underneath it, causing a UAF when the task closes it or dies. This patch also switches the condition to be simply if (ret), so that if someone does add the flag mentioned above, they won't have to remember to fix this too. Reported-by: syzbot+b562969adb2e04af3442@syzkaller.appspotmail.com Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") CC: stable@vger.kernel.org # v5.0+ Signed-off-by: Tycho Andersen Signed-off-by: Kees Cook Acked-by: James Morris Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e815781ed751..181e72718434 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -500,7 +500,10 @@ out: * * Caller must be holding current->sighand->siglock lock. * - * Returns 0 on success, -ve on error. + * Returns 0 on success, -ve on error, or + * - in TSYNC mode: the pid of a thread which was either not in the correct + * seccomp mode or did not have an ancestral seccomp filter + * - in NEW_LISTENER mode: the fd of the new listener */ static long seccomp_attach_filter(unsigned int flags, struct seccomp_filter *filter) @@ -1256,6 +1259,16 @@ static long seccomp_set_mode_filter(unsigned int flags, if (flags & ~SECCOMP_FILTER_FLAG_MASK) return -EINVAL; + /* + * In the successful case, NEW_LISTENER returns the new listener fd. + * But in the failure case, TSYNC returns the thread that died. If you + * combine these two flags, there's no way to tell whether something + * succeeded or failed. So, let's disallow this combination. + */ + if ((flags & SECCOMP_FILTER_FLAG_TSYNC) && + (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER)) + return -EINVAL; + /* Prepare the new filter before holding any locks. */ prepared = seccomp_prepare_user_filter(filter); if (IS_ERR(prepared)) @@ -1302,7 +1315,7 @@ out: mutex_unlock(¤t->signal->cred_guard_mutex); out_put_fd: if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { - if (ret < 0) { + if (ret) { listener_f->private_data = NULL; fput(listener_f); put_unused_fd(listener); -- cgit v1.2.3 From fb1f2cb221f56f6c4509ddf189e6f2d906839bab Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 8 Apr 2019 16:04:38 +0300 Subject: ARC: memset: fix build with L1_CACHE_SHIFT != 6 commit 55c0c4c793b538fb438bcc72481b9dc2f79fe5a9 upstream. In case of 'L1_CACHE_SHIFT != 6' we define dummy assembly macroses PREALLOC_INSTR and PREFETCHW_INSTR without arguments. However we pass arguments to them in code which cause build errors. Fix that. Signed-off-by: Eugeniy Paltsev Cc: [5.0] Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/lib/memset-archs.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index f230bb7092fd..b3373f5c88e0 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -30,10 +30,10 @@ #else -.macro PREALLOC_INSTR +.macro PREALLOC_INSTR reg, off .endm -.macro PREFETCHW_INSTR +.macro PREFETCHW_INSTR reg, off .endm #endif -- cgit v1.2.3 From 7e4e59c961263bb26b637be911f7d55bb8742676 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 21 Apr 2019 17:58:11 +0300 Subject: iwlwifi: fix driver operation for 5350 commit 5c9adef9789148d382d7d1307c3d6bfaf51d143d upstream. We introduced a bug that prevented this old device from working. The driver would simply not be able to complete the INIT flow while spewing this warning: CSR addresses aren't configured WARNING: CPU: 0 PID: 819 at drivers/net/wireless/intel/iwlwifi/pcie/drv.c:917 iwl_pci_probe+0x160/0x1e0 [iwlwifi] Cc: stable@vger.kernel.org # v4.18+ Fixes: a8cbb46f831d ("iwlwifi: allow different csr flags for different device families") Signed-off-by: Emmanuel Grumbach Fixes: c8f1b51e506d ("iwlwifi: allow different csr flags for different device families") Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/cfg/5000.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/5000.c b/drivers/net/wireless/intel/iwlwifi/cfg/5000.c index 575a7022d045..3846064d51a5 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/5000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/5000.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -136,6 +136,7 @@ const struct iwl_cfg iwl5350_agn_cfg = { .ht_params = &iwl5000_ht_params, .led_mode = IWL_LED_BLINK, .internal_wimax_coex = true, + .csr = &iwl_csr_v1, }; #define IWL_DEVICE_5150 \ -- cgit v1.2.3 From 761360f3302196680f683799ae5311623e012c1f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 3 Apr 2019 21:01:06 -0700 Subject: mwifiex: Make resume actually do something useful again on SDIO cards commit b82d6c1f8f8288f744a9dcc16cd3085d535decca upstream. The commit fc3a2fcaa1ba ("mwifiex: use atomic bitops to represent adapter status variables") had a fairly straightforward bug in it. It contained this bit of diff: - if (!adapter->is_suspended) { + if (test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) { As you can see the patch missed the "!" when converting to the atomic bitops. This meant that the resume hasn't done anything at all since that commit landed and suspend/resume for mwifiex SDIO cards has been totally broken. After fixing this mwifiex suspend/resume appears to work again, at least with the simple testing I've done. Fixes: fc3a2fcaa1ba ("mwifiex: use atomic bitops to represent adapter status variables") Cc: Signed-off-by: Douglas Anderson Reviewed-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index d49fbd58afa7..bfbe3aa058d9 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -181,7 +181,7 @@ static int mwifiex_sdio_resume(struct device *dev) adapter = card->adapter; - if (test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) { + if (!test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) { mwifiex_dbg(adapter, WARN, "device already resumed\n"); return 0; -- cgit v1.2.3 From 3ca91b5353a208c9c32548bd74ecb5f2486d7ced Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 8 Apr 2019 10:31:45 +0200 Subject: mtd: rawnand: marvell: Clean the controller state before each operation commit 9a8f612ca0d6a436e6471c9bed516d34a2cc626f upstream. Since the migration of the driver to stop using the legacy ->select_chip() hook, there is nothing deselecting the target anymore, thus the selection is not forced at the next access. Ensure the ND_RUN bit and the interrupts are always in a clean state. Cc: Daniel Mack Cc: stable@vger.kernel.org Fixes: b25251414f6e00 ("mtd: rawnand: marvell: Stop implementing ->select_chip()") Suggested-by: Boris Brezillon Signed-off-by: Miquel Raynal Tested-by: Daniel Mack Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/marvell_nand.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 84283c6bb0ff..66a161b8f745 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -722,12 +722,6 @@ static void marvell_nfc_select_target(struct nand_chip *chip, struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); u32 ndcr_generic; - if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die) - return; - - writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0); - writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1); - /* * Reset the NDCR register to a clean state for this particular chip, * also clear ND_RUN bit. @@ -739,6 +733,12 @@ static void marvell_nfc_select_target(struct nand_chip *chip, /* Also reset the interrupt status register */ marvell_nfc_clear_int(nfc, NDCR_ALL_INT); + if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die) + return; + + writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0); + writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1); + nfc->selected_chip = chip; marvell_nand->selected_die = die_nr; } -- cgit v1.2.3 From 5074530caf7c96f0e0083fccfaff3a868aef3460 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Apr 2019 11:39:33 +0200 Subject: mac80211: don't attempt to rename ERR_PTR() debugfs dirs commit 517879147493a5e1df6b89a50f708f1133fcaddb upstream. We need to dereference the directory to get its parent to be able to rename it, so it's clearly not safe to try to do this with ERR_PTR() pointers. Skip in this case. It seems that this is most likely what was causing the report by syzbot, but I'm not entirely sure as it didn't come with a reproducer this time. Cc: stable@vger.kernel.org Reported-by: syzbot+4ece1a28b8f4730547c9@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/debugfs_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cff0fb3578c9..deb3faf08337 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -841,7 +841,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) dir = sdata->vif.debugfs_dir; - if (!dir) + if (IS_ERR_OR_NULL(dir)) return; sprintf(buf, "netdev:%s", sdata->name); -- cgit v1.2.3 From 8f5a04152e26d1803f1bde5b2f722e7a4db96935 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 30 Apr 2019 11:47:34 +0200 Subject: i2c: synquacer: fix enumeration of slave devices commit 95e0cf3caeb11e1b0398c747b5cfa12828263824 upstream. The I2C host driver for SynQuacer fails to populate the of_node and ACPI companion fields of the struct i2c_adapter it instantiates, resulting in enumeration of the subordinate I2C bus to fail. Fixes: 0d676a6c4390 ("i2c: add support for Socionext SynQuacer I2C controller") Cc: # v4.19+ Signed-off-by: Ard Biesheuvel Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-synquacer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index 2184b7c3580e..6b8d803bd30e 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -602,6 +602,8 @@ static int synquacer_i2c_probe(struct platform_device *pdev) i2c->adapter = synquacer_i2c_ops; i2c_set_adapdata(&i2c->adapter, i2c); i2c->adapter.dev.parent = &pdev->dev; + i2c->adapter.dev.of_node = pdev->dev.of_node; + ACPI_COMPANION_SET(&i2c->adapter.dev, ACPI_COMPANION(&pdev->dev)); i2c->adapter.nr = pdev->id; init_completion(&i2c->completion); -- cgit v1.2.3 From c4e6b01c07a01c1db6cf76b157b9e8358785daad Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 17 Apr 2019 01:59:34 +0000 Subject: i2c: imx: correct the method of getting private data in notifier_call commit d386bb9042f4629bf62cdc5952ea8aab225f24a7 upstream. The way of getting private imx_i2c_struct in i2c_imx_clk_notifier_call() is incorrect, should use clk_change_nb element to get correct address and avoid below kernel dump during POST_RATE_CHANGE notify by clk framework: Unable to handle kernel paging request at virtual address 03ef1488 pgd = (ptrval) [03ef1488] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) Workqueue: events reduce_bus_freq_handler PC is at i2c_imx_set_clk+0x10/0xb8 LR is at i2c_imx_clk_notifier_call+0x20/0x28 pc : [<806a893c>] lr : [<806a8a04>] psr: a0080013 sp : bf399dd8 ip : bf3432ac fp : bf7c1dc0 r10: 00000002 r9 : 00000000 r8 : 00000000 r7 : 03ef1480 r6 : bf399e50 r5 : ffffffff r4 : 00000000 r3 : bf025300 r2 : bf399e50 r1 : 00b71b00 r0 : bf399be8 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 4e03004a DAC: 00000051 Process kworker/2:1 (pid: 38, stack limit = 0x(ptrval)) Stack: (0xbf399dd8 to 0xbf39a000) 9dc0: 806a89e4 00000000 9de0: ffffffff bf399e50 00000002 806a8a04 806a89e4 80142900 ffffffff 00000000 9e00: bf34ef18 bf34ef04 00000000 ffffffff bf399e50 80142d84 00000000 bf399e6c 9e20: bf34ef00 80f214c4 bf025300 00000002 80f08d08 bf017480 00000000 80142df0 9e40: 00000000 80166ed8 80c27638 8045de58 bf352340 03ef1480 00b71b00 0f82e242 9e60: bf025300 00000002 03ef1480 80f60e5c 00000001 8045edf0 00000002 8045eb08 9e80: bf025300 00000002 03ef1480 8045ee10 03ef1480 8045eb08 bf01be40 00000002 9ea0: 03ef1480 8045ee10 07de2900 8045eb08 bf01b780 00000002 07de2900 8045ee10 9ec0: 80c27898 bf399ee4 bf020a80 00000002 1f78a400 8045ee10 80f60e5c 80460514 9ee0: 80f60e5c bf01b600 bf01b480 80460460 0f82e242 bf383a80 bf383a00 80f60e5c 9f00: 00000000 bf7c1dc0 80f60e70 80460564 80f60df0 80f60d24 80f60df0 8011e72c 9f20: 00000000 80f60df0 80f60e6c bf7c4f00 00000000 8011e7ac bf274000 8013bd84 9f40: bf7c1dd8 80f03d00 bf274000 bf7c1dc0 bf274014 bf7c1dd8 80f03d00 bf398000 9f60: 00000008 8013bfb4 00000000 bf25d100 bf25d0c0 00000000 bf274000 8013bf88 9f80: bf25d11c bf0cfebc 00000000 8014140c bf25d0c0 801412ec 00000000 00000000 9fa0: 00000000 00000000 00000000 801010e8 00000000 00000000 00000000 00000000 9fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [<806a893c>] (i2c_imx_set_clk) from [<806a8a04>] (i2c_imx_clk_notifier_call+0x20/0x28) [<806a8a04>] (i2c_imx_clk_notifier_call) from [<80142900>] (notifier_call_chain+0x44/0x84) [<80142900>] (notifier_call_chain) from [<80142d84>] (__srcu_notifier_call_chain+0x44/0x98) [<80142d84>] (__srcu_notifier_call_chain) from [<80142df0>] (srcu_notifier_call_chain+0x18/0x20) [<80142df0>] (srcu_notifier_call_chain) from [<8045de58>] (__clk_notify+0x78/0xa4) [<8045de58>] (__clk_notify) from [<8045edf0>] (__clk_recalc_rates+0x60/0xb4) [<8045edf0>] (__clk_recalc_rates) from [<8045ee10>] (__clk_recalc_rates+0x80/0xb4) Code: e92d40f8 e5903298 e59072a0 e1530001 (e5975008) ---[ end trace fc7f5514b97b6cbb ]--- Fixes: 90ad2cbe88c2 ("i2c: imx: use clk notifier for rate changes") Signed-off-by: Anson Huang Reviewed-by: Dong Aisheng Signed-off-by: Wolfram Sang Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index fa9ad53845d9..d4b72e4ffd71 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -510,9 +510,9 @@ static int i2c_imx_clk_notifier_call(struct notifier_block *nb, unsigned long action, void *data) { struct clk_notifier_data *ndata = data; - struct imx_i2c_struct *i2c_imx = container_of(&ndata->clk, + struct imx_i2c_struct *i2c_imx = container_of(nb, struct imx_i2c_struct, - clk); + clk_change_nb); if (action & POST_RATE_CHANGE) i2c_imx_set_clk(i2c_imx, ndata->new_rate); -- cgit v1.2.3 From e4f53dc963f9fa86932796424a921b0a6a8538e5 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 30 Apr 2019 17:23:22 +0300 Subject: i2c: Prevent runtime suspend of adapter when Host Notify is required commit 72bfcee11cf89509795c56b0e40a3785ab00bbdd upstream. Multiple users have reported their Synaptics touchpad has stopped working between v4.20.1 and v4.20.2 when using SMBus interface. The culprit for this appeared to be commit c5eb1190074c ("PCI / PM: Allow runtime PM without callback functions") that fixed the runtime PM for i2c-i801 SMBus adapter. Those Synaptics touchpad are using i2c-i801 for SMBus communication and testing showed they are able to get back working by preventing the runtime suspend of adapter. Normally when i2c-i801 SMBus adapter transmits with the client it resumes before operation and autosuspends after. However, if client requires SMBus Host Notify protocol, what those Synaptics touchpads do, then the host adapter must not go to runtime suspend since then it cannot process incoming SMBus Host Notify commands the client may send. Fix this by keeping I2C/SMBus adapter active in case client requires Host Notify. Reported-by: Keijo Vaara Link: https://bugzilla.kernel.org/show_bug.cgi?id=203297 Fixes: c5eb1190074c ("PCI / PM: Allow runtime PM without callback functions") Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Jarkko Nikula Acked-by: Rafael J. Wysocki Tested-by: Keijo Vaara Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core-base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index af87a16ac3a5..60fb2afc0e50 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -327,6 +327,8 @@ static int i2c_device_probe(struct device *dev) if (client->flags & I2C_CLIENT_HOST_NOTIFY) { dev_dbg(dev, "Using Host Notify IRQ\n"); + /* Keep adapter active when Host Notify is required */ + pm_runtime_get_sync(&client->adapter->dev); irq = i2c_smbus_host_notify_to_irq(client); } else if (dev->of_node) { irq = of_irq_get_byname(dev->of_node, "irq"); @@ -431,6 +433,8 @@ static int i2c_device_remove(struct device *dev) device_init_wakeup(&client->dev, false); client->irq = client->init_irq; + if (client->flags & I2C_CLIENT_HOST_NOTIFY) + pm_runtime_put(&client->adapter->dev); return status; } -- cgit v1.2.3 From b5fb5b38639ac569c0902ef5e49b31b240d6bf58 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 24 Apr 2019 16:34:25 +0800 Subject: ALSA: hda/realtek - Add new Dell platform for headset mode commit 0a29c57b76624723b6b00c027e0e992d130ace49 upstream. Add two Dell platform for headset mode. [ Note: this is a further correction / addition of the previous pin-based quirks for Dell machines; another entry for ALC236 with the d-mic pin 0x12 and an entry for ALC295 -- tiwai ] Fixes: b26e36b7ef36 ("ALSA: hda/realtek - add two more pin configuration sets to quirk table") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a9f69c3a3e0b..791c1f80b6cc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7266,6 +7266,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x40000000}, + {0x14, 0x90170110}, + {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, {0x14, 0x90170110}, {0x21, 0x02211020}), @@ -7539,6 +7543,9 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x17, 0x90170110}, {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x14, 0x90170110}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, ALC295_STANDARD_PINS, {0x17, 0x21014020}, -- cgit v1.2.3 From e11f632ca8e9e5e21a30ee2e4dfb43a6518d1923 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 26 Apr 2019 16:13:54 +0800 Subject: ALSA: hda/realtek - Fixed Dell AIO speaker noise commit 0700d3d117a7f110ddddbd83873e13652f69c54b upstream. Fixed Dell AIO speaker noise. spec->gen.auto_mute_via_amp = 1, this option was solved speaker white noise at boot. codec->power_save_node = 0, this option was solved speaker noise at resume back. Fixes: 9226665159f0 ("ALSA: hda/realtek - Fix Dell AIO LineOut issue") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 791c1f80b6cc..89e52238fe34 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5448,6 +5448,8 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, return; spec->gen.preferred_dacs = preferred_pairs; + spec->gen.auto_mute_via_amp = 1; + codec->power_save_node = 0; } /* The DAC of NID 0x3 will introduce click/pop noise on headphones, so invalidate it */ -- cgit v1.2.3 From 0ee051c78e8277041c4e60ca6fe492bacab0b789 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 Apr 2019 15:10:01 +0200 Subject: ALSA: hda/realtek - Apply the fixup for ASUS Q325UAR commit 3887c26c0e24d50a4d0ce20cf4726737cee1a2fd upstream. Some ASUS models like Q325UAR with ALC295 codec requires the same fixup that has been applied to ALC294 codec. Just copy the entry with the pin matching to cover ALC295 too. BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1784485 Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 89e52238fe34..5ce28b4f0218 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7545,6 +7545,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x17, 0x90170110}, {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC294_FIXUP_ASUS_SPK, + {0x12, 0x90a60130}, + {0x17, 0x90170110}, + {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, {0x14, 0x90170110}, {0x21, 0x04211020}), -- cgit v1.2.3 From 03040fd71d5d05902643acde75a3c213dfa580b2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 23 Apr 2019 14:48:29 -0400 Subject: USB: yurex: Fix protection fault after device removal commit ef61eb43ada6c1d6b94668f0f514e4c268093ff3 upstream. The syzkaller USB fuzzer found a general-protection-fault bug in the yurex driver. The fault occurs when a device has been unplugged; the driver's interrupt-URB handler logs an error message referring to the device by name, after the device has been unregistered and its name deallocated. This problem is caused by the fact that the interrupt URB isn't cancelled until the driver's private data structure is released, which can happen long after the device is gone. The cure is to make sure that the interrupt URB is killed before yurex_disconnect() returns; this is exactly the sort of thing that usb_poison_urb() was meant for. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+2eb9121678bdb36e6d57@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 6d9fd5f64903..7b306aa22d25 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -314,6 +314,7 @@ static void yurex_disconnect(struct usb_interface *interface) usb_deregister_dev(interface, &yurex_class); /* prevent more I/O from starting */ + usb_poison_urb(dev->urb); mutex_lock(&dev->io_mutex); dev->interface = NULL; mutex_unlock(&dev->io_mutex); -- cgit v1.2.3 From a3c42a175c5c2419c4616c7239cf234388404732 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 22 Apr 2019 11:16:04 -0400 Subject: USB: w1 ds2490: Fix bug caused by improper use of altsetting array commit c114944d7d67f24e71562fcfc18d550ab787e4d4 upstream. The syzkaller USB fuzzer spotted a slab-out-of-bounds bug in the ds2490 driver. This bug is caused by improper use of the altsetting array in the usb_interface structure (the array's entries are not always stored in numerical order), combined with a naive assumption that all interfaces probed by the driver will have the expected number of altsettings. The bug can be fixed by replacing references to the possibly non-existent intf->altsetting[alt] entry with the guaranteed-to-exist intf->cur_altsetting entry. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+d65f673b847a1a96cdba@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/ds2490.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c index 0f4ecfcdb549..a9fb77585272 100644 --- a/drivers/w1/masters/ds2490.c +++ b/drivers/w1/masters/ds2490.c @@ -1016,15 +1016,15 @@ static int ds_probe(struct usb_interface *intf, /* alternative 3, 1ms interrupt (greatly speeds search), 64 byte bulk */ alt = 3; err = usb_set_interface(dev->udev, - intf->altsetting[alt].desc.bInterfaceNumber, alt); + intf->cur_altsetting->desc.bInterfaceNumber, alt); if (err) { dev_err(&dev->udev->dev, "Failed to set alternative setting %d " "for %d interface: err=%d.\n", alt, - intf->altsetting[alt].desc.bInterfaceNumber, err); + intf->cur_altsetting->desc.bInterfaceNumber, err); goto err_out_clear; } - iface_desc = &intf->altsetting[alt]; + iface_desc = intf->cur_altsetting; if (iface_desc->desc.bNumEndpoints != NUM_EP-1) { pr_info("Num endpoints=%d. It is not DS9490R.\n", iface_desc->desc.bNumEndpoints); -- cgit v1.2.3 From c0b68c8de3700b37fd7e1884d61ecfe559f11dc3 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 18 Apr 2019 13:12:07 -0400 Subject: USB: dummy-hcd: Fix failure to give back unlinked URBs commit fc834e607ae3d18e1a20bca3f9a2d7f52ea7a2be upstream. The syzkaller USB fuzzer identified a failure mode in which dummy-hcd would never give back an unlinked URB. This causes usb_kill_urb() to hang, leading to WARNINGs and unkillable threads. In dummy-hcd, all URBs are given back by the dummy_timer() routine as it scans through the list of pending URBS. Failure to give back URBs can be caused by failure to start or early exit from the scanning loop. The code currently has two such pathways: One is triggered when an unsupported bus transfer speed is encountered, and the other by exhausting the simulated bandwidth for USB transfers during a frame. This patch removes those two paths, thereby allowing all unlinked URBs to be given back in a timely manner. It adds a check for the bus speed when the gadget first starts running, so that dummy_timer() will never thereafter encounter an unsupported speed. And it prevents the loop from exiting as soon as the total bandwidth has been used up (the scanning loop continues, giving back unlinked URBs as they are found, but not transferring any more data). Thanks to Andrey Konovalov for manually running the syzkaller fuzzer to help track down the source of the bug. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+d919b0f29d7b5a4994b9@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index baf72f95f0f1..213b52508621 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -979,8 +979,18 @@ static int dummy_udc_start(struct usb_gadget *g, struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; - if (driver->max_speed == USB_SPEED_UNKNOWN) + switch (g->speed) { + /* All the speeds we support */ + case USB_SPEED_LOW: + case USB_SPEED_FULL: + case USB_SPEED_HIGH: + case USB_SPEED_SUPER: + break; + default: + dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n", + driver->max_speed); return -EINVAL; + } /* * SLAVE side init ... the layer above hardware, which @@ -1784,9 +1794,10 @@ static void dummy_timer(struct timer_list *t) /* Bus speed is 500000 bytes/ms, so use a little less */ total = 490000; break; - default: + default: /* Can't happen */ dev_err(dummy_dev(dum_hcd), "bogus device speed\n"); - return; + total = 0; + break; } /* FIXME if HZ != 1000 this will probably misbehave ... */ @@ -1828,7 +1839,7 @@ restart: /* Used up this frame's bandwidth? */ if (total <= 0) - break; + continue; /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); -- cgit v1.2.3 From ecc9d5c32adda175417e5bda3254b641fa420b44 Mon Sep 17 00:00:00 2001 From: Malte Leip Date: Sun, 14 Apr 2019 12:00:12 +0200 Subject: usb: usbip: fix isoc packet num validation in get_pipe commit c409ca3be3c6ff3a1eeb303b191184e80d412862 upstream. Change the validation of number_of_packets in get_pipe to compare the number of packets to a fixed maximum number of packets allowed, set to be 1024. This number was chosen due to it being used by other drivers as well, for example drivers/usb/host/uhci-q.c Background/reason: The get_pipe function in stub_rx.c validates the number of packets in isochronous mode and aborts with an error if that number is too large, in order to prevent malicious input from possibly triggering large memory allocations. This was previously done by checking whether pdu->u.cmd_submit.number_of_packets is bigger than the number of packets that would be needed for pdu->u.cmd_submit.transfer_buffer_length bytes if all except possibly the last packet had maximum length, given by usb_endpoint_maxp(epd) * usb_endpoint_maxp_mult(epd). This leads to an error if URBs with packets shorter than the maximum possible length are submitted, which is allowed according to Documentation/driver-api/usb/URB.rst and occurs for example with the snd-usb-audio driver. Fixes: c6688ef9f297 ("usbip: fix stub_rx: harden CMD_SUBMIT path to handle malicious input") Signed-off-by: Malte Leip Cc: stable Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 12 +++--------- drivers/usb/usbip/usbip_common.h | 7 +++++++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 97b09a42a10c..dbfb2f24d71e 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -361,16 +361,10 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) } if (usb_endpoint_xfer_isoc(epd)) { - /* validate packet size and number of packets */ - unsigned int maxp, packets, bytes; - - maxp = usb_endpoint_maxp(epd); - maxp *= usb_endpoint_maxp_mult(epd); - bytes = pdu->u.cmd_submit.transfer_buffer_length; - packets = DIV_ROUND_UP(bytes, maxp); - + /* validate number of packets */ if (pdu->u.cmd_submit.number_of_packets < 0 || - pdu->u.cmd_submit.number_of_packets > packets) { + pdu->u.cmd_submit.number_of_packets > + USBIP_MAX_ISO_PACKETS) { dev_err(&sdev->udev->dev, "CMD_SUBMIT: isoc invalid num packets %d\n", pdu->u.cmd_submit.number_of_packets); diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index bf8afe9b5883..8be857a4fa13 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -121,6 +121,13 @@ extern struct device_attribute dev_attr_usbip_debug; #define USBIP_DIR_OUT 0x00 #define USBIP_DIR_IN 0x01 +/* + * Arbitrary limit for the maximum number of isochronous packets in an URB, + * compare for example the uhci_submit_isochronous function in + * drivers/usb/host/uhci-q.c + */ +#define USBIP_MAX_ISO_PACKETS 1024 + /** * struct usbip_header_basic - data pertinent to every request * @command: the usbip request type -- cgit v1.2.3 From 0bc45b6b63c3ea91be81425b13e45b8bc582c733 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Apr 2019 11:51:38 -0400 Subject: USB: core: Fix unterminated string returned by usb_string() commit c01c348ecdc66085e44912c97368809612231520 upstream. Some drivers (such as the vub300 MMC driver) expect usb_string() to return a properly NUL-terminated string, even when an error occurs. (In fact, vub300's probe routine doesn't bother to check the return code from usb_string().) When the driver goes on to use an unterminated string, it leads to kernel errors such as stack-out-of-bounds, as found by the syzkaller USB fuzzer. An out-of-range string index argument is not at all unlikely, given that some devices don't provide string descriptors and therefore list 0 as the value for their string indexes. This patch makes usb_string() return a properly terminated empty string along with the -EINVAL error code when an out-of-range index is encountered. And since a USB string index is a single-byte value, indexes >= 256 are just as invalid as values of 0 or below. Signed-off-by: Alan Stern Reported-by: syzbot+b75b85111c10b8d680f1@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 4f33eb632a88..4020ce8db6ce 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -820,9 +820,11 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) if (dev->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; - if (size <= 0 || !buf || !index) + if (size <= 0 || !buf) return -EINVAL; buf[0] = 0; + if (index <= 0 || index >= 256) + return -EINVAL; tbuf = kmalloc(256, GFP_NOIO); if (!tbuf) return -ENOMEM; -- cgit v1.2.3 From 3be814567ee619e6bfaca7a1a93903e5d553e040 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 19 Apr 2019 13:52:38 -0400 Subject: USB: core: Fix bug caused by duplicate interface PM usage counter commit c2b71462d294cf517a0bc6e4fd6424d7cee5596f upstream. The syzkaller fuzzer reported a bug in the USB hub driver which turned out to be caused by a negative runtime-PM usage counter. This allowed a hub to be runtime suspended at a time when the driver did not expect it. The symptom is a WARNING issued because the hub's status URB is submitted while it is already active: URB 0000000031fb463e submitted while active WARNING: CPU: 0 PID: 2917 at drivers/usb/core/urb.c:363 The negative runtime-PM usage count was caused by an unfortunate design decision made when runtime PM was first implemented for USB. At that time, USB class drivers were allowed to unbind from their interfaces without balancing the usage counter (i.e., leaving it with a positive count). The core code would take care of setting the counter back to 0 before allowing another driver to bind to the interface. Later on when runtime PM was implemented for the entire kernel, the opposite decision was made: Drivers were required to balance their runtime-PM get and put calls. In order to maintain backward compatibility, however, the USB subsystem adapted to the new implementation by keeping an independent usage counter for each interface and using it to automatically adjust the normal usage counter back to 0 whenever a driver was unbound. This approach involves duplicating information, but what is worse, it doesn't work properly in cases where a USB class driver delays decrementing the usage counter until after the driver's disconnect() routine has returned and the counter has been adjusted back to 0. Doing so would cause the usage counter to become negative. There's even a warning about this in the USB power management documentation! As it happens, this is exactly what the hub driver does. The kick_hub_wq() routine increments the runtime-PM usage counter, and the corresponding decrement is carried out by hub_event() in the context of the hub_wq work-queue thread. This work routine may sometimes run after the driver has been unbound from its interface, and when it does it causes the usage counter to go negative. It is not possible for hub_disconnect() to wait for a pending hub_event() call to finish, because hub_disconnect() is called with the device lock held and hub_event() acquires that lock. The only feasible fix is to reverse the original design decision: remove the duplicate interface-specific usage counter and require USB drivers to balance their runtime PM gets and puts. As far as I know, all existing drivers currently do this. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+7634edaea4d0b341c625@syzkaller.appspotmail.com CC: Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/usb/power-management.rst | 14 +++++++++----- drivers/usb/core/driver.c | 13 ------------- drivers/usb/storage/realtek_cr.c | 13 +++++-------- include/linux/usb.h | 2 -- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst index 79beb807996b..4a74cf6f2797 100644 --- a/Documentation/driver-api/usb/power-management.rst +++ b/Documentation/driver-api/usb/power-management.rst @@ -370,11 +370,15 @@ autosuspend the interface's device. When the usage counter is = 0 then the interface is considered to be idle, and the kernel may autosuspend the device. -Drivers need not be concerned about balancing changes to the usage -counter; the USB core will undo any remaining "get"s when a driver -is unbound from its interface. As a corollary, drivers must not call -any of the ``usb_autopm_*`` functions after their ``disconnect`` -routine has returned. +Drivers must be careful to balance their overall changes to the usage +counter. Unbalanced "get"s will remain in effect when a driver is +unbound from its interface, preventing the device from going into +runtime suspend should the interface be bound to a driver again. On +the other hand, drivers are allowed to achieve this balance by calling +the ``usb_autopm_*`` functions even after their ``disconnect`` routine +has returned -- say from within a work-queue routine -- provided they +retain an active reference to the interface (via ``usb_get_intf`` and +``usb_put_intf``). Drivers using the async routines are responsible for their own synchronization and mutual exclusion. diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 8987cec9549d..ebcadaad89d1 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -473,11 +473,6 @@ static int usb_unbind_interface(struct device *dev) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); - /* Undo any residual pm_autopm_get_interface_* calls */ - for (r = atomic_read(&intf->pm_usage_cnt); r > 0; --r) - usb_autopm_put_interface_no_suspend(intf); - atomic_set(&intf->pm_usage_cnt, 0); - if (!error) usb_autosuspend_device(udev); @@ -1633,7 +1628,6 @@ void usb_autopm_put_interface(struct usb_interface *intf) int status; usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put_sync(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), @@ -1662,7 +1656,6 @@ void usb_autopm_put_interface_async(struct usb_interface *intf) int status; usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), @@ -1684,7 +1677,6 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); pm_runtime_put_noidle(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend); @@ -1715,8 +1707,6 @@ int usb_autopm_get_interface(struct usb_interface *intf) status = pm_runtime_get_sync(&intf->dev); if (status < 0) pm_runtime_put_sync(&intf->dev); - else - atomic_inc(&intf->pm_usage_cnt); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1750,8 +1740,6 @@ int usb_autopm_get_interface_async(struct usb_interface *intf) status = pm_runtime_get(&intf->dev); if (status < 0 && status != -EINPROGRESS) pm_runtime_put_noidle(&intf->dev); - else - atomic_inc(&intf->pm_usage_cnt); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1775,7 +1763,6 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); - atomic_inc(&intf->pm_usage_cnt); pm_runtime_get_noresume(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume); diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 31b024441938..cc794e25a0b6 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -763,18 +763,16 @@ static void rts51x_suspend_timer_fn(struct timer_list *t) break; case RTS51X_STAT_IDLE: case RTS51X_STAT_SS: - usb_stor_dbg(us, "RTS51X_STAT_SS, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); - if (atomic_read(&us->pusb_intf->pm_usage_cnt) > 0) { + if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) { usb_stor_dbg(us, "Ready to enter SS state\n"); rts51x_set_stat(chip, RTS51X_STAT_SS); /* ignore mass storage interface's children */ pm_suspend_ignore_children(&us->pusb_intf->dev, true); usb_autopm_put_interface_async(us->pusb_intf); - usb_stor_dbg(us, "RTS51X_STAT_SS 01, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); } break; @@ -807,11 +805,10 @@ static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) int ret; if (working_scsi(srb)) { - usb_stor_dbg(us, "working scsi, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "working scsi, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); - if (atomic_read(&us->pusb_intf->pm_usage_cnt) <= 0) { + if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) { ret = usb_autopm_get_interface(us->pusb_intf); usb_stor_dbg(us, "working scsi, ret=%d\n", ret); } diff --git a/include/linux/usb.h b/include/linux/usb.h index 5e49e82c4368..ff010d1fd1c7 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -200,7 +200,6 @@ usb_find_last_int_out_endpoint(struct usb_host_interface *alt, * @dev: driver model's view of this device * @usb_dev: if an interface is bound to the USB major, this will point * to the sysfs representation for that device. - * @pm_usage_cnt: PM usage counter for this interface * @reset_ws: Used for scheduling resets from atomic context. * @resetting_device: USB core reset the device, so use alt setting 0 as * current; needs bandwidth alloc after reset. @@ -257,7 +256,6 @@ struct usb_interface { struct device dev; /* interface specific device info */ struct device *usb_dev; - atomic_t pm_usage_cnt; /* usage counter for autosuspend */ struct work_struct reset_ws; /* for resets in atomic context */ }; #define to_usb_interface(d) container_of(d, struct usb_interface, dev) -- cgit v1.2.3 From 042b6d11e00fd3614760454744ce492825201c2d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 17 Apr 2019 10:15:31 -0700 Subject: KVM: lapic: Disable timer advancement if adaptive tuning goes haywire commit 57bf67e73ce9bcce2258890f5abf2adf5f619f1a upstream. To minimize the latency of timer interrupts as observed by the guest, KVM adjusts the values it programs into the host timers to account for the host's overhead of programming and handling the timer event. Now that the timer advancement is automatically tuned during runtime, it's effectively unbounded by default, e.g. if KVM is running as L1 the advancement can measure in hundreds of milliseconds. Disable timer advancement if adaptive tuning yields an advancement of more than 5000ns, as large advancements can break reasonable assumptions of the guest, e.g. that a timer configured to fire after 1ms won't arrive on the next instruction. Although KVM busy waits to mitigate the case of a timer event arriving too early, complications can arise when shifting the interrupt too far, e.g. kvm-unit-test's vmx.interrupt test will fail when its "host" exits on interrupts as KVM may inject the INTR before the guest executes STI+HLT. Arguably the unit test is "broken" in the sense that delaying a timer interrupt by 1ms doesn't technically guarantee the interrupt will arrive after STI+HLT, but it's a reasonable assumption that KVM should support. Furthermore, an unbounded advancement also effectively unbounds the time spent busy waiting, e.g. if the guest programs a timer with a very large delay. 5000ns is a somewhat arbitrary threshold. When running on bare metal, which is the intended use case, timer advancement is expected to be in the general vicinity of 1000ns. 5000ns is high enough that false positives are unlikely, while not being so high as to negatively affect the host's performance/stability. Note, a future patch will enable userspace to disable KVM's adaptive tuning, which will allow priveleged userspace will to specifying an advancement value in excess of this arbitrary threshold in order to satisfy an abnormal use case. Cc: Liran Alon Cc: Wanpeng Li Cc: stable@vger.kernel.org Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4b6c2da7265c..debd0410f10c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1519,6 +1519,10 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) } if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) lapic_timer_advance_adjust_done = true; + if (unlikely(lapic_timer_advance_ns > 5000)) { + lapic_timer_advance_ns = 0; + lapic_timer_advance_adjust_done = true; + } } } -- cgit v1.2.3 From a72f524964fb0310156804ba121249f1eebd8c13 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Tue, 16 Apr 2019 20:36:34 +0300 Subject: KVM: x86: Consider LAPIC TSC-Deadline timer expired if deadline too short commit c09d65d9eab69985c75f98ed64541229f6fa9aa6 upstream. If guest sets MSR_IA32_TSCDEADLINE to value such that in host time-domain it's shorter than lapic_timer_advance_ns, we can reach a case that we call hrtimer_start() with expiration time set at the past. Because lapic_timer.timer is init with HRTIMER_MODE_ABS_PINNED, it is not allowed to run in softirq and therefore will never expire. To avoid such a scenario, verify that deadline expiration time is set on host time-domain further than (now + lapic_timer_advance_ns). A future patch can also consider adding a min_timer_deadline_ns module parameter, similar to min_timer_period_us to avoid races that amount of ns it takes to run logic could still call hrtimer_start() with expiration timer set at the past. Reviewed-by: Joao Martins Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index debd0410f10c..34678ea56b4b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1543,9 +1543,12 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) now = ktime_get(); guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - if (likely(tscdeadline > guest_tsc)) { - ns = (tscdeadline - guest_tsc) * 1000000ULL; - do_div(ns, this_tsc_khz); + + ns = (tscdeadline - guest_tsc) * 1000000ULL; + do_div(ns, this_tsc_khz); + + if (likely(tscdeadline > guest_tsc) && + likely(ns > lapic_timer_advance_ns)) { expire = ktime_add_ns(now, ns); expire = ktime_sub_ns(expire, lapic_timer_advance_ns); hrtimer_start(&apic->lapic_timer.timer, -- cgit v1.2.3 From a3ca780612e5ec67cb716c47d31404b5f313afb9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 17 Apr 2019 10:15:32 -0700 Subject: KVM: lapic: Track lapic timer advance per vCPU commit 39497d7660d9866a47a2dc9055672358da57ad3d upstream. Automatically adjusting the globally-shared timer advancement could corrupt the timer, e.g. if multiple vCPUs are concurrently adjusting the advancement value. That could be partially fixed by using a local variable for the arithmetic, but it would still be susceptible to a race when setting timer_advance_adjust_done. And because virtual_tsc_khz and tsc_scaling_ratio are per-vCPU, the correct calibration for a given vCPU may not apply to all vCPUs. Furthermore, lapic_timer_advance_ns is marked __read_mostly, which is effectively violated when finding a stable advancement takes an extended amount of timer. Opportunistically change the definition of lapic_timer_advance_ns to a u32 so that it matches the style of struct kvm_timer. Explicitly pass the param to kvm_create_lapic() so that it doesn't have to be exposed to lapic.c, thus reducing the probability of unintentionally using the global value instead of the per-vCPU value. Cc: Liran Alon Cc: Wanpeng Li Reviewed-by: Liran Alon Cc: stable@vger.kernel.org Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 36 +++++++++++++++++++----------------- arch/x86/kvm/lapic.h | 4 +++- arch/x86/kvm/vmx/vmx.c | 4 +++- arch/x86/kvm/x86.c | 7 +++---- arch/x86/kvm/x86.h | 2 -- 5 files changed, 28 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 34678ea56b4b..b6b4b44940f0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -70,7 +70,6 @@ #define APIC_BROADCAST 0xFF #define X2APIC_BROADCAST 0xFFFFFFFFul -static bool lapic_timer_advance_adjust_done = false; #define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 @@ -1482,6 +1481,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) void wait_lapic_expire(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; + u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; u64 guest_tsc, tsc_deadline, ns; if (!lapic_in_kernel(vcpu)) @@ -1501,34 +1501,36 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ if (guest_tsc < tsc_deadline) __delay(min(tsc_deadline - guest_tsc, - nsec_to_cycles(vcpu, lapic_timer_advance_ns))); + nsec_to_cycles(vcpu, timer_advance_ns))); - if (!lapic_timer_advance_adjust_done) { + if (!apic->lapic_timer.timer_advance_adjust_done) { /* too early */ if (guest_tsc < tsc_deadline) { ns = (tsc_deadline - guest_tsc) * 1000000ULL; do_div(ns, vcpu->arch.virtual_tsc_khz); - lapic_timer_advance_ns -= min((unsigned int)ns, - lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + timer_advance_ns -= min((u32)ns, + timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); } else { /* too late */ ns = (guest_tsc - tsc_deadline) * 1000000ULL; do_div(ns, vcpu->arch.virtual_tsc_khz); - lapic_timer_advance_ns += min((unsigned int)ns, - lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + timer_advance_ns += min((u32)ns, + timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); } if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) - lapic_timer_advance_adjust_done = true; - if (unlikely(lapic_timer_advance_ns > 5000)) { - lapic_timer_advance_ns = 0; - lapic_timer_advance_adjust_done = true; + apic->lapic_timer.timer_advance_adjust_done = true; + if (unlikely(timer_advance_ns > 5000)) { + timer_advance_ns = 0; + apic->lapic_timer.timer_advance_adjust_done = true; } + apic->lapic_timer.timer_advance_ns = timer_advance_ns; } } static void start_sw_tscdeadline(struct kvm_lapic *apic) { - u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; + struct kvm_timer *ktimer = &apic->lapic_timer; + u64 guest_tsc, tscdeadline = ktimer->tscdeadline; u64 ns = 0; ktime_t expire; struct kvm_vcpu *vcpu = apic->vcpu; @@ -1548,11 +1550,10 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) do_div(ns, this_tsc_khz); if (likely(tscdeadline > guest_tsc) && - likely(ns > lapic_timer_advance_ns)) { + likely(ns > apic->lapic_timer.timer_advance_ns)) { expire = ktime_add_ns(now, ns); - expire = ktime_sub_ns(expire, lapic_timer_advance_ns); - hrtimer_start(&apic->lapic_timer.timer, - expire, HRTIMER_MODE_ABS_PINNED); + expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); + hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED); } else apic_timer_expired(apic); @@ -2259,7 +2260,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) return HRTIMER_NORESTART; } -int kvm_create_lapic(struct kvm_vcpu *vcpu) +int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns) { struct kvm_lapic *apic; @@ -2283,6 +2284,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); apic->lapic_timer.timer.function = apic_timer_fn; + apic->lapic_timer.timer_advance_ns = timer_advance_ns; /* * APIC is created enabled. This will prevent kvm_lapic_set_base from diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index ff6ef9c3d760..3e97f8a68967 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -31,8 +31,10 @@ struct kvm_timer { u32 timer_mode_mask; u64 tscdeadline; u64 expired_tscdeadline; + u32 timer_advance_ns; atomic_t pending; /* accumulated triggered timers */ bool hv_timer_in_use; + bool timer_advance_adjust_done; }; struct kvm_lapic { @@ -62,7 +64,7 @@ struct kvm_lapic { struct dest_map; -int kvm_create_lapic(struct kvm_vcpu *vcpu); +int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns); void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e7fe8c692362..72f1d17711f2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7133,6 +7133,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) { struct vcpu_vmx *vmx; u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; + struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; if (kvm_mwait_in_guest(vcpu->kvm)) return -EOPNOTSUPP; @@ -7141,7 +7142,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) tscl = rdtsc(); guest_tscl = kvm_read_l1_tsc(vcpu, tscl); delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; - lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns); + lapic_timer_advance_cycles = nsec_to_cycles(vcpu, + ktimer->timer_advance_ns); if (delta_tsc > lapic_timer_advance_cycles) delta_tsc -= lapic_timer_advance_cycles; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7e413ea19a9a..3244994b111d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -137,9 +137,8 @@ static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); /* lapic timer advance (tscdeadline mode only) in nanoseconds */ -unsigned int __read_mostly lapic_timer_advance_ns = 1000; +static u32 __read_mostly lapic_timer_advance_ns = 1000; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); -EXPORT_SYMBOL_GPL(lapic_timer_advance_ns); static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); @@ -7882,7 +7881,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } trace_kvm_entry(vcpu->vcpu_id); - if (lapic_timer_advance_ns) + if (vcpu->arch.apic->lapic_timer.timer_advance_ns) wait_lapic_expire(vcpu); guest_enter_irqoff(); @@ -9070,7 +9069,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_pio_data; if (irqchip_in_kernel(vcpu->kvm)) { - r = kvm_create_lapic(vcpu); + r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); if (r < 0) goto fail_mmu_destroy; } else diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index de3d46769ee3..b457160dc7ba 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -294,8 +294,6 @@ extern u64 kvm_supported_xcr0(void); extern unsigned int min_timer_period_us; -extern unsigned int lapic_timer_advance_ns; - extern bool enable_vmware_backdoor; extern struct static_key kvm_no_apic_vcpu; -- cgit v1.2.3 From 63be90093cf54523dd158ca555319c413fc151b7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 17 Apr 2019 10:15:33 -0700 Subject: KVM: lapic: Allow user to disable adaptive tuning of timer advancement commit c3941d9e0ccd48920e4811f133235b3597e5310b upstream. The introduction of adaptive tuning of lapic timer advancement did not allow for the scenario where userspace would want to disable adaptive tuning but still employ timer advancement, e.g. for testing purposes or to handle a use case where adaptive tuning is unable to settle on a suitable time. This is epecially pertinent now that KVM places a hard threshold on the maximum advancment time. Rework the timer semantics to accept signed values, with a value of '-1' being interpreted as "use adaptive tuning with KVM's internal default", and any other value being used as an explicit advancement time, e.g. a time of '0' effectively disables advancement. Note, this does not completely restore the original behavior of lapic_timer_advance_ns. Prior to tracking the advancement per vCPU, which is necessary to support autotuning, userspace could adjust lapic_timer_advance_ns for *running* vCPU. With per-vCPU tracking, the module params are snapshotted at vCPU creation, i.e. applying a new advancement effectively requires restarting a VM. Dynamically updating a running vCPU is possible, e.g. a helper could be added to retrieve the desired delay, choosing between the global module param and the per-VCPU value depending on whether or not auto-tuning is (globally) enabled, but introduces a great deal of complexity. The wrapper itself is not complex, but understanding and documenting the effects of dynamically toggling auto-tuning and/or adjusting the timer advancement is nigh impossible since the behavior would be dependent on KVM's implementation as well as compiler optimizations. In other words, providing stable behavior would require extremely careful consideration now and in the future. Given that the expected use of a manually-tuned timer advancement is to "tune once, run many", use the vastly simpler approach of recognizing changes to the module params only when creating a new vCPU. Cc: Liran Alon Cc: Wanpeng Li Reviewed-by: Liran Alon Cc: stable@vger.kernel.org Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 11 +++++++++-- arch/x86/kvm/lapic.h | 2 +- arch/x86/kvm/x86.c | 9 +++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b6b4b44940f0..cdff0248b596 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2260,7 +2260,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) return HRTIMER_NORESTART; } -int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns) +int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) { struct kvm_lapic *apic; @@ -2284,7 +2284,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns) hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); apic->lapic_timer.timer.function = apic_timer_fn; - apic->lapic_timer.timer_advance_ns = timer_advance_ns; + if (timer_advance_ns == -1) { + apic->lapic_timer.timer_advance_ns = 1000; + apic->lapic_timer.timer_advance_adjust_done = false; + } else { + apic->lapic_timer.timer_advance_ns = timer_advance_ns; + apic->lapic_timer.timer_advance_adjust_done = true; + } + /* * APIC is created enabled. This will prevent kvm_lapic_set_base from diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 3e97f8a68967..d6d049ba3045 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -64,7 +64,7 @@ struct kvm_lapic { struct dest_map; -int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns); +int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns); void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3244994b111d..613dd3c89b94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -136,8 +136,13 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio); static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); -/* lapic timer advance (tscdeadline mode only) in nanoseconds */ -static u32 __read_mostly lapic_timer_advance_ns = 1000; +/* + * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables + * adaptive tuning starting from default advancment of 1000ns. '0' disables + * advancement entirely. Any other value is used as-is and disables adaptive + * tuning, i.e. allows priveleged userspace to set an exact advancement time. + */ +static int __read_mostly lapic_timer_advance_ns = -1; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); static bool __read_mostly vector_hashing = true; -- cgit v1.2.3 From b2e3c7d43f71989189fe34d72f7158fb0ad3fb89 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 17 Apr 2019 10:15:34 -0700 Subject: KVM: lapic: Convert guest TSC to host time domain if necessary commit b6aa57c69cb26ea0160c51f7cf45f1af23542686 upstream. To minimize the latency of timer interrupts as observed by the guest, KVM adjusts the values it programs into the host timers to account for the host's overhead of programming and handling the timer event. In the event that the adjustments are too aggressive, i.e. the timer fires earlier than the guest expects, KVM busy waits immediately prior to entering the guest. Currently, KVM manually converts the delay from nanoseconds to clock cycles. But, the conversion is done in the guest's time domain, while the delay occurs in the host's time domain. This is perfectly ok when the guest and host are using the same TSC ratio, but if the guest is using a different ratio then the delay may not be accurate and could wait too little or too long. When the guest is not using the host's ratio, convert the delay from guest clock cycles to host nanoseconds and use ndelay() instead of __delay() to provide more accurate timing. Because converting to nanoseconds is relatively expensive, e.g. requires division and more multiplication ops, continue using __delay() directly when guest and host TSCs are running at the same ratio. Cc: Liran Alon Cc: Wanpeng Li Cc: stable@vger.kernel.org Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cdff0248b596..4ae142eea96a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1478,6 +1478,26 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) return false; } +static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) +{ + u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; + + /* + * If the guest TSC is running at a different ratio than the host, then + * convert the delay to nanoseconds to achieve an accurate delay. Note + * that __delay() uses delay_tsc whenever the hardware has TSC, thus + * always for VMX enabled hardware. + */ + if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { + __delay(min(guest_cycles, + nsec_to_cycles(vcpu, timer_advance_ns))); + } else { + u64 delay_ns = guest_cycles * 1000000ULL; + do_div(delay_ns, vcpu->arch.virtual_tsc_khz); + ndelay(min_t(u32, delay_ns, timer_advance_ns)); + } +} + void wait_lapic_expire(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -1498,10 +1518,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); - /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ if (guest_tsc < tsc_deadline) - __delay(min(tsc_deadline - guest_tsc, - nsec_to_cycles(vcpu, timer_advance_ns))); + __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); if (!apic->lapic_timer.timer_advance_adjust_done) { /* too early */ -- cgit v1.2.3 From 4c01ab81fa27e84502737785716931be76007541 Mon Sep 17 00:00:00 2001 From: "Leonidas P. Papadakos" Date: Fri, 1 Mar 2019 00:29:23 +0200 Subject: arm64: dts: rockchip: fix rk3328-roc-cc gmac2io tx/rx_delay [ Upstream commit 924726888f660b2a86382a5dd051ec9ca1b18190 ] The rk3328-roc-cc board exhibits tx stability issues with large packets, as does the rock64 board, which was fixed with this patch https://patchwork.kernel.org/patch/10178969/ A similar patch was merged for the rk3328-roc-cc here https://patchwork.kernel.org/patch/10804863/ but it doesn't include the tx/rx_delay tweaks, and I find that they help with an issue where large transfers would bring the ethernet link down, causing a link reset regularly. Signed-off-by: Leonidas P. Papadakos Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin (Microsoft) --- arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts index 99d0d9912950..a91f87df662e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts @@ -107,8 +107,8 @@ snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - tx_delay = <0x25>; - rx_delay = <0x11>; + tx_delay = <0x24>; + rx_delay = <0x18>; status = "okay"; }; -- cgit v1.2.3 From 8c284dd4a8522160b95a4500cb871637a7ed169f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 8 Mar 2019 13:11:17 +0800 Subject: HID: Increase maximum report size allowed by hid_field_extract() [ Upstream commit 94a9992f7dbdfb28976b565af220e0c4a117144a ] Commit 71f6fa90a353 ("HID: increase maximum global item tag report size to 256") increases the max report size from 128 to 256. We also need to update the report size in hid_field_extract() otherwise it complains and truncates now valid report size: [ 406.165461] hid-sensor-hub 001F:8086:22D8.0002: hid_field_extract() called with n (192) > 32! (kworker/5:1) BugLink: https://bugs.launchpad.net/bugs/1818547 Fixes: 71f6fa90a353 ("HID: increase maximum global item tag report size to 256") Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin (Microsoft) --- drivers/hid/hid-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 9993b692598f..860e21ec6a49 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1301,10 +1301,10 @@ static u32 __extract(u8 *report, unsigned offset, int n) u32 hid_field_extract(const struct hid_device *hid, u8 *report, unsigned offset, unsigned n) { - if (n > 32) { - hid_warn(hid, "hid_field_extract() called with n (%d) > 32! (%s)\n", + if (n > 256) { + hid_warn(hid, "hid_field_extract() called with n (%d) > 256! (%s)\n", n, current->comm); - n = 32; + n = 256; } return __extract(report, offset, n); -- cgit v1.2.3 From d6cba891f1f891d39c6dbfd12eef83b1a17e5101 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 00:24:02 -0500 Subject: HID: logitech: check the return value of create_singlethread_workqueue [ Upstream commit 6c44b15e1c9076d925d5236ddadf1318b0a25ce2 ] create_singlethread_workqueue may fail and return NULL. The fix checks if it is NULL to avoid NULL pointer dereference. Also, the fix moves the call of create_singlethread_workqueue earlier to avoid resource-release issues. Signed-off-by: Kangjie Lu Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin (Microsoft) --- drivers/hid/hid-logitech-hidpp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index f040c8a7f9a9..199cc256e9d9 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -2111,6 +2111,13 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) kfree(data); return -ENOMEM; } + data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue"); + if (!data->wq) { + kfree(data->effect_ids); + kfree(data); + return -ENOMEM; + } + data->hidpp = hidpp; data->feature_index = feature_index; data->version = version; @@ -2155,7 +2162,6 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) /* ignore boost value at response.fap.params[2] */ /* init the hardware command queue */ - data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue"); atomic_set(&data->workqueue_size, 0); /* initialize with zero autocenter to get wheel in usable state */ -- cgit v1.2.3 From 458c92229d70524f9854cdce0f43c3774a542254 Mon Sep 17 00:00:00 2001 From: "He, Bo" Date: Thu, 14 Mar 2019 02:28:21 +0000 Subject: HID: debug: fix race condition with between rdesc_show() and device removal [ Upstream commit cef0d4948cb0a02db37ebfdc320e127c77ab1637 ] There is a race condition that could happen if hid_debug_rdesc_show() is running while hdev is in the process of going away (device removal, system suspend, etc) which could result in NULL pointer dereference: BUG: unable to handle kernel paging request at 0000000783316040 CPU: 1 PID: 1512 Comm: getevent Tainted: G U O 4.19.20-quilt-2e5dc0ac-00029-gc455a447dd55 #1 RIP: 0010:hid_dump_device+0x9b/0x160 Call Trace: hid_debug_rdesc_show+0x72/0x1d0 seq_read+0xe0/0x410 full_proxy_read+0x5f/0x90 __vfs_read+0x3a/0x170 vfs_read+0xa0/0x150 ksys_read+0x58/0xc0 __x64_sys_read+0x1a/0x20 do_syscall_64+0x55/0x110 entry_SYSCALL_64_after_hwframe+0x49/0xbe Grab driver_input_lock to make sure the input device exists throughout the whole process of dumping the rdesc. [jkosina@suse.cz: update changelog a bit] Signed-off-by: he, bo Signed-off-by: "Zhang, Jun" Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin (Microsoft) --- drivers/hid/hid-debug.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index ac9fda1b5a72..1384e57182af 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -1060,10 +1060,15 @@ static int hid_debug_rdesc_show(struct seq_file *f, void *p) seq_printf(f, "\n\n"); /* dump parsed data and input mappings */ + if (down_interruptible(&hdev->driver_input_lock)) + return 0; + hid_dump_device(hdev, f); seq_printf(f, "\n"); hid_dump_input_mapping(hdev, f); + up(&hdev->driver_input_lock); + return 0; } -- cgit v1.2.3 From 9be94b95acb66b5f5fb8123664e2d8a033484242 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 15 Mar 2019 11:51:12 -0700 Subject: rtc: cros-ec: Fail suspend/resume if wake IRQ can't be configured [ Upstream commit d6752e185c3168771787a02dc6a55f32260943cc ] If we encounter a failure during suspend where this RTC was programmed to wakeup the system from suspend, but that wakeup couldn't be configured because the system didn't support wakeup interrupts, we'll run into the following warning: Unbalanced IRQ 166 wake disable WARNING: CPU: 7 PID: 3071 at kernel/irq/manage.c:669 irq_set_irq_wake+0x108/0x278 This happens because the suspend process isn't aborted when the RTC fails to configure the wakeup IRQ. Instead, we continue suspending the system and then another suspend callback fails the suspend process and "unwinds" the previously suspended drivers by calling their resume callbacks. When we get back to resuming this RTC driver, we'll call disable_irq_wake() on an IRQ that hasn't been configured for wake. Let's just fail suspend/resume here if we can't configure the system to wake and the user has chosen to wakeup with this device. This fixes this warning and makes the code more robust in case there are systems out there that can't wakeup from suspend on this line but the user has chosen to do so. Cc: Enric Balletbo i Serra Cc: Evan Green Cc: Benson Leung Cc: Guenter Roeck Signed-off-by: Stephen Boyd Acked-By: Benson Leung Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin (Microsoft) --- drivers/rtc/rtc-cros-ec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index e5444296075e..4d6bf9304ceb 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -298,7 +298,7 @@ static int cros_ec_rtc_suspend(struct device *dev) struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev); if (device_may_wakeup(dev)) - enable_irq_wake(cros_ec_rtc->cros_ec->irq); + return enable_irq_wake(cros_ec_rtc->cros_ec->irq); return 0; } @@ -309,7 +309,7 @@ static int cros_ec_rtc_resume(struct device *dev) struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev); if (device_may_wakeup(dev)) - disable_irq_wake(cros_ec_rtc->cros_ec->irq); + return disable_irq_wake(cros_ec_rtc->cros_ec->irq); return 0; } -- cgit v1.2.3 From c832bbc4fff052593dbfc6e8c4304d10b481d502 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Mar 2019 11:32:14 +0100 Subject: rtc: sh: Fix invalid alarm warning for non-enabled alarm [ Upstream commit 15d82d22498784966df8e4696174a16b02cc1052 ] When no alarm has been programmed on RSK-RZA1, an error message is printed during boot: rtc rtc0: invalid alarm value: 2019-03-14T255:255:255 sh_rtc_read_alarm_value() returns 0xff when querying a hardware alarm field that is not enabled. __rtc_read_alarm() validates the received alarm values, and fills in missing fields when needed. While 0xff is handled fine for the year, month, and day fields, and corrected as considered being out-of-range, this is not the case for the hour, minute, and second fields, where -1 is expected for missing fields. Fix this by returning -1 instead, as this value is handled fine for all fields. Signed-off-by: Geert Uytterhoeven Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin (Microsoft) --- drivers/rtc/rtc-sh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index d417b203cbc5..1d3de2a3d1a4 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -374,7 +374,7 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm) static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off) { unsigned int byte; - int value = 0xff; /* return 0xff for ignored values */ + int value = -1; /* return -1 for ignored values */ byte = readb(rtc->regbase + reg_off); if (byte & AR_ENB) { -- cgit v1.2.3 From 13d47cf66df77b0a48aff523d132cb13a28f28ca Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Feb 2019 14:20:42 +0100 Subject: ARM: OMAP2+: add missing of_node_put after of_device_is_available [ Upstream commit 30645307e5d2c8a4caf978558c66121ac91ad17e ] Add an of_node_put when a tested device node is not available. The semantic patch that fixes this problem is as follows (http://coccinelle.lip6.fr): // @@ identifier f; local idexpression e; expression x; @@ e = f(...); ... when != of_node_put(e) when != x = e when != e = x when any if (<+...of_device_is_available(e)...+>) { ... when != of_node_put(e) ( return e; | + of_node_put(e); return ...; ) } // Fixes: e0c827aca0730 ("drm/omap: Populate DSS children in omapdss driver") Signed-off-by: Julia Lawall Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/mach-omap2/display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 1444b4b4bd9f..439e143cad7b 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -250,8 +250,10 @@ static int __init omapdss_init_of(void) if (!node) return 0; - if (!of_device_is_available(node)) + if (!of_device_is_available(node)) { + of_node_put(node); return 0; + } pdev = of_find_device_by_node(node); -- cgit v1.2.3 From 810cfcc0f2a47ca0ab1ec8970177b33fef2a1a70 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 23 Feb 2019 14:27:10 +0100 Subject: batman-adv: Reduce claim hash refcnt only for removed entry [ Upstream commit 4ba104f468bbfc27362c393815d03aa18fb7a20f ] The batadv_hash_remove is a function which searches the hashtable for an entry using a needle, a hashtable bucket selection function and a compare function. It will lock the bucket list and delete an entry when the compare function matches it with the needle. It returns the pointer to the hlist_node which matches or NULL when no entry matches the needle. The batadv_bla_del_claim is not itself protected in anyway to avoid that any other function is modifying the hashtable between the search for the entry and the call to batadv_hash_remove. It can therefore happen that the entry either doesn't exist anymore or an entry was deleted which is not the same object as the needle. In such an situation, the reference counter (for the reference stored in the hashtable) must not be reduced for the needle. Instead the reference counter of the actually removed entry has to be reduced. Otherwise the reference counter will underflow and the object might be freed before all its references were dropped. The kref helpers reported this problem as: refcount_t: underflow; use-after-free. Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin (Microsoft) --- net/batman-adv/bridge_loop_avoidance.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 5fdde2947802..cf2bcea7df82 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -803,6 +803,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const u8 *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; + struct batadv_bla_claim *claim_removed_entry; + struct hlist_node *claim_removed_node; ether_addr_copy(search_claim.addr, mac); search_claim.vid = vid; @@ -813,10 +815,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__, mac, batadv_print_vid(vid)); - batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, - batadv_choose_claim, claim); - batadv_claim_put(claim); /* reference from the hash is gone */ + claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash, + batadv_compare_claim, + batadv_choose_claim, claim); + if (!claim_removed_node) + goto free_claim; + /* reference from the hash is gone */ + claim_removed_entry = hlist_entry(claim_removed_node, + struct batadv_bla_claim, hash_entry); + batadv_claim_put(claim_removed_entry); + +free_claim: /* don't need the reference from hash_find() anymore */ batadv_claim_put(claim); } -- cgit v1.2.3 From 0536ffa505bc63a5d18ec5e324177c284297b057 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 23 Feb 2019 14:27:10 +0100 Subject: batman-adv: Reduce tt_local hash refcnt only for removed entry [ Upstream commit 3d65b9accab4a7ed5038f6df403fbd5e298398c7 ] The batadv_hash_remove is a function which searches the hashtable for an entry using a needle, a hashtable bucket selection function and a compare function. It will lock the bucket list and delete an entry when the compare function matches it with the needle. It returns the pointer to the hlist_node which matches or NULL when no entry matches the needle. The batadv_tt_local_remove is not itself protected in anyway to avoid that any other function is modifying the hashtable between the search for the entry and the call to batadv_hash_remove. It can therefore happen that the entry either doesn't exist anymore or an entry was deleted which is not the same object as the needle. In such an situation, the reference counter (for the reference stored in the hashtable) must not be reduced for the needle. Instead the reference counter of the actually removed entry has to be reduced. Otherwise the reference counter will underflow and the object might be freed before all its references were dropped. The kref helpers reported this problem as: refcount_t: underflow; use-after-free. Fixes: ef72706a0543 ("batman-adv: protect tt_local_entry from concurrent delete events") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin (Microsoft) --- net/batman-adv/translation-table.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 8dcd4968cde7..2ee87d3ca6d0 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1337,9 +1337,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, const char *message, bool roaming) { + struct batadv_tt_local_entry *tt_removed_entry; struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; - void *tt_entry_exists; + struct hlist_node *tt_removed_node; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1368,15 +1369,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local_entry->common); - if (!tt_entry_exists) + if (!tt_removed_node) goto out; - /* extra call to free the local tt entry */ - batadv_tt_local_entry_put(tt_local_entry); + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_local_entry, + common.hash_entry); + batadv_tt_local_entry_put(tt_removed_entry); out: if (tt_local_entry) -- cgit v1.2.3 From 2ea41a71b191ce59a99aa6ddc108b063911aaaf4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 23 Feb 2019 14:27:10 +0100 Subject: batman-adv: Reduce tt_global hash refcnt only for removed entry [ Upstream commit f131a56880d10932931e74773fb8702894a94a75 ] The batadv_hash_remove is a function which searches the hashtable for an entry using a needle, a hashtable bucket selection function and a compare function. It will lock the bucket list and delete an entry when the compare function matches it with the needle. It returns the pointer to the hlist_node which matches or NULL when no entry matches the needle. The batadv_tt_global_free is not itself protected in anyway to avoid that any other function is modifying the hashtable between the search for the entry and the call to batadv_hash_remove. It can therefore happen that the entry either doesn't exist anymore or an entry was deleted which is not the same object as the needle. In such an situation, the reference counter (for the reference stored in the hashtable) must not be reduced for the needle. Instead the reference counter of the actually removed entry has to be reduced. Otherwise the reference counter will underflow and the object might be freed before all its references were dropped. The kref helpers reported this problem as: refcount_t: underflow; use-after-free. Fixes: 7683fdc1e886 ("batman-adv: protect the local and the global trans-tables with rcu") Reported-by: Martin Weinelt Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin (Microsoft) --- net/batman-adv/translation-table.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 2ee87d3ca6d0..6ec0e67be560 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -616,14 +616,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global, const char *message) { + struct batadv_tt_global_entry *tt_removed_entry; + struct hlist_node *tt_removed_node; + batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, batadv_print_vid(tt_global->common.vid), message); - batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, - batadv_choose_tt, &tt_global->common); - batadv_tt_global_entry_put(tt_global); + tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_global->common); + if (!tt_removed_node) + return; + + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_global_entry, + common.hash_entry); + batadv_tt_global_entry_put(tt_removed_entry); } /** -- cgit v1.2.3 From 10db6b5653f16e3c833f3127c9792a09cd7829c7 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 22 Feb 2019 16:25:54 +0100 Subject: batman-adv: fix warning in function batadv_v_elp_get_throughput MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ca8c3b922e7032aff6cc3fd05548f4df1f3df90e ] When CONFIG_CFG80211 isn't enabled the compiler correcly warns about 'sinfo.pertid' may be unused. It can also happen for other error conditions that it not warn about. net/batman-adv/bat_v_elp.c: In function ‘batadv_v_elp_get_throughput.isra.0’: include/net/cfg80211.h:6370:13: warning: ‘sinfo.pertid’ may be used uninitialized in this function [-Wmaybe-uninitialized] kfree(sinfo->pertid); ~~~~~^~~~~~~~ Rework so that we only release '&sinfo' if cfg80211_get_station returns zero. Fixes: 7d652669b61d ("batman-adv: release station info tidstats") Signed-off-by: Anders Roxell Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin (Microsoft) --- net/batman-adv/bat_v_elp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index ef0dec20c7d8..5da183b2f4c9 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,8 +104,10 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); - /* free the TID stats immediately */ - cfg80211_sinfo_release_content(&sinfo); + if (!ret) { + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + } dev_put(real_netdev); if (ret == -ENOENT) { -- cgit v1.2.3 From 18d841b848c2856553c04693b6dc7656affec971 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 20 Mar 2019 13:14:00 -0700 Subject: ARM: dts: rockchip: Fix gpu opp node names for rk3288 [ Upstream commit d040e4e8deeaa8257d6aa260e29ad69832b5d630 ] The device tree compiler yells like this: Warning (unit_address_vs_reg): /gpu-opp-table/opp@100000000: node has a unit name, but no reg property Let's match the cpu opp node names and use a dash. Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/boot/dts/rk3288.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 09868dcee34b..df0c5456c94f 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -1282,27 +1282,27 @@ gpu_opp_table: gpu-opp-table { compatible = "operating-points-v2"; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; opp-microvolt = <950000>; }; - opp@200000000 { + opp-200000000 { opp-hz = /bits/ 64 <200000000>; opp-microvolt = <950000>; }; - opp@300000000 { + opp-300000000 { opp-hz = /bits/ 64 <300000000>; opp-microvolt = <1000000>; }; - opp@400000000 { + opp-400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <1100000>; }; - opp@500000000 { + opp-500000000 { opp-hz = /bits/ 64 <500000000>; opp-microvolt = <1200000>; }; - opp@600000000 { + opp-600000000 { opp-hz = /bits/ 64 <600000000>; opp-microvolt = <1250000>; }; -- cgit v1.2.3 From 4b6d04ef0102cde5681c436d3bde84ed1a220017 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 18 Mar 2019 22:03:52 +0800 Subject: reset: meson-audio-arb: Fix missing .owner setting of reset_controller_dev [ Upstream commit 13e8a05b922457761ddef39cfff6231bd4ed9eef ] Set .owner to prevent module unloading while being used. Signed-off-by: Axel Lin Fixes: d903779b58be ("reset: meson: add meson audio arb driver") Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin (Microsoft) --- drivers/reset/reset-meson-audio-arb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/reset/reset-meson-audio-arb.c b/drivers/reset/reset-meson-audio-arb.c index 91751617b37a..c53a2185a039 100644 --- a/drivers/reset/reset-meson-audio-arb.c +++ b/drivers/reset/reset-meson-audio-arb.c @@ -130,6 +130,7 @@ static int meson_audio_arb_probe(struct platform_device *pdev) arb->rstc.nr_resets = ARRAY_SIZE(axg_audio_arb_reset_bits); arb->rstc.ops = &meson_audio_arb_rstc_ops; arb->rstc.of_node = dev->of_node; + arb->rstc.owner = THIS_MODULE; /* * Enable general : -- cgit v1.2.3 From f600213a3930fef4b0af33d1bc2f116138ce305b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 26 Mar 2019 10:32:23 -0700 Subject: ARM: dts: Fix dcan clkctrl clock for am3 [ Upstream commit 7d56bedb2730dc2ea8abf0fd7240ee99ecfee3c9 ] We must not use legacy clock defines for dts clckctrl clocks as the offsets will be wrong. Fixes: 87fc89ced3a7 ("ARM: dts: am335x: Move l4 child devices to probe them with ti-sysc") Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/boot/dts/am33xx-l4.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index 7b818d9d2eab..8396faa9ac28 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -1763,7 +1763,7 @@ reg = <0xcc000 0x4>; reg-names = "rev"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM3_D_CAN0_CLKCTRL 0>; + clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; @@ -1786,7 +1786,7 @@ reg = <0xd0000 0x4>; reg-names = "rev"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM3_D_CAN1_CLKCTRL 0>; + clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From b95b96893c86e1f478875fe65b076753a69e1ddc Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 25 Feb 2019 11:20:05 -0800 Subject: i40e: fix i40e_ptp_adjtime when given a negative delta [ Upstream commit b3ccbbce1e455b8454d3935eb9ae0a5f18939e24 ] Commit 0ac30ce43323 ("i40e: fix up 32 bit timespec references", 2017-07-26) claims to be cleaning up references to 32-bit timespecs. The actual contents of the commit make no sense, as it converts a call to timespec64_add into timespec64_add_ns. This would seem ok, if (a) the change was documented in the commit message, and (b) timespec64_add_ns supported negative numbers. timespec64_add_ns doesn't work with signed deltas, because the implementation is based around iter_div_u64_rem. This change resulted in a regression where i40e_ptp_adjtime would interpret small negative adjustments as large positive additions, resulting in incorrect behavior. This commit doesn't appear to fix anything, is not well explained, and introduces a bug, so lets just revert it. Reverts: 0ac30ce43323 ("i40e: fix up 32 bit timespec references", 2017-07-26) Signed-off-by: Jacob Keller Reviewed-by: Arnd Bergmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 5fb4353c742b..31575c0bb884 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -146,12 +146,13 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - struct timespec64 now; + struct timespec64 now, then; + then = ns_to_timespec64(delta); mutex_lock(&pf->tmreg_lock); i40e_ptp_read(pf, &now, NULL); - timespec64_add_ns(&now, delta); + now = timespec64_add(now, then); i40e_ptp_write(pf, (const struct timespec64 *)&now); mutex_unlock(&pf->tmreg_lock); -- cgit v1.2.3 From 29cdcf7c4d5942b65611e74ae3366de847d52c9e Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Sat, 2 Mar 2019 11:01:17 -0500 Subject: igb: Fix WARN_ONCE on runtime suspend [ Upstream commit dabb8338be533c18f50255cf39ff4f66d4dabdbe ] The runtime_suspend device callbacks are not supposed to save configuration state or change the power state. Commit fb29f76cc566 ("igb: Fix an issue that PME is not enabled during runtime suspend") changed the driver to not save configuration state during runtime suspend, however the driver callback still put the device into a low-power state. This causes a warning in the pci pm core and results in pci_pm_runtime_suspend not calling pci_save_state or pci_finish_runtime_suspend. Fix this by not changing the power state either, leaving that to pci pm core, and make the same change for suspend callback as well. Also move a couple of defines into the appropriate header file instead of inline in the .c file. Fixes: fb29f76cc566 ("igb: Fix an issue that PME is not enabled during runtime suspend") Signed-off-by: Arvind Sankar Reviewed-by: Kai-Heng Feng Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/intel/igb/e1000_defines.h | 2 + drivers/net/ethernet/intel/igb/igb_main.c | 57 ++++---------------------- 2 files changed, 10 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 01fcfc6f3415..d2e2c50ce257 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -194,6 +194,8 @@ /* enable link status from external LINK_0 and LINK_1 pins */ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ +#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */ #define E1000_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */ #define E1000_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */ #define E1000_CTRL_RST 0x04000000 /* Global reset */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7137e7f9c7f3..21ccadb720d1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8755,9 +8755,7 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, struct e1000_hw *hw = &adapter->hw; u32 ctrl, rctl, status; u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; -#ifdef CONFIG_PM - int retval = 0; -#endif + bool wake; rtnl_lock(); netif_device_detach(netdev); @@ -8770,14 +8768,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, igb_clear_interrupt_scheme(adapter); rtnl_unlock(); -#ifdef CONFIG_PM - if (!runtime) { - retval = pci_save_state(pdev); - if (retval) - return retval; - } -#endif - status = rd32(E1000_STATUS); if (status & E1000_STATUS_LU) wufc &= ~E1000_WUFC_LNKC; @@ -8794,10 +8784,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, } ctrl = rd32(E1000_CTRL); - /* advertise wake from D3Cold */ - #define E1000_CTRL_ADVD3WUC 0x00100000 - /* phy power management enable */ - #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 ctrl |= E1000_CTRL_ADVD3WUC; wr32(E1000_CTRL, ctrl); @@ -8811,12 +8797,15 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, wr32(E1000_WUFC, 0); } - *enable_wake = wufc || adapter->en_mng_pt; - if (!*enable_wake) + wake = wufc || adapter->en_mng_pt; + if (!wake) igb_power_down_link(adapter); else igb_power_up_link(adapter); + if (enable_wake) + *enable_wake = wake; + /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ @@ -8859,22 +8848,7 @@ static void igb_deliver_wake_packet(struct net_device *netdev) static int __maybe_unused igb_suspend(struct device *dev) { - int retval; - bool wake; - struct pci_dev *pdev = to_pci_dev(dev); - - retval = __igb_shutdown(pdev, &wake, 0); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; + return __igb_shutdown(to_pci_dev(dev), NULL, 0); } static int __maybe_unused igb_resume(struct device *dev) @@ -8945,22 +8919,7 @@ static int __maybe_unused igb_runtime_idle(struct device *dev) static int __maybe_unused igb_runtime_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - int retval; - bool wake; - - retval = __igb_shutdown(pdev, &wake, 1); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } - - return 0; + return __igb_shutdown(to_pci_dev(dev), NULL, 1); } static int __maybe_unused igb_runtime_resume(struct device *dev) -- cgit v1.2.3 From ee5088ba4b2366cd78cd8155be0452f1f71be465 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 15 Mar 2019 09:45:15 +0100 Subject: ixgbe: fix mdio bus registration [ Upstream commit 7ec52b9df7d7472240fa96223185894b1897aeb0 ] The ixgbe ignores errors returned from mdiobus_register() and leaves adapter->mii_bus non-NULL and MDIO bus state as MDIOBUS_ALLOCATED. This triggers a BUG from mdiobus_unregister() during ixgbe_remove() call. Fixes: 8fa10ef01260 ("ixgbe: register a mdiobus") Signed-off-by: Ivan Vecera Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index cc4907f9ff02..2fb97967961c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -905,13 +905,12 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw) struct pci_dev *pdev = adapter->pdev; struct device *dev = &adapter->netdev->dev; struct mii_bus *bus; + int err = -ENODEV; - adapter->mii_bus = devm_mdiobus_alloc(dev); - if (!adapter->mii_bus) + bus = devm_mdiobus_alloc(dev); + if (!bus) return -ENOMEM; - bus = adapter->mii_bus; - switch (hw->device_id) { /* C3000 SoCs */ case IXGBE_DEV_ID_X550EM_A_KR: @@ -949,12 +948,15 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw) */ hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22; - return mdiobus_register(bus); + err = mdiobus_register(bus); + if (!err) { + adapter->mii_bus = bus; + return 0; + } ixgbe_no_mii_bus: devm_mdiobus_free(dev, bus); - adapter->mii_bus = NULL; - return -ENODEV; + return err; } /** -- cgit v1.2.3 From 56e41e46e860b672bd35022b28e2e6fb5977888b Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 21 Mar 2019 13:45:35 +0100 Subject: i40e: fix WoL support check [ Upstream commit f669d24f3dd00beab452c0fc9257f6a942ffca9b ] The current check for WoL on i40e is broken. Code comment says only magic packet is supported, so only check for that. Fixes: 540a152da762 (i40e/ixgbe/igb: fail on new WoL flag setting WAKE_MAGICSECURE) Signed-off-by: Stefan Assmann Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a6bc7847346b..5d544e661445 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2378,8 +2378,7 @@ static int i40e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return -EOPNOTSUPP; /* only magic packet is supported */ - if (wol->wolopts && (wol->wolopts != WAKE_MAGIC) - | (wol->wolopts != WAKE_FILTER)) + if (wol->wolopts & ~WAKE_MAGIC) return -EOPNOTSUPP; /* is this a new value? */ -- cgit v1.2.3 From 603ae6024626ae227fcac0ec75b9a2f907827765 Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Fri, 22 Mar 2019 14:37:04 +0800 Subject: riscv: fix accessing 8-byte variable from RV32 [ Upstream commit dbee9c9c45846f003ec2f819710c2f4835630a6a ] A memory save operation to 8-byte variable in RV32 is divided into two sw instructions in the put_user macro. The current fixup returns execution flow to the second sw instead of the one after it. This patch fixes this fixup code according to the load access part. Signed-off-by: Alan Kao Cc: Greentime Hu Cc: Vincent Chen Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin (Microsoft) --- arch/riscv/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 637b896894fc..aa82df30e38a 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -301,7 +301,7 @@ do { \ " .balign 4\n" \ "4:\n" \ " li %0, %6\n" \ - " jump 2b, %1\n" \ + " jump 3b, %1\n" \ " .previous\n" \ " .section __ex_table,\"a\"\n" \ " .balign " RISCV_SZPTR "\n" \ -- cgit v1.2.3 From fcaef6ae7fa389747aa9a84a349ef24cbe06176c Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Tue, 26 Mar 2019 09:55:54 -0700 Subject: HID: quirks: Fix keyboard + touchpad on Lenovo Miix 630 [ Upstream commit 2bafa1e9625400bec4c840a168d70ba52607a58d ] Similar to commit edfc3722cfef ("HID: quirks: Fix keyboard + touchpad on Toshiba Click Mini not working"), the Lenovo Miix 630 has a combo keyboard/touchpad device with vid:pid of 04F3:0400, which is shared with Elan touchpads. The combo on the Miix 630 has an ACPI id of QTEC0001, which is not claimed by the elan_i2c driver, so key on that similar to what was done for the Toshiba Click Mini. Signed-off-by: Jeffrey Hugo Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin (Microsoft) --- drivers/hid/hid-quirks.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 94088c0ed68a..e24790c988c0 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -744,7 +744,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DEALEXTREAME, USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) }, - { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) }, { HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) }, { HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) }, @@ -1025,6 +1024,10 @@ bool hid_ignore(struct hid_device *hdev) if (hdev->product == 0x0401 && strncmp(hdev->name, "ELAN0800", 8) != 0) return true; + /* Same with product id 0x0400 */ + if (hdev->product == 0x0400 && + strncmp(hdev->name, "QTEC0001", 8) != 0) + return true; break; } -- cgit v1.2.3 From eab19d67ad688e58aa50400e69c523c3882eda9f Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 26 Mar 2019 14:53:49 +0800 Subject: net: hns3: fix compile error [ Upstream commit 669efc76b317b3aa550ffbf0b79d064cb00a5f96 ] Currently, the rules for configuring search paths in Kbuild have changed, this will lead some erros when compiling hns3 with the following command: make O=DIR M=drivers/net/ethernet/hisilicon/hns3 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c:11:10: fatal error: hnae3.h: No such file or directory This patch fix it by adding $(srctree)/ prefix to the serach paths. Signed-off-by: Xi Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index fffe8c1c45d3..0fb61d440d3b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -3,7 +3,7 @@ # Makefile for the HISILICON network device drivers. # -ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 +ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGE) += hclge.o hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile index fb93bbd35845..6193f8fa7cf3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile @@ -3,7 +3,7 @@ # Makefile for the HISILICON network device drivers. # -ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 +ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o \ No newline at end of file -- cgit v1.2.3 From 5c0323b32e538cd96451c405f0735be9cc42915e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 29 Mar 2019 10:18:00 +0100 Subject: xdp: fix cpumap redirect SKB creation bug [ Upstream commit 676e4a6fe703f2dae699ee9d56f14516f9ada4ea ] We want to avoid leaking pointer info from xdp_frame (that is placed in top of frame) like commit 6dfb970d3dbd ("xdp: avoid leaking info stored in frame data on page reuse"), and followup commit 97e19cce05e5 ("bpf: reserve xdp_frame size in xdp headroom") that reserve this headroom. These changes also affected how cpumap constructed SKBs, as xdpf->headroom size changed, the skb data starting point were in-effect shifted with 32 bytes (sizeof xdp_frame). This was still okay, as the cpumap frame_size calculation also included xdpf->headroom which were reduced by same amount. A bug was introduced in commit 77ea5f4cbe20 ("bpf/cpumap: make sure frame_size for build_skb is aligned if headroom isn't"), where the xdpf->headroom became part of the SKB_DATA_ALIGN rounding up. This round-up to find the frame_size is in principle still correct as it does not exceed the 2048 bytes frame_size (which is max for ixgbe and i40e), but the 32 bytes offset of pkt_data_start puts this over the 2048 bytes limit. This cause skb_shared_info to spill into next frame. It is a little hard to trigger, as the SKB need to use above 15 skb_shinfo->frags[] as far as I calculate. This does happen in practise for TCP streams when skb_try_coalesce() kicks in. KASAN can be used to detect these wrong memory accesses, I've seen: BUG: KASAN: use-after-free in skb_try_coalesce+0x3cb/0x760 BUG: KASAN: wild-memory-access in skb_release_data+0xe2/0x250 Driver veth also construct a SKB from xdp_frame in this way, but is not affected, as it doesn't reserve/deduct the room (used by xdp_frame) from the SKB headroom. Instead is clears the pointers via xdp_scrub_frame(), and allows SKB to use this area. The fix in this patch is to do like veth and instead allow SKB to (re)use the area occupied by xdp_frame, by clearing via xdp_scrub_frame(). (This does kill the idea of the SKB being able to access (mem) info from this area, but I guess it was a bad idea anyhow, and it was already killed by the veth changes.) Fixes: 77ea5f4cbe20 ("bpf/cpumap: make sure frame_size for build_skb is aligned if headroom isn't") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin (Microsoft) --- kernel/bpf/cpumap.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8974b3755670..3c18260403dd 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work) static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { + unsigned int hard_start_headroom; unsigned int frame_size; void *pkt_data_start; struct sk_buff *skb; + /* Part of headroom was reserved to xdpf */ + hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom; + /* build_skb need to place skb_shared_info after SKB end, and * also want to know the memory "truesize". Thus, need to * know the memory frame size backing xdp_buff. @@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * is not at a fixed memory location, with mixed length * packets, which is bad for cache-line hotness. */ - frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) + + frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - pkt_data_start = xdpf->data - xdpf->headroom; + pkt_data_start = xdpf->data - hard_start_headroom; skb = build_skb(pkt_data_start, frame_size); if (!skb) return NULL; - skb_reserve(skb, xdpf->headroom); + skb_reserve(skb, hard_start_headroom); __skb_put(skb, xdpf->len); if (xdpf->metasize) skb_metadata_set(skb, xdpf->metasize); @@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * - RX ring dev queue index (skb_record_rx_queue) */ + /* Allow SKB to reuse area used by xdp_frame */ + xdp_scrub_frame(xdpf); + return skb; } -- cgit v1.2.3 From 21b557cb27e2eadfb84b837f465556252c5376c4 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 21 Mar 2019 15:51:35 -0700 Subject: net/mlx5: E-Switch, Protect from invalid memory access in offload fdb table [ Upstream commit 5c1d260ed10cf08dd7a0299c103ad0a3f9a9f7a1 ] The esw offloads structures share a union with the legacy mode structs. Reset the offloads struct to zero in init to protect from null assumptions made by the legacy mode code. Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d4e6fe5b9300..ce5766a26baa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1402,6 +1402,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { int err; + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); err = esw_create_offloads_fdb_tables(esw, nvports); -- cgit v1.2.3 From 2340d1c4ede82c10cff60145d1c6c9b8add40ea6 Mon Sep 17 00:00:00 2001 From: Omri Kahalon Date: Sun, 24 Feb 2019 16:31:08 +0200 Subject: net/mlx5: E-Switch, Fix esw manager vport indication for more vport commands [ Upstream commit eca4a928585ac08147e5cc8e2111ecbc6279ee31 ] Traditionally, the PF (Physical Function) which resides on vport 0 was the E-switch manager. Since the ECPF (Embedded CPU Physical Function), which resides on vport 0xfffe, was introduced as the E-Switch manager, the assumption that the E-switch manager is on vport 0 is incorrect. Since the eswitch code already uses the actual vport value, all we need is to always set other_vport=1. Signed-off-by: Omri Kahalon Reviewed-by: Max Gurtovoy Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 13c48883ed61..619f96940b65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,8 +81,7 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -110,8 +109,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -- cgit v1.2.3 From 79e55cf86cec524d147e67fecf9e29c23512bd87 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Thu, 28 Mar 2019 13:29:21 +0300 Subject: bonding: show full hw address in sysfs for slave entries [ Upstream commit 18bebc6dd3281955240062655a4df35eef2c46b3 ] Bond expects ethernet hwaddr for its slave, but it can be longer than 6 bytes - infiniband interface for example. # cat /sys/devices//net/ib0/address 80:00:02:08:fe:80:00:00:00:00:00:00:7c:fe:90:03:00:be:5d:e1 # cat /sys/devices//net/ib0/bonding_slave/perm_hwaddr 80:00:02:08:fe:80 So print full hwaddr in sysfs "bonding_slave/perm_hwaddr" as well. Signed-off-by: Konstantin Khorenko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/bonding/bond_sysfs_slave.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index 2f120b2ffef0..4985268e2273 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -55,7 +55,9 @@ static SLAVE_ATTR_RO(link_failure_count); static ssize_t perm_hwaddr_show(struct slave *slave, char *buf) { - return sprintf(buf, "%pM\n", slave->perm_hwaddr); + return sprintf(buf, "%*phC\n", + slave->dev->addr_len, + slave->perm_hwaddr); } static SLAVE_ATTR_RO(perm_hwaddr); -- cgit v1.2.3 From 0fa0ac6c6b6219181d35da55898039447a91e5f6 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:35 +0200 Subject: net: stmmac: use correct DMA buffer size in the RX descriptor [ Upstream commit 583e6361414903c5206258a30e5bd88cb03c0254 ] We always program the maximum DMA buffer size into the receive descriptor, although the allocated size may be less. E.g. with the default MTU size we allocate only 1536 bytes. If somebody sends us a bigger frame, then memory may get corrupted. Fix by using exact buffer sizes. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/stmicro/stmmac/descs_com.h | 22 ++++++++++++++-------- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 2 +- .../net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 2 +- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 10 +++++++--- drivers/net/ethernet/stmicro/stmmac/hwif.h | 2 +- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 10 +++++++--- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- 7 files changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index 40d6356a7e73..3dfb07a78952 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -29,11 +29,13 @@ /* Specific functions used for Ring mode */ /* Enhanced descriptors */ -static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end, + int bfsize) { - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - << ERDES1_BUFFER2_SIZE_SHIFT) - & ERDES1_BUFFER2_SIZE_MASK); + if (bfsize == BUF_SIZE_16KiB) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB + << ERDES1_BUFFER2_SIZE_SHIFT) + & ERDES1_BUFFER2_SIZE_MASK); if (end) p->des1 |= cpu_to_le32(ERDES1_END_RING); @@ -59,11 +61,15 @@ static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len) } /* Normal descriptors */ -static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end, int bfsize) { - p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1) - << RDES1_BUFFER2_SIZE_SHIFT) - & RDES1_BUFFER2_SIZE_MASK); + if (bfsize >= BUF_SIZE_2KiB) { + int bfsize2; + + bfsize2 = min(bfsize - BUF_SIZE_2KiB + 1, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32((bfsize2 << RDES1_BUFFER2_SIZE_SHIFT) + & RDES1_BUFFER2_SIZE_MASK); + } if (end) p->des1 |= cpu_to_le32(RDES1_END_RING); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 736e29635b77..313a58b68fee 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -296,7 +296,7 @@ exit: } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { dwmac4_set_rx_owner(p, disable_rx_ic); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 1d858fdec997..98fa471da7c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -123,7 +123,7 @@ static int dwxgmac2_get_rx_timestamp_status(void *desc, void *next_desc, } static void dwxgmac2_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { dwxgmac2_set_rx_owner(p, disable_rx_ic); } diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 5ef91a790f9d..e8855e6adb48 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -259,15 +259,19 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_8KiB); + p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); else - ehn_desc_rx_set_on_ring(p, end); + ehn_desc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 92b8944f26e3..5bb00234d961 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -33,7 +33,7 @@ struct dma_extended_desc; struct stmmac_desc_ops { /* DMA RX descriptor ring initialization */ void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode, - int end); + int end, int bfsize); /* DMA TX descriptor ring initialization */ void (*init_tx_desc)(struct dma_desc *p, int mode, int end); /* Invoked by the xmit function to prepare the tx descriptor */ diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index de65bb29feba..c55a9815b394 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -135,15 +135,19 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, - int end) + int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ndesc_rx_set_on_chain(p, end); else - ndesc_rx_set_on_ring(p, end); + ndesc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(RDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0bc3632880b5..f765869bd864 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1114,11 +1114,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue) if (priv->extend_desc) stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic, priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); else stmmac_init_rx_desc(priv, &rx_q->dma_rx[i], priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); } /** -- cgit v1.2.3 From 9268b0a54bb91fb6ce22ba39cf4843ac68961de1 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:36 +0200 Subject: net: stmmac: ratelimit RX error logs [ Upstream commit 972c9be784e077bc56472c78243e0326e525b689 ] Ratelimit RX error logs. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f765869bd864..0ccf91a08290 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3407,9 +3407,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) * ignored */ if (frame_len > priv->dma_buf_sz) { - netdev_err(priv->dev, - "len %d larger than size (%d)\n", - frame_len, priv->dma_buf_sz); + if (net_ratelimit()) + netdev_err(priv->dev, + "len %d larger than size (%d)\n", + frame_len, priv->dma_buf_sz); priv->dev->stats.rx_length_errors++; break; } @@ -3466,9 +3467,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) } else { skb = rx_q->rx_skbuff[entry]; if (unlikely(!skb)) { - netdev_err(priv->dev, - "%s: Inconsistent Rx chain\n", - priv->dev->name); + if (net_ratelimit()) + netdev_err(priv->dev, + "%s: Inconsistent Rx chain\n", + priv->dev->name); priv->dev->stats.rx_dropped++; break; } -- cgit v1.2.3 From c937eeff033df551153e322520b638e9823a4d29 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:37 +0200 Subject: net: stmmac: don't stop NAPI processing when dropping a packet [ Upstream commit 07b3975352374c3f5ebb4a42ef0b253fe370542d ] Currently, if we drop a packet, we exit from NAPI loop before the budget is consumed. In some situations this will make the RX processing stall e.g. when flood pinging the system with oversized packets, as the errorneous packets are not dropped efficiently. If we drop a packet, we should just continue to the next one as long as the budget allows. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0ccf91a08290..f0e0593e54f3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3328,9 +3328,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; struct stmmac_channel *ch = &priv->channel[queue]; - unsigned int entry = rx_q->cur_rx; + unsigned int next_entry = rx_q->cur_rx; int coe = priv->hw->rx_csum; - unsigned int next_entry; unsigned int count = 0; bool xmac; @@ -3348,10 +3347,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true); } while (count < limit) { - int status; + int entry, status; struct dma_desc *p; struct dma_desc *np; + entry = next_entry; + if (priv->extend_desc) p = (struct dma_desc *)(rx_q->dma_erx + entry); else @@ -3412,7 +3413,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) "len %d larger than size (%d)\n", frame_len, priv->dma_buf_sz); priv->dev->stats.rx_length_errors++; - break; + continue; } /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 @@ -3447,7 +3448,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dev_warn(priv->device, "packet dropped\n"); priv->dev->stats.rx_dropped++; - break; + continue; } dma_sync_single_for_cpu(priv->device, @@ -3472,7 +3473,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) "%s: Inconsistent Rx chain\n", priv->dev->name); priv->dev->stats.rx_dropped++; - break; + continue; } prefetch(skb->data - NET_IP_ALIGN); rx_q->rx_skbuff[entry] = NULL; @@ -3507,7 +3508,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += frame_len; } - entry = next_entry; } stmmac_rx_refill(priv, queue); -- cgit v1.2.3 From e6b88961c8886b95e62e9f837882be6abbdc8fe1 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:38 +0200 Subject: net: stmmac: don't overwrite discard_frame status [ Upstream commit 1b746ce8b397e58f9e40ce5c63b7198de6930482 ] If we have error bits set, the discard_frame status will get overwritten by checksum bit checks, which might set the status back to good one. Fix by checking the COE status only if the frame is good. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index e8855e6adb48..c42ef6c729c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -231,9 +231,10 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, * It doesn't match with the information reported into the databook. * At any rate, we need to understand if the CSUM hw computation is ok * and report this info to the upper layers. */ - ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR), - !!(rdes0 & RDES0_FRAME_TYPE), - !!(rdes0 & ERDES0_RX_MAC_ADDR)); + if (likely(ret == good_frame)) + ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR), + !!(rdes0 & RDES0_FRAME_TYPE), + !!(rdes0 & ERDES0_RX_MAC_ADDR)); if (unlikely(rdes0 & RDES0_DRIBBLING)) x->dribbling_bit++; -- cgit v1.2.3 From f3b6acbaff395b2d3ad5704da18552081f0d356b Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:39 +0200 Subject: net: stmmac: fix dropping of multi-descriptor RX frames [ Upstream commit 8ac0c24fe1c256af6644caf3d311029440ec2fbd ] Packets without the last descriptor set should be dropped early. If we receive a frame larger than the DMA buffer, the HW will continue using the next descriptor. Driver mistakes these as individual frames, and sometimes a truncated frame (without the LD set) may look like a valid packet. This fixes a strange issue where the system replies to 4098-byte ping although the MTU/DMA buffer size is set to 4096, and yet at the same time it's logging an oversized packet. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index c42ef6c729c0..5202d6ad7919 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -201,6 +201,11 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, if (unlikely(rdes0 & RDES0_OWN)) return dma_own; + if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) { + stats->rx_length_errors++; + return discard_frame; + } + if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) { if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) { x->rx_desc++; -- cgit v1.2.3 From 02f3f9794f214555ed624252b92e36f3f892ab45 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 27 Mar 2019 22:35:40 +0200 Subject: net: stmmac: don't log oversized frames [ Upstream commit 057a0c5642a2ff2db7c421cdcde34294a23bf37b ] This is log is harmful as it can trigger multiple times per packet. Delete it. Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index c55a9815b394..b7dd4e3c760d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -91,8 +91,6 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, return dma_own; if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) { - pr_warn("%s: Oversized frame spanned multiple buffers\n", - __func__); stats->rx_length_errors++; return discard_frame; } -- cgit v1.2.3 From 66559d395933540c5ae7fd26aacec1ddbe579a29 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Mar 2019 01:39:50 +0000 Subject: jffs2: fix use-after-free on symlink traversal [ Upstream commit 4fdcfab5b5537c21891e22e65996d4d0dd8ab4ca ] free the symlink body after the same RCU delay we have for freeing the struct inode itself, so that traversal during RCU pathwalk wouldn't step into freed memory. Signed-off-by: Al Viro Signed-off-by: Sasha Levin (Microsoft) --- fs/jffs2/readinode.c | 5 ----- fs/jffs2/super.c | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 389ea53ea487..bccfc40b3a74 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -1414,11 +1414,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f) jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL); - if (f->target) { - kfree(f->target); - f->target = NULL; - } - fds = f->dents; while(fds) { fd = fds; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index bb6ae387469f..05d892c79339 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -47,7 +47,10 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb) static void jffs2_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + + kfree(f->target); + kmem_cache_free(jffs2_inode_cachep, f); } static void jffs2_destroy_inode(struct inode *inode) -- cgit v1.2.3 From 3ba95a36358796c210f0a902119a697d269ec442 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Mar 2019 01:43:37 +0000 Subject: debugfs: fix use-after-free on symlink traversal [ Upstream commit 93b919da64c15b90953f96a536e5e61df896ca57 ] symlink body shouldn't be freed without an RCU delay. Switch debugfs to ->destroy_inode() and use of call_rcu(); free both the inode and symlink body in the callback. Similar to solution for bpf, only here it's even more obvious that ->evict_inode() can be dropped. Signed-off-by: Al Viro Signed-off-by: Sasha Levin (Microsoft) --- fs/debugfs/inode.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 29c68c5d44d5..c4a4fc6f1a95 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -163,19 +163,24 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root) return 0; } -static void debugfs_evict_inode(struct inode *inode) +static void debugfs_i_callback(struct rcu_head *head) { - truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); + struct inode *inode = container_of(head, struct inode, i_rcu); if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); + free_inode_nonrcu(inode); +} + +static void debugfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, debugfs_i_callback); } static const struct super_operations debugfs_super_operations = { .statfs = simple_statfs, .remount_fs = debugfs_remount, .show_options = debugfs_show_options, - .evict_inode = debugfs_evict_inode, + .destroy_inode = debugfs_destroy_inode, }; static void debugfs_release_dentry(struct dentry *dentry) -- cgit v1.2.3 From 93a122ae03805e3884fa563cf621a779d342e199 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sat, 23 Feb 2019 12:47:54 +0100 Subject: mfd: twl-core: Disable IRQ while suspended [ Upstream commit 20bb907f7dc82ecc9e135ad7067ac7eb69c81222 ] Since commit 6e2bd956936 ("i2c: omap: Use noirq system sleep pm ops to idle device for suspend") on gta04 we have handle_twl4030_pih() called in situations where pm_runtime_get() in i2c-omap.c returns -EACCES. [ 86.474365] Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done. [ 86.485473] printk: Suspending console(s) (use no_console_suspend to debug) [ 86.555572] Disabling non-boot CPUs ... [ 86.555664] Successfully put all powerdomains to target state [ 86.563720] twl: Read failed (mod 1, reg 0x01 count 1) [ 86.563751] twl4030: I2C error -13 reading PIH ISR [ 86.563812] twl: Read failed (mod 1, reg 0x01 count 1) [ 86.563812] twl4030: I2C error -13 reading PIH ISR [ 86.563873] twl: Read failed (mod 1, reg 0x01 count 1) [ 86.563903] twl4030: I2C error -13 reading PIH ISR This happens when we wakeup via something behing twl4030 (powerbutton or rtc alarm). This goes on for minutes until the system is finally resumed. Disable the irq on suspend and enable it on resume to avoid having i2c access problems when the irq registers are checked. Fixes: 6e2bd956936 ("i2c: omap: Use noirq system sleep pm ops to idle device for suspend") Signed-off-by: Andreas Kemnade Tested-by: Tony Lindgren Signed-off-by: Lee Jones Signed-off-by: Sasha Levin (Microsoft) --- drivers/mfd/twl-core.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c index 299016bc46d9..104477b512a2 100644 --- a/drivers/mfd/twl-core.c +++ b/drivers/mfd/twl-core.c @@ -1245,6 +1245,28 @@ free: return status; } +static int __maybe_unused twl_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq) + disable_irq(client->irq); + + return 0; +} + +static int __maybe_unused twl_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq) + enable_irq(client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(twl_dev_pm_ops, twl_suspend, twl_resume); + static const struct i2c_device_id twl_ids[] = { { "twl4030", TWL4030_VAUX2 }, /* "Triton 2" */ { "twl5030", 0 }, /* T2 updated */ @@ -1262,6 +1284,7 @@ static const struct i2c_device_id twl_ids[] = { /* One Client Driver , 4 Clients */ static struct i2c_driver twl_driver = { .driver.name = DRIVER_NAME, + .driver.pm = &twl_dev_pm_ops, .id_table = twl_ids, .probe = twl_probe, .remove = twl_remove, -- cgit v1.2.3 From b514ad765acbcd8d2784fff67b9f218b7fa4a33a Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Mon, 1 Apr 2019 21:40:36 +0800 Subject: block: use blk_free_flush_queue() to free hctx->fq in blk_mq_init_hctx [ Upstream commit b9a1ff504b9492ad6beb7d5606e0e3365d4d8499 ] kfree() can leak the hctx->fq->flush_rq field. Reviewed-by: Ming Lei Signed-off-by: Shenghui Wang Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin (Microsoft) --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 16f9675c57e6..97eba6d23425 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2341,7 +2341,7 @@ static int blk_mq_init_hctx(struct request_queue *q, return 0; free_fq: - kfree(hctx->fq); + blk_free_flush_queue(hctx->fq); exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); -- cgit v1.2.3 From 6651fd9bead916cc6e8dc084337d26779a125185 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 2 Apr 2019 12:26:36 +0200 Subject: rtc: da9063: set uie_unsupported when relevant [ Upstream commit 882c5e552ffd06856de42261460f46e18319d259 ] The DA9063AD doesn't support alarms on any seconds and its granularity is the minute. Set uie_unsupported in that case. Reported-by: Wolfram Sang Reported-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Acked-by: Steve Twiss Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin (Microsoft) --- drivers/rtc/rtc-da9063.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c index b4e054c64bad..69b54e5556c0 100644 --- a/drivers/rtc/rtc-da9063.c +++ b/drivers/rtc/rtc-da9063.c @@ -480,6 +480,13 @@ static int da9063_rtc_probe(struct platform_device *pdev) da9063_data_to_tm(data, &rtc->alarm_time, rtc); rtc->rtc_sync = false; + /* + * TODO: some models have alarms on a minute boundary but still support + * real hardware interrupts. Add this once the core supports it. + */ + if (config->rtc_data_start != RTC_SEC) + rtc->rtc_dev->uie_unsupported = 1; + irq_alarm = platform_get_irq_byname(pdev, "ALARM"); ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL, da9063_alarm_event, -- cgit v1.2.3 From 8a5ce8d9825abe474bd46fe031a7b1f4fa669ecc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 2 Apr 2019 09:57:13 -0700 Subject: HID: input: add mapping for Assistant key [ Upstream commit ce856634af8cda3490947df8ac1ef5843e6356af ] According to HUTRR89 usage 0x1cb from the consumer page was assigned to allow launching desktop-aware assistant application, so let's add the mapping. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin (Microsoft) --- drivers/hid/hid-input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 59a5608b8dc0..ff92a7b2fc89 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -995,6 +995,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x1b8: map_key_clear(KEY_VIDEO); break; case 0x1bc: map_key_clear(KEY_MESSENGER); break; case 0x1bd: map_key_clear(KEY_INFO); break; + case 0x1cb: map_key_clear(KEY_ASSISTANT); break; case 0x201: map_key_clear(KEY_NEW); break; case 0x202: map_key_clear(KEY_OPEN); break; case 0x203: map_key_clear(KEY_CLOSE); break; -- cgit v1.2.3 From 52dd2b2d7f0a7aca9dc6f5edffe7dff87bc69014 Mon Sep 17 00:00:00 2001 From: Louis Taylor Date: Wed, 3 Apr 2019 12:36:20 -0600 Subject: vfio/pci: use correct format characters [ Upstream commit 426b046b748d1f47e096e05bdcc6fb4172791307 ] When compiling with -Wformat, clang emits the following warnings: drivers/vfio/pci/vfio_pci.c:1601:5: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1601:13: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1601:21: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1601:32: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1605:5: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1605:13: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~ drivers/vfio/pci/vfio_pci.c:1605:21: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ drivers/vfio/pci/vfio_pci.c:1605:32: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] vendor, device, subvendor, subdevice, ^~~~~~~~~ The types of these arguments are unconditionally defined, so this patch updates the format character to the correct ones for unsigned ints. Link: https://github.com/ClangBuiltLinux/linux/issues/378 Signed-off-by: Louis Taylor Reviewed-by: Nick Desaulniers Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin (Microsoft) --- drivers/vfio/pci/vfio_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index ff60bd1ea587..eb8fc8ccffc6 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1597,11 +1597,11 @@ static void __init vfio_pci_fill_ids(void) rc = pci_add_dynid(&vfio_pci_driver, vendor, device, subvendor, subdevice, class, class_mask, 0); if (rc) - pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n", + pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n", vendor, device, subvendor, subdevice, class, class_mask, rc); else - pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n", + pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n", vendor, device, subvendor, subdevice, class, class_mask); } -- cgit v1.2.3 From ebaa9d86f25b6941e7309c9d717576c267b6717d Mon Sep 17 00:00:00 2001 From: Xose Vazquez Perez Date: Sat, 30 Mar 2019 15:43:31 +0100 Subject: scsi: core: add new RDAC LENOVO/DE_Series device [ Upstream commit 1cb1d2c64e812928fe0a40b8f7e74523d0283dbe ] Blacklist "Universal Xport" LUN. It's used for in-band storage array management. Also add model to the rdac dh family. Cc: Martin Wilck Cc: Hannes Reinecke Cc: NetApp RDAC team Cc: Christophe Varoqui Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: SCSI ML Cc: DM ML Signed-off-by: Xose Vazquez Perez Reviewed-by: Martin Wilck Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (Microsoft) --- drivers/scsi/scsi_devinfo.c | 1 + drivers/scsi/scsi_dh.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index c4cbfd07b916..a08ff3bd6310 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -238,6 +238,7 @@ static struct { {"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36}, {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN}, {"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */ diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 5a58cbf3a75d..c14006ac98f9 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -75,6 +75,7 @@ static const struct scsi_dh_blist scsi_dh_blist[] = { {"NETAPP", "INF-01-00", "rdac", }, {"LSI", "INF-01-00", "rdac", }, {"ENGENIO", "INF-01-00", "rdac", }, + {"LENOVO", "DE_Series", "rdac", }, {NULL, NULL, NULL }, }; -- cgit v1.2.3 From 24d73473cc92e687f200a5590bc3afa8f3452848 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 1 Apr 2019 16:10:52 +0000 Subject: scsi: storvsc: Fix calculation of sub-channel count [ Upstream commit 382e06d11e075a40b4094b6ef809f8d4bcc7ab2a ] When the number of sub-channels offered by Hyper-V is >= the number of CPUs in the VM, calculate the correct number of sub-channels. The current code produces one too many. This scenario arises only when the number of CPUs is artificially restricted (for example, with maxcpus= on the kernel boot line), because Hyper-V normally offers a sub-channel count < number of CPUs. While the current code doesn't break, the extra sub-channel is unbalanced across the CPUs (for example, a total of 5 channels on a VM with 4 CPUs). Signed-off-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Reviewed-by: Long Li Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (Microsoft) --- drivers/scsi/storvsc_drv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 84380bae20f1..e186743033f4 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -668,13 +668,22 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) { struct device *dev = &device->device; struct storvsc_device *stor_device; - int num_cpus = num_online_cpus(); int num_sc; struct storvsc_cmd_request *request; struct vstor_packet *vstor_packet; int ret, t; - num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns); + /* + * If the number of CPUs is artificially restricted, such as + * with maxcpus=1 on the kernel boot line, Hyper-V could offer + * sub-channels >= the number of CPUs. These sub-channels + * should not be created. The primary channel is already created + * and assigned to one CPU, so check against # CPUs - 1. + */ + num_sc = min((int)(num_online_cpus() - 1), max_chns); + if (!num_sc) + return; + stor_device = get_out_stor_device(device); if (!stor_device) return; -- cgit v1.2.3 From 9bc06b41fd0d7e942ae501762266934b4743d09d Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Tue, 2 Apr 2019 22:12:38 +0800 Subject: arm/mach-at91/pm : fix possible object reference leak [ Upstream commit ba5e60c9b75dec92d4c695b928f69300b17d7686 ] of_find_device_by_node() takes a reference to the struct device when it finds a match via get_device. When returning error we should call put_device. Reviewed-by: Mukesh Ojha Signed-off-by: Peng Hao Signed-off-by: Ludovic Desroches Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/mach-at91/pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 51e808adb00c..2a757dcaa1a5 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -591,13 +591,13 @@ static int __init at91_pm_backup_init(void) np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam"); if (!np) - goto securam_fail; + goto securam_fail_no_ref_dev; pdev = of_find_device_by_node(np); of_node_put(np); if (!pdev) { pr_warn("%s: failed to find securam device!\n", __func__); - goto securam_fail; + goto securam_fail_no_ref_dev; } sram_pool = gen_pool_get(&pdev->dev, NULL); @@ -620,6 +620,8 @@ static int __init at91_pm_backup_init(void) return 0; securam_fail: + put_device(&pdev->dev); +securam_fail_no_ref_dev: iounmap(pm_data.sfrbu); pm_data.sfrbu = NULL; return ret; -- cgit v1.2.3 From 2c52a30e2158ef98a6ade3673e7236d2cde6e429 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Thu, 4 Apr 2019 10:57:44 +0800 Subject: blk-mq: do not reset plug->rq_count before the list is sorted [ Upstream commit bcc816dfe51ab86ca94663c7b225f2d6eb0fddb9 ] We would never be able to sort the list if we first reset plug->rq_count which is used in conditional check later. Fixes: ce5b009cff19 ("block: improve logic around when to sort a plug list") Reviewed-by: Ming Lei Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin (Microsoft) --- block/blk-mq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 97eba6d23425..5a2585d69c81 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1716,11 +1716,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) unsigned int depth; list_splice_init(&plug->mq_list, &list); - plug->rq_count = 0; if (plug->rq_count > 2 && plug->multiple_queues) list_sort(NULL, &list, plug_rq_cmp); + plug->rq_count = 0; + this_q = NULL; this_hctx = NULL; this_ctx = NULL; -- cgit v1.2.3 From 26ac3d804822b757a6a4f73c0497bc9404f9a8e1 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Mon, 1 Apr 2019 11:55:57 +0800 Subject: arm64: fix wrong check of on_sdei_stack in nmi context [ Upstream commit 1c41860864c8ae0387ef7d44f0000e99cbb2e06d ] When doing unwind_frame() in the context of pseudo nmi (need enable CONFIG_ARM64_PSEUDO_NMI), reaching the bottom of the stack (fp == 0, pc != 0), function on_sdei_stack() will return true while the sdei acpi table is not inited in fact. This will cause a "NULL pointer dereference" oops when going on. Reviewed-by: Julien Thierry Signed-off-by: Wei Li Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin (Microsoft) --- arch/arm64/kernel/sdei.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 5ba4465e44f0..ea94cf8f9dc6 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -94,6 +94,9 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; + if (!low) + return false; + if (sp < low || sp >= high) return false; @@ -111,6 +114,9 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; + if (!low) + return false; + if (sp < low || sp >= high) return false; -- cgit v1.2.3 From f59129993a5300dce02bea7c3dc77b2ee3fb101c Mon Sep 17 00:00:00 2001 From: Liubin Shu Date: Thu, 4 Apr 2019 16:46:42 +0800 Subject: net: hns: fix KASAN: use-after-free in hns_nic_net_xmit_hw() [ Upstream commit 3a39a12ad364a9acd1038ba8da67cd8430f30de4 ] This patch is trying to fix the issue due to: [27237.844750] BUG: KASAN: use-after-free in hns_nic_net_xmit_hw+0x708/0xa18[hns_enet_drv] After hnae_queue_xmit() in hns_nic_net_xmit_hw(), can be interrupted by interruptions, and than call hns_nic_tx_poll_one() to handle the new packets, and free the skb. So, when turn back to hns_nic_net_xmit_hw(), calling skb->len will cause use-after-free. This patch update tx ring statistics in hns_nic_tx_poll_one() to fix the bug. Signed-off-by: Liubin Shu Signed-off-by: Zhen Lei Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 60e7d7ae3787..e5a7c0761dbd 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -376,8 +376,6 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, wmb(); /* commit all data before submit */ assert(skb->queue_mapping < priv->ae_handle->q_num); hnae_queue_xmit(priv->ae_handle->qs[skb->queue_mapping], buf_num); - ring->stats.tx_pkts++; - ring->stats.tx_bytes += skb->len; return NETDEV_TX_OK; @@ -999,6 +997,9 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, /* issue prefetch for next Tx descriptor */ prefetch(&ring->desc_cb[ring->next_to_clean]); } + /* update tx ring statistics. */ + ring->stats.tx_pkts += pkts; + ring->stats.tx_bytes += bytes; NETIF_TX_UNLOCK(ring); -- cgit v1.2.3 From 2cbe0575198107fa58d31aa2e9eb95874d1dc58b Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:43 +0800 Subject: net: hns: Use NAPI_POLL_WEIGHT for hns driver [ Upstream commit acb1ce15a61154aa501891d67ebf79bc9ea26818 ] When the HNS driver loaded, always have an error print: "netif_napi_add() called with weight 256" This is because the kernel checks the NAPI polling weights requested by drivers and it prints an error message if a driver requests a weight bigger than 64. So use NAPI_POLL_WEIGHT to fix it. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index e5a7c0761dbd..4cd86ba1f050 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -29,9 +29,6 @@ #define SERVICE_TIMER_HZ (1 * HZ) -#define NIC_TX_CLEAN_MAX_NUM 256 -#define NIC_RX_CLEAN_MAX_NUM 64 - #define RCB_IRQ_NOT_INITED 0 #define RCB_IRQ_INITED 1 #define HNS_BUFFER_SIZE_2048 2048 @@ -2153,7 +2150,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) hns_nic_tx_fini_pro_v2; netif_napi_add(priv->netdev, &rd->napi, - hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM); + hns_nic_common_poll, NAPI_POLL_WEIGHT); rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED; } for (i = h->q_num; i < h->q_num * 2; i++) { @@ -2166,7 +2163,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) hns_nic_rx_fini_pro_v2; netif_napi_add(priv->netdev, &rd->napi, - hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM); + hns_nic_common_poll, NAPI_POLL_WEIGHT); rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED; } -- cgit v1.2.3 From 759e8256d0013ad451f7201ed3cd01ba6efa70ea Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:44 +0800 Subject: net: hns: Fix probabilistic memory overwrite when HNS driver initialized [ Upstream commit c0b0984426814f3a9251873b689e67d34d8ccd84 ] When reboot the system again and again, may cause a memory overwrite. [ 15.638922] systemd[1]: Reached target Swap. [ 15.667561] tun: Universal TUN/TAP device driver, 1.6 [ 15.676756] Bridge firewalling registered [ 17.344135] Unable to handle kernel paging request at virtual address 0000000200000040 [ 17.352179] Mem abort info: [ 17.355007] ESR = 0x96000004 [ 17.358105] Exception class = DABT (current EL), IL = 32 bits [ 17.364112] SET = 0, FnV = 0 [ 17.367209] EA = 0, S1PTW = 0 [ 17.370393] Data abort info: [ 17.373315] ISV = 0, ISS = 0x00000004 [ 17.377206] CM = 0, WnR = 0 [ 17.380214] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____) [ 17.386926] [0000000200000040] pgd=0000000000000000 [ 17.391878] Internal error: Oops: 96000004 [#1] SMP [ 17.396824] CPU: 23 PID: 95 Comm: kworker/u130:0 Tainted: G E 4.19.25-1.2.78.aarch64 #1 [ 17.414175] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.54 08/16/2018 [ 17.425615] Workqueue: events_unbound async_run_entry_fn [ 17.435151] pstate: 00000005 (nzcv daif -PAN -UAO) [ 17.444139] pc : __mutex_lock.isra.1+0x74/0x540 [ 17.453002] lr : __mutex_lock.isra.1+0x3c/0x540 [ 17.461701] sp : ffff000100d9bb60 [ 17.469146] x29: ffff000100d9bb60 x28: 0000000000000000 [ 17.478547] x27: 0000000000000000 x26: ffff802fb8945000 [ 17.488063] x25: 0000000000000000 x24: ffff802fa32081a8 [ 17.497381] x23: 0000000000000002 x22: ffff801fa2b15220 [ 17.506701] x21: ffff000009809000 x20: ffff802fa23a0888 [ 17.515980] x19: ffff801fa2b15220 x18: 0000000000000000 [ 17.525272] x17: 0000000200000000 x16: 0000000200000000 [ 17.534511] x15: 0000000000000000 x14: 0000000000000000 [ 17.543652] x13: ffff000008d95db8 x12: 000000000000000d [ 17.552780] x11: ffff000008d95d90 x10: 0000000000000b00 [ 17.561819] x9 : ffff000100d9bb90 x8 : ffff802fb89d6560 [ 17.570829] x7 : 0000000000000004 x6 : 00000004a1801d05 [ 17.579839] x5 : 0000000000000000 x4 : 0000000000000000 [ 17.588852] x3 : ffff802fb89d5a00 x2 : 0000000000000000 [ 17.597734] x1 : 0000000200000000 x0 : 0000000200000000 [ 17.606631] Process kworker/u130:0 (pid: 95, stack limit = 0x(____ptrval____)) [ 17.617438] Call trace: [ 17.623349] __mutex_lock.isra.1+0x74/0x540 [ 17.630927] __mutex_lock_slowpath+0x24/0x30 [ 17.638602] mutex_lock+0x50/0x60 [ 17.645295] drain_workqueue+0x34/0x198 [ 17.652623] __sas_drain_work+0x7c/0x168 [ 17.659903] sas_drain_work+0x60/0x68 [ 17.666947] hisi_sas_scan_finished+0x30/0x40 [hisi_sas_main] [ 17.676129] do_scsi_scan_host+0x70/0xb0 [ 17.683534] do_scan_async+0x20/0x228 [ 17.690586] async_run_entry_fn+0x4c/0x1d0 [ 17.697997] process_one_work+0x1b4/0x3f8 [ 17.705296] worker_thread+0x54/0x470 Every time the call trace is not the same, but the overwrite address is always the same: Unable to handle kernel paging request at virtual address 0000000200000040 The root cause is, when write the reg XGMAC_MAC_TX_LF_RF_CONTROL_REG, didn't use the io_base offset. Signed-off-by: Yonglong Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c index ba4316910dea..a60f207768fc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c @@ -129,7 +129,7 @@ static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv) dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0); dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1); dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0); - dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val); + dsaf_write_dev(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val); } /** -- cgit v1.2.3 From c43ddd7a21323064d4afeec8336f94fe2a4d685e Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:45 +0800 Subject: net: hns: fix ICMP6 neighbor solicitation messages discard problem [ Upstream commit f058e46855dcbc28edb2ed4736f38a71fd19cadb ] ICMP6 neighbor solicitation messages will be discard by the Hip06 chips, because of not setting forwarding pool. Enable promisc mode has the same problem. This patch fix the wrong forwarding table configs for the multicast vague matching when enable promisc mode, and add forwarding pool for the forwarding table. Signed-off-by: Yonglong Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 33 ++++++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index ac55db065f16..f5ff07cb2b72 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2750,6 +2750,17 @@ int hns_dsaf_get_regs_count(void) return DSAF_DUMP_REGS_NUM; } +static int hns_dsaf_get_port_id(u8 port) +{ + if (port < DSAF_SERVICE_NW_NUM) + return port; + + if (port >= DSAF_BASE_INNER_PORT_NUM) + return port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM; + + return -EINVAL; +} + static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) { struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80}; @@ -2815,23 +2826,33 @@ static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) memset(&temp_key, 0x0, sizeof(temp_key)); mask_entry.addr[0] = 0x01; hns_dsaf_set_mac_key(dsaf_dev, &mask_key, mask_entry.in_vlan_id, - port, mask_entry.addr); + 0xf, mask_entry.addr); tbl_tcam_mcast.tbl_mcast_item_vld = 1; tbl_tcam_mcast.tbl_mcast_old_en = 0; - if (port < DSAF_SERVICE_NW_NUM) { - mskid = port; - } else if (port >= DSAF_BASE_INNER_PORT_NUM) { - mskid = port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM; - } else { + /* set MAC port to handle multicast */ + mskid = hns_dsaf_get_port_id(port); + if (mskid == -EINVAL) { dev_err(dsaf_dev->dev, "%s,pnum(%d)error,key(%#x:%#x)\n", dsaf_dev->ae_dev.name, port, mask_key.high.val, mask_key.low.val); return; } + dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32], + mskid % 32, 1); + /* set pool bit map to handle multicast */ + mskid = hns_dsaf_get_port_id(port_num); + if (mskid == -EINVAL) { + dev_err(dsaf_dev->dev, + "%s, pool bit map pnum(%d)error,key(%#x:%#x)\n", + dsaf_dev->ae_dev.name, port_num, + mask_key.high.val, mask_key.low.val); + return; + } dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32], mskid % 32, 1); + memcpy(&temp_key, &mask_key, sizeof(mask_key)); hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc, (struct dsaf_tbl_tcam_data *)(&mask_key), -- cgit v1.2.3 From 3e67174547a3dbb3076cd5665d967b65434a4c6f Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Thu, 4 Apr 2019 16:46:46 +0800 Subject: net: hns: Fix WARNING when remove HNS driver with SMMU enabled [ Upstream commit 8601a99d7c0256b7a7fdd1ab14cf6c1f1dfcadc6 ] When enable SMMU, remove HNS driver will cause a WARNING: [ 141.924177] WARNING: CPU: 36 PID: 2708 at drivers/iommu/dma-iommu.c:443 __iommu_dma_unmap+0xc0/0xc8 [ 141.954673] Modules linked in: hns_enet_drv(-) [ 141.963615] CPU: 36 PID: 2708 Comm: rmmod Tainted: G W 5.0.0-rc1-28723-gb729c57de95c-dirty #32 [ 141.983593] Hardware name: Huawei D05/D05, BIOS Hisilicon D05 UEFI Nemo 1.8 RC0 08/31/2017 [ 142.000244] pstate: 60000005 (nZCv daif -PAN -UAO) [ 142.009886] pc : __iommu_dma_unmap+0xc0/0xc8 [ 142.018476] lr : __iommu_dma_unmap+0xc0/0xc8 [ 142.027066] sp : ffff000013533b90 [ 142.033728] x29: ffff000013533b90 x28: ffff8013e6983600 [ 142.044420] x27: 0000000000000000 x26: 0000000000000000 [ 142.055113] x25: 0000000056000000 x24: 0000000000000015 [ 142.065806] x23: 0000000000000028 x22: ffff8013e66eee68 [ 142.076499] x21: ffff8013db919800 x20: 0000ffffefbff000 [ 142.087192] x19: 0000000000001000 x18: 0000000000000007 [ 142.097885] x17: 000000000000000e x16: 0000000000000001 [ 142.108578] x15: 0000000000000019 x14: 363139343a70616d [ 142.119270] x13: 6e75656761705f67 x12: 0000000000000000 [ 142.129963] x11: 00000000ffffffff x10: 0000000000000006 [ 142.140656] x9 : 1346c1aa88093500 x8 : ffff0000114de4e0 [ 142.151349] x7 : 6662666578303d72 x6 : ffff0000105ffec8 [ 142.162042] x5 : 0000000000000000 x4 : 0000000000000000 [ 142.172734] x3 : 00000000ffffffff x2 : ffff0000114de500 [ 142.183427] x1 : 0000000000000000 x0 : 0000000000000035 [ 142.194120] Call trace: [ 142.199030] __iommu_dma_unmap+0xc0/0xc8 [ 142.206920] iommu_dma_unmap_page+0x20/0x28 [ 142.215335] __iommu_unmap_page+0x40/0x60 [ 142.223399] hnae_unmap_buffer+0x110/0x134 [ 142.231639] hnae_free_desc+0x6c/0x10c [ 142.239177] hnae_fini_ring+0x14/0x34 [ 142.246540] hnae_fini_queue+0x2c/0x40 [ 142.254080] hnae_put_handle+0x38/0xcc [ 142.261619] hns_nic_dev_remove+0x54/0xfc [hns_enet_drv] [ 142.272312] platform_drv_remove+0x24/0x64 [ 142.280552] device_release_driver_internal+0x17c/0x20c [ 142.291070] driver_detach+0x4c/0x90 [ 142.298259] bus_remove_driver+0x5c/0xd8 [ 142.306148] driver_unregister+0x2c/0x54 [ 142.314037] platform_driver_unregister+0x10/0x18 [ 142.323505] hns_nic_dev_driver_exit+0x14/0xf0c [hns_enet_drv] [ 142.335248] __arm64_sys_delete_module+0x214/0x25c [ 142.344891] el0_svc_common+0xb0/0x10c [ 142.352430] el0_svc_handler+0x24/0x80 [ 142.359968] el0_svc+0x8/0x7c0 [ 142.366104] ---[ end trace 60ad1cd58e63c407 ]--- The tx ring buffer map when xmit and unmap when xmit done. So in hnae_init_ring() did not map tx ring buffer, but in hnae_fini_ring() have a unmap operation for tx ring buffer, which is already unmapped when xmit done, than cause this WARNING. The hnae_alloc_buffers() is called in hnae_init_ring(), so the hnae_free_buffers() should be in hnae_fini_ring(), not in hnae_free_desc(). In hnae_fini_ring(), adds a check is_rx_ring() as in hnae_init_ring(). When the ring buffer is tx ring, adds a piece of code to ensure that the tx ring is unmap. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index 79d03f8ee7b1..c7fa97a7e1f4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -150,7 +150,6 @@ out_buffer_fail: /* free desc along with its attached buffer */ static void hnae_free_desc(struct hnae_ring *ring) { - hnae_free_buffers(ring); dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr, ring->desc_num * sizeof(ring->desc[0]), ring_to_dma_dir(ring)); @@ -183,6 +182,9 @@ static int hnae_alloc_desc(struct hnae_ring *ring) /* fini ring, also free the buffer for the ring */ static void hnae_fini_ring(struct hnae_ring *ring) { + if (is_rx_ring(ring)) + hnae_free_buffers(ring); + hnae_free_desc(ring); kfree(ring->desc_cb); ring->desc_cb = NULL; -- cgit v1.2.3 From 93e9dabbcdc7b8cd5f6c8e5451963fc1a1771e70 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 3 Apr 2019 17:30:14 +0530 Subject: libcxgb: fix incorrect ppmax calculation [ Upstream commit cc5a726c79158bd307150e8d4176ec79b52001ea ] BITS_TO_LONGS() uses DIV_ROUND_UP() because of this ppmax value can be greater than available per cpu page pods. This patch removes BITS_TO_LONGS() to fix this issue. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (Microsoft) --- drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c index 74849be5f004..e2919005ead3 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c @@ -354,7 +354,10 @@ static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total, ppmax = max; /* pool size must be multiple of unsigned long */ - bmap = BITS_TO_LONGS(ppmax); + bmap = ppmax / BITS_PER_TYPE(unsigned long); + if (!bmap) + return NULL; + ppmax = (bmap * sizeof(unsigned long)) << 3; alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap; @@ -402,6 +405,10 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, if (reserve_factor) { ppmax_pool = ppmax / reserve_factor; pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max); + if (!pool) { + ppmax_pool = 0; + reserve_factor = 0; + } pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n", ndev->name, ppmax, ppmax_pool, pool_index_max); -- cgit v1.2.3 From 271ea551cfdff55860f8628611d6538dec7dc307 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 25 Mar 2019 11:47:31 -0700 Subject: KVM: SVM: prevent DBG_DECRYPT and DBG_ENCRYPT overflow [ Upstream commit b86bc2858b389255cd44555ce4b1e427b2b770c0 ] This ensures that the address and length provided to DBG_DECRYPT and DBG_ENCRYPT do not cause an overflow. At the same time, pass the actual number of pages pinned in memory to sev_unpin_memory() as a cleanup. Reported-by: Cfir Cohen Signed-off-by: David Rientjes Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin (Microsoft) --- arch/x86/kvm/svm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e544cec812f9..2a07e43ee666 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6815,7 +6815,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) struct page **src_p, **dst_p; struct kvm_sev_dbg debug; unsigned long n; - int ret, size; + unsigned int size; + int ret; if (!sev_guest(kvm)) return -ENOTTY; @@ -6823,6 +6824,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) return -EFAULT; + if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) + return -EINVAL; + if (!debug.dst_uaddr) + return -EINVAL; + vaddr = debug.src_uaddr; size = debug.len; vaddr_end = vaddr + size; @@ -6873,8 +6879,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) dst_vaddr, len, &argp->error); - sev_unpin_memory(kvm, src_p, 1); - sev_unpin_memory(kvm, dst_p, 1); + sev_unpin_memory(kvm, src_p, n); + sev_unpin_memory(kvm, dst_p, n); if (ret) goto err; -- cgit v1.2.3 From 37436fba416a9754e53e46236542414fbb5796dc Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 5 Apr 2019 18:38:49 -0700 Subject: kmemleak: powerpc: skip scanning holes in the .bss section [ Upstream commit 298a32b132087550d3fa80641ca58323c5dfd4d9 ] Commit 2d4f567103ff ("KVM: PPC: Introduce kvm_tmp framework") adds kvm_tmp[] into the .bss section and then free the rest of unused spaces back to the page allocator. kernel_init kvm_guest_init kvm_free_tmp free_reserved_area free_unref_page free_unref_page_prepare With DEBUG_PAGEALLOC=y, it will unmap those pages from kernel. As the result, kmemleak scan will trigger a panic when it scans the .bss section with unmapped pages. This patch creates dedicated kmemleak objects for the .data, .bss and potentially .data..ro_after_init sections to allow partial freeing via the kmemleak_free_part() in the powerpc kvm_free_tmp() function. Link: http://lkml.kernel.org/r/20190321171917.62049-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Reported-by: Qian Cai Acked-by: Michael Ellerman (powerpc) Tested-by: Qian Cai Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Avi Kivity Cc: Paolo Bonzini Cc: Radim Krcmar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin (Microsoft) --- arch/powerpc/kernel/kvm.c | 7 +++++++ mm/kmemleak.c | 16 +++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 683b5b3805bd..cd381e2291df 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -712,6 +713,12 @@ static void kvm_use_magic_page(void) static __init void kvm_free_tmp(void) { + /* + * Inform kmemleak about the hole in the .bss section since the + * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y. + */ + kmemleak_free_part(&kvm_tmp[kvm_tmp_index], + ARRAY_SIZE(kvm_tmp) - kvm_tmp_index); free_reserved_area(&kvm_tmp[kvm_tmp_index], &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL); } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 707fa5579f66..6c318f5ac234 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1529,11 +1529,6 @@ static void kmemleak_scan(void) } rcu_read_unlock(); - /* data/bss scanning */ - scan_large_block(_sdata, _edata); - scan_large_block(__bss_start, __bss_stop); - scan_large_block(__start_ro_after_init, __end_ro_after_init); - #ifdef CONFIG_SMP /* per-cpu sections scanning */ for_each_possible_cpu(i) @@ -2071,6 +2066,17 @@ void __init kmemleak_init(void) } local_irq_restore(flags); + /* register the data/bss sections */ + create_object((unsigned long)_sdata, _edata - _sdata, + KMEMLEAK_GREY, GFP_ATOMIC); + create_object((unsigned long)__bss_start, __bss_stop - __bss_start, + KMEMLEAK_GREY, GFP_ATOMIC); + /* only register .data..ro_after_init if not within .data */ + if (__start_ro_after_init < _sdata || __end_ro_after_init > _edata) + create_object((unsigned long)__start_ro_after_init, + __end_ro_after_init - __start_ro_after_init, + KMEMLEAK_GREY, GFP_ATOMIC); + /* * This is the point where tracking allocations is safe. Automatic * scanning is started during the late initcall. Add the early logged -- cgit v1.2.3 From 695ece893cb734885ab7e8689ec81905a1c8c7cf Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 5 Apr 2019 18:39:06 -0700 Subject: hugetlbfs: fix memory leak for resv_map [ Upstream commit 58b6e5e8f1addd44583d61b0a03c0f5519527e35 ] When mknod is used to create a block special file in hugetlbfs, it will allocate an inode and kmalloc a 'struct resv_map' via resv_map_alloc(). inode->i_mapping->private_data will point the newly allocated resv_map. However, when the device special file is opened bd_acquire() will set inode->i_mapping to bd_inode->i_mapping. Thus the pointer to the allocated resv_map is lost and the structure is leaked. Programs to reproduce: mount -t hugetlbfs nodev hugetlbfs mknod hugetlbfs/dev b 0 0 exec 30<> hugetlbfs/dev umount hugetlbfs/ resv_map structures are only needed for inodes which can have associated page allocations. To fix the leak, only allocate resv_map for those inodes which could possibly be associated with page allocations. Link: http://lkml.kernel.org/r/20190401213101.16476-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Andrew Morton Reported-by: Yufen Yu Suggested-by: Yufen Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin (Microsoft) --- fs/hugetlbfs/inode.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a7fa037b876b..a3a3d256fb0e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -741,11 +741,17 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev) { struct inode *inode; - struct resv_map *resv_map; + struct resv_map *resv_map = NULL; - resv_map = resv_map_alloc(); - if (!resv_map) - return NULL; + /* + * Reserve maps are only needed for inodes that can have associated + * page allocations. + */ + if (S_ISREG(mode) || S_ISLNK(mode)) { + resv_map = resv_map_alloc(); + if (!resv_map) + return NULL; + } inode = new_inode(sb); if (inode) { @@ -780,8 +786,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, break; } lockdep_annotate_inode_mutex_key(inode); - } else - kref_put(&resv_map->refs, resv_map_release); + } else { + if (resv_map) + kref_put(&resv_map->refs, resv_map_release); + } return inode; } -- cgit v1.2.3 From b174065805b55300d9d4e6ae6865c7b0838cc0f4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 5 Apr 2019 18:39:30 -0700 Subject: sh: fix multiple function definition build errors [ Upstream commit acaf892ecbf5be7710ae05a61fd43c668f68ad95 ] Many of the sh CPU-types have their own plat_irq_setup() and arch_init_clk_ops() functions, so these same (empty) functions in arch/sh/boards/of-generic.c are not needed and cause build errors. If there is some case where these empty functions are needed, they can be retained by marking them as "__weak" while at the same time making builds that do not need them succeed. Fixes these build errors: arch/sh/boards/of-generic.o: In function `plat_irq_setup': (.init.text+0x134): multiple definition of `plat_irq_setup' arch/sh/kernel/cpu/sh2/setup-sh7619.o:(.init.text+0x30): first defined here arch/sh/boards/of-generic.o: In function `arch_init_clk_ops': (.init.text+0x118): multiple definition of `arch_init_clk_ops' arch/sh/kernel/cpu/sh2/clock-sh7619.o:(.init.text+0x0): first defined here Link: http://lkml.kernel.org/r/9ee4e0c5-f100-86a2-bd4d-1d3287ceab31@infradead.org Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Cc: Takashi Iwai Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin (Microsoft) --- arch/sh/boards/of-generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index 958f46da3a79..d91065e81a4e 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -164,10 +164,10 @@ static struct sh_machine_vector __initmv sh_of_generic_mv = { struct sh_clk_ops; -void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx) +void __init __weak arch_init_clk_ops(struct sh_clk_ops **ops, int idx) { } -void __init plat_irq_setup(void) +void __init __weak plat_irq_setup(void) { } -- cgit v1.2.3 From e91fcd8326c6c432aa428ad92b92b122469f92f5 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Fri, 5 Apr 2019 17:42:45 -0400 Subject: null_blk: prevent crash from bad home_node value [ Upstream commit 7ff684a683d777c4956fce93e60accbab2bd7696 ] At module load, if the selected home_node value is greater than the available numa nodes, the system will crash in __alloc_pages_nodemask() due to a bad paging request. Prevent this user error crash by detecting the bad value, logging an error, and setting g_home_node back to the default of NUMA_NO_NODE. Signed-off-by: John Pittman Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin (Microsoft) --- drivers/block/null_blk_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 62c9654b9ce8..fd7a9be54595 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1749,6 +1749,11 @@ static int __init null_init(void) return -EINVAL; } + if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { + pr_err("null_blk: invalid home_node value\n"); + g_home_node = NUMA_NO_NODE; + } + if (g_queue_mode == NULL_Q_RQ) { pr_err("null_blk: legacy IO path no longer available\n"); return -EINVAL; -- cgit v1.2.3 From 6f014ec022f93b64e1eb366c11706e6396e9e18a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 19 Feb 2019 08:49:56 -0800 Subject: xsysace: Fix error handling in ace_setup [ Upstream commit 47b16820c490149c2923e8474048f2c6e7557cab ] If xace hardware reports a bad version number, the error handling code in ace_setup() calls put_disk(), followed by queue cleanup. However, since the disk data structure has the queue pointer set, put_disk() also cleans and releases the queue. This results in blk_cleanup_queue() accessing an already released data structure, which in turn may result in a crash such as the following. [ 10.681671] BUG: Kernel NULL pointer dereference at 0x00000040 [ 10.681826] Faulting instruction address: 0xc0431480 [ 10.682072] Oops: Kernel access of bad area, sig: 11 [#1] [ 10.682251] BE PAGE_SIZE=4K PREEMPT Xilinx Virtex440 [ 10.682387] Modules linked in: [ 10.682528] CPU: 0 PID: 1 Comm: swapper Tainted: G W 5.0.0-rc6-next-20190218+ #2 [ 10.682733] NIP: c0431480 LR: c043147c CTR: c0422ad8 [ 10.682863] REGS: cf82fbe0 TRAP: 0300 Tainted: G W (5.0.0-rc6-next-20190218+) [ 10.683065] MSR: 00029000 CR: 22000222 XER: 00000000 [ 10.683236] DEAR: 00000040 ESR: 00000000 [ 10.683236] GPR00: c043147c cf82fc90 cf82ccc0 00000000 00000000 00000000 00000002 00000000 [ 10.683236] GPR08: 00000000 00000000 c04310bc 00000000 22000222 00000000 c0002c54 00000000 [ 10.683236] GPR16: 00000000 00000001 c09aa39c c09021b0 c09021dc 00000007 c0a68c08 00000000 [ 10.683236] GPR24: 00000001 ced6d400 ced6dcf0 c0815d9c 00000000 00000000 00000000 cedf0800 [ 10.684331] NIP [c0431480] blk_mq_run_hw_queue+0x28/0x114 [ 10.684473] LR [c043147c] blk_mq_run_hw_queue+0x24/0x114 [ 10.684602] Call Trace: [ 10.684671] [cf82fc90] [c043147c] blk_mq_run_hw_queue+0x24/0x114 (unreliable) [ 10.684854] [cf82fcc0] [c04315bc] blk_mq_run_hw_queues+0x50/0x7c [ 10.685002] [cf82fce0] [c0422b24] blk_set_queue_dying+0x30/0x68 [ 10.685154] [cf82fcf0] [c0423ec0] blk_cleanup_queue+0x34/0x14c [ 10.685306] [cf82fd10] [c054d73c] ace_probe+0x3dc/0x508 [ 10.685445] [cf82fd50] [c052d740] platform_drv_probe+0x4c/0xb8 [ 10.685592] [cf82fd70] [c052abb0] really_probe+0x20c/0x32c [ 10.685728] [cf82fda0] [c052ae58] driver_probe_device+0x68/0x464 [ 10.685877] [cf82fdc0] [c052b500] device_driver_attach+0xb4/0xe4 [ 10.686024] [cf82fde0] [c052b5dc] __driver_attach+0xac/0xfc [ 10.686161] [cf82fe00] [c0528428] bus_for_each_dev+0x80/0xc0 [ 10.686314] [cf82fe30] [c0529b3c] bus_add_driver+0x144/0x234 [ 10.686457] [cf82fe50] [c052c46c] driver_register+0x88/0x15c [ 10.686610] [cf82fe60] [c09de288] ace_init+0x4c/0xac [ 10.686742] [cf82fe80] [c0002730] do_one_initcall+0xac/0x330 [ 10.686888] [cf82fee0] [c09aafd0] kernel_init_freeable+0x34c/0x478 [ 10.687043] [cf82ff30] [c0002c6c] kernel_init+0x18/0x114 [ 10.687188] [cf82ff40] [c000f2f0] ret_from_kernel_thread+0x14/0x1c [ 10.687349] Instruction dump: [ 10.687435] 3863ffd4 4bfffd70 9421ffd0 7c0802a6 93c10028 7c9e2378 93e1002c 38810008 [ 10.687637] 7c7f1b78 90010034 4bfffc25 813f008c <81290040> 75290100 4182002c 80810008 [ 10.688056] ---[ end trace 13c9ff51d41b9d40 ]--- Fix the problem by setting the disk queue pointer to NULL before calling put_disk(). A more comprehensive fix might be to rearrange the code to check the hardware version before initializing data structures, but I don't know if this would have undesirable side effects, and it would increase the complexity of backporting the fix to older kernels. Fixes: 74489a91dd43a ("Add support for Xilinx SystemACE CompactFlash interface") Acked-by: Michal Simek Signed-off-by: Guenter Roeck Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin (Microsoft) --- drivers/block/xsysace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 87ccef4bd69e..32a21b8d1d85 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -1090,6 +1090,8 @@ static int ace_setup(struct ace_device *ace) return 0; err_read: + /* prevent double queue cleanup */ + ace->gd->queue = NULL; put_disk(ace->gd); err_alloc_disk: blk_cleanup_queue(ace->queue); -- cgit v1.2.3 From e42ae9c9b8640bc05a8ea1e223c7aae17eef3ef2 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 26 Mar 2019 22:20:43 +0000 Subject: fs: stream_open - opener for stream-like files so that read and write can run simultaneously without deadlock [ Upstream commit 10dce8af34226d90fa56746a934f8da5dcdba3df ] Commit 9c225f2655e3 ("vfs: atomic f_pos accesses as per POSIX") added locking for file.f_pos access and in particular made concurrent read and write not possible - now both those functions take f_pos lock for the whole run, and so if e.g. a read is blocked waiting for data, write will deadlock waiting for that read to complete. This caused regression for stream-like files where previously read and write could run simultaneously, but after that patch could not do so anymore. See e.g. commit 581d21a2d02a ("xenbus: fix deadlock on writes to /proc/xen/xenbus") which fixes such regression for particular case of /proc/xen/xenbus. The patch that added f_pos lock in 2014 did so to guarantee POSIX thread safety for read/write/lseek and added the locking to file descriptors of all regular files. In 2014 that thread-safety problem was not new as it was already discussed earlier in 2006. However even though 2006'th version of Linus's patch was adding f_pos locking "only for files that are marked seekable with FMODE_LSEEK (thus avoiding the stream-like objects like pipes and sockets)", the 2014 version - the one that actually made it into the tree as 9c225f2655e3 - is doing so irregardless of whether a file is seekable or not. See https://lore.kernel.org/lkml/53022DB1.4070805@gmail.com/ https://lwn.net/Articles/180387 https://lwn.net/Articles/180396 for historic context. The reason that it did so is, probably, that there are many files that are marked non-seekable, but e.g. their read implementation actually depends on knowing current position to correctly handle the read. Some examples: kernel/power/user.c snapshot_read fs/debugfs/file.c u32_array_read fs/fuse/control.c fuse_conn_waiting_read + ... drivers/hwmon/asus_atk0110.c atk_debugfs_ggrp_read arch/s390/hypfs/inode.c hypfs_read_iter ... Despite that, many nonseekable_open users implement read and write with pure stream semantics - they don't depend on passed ppos at all. And for those cases where read could wait for something inside, it creates a situation similar to xenbus - the write could be never made to go until read is done, and read is waiting for some, potentially external, event, for potentially unbounded time -> deadlock. Besides xenbus, there are 14 such places in the kernel that I've found with semantic patch (see below): drivers/xen/evtchn.c:667:8-24: ERROR: evtchn_fops: .read() can deadlock .write() drivers/isdn/capi/capi.c:963:8-24: ERROR: capi_fops: .read() can deadlock .write() drivers/input/evdev.c:527:1-17: ERROR: evdev_fops: .read() can deadlock .write() drivers/char/pcmcia/cm4000_cs.c:1685:7-23: ERROR: cm4000_fops: .read() can deadlock .write() net/rfkill/core.c:1146:8-24: ERROR: rfkill_fops: .read() can deadlock .write() drivers/s390/char/fs3270.c:488:1-17: ERROR: fs3270_fops: .read() can deadlock .write() drivers/usb/misc/ldusb.c:310:1-17: ERROR: ld_usb_fops: .read() can deadlock .write() drivers/hid/uhid.c:635:1-17: ERROR: uhid_fops: .read() can deadlock .write() net/batman-adv/icmp_socket.c:80:1-17: ERROR: batadv_fops: .read() can deadlock .write() drivers/media/rc/lirc_dev.c:198:1-17: ERROR: lirc_fops: .read() can deadlock .write() drivers/leds/uleds.c:77:1-17: ERROR: uleds_fops: .read() can deadlock .write() drivers/input/misc/uinput.c:400:1-17: ERROR: uinput_fops: .read() can deadlock .write() drivers/infiniband/core/user_mad.c:985:7-23: ERROR: umad_fops: .read() can deadlock .write() drivers/gnss/core.c:45:1-17: ERROR: gnss_fops: .read() can deadlock .write() In addition to the cases above another regression caused by f_pos locking is that now FUSE filesystems that implement open with FOPEN_NONSEEKABLE flag, can no longer implement bidirectional stream-like files - for the same reason as above e.g. read can deadlock write locking on file.f_pos in the kernel. FUSE's FOPEN_NONSEEKABLE was added in 2008 in a7c1b990f715 ("fuse: implement nonseekable open") to support OSSPD. OSSPD implements /dev/dsp in userspace with FOPEN_NONSEEKABLE flag, with corresponding read and write routines not depending on current position at all, and with both read and write being potentially blocking operations: See https://github.com/libfuse/osspd https://lwn.net/Articles/308445 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1406 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1438-L1477 https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1479-L1510 Corresponding libfuse example/test also describes FOPEN_NONSEEKABLE as "somewhat pipe-like files ..." with read handler not using offset. However that test implements only read without write and cannot exercise the deadlock scenario: https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L124-L131 https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L146-L163 https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L209-L216 I've actually hit the read vs write deadlock for real while implementing my FUSE filesystem where there is /head/watch file, for which open creates separate bidirectional socket-like stream in between filesystem and its user with both read and write being later performed simultaneously. And there it is semantically not easy to split the stream into two separate read-only and write-only channels: https://lab.nexedi.com/kirr/wendelin.core/blob/f13aa600/wcfs/wcfs.go#L88-169 Let's fix this regression. The plan is: 1. We can't change nonseekable_open to include &~FMODE_ATOMIC_POS - doing so would break many in-kernel nonseekable_open users which actually use ppos in read/write handlers. 2. Add stream_open() to kernel to open stream-like non-seekable file descriptors. Read and write on such file descriptors would never use nor change ppos. And with that property on stream-like files read and write will be running without taking f_pos lock - i.e. read and write could be running simultaneously. 3. With semantic patch search and convert to stream_open all in-kernel nonseekable_open users for which read and write actually do not depend on ppos and where there is no other methods in file_operations which assume @offset access. 4. Add FOPEN_STREAM to fs/fuse/ and open in-kernel file-descriptors via steam_open if that bit is present in filesystem open reply. It was tempting to change fs/fuse/ open handler to use stream_open instead of nonseekable_open on just FOPEN_NONSEEKABLE flags, but grepping through Debian codesearch shows users of FOPEN_NONSEEKABLE, and in particular GVFS which actually uses offset in its read and write handlers https://codesearch.debian.net/search?q=-%3Enonseekable+%3D https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1080 https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1247-1346 https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1399-1481 so if we would do such a change it will break a real user. 5. Add stream_open and FOPEN_STREAM handling to stable kernels starting from v3.14+ (the kernel where 9c225f2655 first appeared). This will allow to patch OSSPD and other FUSE filesystems that provide stream-like files to return FOPEN_STREAM | FOPEN_NONSEEKABLE in their open handler and this way avoid the deadlock on all kernel versions. This should work because fs/fuse/ ignores unknown open flags returned from a filesystem and so passing FOPEN_STREAM to a kernel that is not aware of this flag cannot hurt. In turn the kernel that is not aware of FOPEN_STREAM will be < v3.14 where just FOPEN_NONSEEKABLE is sufficient to implement streams without read vs write deadlock. This patch adds stream_open, converts /proc/xen/xenbus to it and adds semantic patch to automatically locate in-kernel places that are either required to be converted due to read vs write deadlock, or that are just safe to be converted because read and write do not use ppos and there are no other funky methods in file_operations. Regarding semantic patch I've verified each generated change manually - that it is correct to convert - and each other nonseekable_open instance left - that it is either not correct to convert there, or that it is not converted due to current stream_open.cocci limitations. The script also does not convert files that should be valid to convert, but that currently have .llseek = noop_llseek or generic_file_llseek for unknown reason despite file being opened with nonseekable_open (e.g. drivers/input/mousedev.c) Cc: Michael Kerrisk Cc: Yongzhi Pan Cc: Jonathan Corbet Cc: David Vrabel Cc: Juergen Gross Cc: Miklos Szeredi Cc: Tejun Heo Cc: Kirill Tkhai Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Julia Lawall Cc: Nikolaus Rath Cc: Han-Wen Nienhuys Signed-off-by: Kirill Smelkov Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin (Microsoft) --- drivers/xen/xenbus/xenbus_dev_frontend.c | 4 +- fs/open.c | 18 ++ fs/read_write.c | 5 +- include/linux/fs.h | 4 + scripts/coccinelle/api/stream_open.cocci | 363 +++++++++++++++++++++++++++++++ 5 files changed, 389 insertions(+), 5 deletions(-) create mode 100644 scripts/coccinelle/api/stream_open.cocci diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index c3e201025ef0..0782ff3c2273 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -622,9 +622,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp) if (xen_store_evtchn == 0) return -ENOENT; - nonseekable_open(inode, filp); - - filp->f_mode &= ~FMODE_ATOMIC_POS; /* cdev-style semantics */ + stream_open(inode, filp); u = kzalloc(sizeof(*u), GFP_KERNEL); if (u == NULL) diff --git a/fs/open.c b/fs/open.c index f1c2f855fd43..a00350018a47 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1215,3 +1215,21 @@ int nonseekable_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL(nonseekable_open); + +/* + * stream_open is used by subsystems that want stream-like file descriptors. + * Such file descriptors are not seekable and don't have notion of position + * (file.f_pos is always 0). Contrary to file descriptors of other regular + * files, .read() and .write() can run simultaneously. + * + * stream_open never fails and is marked to return int so that it could be + * directly used as file_operations.open . + */ +int stream_open(struct inode *inode, struct file *filp) +{ + filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS); + filp->f_mode |= FMODE_STREAM; + return 0; +} + +EXPORT_SYMBOL(stream_open); diff --git a/fs/read_write.c b/fs/read_write.c index 27b69b85d49f..3d3194e32201 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -560,12 +560,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ static inline loff_t file_pos_read(struct file *file) { - return file->f_pos; + return file->f_mode & FMODE_STREAM ? 0 : file->f_pos; } static inline void file_pos_write(struct file *file, loff_t pos) { - file->f_pos = pos; + if ((file->f_mode & FMODE_STREAM) == 0) + file->f_pos = pos; } ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) diff --git a/include/linux/fs.h b/include/linux/fs.h index fd423fec8d83..09ce2646c78a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -153,6 +153,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_OPENED ((__force fmode_t)0x80000) #define FMODE_CREATED ((__force fmode_t)0x100000) +/* File is stream-like */ +#define FMODE_STREAM ((__force fmode_t)0x200000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) @@ -3074,6 +3077,7 @@ extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek(struct file *, loff_t, int); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); +extern int stream_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci new file mode 100644 index 000000000000..350145da7669 --- /dev/null +++ b/scripts/coccinelle/api/stream_open.cocci @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-2.0 +// Author: Kirill Smelkov (kirr@nexedi.com) +// +// Search for stream-like files that are using nonseekable_open and convert +// them to stream_open. A stream-like file is a file that does not use ppos in +// its read and write. Rationale for the conversion is to avoid deadlock in +// between read and write. + +virtual report +virtual patch +virtual explain // explain decisions in the patch (SPFLAGS="-D explain") + +// stream-like reader & writer - ones that do not depend on f_pos. +@ stream_reader @ +identifier readstream, ppos; +identifier f, buf, len; +type loff_t; +@@ + ssize_t readstream(struct file *f, char *buf, size_t len, loff_t *ppos) + { + ... when != ppos + } + +@ stream_writer @ +identifier writestream, ppos; +identifier f, buf, len; +type loff_t; +@@ + ssize_t writestream(struct file *f, const char *buf, size_t len, loff_t *ppos) + { + ... when != ppos + } + + +// a function that blocks +@ blocks @ +identifier block_f; +identifier wait_event =~ "^wait_event_.*"; +@@ + block_f(...) { + ... when exists + wait_event(...) + ... when exists + } + +// stream_reader that can block inside. +// +// XXX wait_* can be called not directly from current function (e.g. func -> f -> g -> wait()) +// XXX currently reader_blocks supports only direct and 1-level indirect cases. +@ reader_blocks_direct @ +identifier stream_reader.readstream; +identifier wait_event =~ "^wait_event_.*"; +@@ + readstream(...) + { + ... when exists + wait_event(...) + ... when exists + } + +@ reader_blocks_1 @ +identifier stream_reader.readstream; +identifier blocks.block_f; +@@ + readstream(...) + { + ... when exists + block_f(...) + ... when exists + } + +@ reader_blocks depends on reader_blocks_direct || reader_blocks_1 @ +identifier stream_reader.readstream; +@@ + readstream(...) { + ... + } + + +// file_operations + whether they have _any_ .read, .write, .llseek ... at all. +// +// XXX add support for file_operations xxx[N] = ... (sound/core/pcm_native.c) +@ fops0 @ +identifier fops; +@@ + struct file_operations fops = { + ... + }; + +@ has_read @ +identifier fops0.fops; +identifier read_f; +@@ + struct file_operations fops = { + .read = read_f, + }; + +@ has_read_iter @ +identifier fops0.fops; +identifier read_iter_f; +@@ + struct file_operations fops = { + .read_iter = read_iter_f, + }; + +@ has_write @ +identifier fops0.fops; +identifier write_f; +@@ + struct file_operations fops = { + .write = write_f, + }; + +@ has_write_iter @ +identifier fops0.fops; +identifier write_iter_f; +@@ + struct file_operations fops = { + .write_iter = write_iter_f, + }; + +@ has_llseek @ +identifier fops0.fops; +identifier llseek_f; +@@ + struct file_operations fops = { + .llseek = llseek_f, + }; + +@ has_no_llseek @ +identifier fops0.fops; +@@ + struct file_operations fops = { + .llseek = no_llseek, + }; + +@ has_mmap @ +identifier fops0.fops; +identifier mmap_f; +@@ + struct file_operations fops = { + .mmap = mmap_f, + }; + +@ has_copy_file_range @ +identifier fops0.fops; +identifier copy_file_range_f; +@@ + struct file_operations fops = { + .copy_file_range = copy_file_range_f, + }; + +@ has_remap_file_range @ +identifier fops0.fops; +identifier remap_file_range_f; +@@ + struct file_operations fops = { + .remap_file_range = remap_file_range_f, + }; + +@ has_splice_read @ +identifier fops0.fops; +identifier splice_read_f; +@@ + struct file_operations fops = { + .splice_read = splice_read_f, + }; + +@ has_splice_write @ +identifier fops0.fops; +identifier splice_write_f; +@@ + struct file_operations fops = { + .splice_write = splice_write_f, + }; + + +// file_operations that is candidate for stream_open conversion - it does not +// use mmap and other methods that assume @offset access to file. +// +// XXX for simplicity require no .{read/write}_iter and no .splice_{read/write} for now. +// XXX maybe_steam.fops cannot be used in other rules - it gives "bad rule maybe_stream or bad variable fops". +@ maybe_stream depends on (!has_llseek || has_no_llseek) && !has_mmap && !has_copy_file_range && !has_remap_file_range && !has_read_iter && !has_write_iter && !has_splice_read && !has_splice_write @ +identifier fops0.fops; +@@ + struct file_operations fops = { + }; + + +// ---- conversions ---- + +// XXX .open = nonseekable_open -> .open = stream_open +// XXX .open = func -> openfunc -> nonseekable_open + +// read & write +// +// if both are used in the same file_operations together with an opener - +// under that conditions we can use stream_open instead of nonseekable_open. +@ fops_rw depends on maybe_stream @ +identifier fops0.fops, openfunc; +identifier stream_reader.readstream; +identifier stream_writer.writestream; +@@ + struct file_operations fops = { + .open = openfunc, + .read = readstream, + .write = writestream, + }; + +@ report_rw depends on report @ +identifier fops_rw.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report && reader_blocks @ +fops << fops0.fops; +p << report_rw.p1; +@@ +coccilib.report.print_report(p[0], + "ERROR: %s: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix." % (fops,)) + +@ script:python depends on report && !reader_blocks @ +fops << fops0.fops; +p << report_rw.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + + +@ explain_rw_deadlocked depends on explain && reader_blocks @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read & write (was deadlock) */ + ...> + } + + +@ explain_rw_nodeadlock depends on explain && !reader_blocks @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read & write (no direct deadlock) */ + ...> + } + +@ patch_rw depends on patch @ +identifier fops_rw.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// read, but not write +@ fops_r depends on maybe_stream && !has_write @ +identifier fops0.fops, openfunc; +identifier stream_reader.readstream; +@@ + struct file_operations fops = { + .open = openfunc, + .read = readstream, + }; + +@ report_r depends on report @ +identifier fops_r.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report @ +fops << fops0.fops; +p << report_r.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .read() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + +@ explain_r depends on explain @ +identifier fops_r.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* read only */ + ...> + } + +@ patch_r depends on patch @ +identifier fops_r.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// write, but not read +@ fops_w depends on maybe_stream && !has_read @ +identifier fops0.fops, openfunc; +identifier stream_writer.writestream; +@@ + struct file_operations fops = { + .open = openfunc, + .write = writestream, + }; + +@ report_w depends on report @ +identifier fops_w.openfunc; +position p1; +@@ + openfunc(...) { + <... + nonseekable_open@p1 + ...> + } + +@ script:python depends on report @ +fops << fops0.fops; +p << report_w.p1; +@@ +coccilib.report.print_report(p[0], + "WARNING: %s: .write() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,)) + +@ explain_w depends on explain @ +identifier fops_w.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ nonseekable_open /* write only */ + ...> + } + +@ patch_w depends on patch @ +identifier fops_w.openfunc; +@@ + openfunc(...) { + <... +- nonseekable_open ++ stream_open + ...> + } + + +// no read, no write - don't change anything -- cgit v1.2.3 From f78b5205ea57db248fe39d224578c09cf75157e9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 16:50:42 +0100 Subject: ARM: orion: don't use using 64-bit DMA masks [ Upstream commit cd92d74d67c811dc22544430b9ac3029f5bd64c5 ] clang warns about statically defined DMA masks from the DMA_BIT_MASK macro with length 64: arch/arm/plat-orion/common.c:625:29: error: shift count >= width of type [-Werror,-Wshift-count-overflow] .coherent_dma_mask = DMA_BIT_MASK(64), ^~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK' #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) The ones in orion shouldn't really be 64 bit masks, so changing them to what the driver can support avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/plat-orion/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index a2399fd66e97..1e970873439c 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -622,7 +622,7 @@ static struct platform_device orion_xor0_shared = { .resource = orion_xor0_shared_resources, .dev = { .dma_mask = &orion_xor_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = &orion_xor0_pdata, }, }; @@ -683,7 +683,7 @@ static struct platform_device orion_xor1_shared = { .resource = orion_xor1_shared_resources, .dev = { .dma_mask = &orion_xor_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = &orion_xor1_pdata, }, }; -- cgit v1.2.3 From 0b01d66f6d5724a40a41793f7b4d2fd963d06cee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 16:50:43 +0100 Subject: ARM: iop: don't use using 64-bit DMA masks [ Upstream commit 2125801ccce19249708ca3245d48998e70569ab8 ] clang warns about statically defined DMA masks from the DMA_BIT_MASK macro with length 64: arch/arm/mach-iop13xx/setup.c:303:35: error: shift count >= width of type [-Werror,-Wshift-count-overflow] static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64); ^~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK' #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) ^ ~~~ The ones in iop shouldn't really be 64 bit masks, so changing them to what the driver can support avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin (Microsoft) --- arch/arm/mach-iop13xx/setup.c | 8 ++++---- arch/arm/mach-iop13xx/tpmi.c | 10 +++++----- arch/arm/plat-iop/adma.c | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index 53c316f7301e..fe4932fda01d 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -300,7 +300,7 @@ static struct resource iop13xx_adma_2_resources[] = { } }; -static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64); +static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32); static struct iop_adma_platform_data iop13xx_adma_0_data = { .hw_id = 0, .pool_size = PAGE_SIZE, @@ -324,7 +324,7 @@ static struct platform_device iop13xx_adma_0_channel = { .resource = iop13xx_adma_0_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_0_data, }, }; @@ -336,7 +336,7 @@ static struct platform_device iop13xx_adma_1_channel = { .resource = iop13xx_adma_1_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_1_data, }, }; @@ -348,7 +348,7 @@ static struct platform_device iop13xx_adma_2_channel = { .resource = iop13xx_adma_2_resources, .dev = { .dma_mask = &iop13xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop13xx_adma_2_data, }, }; diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c index db511ec2b1df..116feb6b261e 100644 --- a/arch/arm/mach-iop13xx/tpmi.c +++ b/arch/arm/mach-iop13xx/tpmi.c @@ -152,7 +152,7 @@ static struct resource iop13xx_tpmi_3_resources[] = { } }; -u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64); +u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32); static struct platform_device iop13xx_tpmi_0_device = { .name = "iop-tpmi", .id = 0, @@ -160,7 +160,7 @@ static struct platform_device iop13xx_tpmi_0_device = { .resource = iop13xx_tpmi_0_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -171,7 +171,7 @@ static struct platform_device iop13xx_tpmi_1_device = { .resource = iop13xx_tpmi_1_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -182,7 +182,7 @@ static struct platform_device iop13xx_tpmi_2_device = { .resource = iop13xx_tpmi_2_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; @@ -193,7 +193,7 @@ static struct platform_device iop13xx_tpmi_3_device = { .resource = iop13xx_tpmi_3_resources, .dev = { .dma_mask = &iop13xx_tpmi_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index a4d1f8de3b5b..d9612221e484 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c @@ -143,7 +143,7 @@ struct platform_device iop3xx_dma_0_channel = { .resource = iop3xx_dma_0_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_dma_0_data, }, }; @@ -155,7 +155,7 @@ struct platform_device iop3xx_dma_1_channel = { .resource = iop3xx_dma_1_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_dma_1_data, }, }; @@ -167,7 +167,7 @@ struct platform_device iop3xx_aau_channel = { .resource = iop3xx_aau_resources, .dev = { .dma_mask = &iop3xx_adma_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), + .coherent_dma_mask = DMA_BIT_MASK(32), .platform_data = (void *) &iop3xx_aau_data, }, }; -- cgit v1.2.3 From 542fb915e79e6072ef2e252cf2d901ef8d432775 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 2 May 2019 15:29:47 +0000 Subject: perf/x86/amd: Update generic hardware cache events for Family 17h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0e3b74e26280f2cf8753717a950b97d424da6046 upstream. Add a new amd_hw_cache_event_ids_f17h assignment structure set for AMD families 17h and above, since a lot has changed. Specifically: L1 Data Cache The data cache access counter remains the same on Family 17h. For DC misses, PMCx041's definition changes with Family 17h, so instead we use the L2 cache accesses from L1 data cache misses counter (PMCx060,umask=0xc8). For DC hardware prefetch events, Family 17h breaks compatibility for PMCx067 "Data Prefetcher", so instead, we use PMCx05a "Hardware Prefetch DC Fills." L1 Instruction Cache PMCs 0x80 and 0x81 (32-byte IC fetches and misses) are backward compatible on Family 17h. For prefetches, we remove the erroneous PMCx04B assignment which counts how many software data cache prefetch load instructions were dispatched. LL - Last Level Cache Removing PMCs 7D, 7E, and 7F assignments, as they do not exist on Family 17h, where the last level cache is L3. L3 counters can be accessed using the existing AMD Uncore driver. Data TLB On Intel machines, data TLB accesses ("dTLB-loads") are assigned to counters that count load/store instructions retired. This is inconsistent with instruction TLB accesses, where Intel implementations report iTLB misses that hit in the STLB. Ideally, dTLB-loads would count higher level dTLB misses that hit in lower level TLBs, and dTLB-load-misses would report those that also missed in those lower-level TLBs, therefore causing a page table walk. That would be consistent with instruction TLB operation, remove the redundancy between dTLB-loads and L1-dcache-loads, and prevent perf from producing artificially low percentage ratios, i.e. the "0.01%" below: 42,550,869 L1-dcache-loads 41,591,860 dTLB-loads 4,802 dTLB-load-misses # 0.01% of all dTLB cache hits 7,283,682 L1-dcache-stores 7,912,392 dTLB-stores 310 dTLB-store-misses On AMD Families prior to 17h, the "Data Cache Accesses" counter is used, which is slightly better than load/store instructions retired, but still counts in terms of individual load/store operations instead of TLB operations. So, for AMD Families 17h and higher, this patch assigns "dTLB-loads" to a counter for L1 dTLB misses that hit in the L2 dTLB, and "dTLB-load-misses" to a counter for L1 DTLB misses that caused L2 DTLB misses and therefore also caused page table walks. This results in a much more accurate view of data TLB performance: 60,961,781 L1-dcache-loads 4,601 dTLB-loads 963 dTLB-load-misses # 20.93% of all dTLB cache hits Note that for all AMD families, data loads and stores are combined in a single accesses counter, so no 'L1-dcache-stores' are reported separately, and stores are counted with loads in 'L1-dcache-loads'. Also note that the "% of all dTLB cache hits" string is misleading because (a) "dTLB cache": although TLBs can be considered caches for page tables, in this context, it can be misinterpreted as data cache hits because the figures are similar (at least on Intel), and (b) not all those loads (technically accesses) technically "hit" at that hardware level. "% of all dTLB accesses" would be more clear/accurate. Instruction TLB On Intel machines, 'iTLB-loads' measure iTLB misses that hit in the STLB, and 'iTLB-load-misses' measure iTLB misses that also missed in the STLB and completed a page table walk. For AMD Family 17h and above, for 'iTLB-loads' we replace the erroneous instruction cache fetches counter with PMCx084 "L1 ITLB Miss, L2 ITLB Hit". For 'iTLB-load-misses' we still use PMCx085 "L1 ITLB Miss, L2 ITLB Miss", but set a 0xff umask because without it the event does not get counted. Branch Predictor (BPU) PMCs 0xc2 and 0xc3 continue to be valid across all AMD Families. Node Level Events Family 17h does not have a PMCx0e9 counter, and corresponding counters have not been made available publicly, so for now, we mark them as unsupported for Families 17h and above. Reference: "Open-Source Register Reference For AMD Family 17h Processors Models 00h-2Fh" Released 7/17/2018, Publication #56255, Revision 3.03: https://www.amd.com/system/files/TechDocs/56255_OSRR.pdf [ mingo: tidied up the line breaks. ] Signed-off-by: Kim Phillips Cc: # v4.9+ Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Janakarajan Natarajan Cc: Jiri Olsa Cc: Linus Torvalds Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Pu Wen Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Thomas Lendacky Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Cc: linux-perf-users@vger.kernel.org Fixes: e40ed1542dd7 ("perf/x86: Add perf support for AMD family-17h processors") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/core.c | 111 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index d45f3fbd232e..f15441b07dad 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -116,6 +116,110 @@ static __initconst const u64 amd_hw_cache_event_ids }, }; +static __initconst const u64 amd_hw_cache_event_ids_f17h + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */ + [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */ + [C(RESULT_MISS)] = 0, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */ + [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */ + [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */ + [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */ + [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = 0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +}; + /* * AMD Performance Monitor K7 and later, up to and including Family 16h: */ @@ -865,9 +969,10 @@ __init int amd_pmu_init(void) x86_pmu.amd_nb_constraints = 0; } - /* Events are common for all AMDs */ - memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); + if (boot_cpu_data.x86 >= 0x17) + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids)); + else + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); return 0; } -- cgit v1.2.3 From 4b96517d7335c2bbed60432713e6d575f3d221f1 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 9 Apr 2019 11:49:17 -0700 Subject: Bluetooth: btusb: request wake pin with NOAUTOEN commit 771acc7e4a6e5dba779cb1a7fd851a164bc81033 upstream. Badly-designed systems might have (for example) active-high wake pins that default to high (e.g., because of external pull ups) until they have an active firmware which starts driving it low. This can cause an interrupt storm in the time between request_irq() and disable_irq(). We don't support shared interrupts here, so let's just pre-configure the interrupt to avoid auto-enabling it. Fixes: fd913ef7ce61 ("Bluetooth: btusb: Add out-of-band wakeup support") Fixes: 5364a0b4f4be ("arm64: dts: rockchip: move QCA6174A wakeup pin into its USB node") Signed-off-by: Brian Norris Reviewed-by: Matthias Kaehlcke Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 4761499db9ee..470ee68555d9 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2885,6 +2885,7 @@ static int btusb_config_oob_wake(struct hci_dev *hdev) return 0; } + irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_irq(&hdev->dev, irq, btusb_oob_wake_handler, 0, "OOB Wake-on-BT", data); if (ret) { @@ -2899,7 +2900,6 @@ static int btusb_config_oob_wake(struct hci_dev *hdev) } data->oob_wake_irq = irq; - disable_irq(irq); bt_dev_info(hdev, "OOB Wake-on-BT configured at IRQ %u", irq); return 0; } -- cgit v1.2.3 From c319d45742d25ecf34bf1fb2c51f304b08b03b2d Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 15 Feb 2019 07:19:35 +0800 Subject: Bluetooth: mediatek: fix up an error path to restore bdev->tx_state commit 77f328dbc6cf42f22c691a164958a5452142a542 upstream. Restore bdev->tx_state with clearing bit BTMTKUART_TX_WAIT_VND_EVT when there is an error on waiting for the corresponding event. Fixes: 7237c4c9ec92 ("Bluetooth: mediatek: Add protocol support for MediaTek serial devices") Signed-off-by: Sean Wang Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btmtkuart.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index 4593baff2bc9..19eecf198321 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -115,11 +115,13 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev, u8 op, u8 flag, u16 plen, TASK_INTERRUPTIBLE, HCI_INIT_TIMEOUT); if (err == -EINTR) { bt_dev_err(hdev, "Execution of wmt command interrupted"); + clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state); return err; } if (err) { bt_dev_err(hdev, "Execution of wmt command timed out"); + clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state); return -ETIMEDOUT; } -- cgit v1.2.3 From ad93be454503e385e757d16e61b1a0f161419094 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 4 Jan 2019 09:49:46 -0700 Subject: clk: qcom: Add missing freq for usb30_master_clk on 8998 commit 0c8ff62504e3a667387e87889a259632c3199a86 upstream. The usb30_master_clk supports a 60Mhz frequency, but that is missing from the table of supported frequencies. Add it. Fixes: b5f5f525c547 (clk: qcom: Add MSM8998 Global Clock Control (GCC) driver) Signed-off-by: Jeffrey Hugo Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/gcc-msm8998.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/gcc-msm8998.c b/drivers/clk/qcom/gcc-msm8998.c index 1b779396e04f..42de947173f8 100644 --- a/drivers/clk/qcom/gcc-msm8998.c +++ b/drivers/clk/qcom/gcc-msm8998.c @@ -1112,6 +1112,7 @@ static struct clk_rcg2 ufs_axi_clk_src = { static const struct freq_tbl ftbl_usb30_master_clk_src[] = { F(19200000, P_XO, 1, 0, 0), + F(60000000, P_GPLL0_OUT_MAIN, 10, 0, 0), F(120000000, P_GPLL0_OUT_MAIN, 5, 0, 0), F(150000000, P_GPLL0_OUT_MAIN, 4, 0, 0), { } -- cgit v1.2.3 From 08b450e6826c27e4c6cc7cf158ff9f930182711c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 12 Feb 2019 19:39:27 -0800 Subject: usb: dwc3: Reset num_trbs after skipping commit c7152763f02e05567da27462b2277a554e507c89 upstream. Currently req->num_trbs is not reset after the TRBs are skipped and processed from the cancelled list. The gadget driver may reuse the request with an invalid req->num_trbs, and DWC3 will incorrectly skip trbs. To fix this, simply reset req->num_trbs to 0 after skipping through all of them. Fixes: c3acd5901414 ("usb: dwc3: gadget: use num_trbs when skipping TRBs on ->dequeue()") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8d1dbe36db92..9f941cdb0691 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1506,6 +1506,8 @@ static void dwc3_gadget_ep_skip_trbs(struct dwc3_ep *dep, struct dwc3_request *r trb->ctrl &= ~DWC3_TRB_CTRL_HWO; dwc3_ep_inc_deq(dep); } + + req->num_trbs = 0; } static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) -- cgit v1.2.3 From 9f929fd72c8d622e2ae8e0cde1b98228a38b65b7 Mon Sep 17 00:00:00 2001 From: Jeremy Fertic Date: Tue, 11 Dec 2018 17:55:00 -0700 Subject: staging: iio: adt7316: allow adt751x to use internal vref for all dacs commit 10bfe7cc1739c22f0aa296b39e53f61e9e3f4d99 upstream. With adt7516/7/9, internal vref is available for dacs a and b, dacs c and d, or all dacs. The driver doesn't currently support internal vref for all dacs. Change the else if to an if so both bits are checked rather than just one or the other. Signed-off-by: Jeremy Fertic Fixes: 35f6b6b86ede ("staging: iio: new ADT7316/7/8 and ADT7516/7/9 driver") Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/addac/adt7316.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/addac/adt7316.c b/drivers/staging/iio/addac/adt7316.c index 7839d869d25d..f85a7a550728 100644 --- a/drivers/staging/iio/addac/adt7316.c +++ b/drivers/staging/iio/addac/adt7316.c @@ -1079,7 +1079,7 @@ static ssize_t adt7316_store_DAC_internal_Vref(struct device *dev, ldac_config = chip->ldac_config & (~ADT7516_DAC_IN_VREF_MASK); if (data & 0x1) ldac_config |= ADT7516_DAC_AB_IN_VREF; - else if (data & 0x2) + if (data & 0x2) ldac_config |= ADT7516_DAC_CD_IN_VREF; } else { ret = kstrtou8(buf, 16, &data); -- cgit v1.2.3 From 5502a9c8e28880924da43f6d36522742614358f6 Mon Sep 17 00:00:00 2001 From: Jeremy Fertic Date: Sat, 22 Dec 2018 21:57:42 -0700 Subject: staging: iio: adt7316: fix the dac read calculation commit 45130fb030aec26ac28b4bb23344901df3ec3b7f upstream. The calculation of the current dac value is using the wrong bits of the dac lsb register. Create two macros to shift the lsb register value into lsb position, depending on whether the dac is 10 or 12 bit. Initialize data to 0 so, with an 8 bit dac, the msb register value can be bitwise ORed with data. Fixes: 35f6b6b86ede ("staging: iio: new ADT7316/7/8 and ADT7516/7/9 driver") Signed-off-by: Jeremy Fertic Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/addac/adt7316.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/staging/iio/addac/adt7316.c b/drivers/staging/iio/addac/adt7316.c index f85a7a550728..0c737eee54ee 100644 --- a/drivers/staging/iio/addac/adt7316.c +++ b/drivers/staging/iio/addac/adt7316.c @@ -47,6 +47,8 @@ #define ADT7516_MSB_AIN3 0xA #define ADT7516_MSB_AIN4 0xB #define ADT7316_DA_DATA_BASE 0x10 +#define ADT7316_DA_10_BIT_LSB_SHIFT 6 +#define ADT7316_DA_12_BIT_LSB_SHIFT 4 #define ADT7316_DA_MSB_DATA_REGS 4 #define ADT7316_LSB_DAC_A 0x10 #define ADT7316_MSB_DAC_A 0x11 @@ -1403,7 +1405,7 @@ static IIO_DEVICE_ATTR(ex_analog_temp_offset, 0644, static ssize_t adt7316_show_DAC(struct adt7316_chip_info *chip, int channel, char *buf) { - u16 data; + u16 data = 0; u8 msb, lsb, offset; int ret; @@ -1428,7 +1430,11 @@ static ssize_t adt7316_show_DAC(struct adt7316_chip_info *chip, if (ret) return -EIO; - data = (msb << offset) + (lsb & ((1 << offset) - 1)); + if (chip->dac_bits == 12) + data = lsb >> ADT7316_DA_12_BIT_LSB_SHIFT; + else if (chip->dac_bits == 10) + data = lsb >> ADT7316_DA_10_BIT_LSB_SHIFT; + data |= msb << offset; return sprintf(buf, "%d\n", data); } -- cgit v1.2.3 From 85dbcda147f41cbf9be95864dae0daa25f66473c Mon Sep 17 00:00:00 2001 From: Jeremy Fertic Date: Sat, 22 Dec 2018 21:57:41 -0700 Subject: staging: iio: adt7316: fix handling of dac high resolution option commit 76b7fe8d6c4daf4db672eb953c892c6f6572a282 upstream. The adt7316/7 and adt7516/7 have the option to output voltage proportional to temperature on dac a and/or dac b. The default dac resolution in this mode is 8 bits with the dac high resolution option enabling 10 bits. None of these settings affect dacs c and d. Remove the "1 (12 bits)" output from the show function since that is not an option for this mode. Return "1 (10 bits)" if the device is one of the above mentioned chips and the dac high resolution mode is enabled. In the store function, the driver currently allows the user to write to the ADT7316_DA_HIGH_RESOLUTION bit regardless of the device in use. Add a check to return an error in the case of an adt7318 or adt7519. Remove the else statement that clears the ADT7316_DA_HIGH_RESOLUTION bit. Instead, clear it before conditionally enabling it, depending on user input. This matches the typical pattern in the driver when an attribute is a boolean. Fixes: 35f6b6b86ede ("staging: iio: new ADT7316/7/8 and ADT7516/7/9 driver") Signed-off-by: Jeremy Fertic Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/addac/adt7316.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/staging/iio/addac/adt7316.c b/drivers/staging/iio/addac/adt7316.c index 0c737eee54ee..b84360ebb57a 100644 --- a/drivers/staging/iio/addac/adt7316.c +++ b/drivers/staging/iio/addac/adt7316.c @@ -634,9 +634,7 @@ static ssize_t adt7316_show_da_high_resolution(struct device *dev, struct adt7316_chip_info *chip = iio_priv(dev_info); if (chip->config3 & ADT7316_DA_HIGH_RESOLUTION) { - if (chip->id == ID_ADT7316 || chip->id == ID_ADT7516) - return sprintf(buf, "1 (12 bits)\n"); - if (chip->id == ID_ADT7317 || chip->id == ID_ADT7517) + if (chip->id != ID_ADT7318 && chip->id != ID_ADT7519) return sprintf(buf, "1 (10 bits)\n"); } @@ -653,10 +651,12 @@ static ssize_t adt7316_store_da_high_resolution(struct device *dev, u8 config3; int ret; + if (chip->id == ID_ADT7318 || chip->id == ID_ADT7519) + return -EPERM; + + config3 = chip->config3 & (~ADT7316_DA_HIGH_RESOLUTION); if (buf[0] == '1') - config3 = chip->config3 | ADT7316_DA_HIGH_RESOLUTION; - else - config3 = chip->config3 & (~ADT7316_DA_HIGH_RESOLUTION); + config3 |= ADT7316_DA_HIGH_RESOLUTION; ret = chip->bus.write(chip->bus.client, ADT7316_CONFIG3, config3); if (ret) -- cgit v1.2.3 From 15e4df25a6b7d8b3f396d7dbc1885343affa2539 Mon Sep 17 00:00:00 2001 From: Jeremy Fertic Date: Sat, 22 Dec 2018 21:57:43 -0700 Subject: staging: iio: adt7316: fix the dac write calculation commit 78accaea117c1ae878774974fab91ac4a0b0e2b0 upstream. The lsb calculation is not masking the correct bits from the user input. Subtract 1 from (1 << offset) to correctly set up the mask to be applied to user input. The lsb register stores its value starting at the bit 7 position. adt7316_store_DAC() currently assumes the value is at the other end of the register. Shift the lsb value before storing it in a new variable lsb_reg, and write this variable to the lsb register. Fixes: 35f6b6b86ede ("staging: iio: new ADT7316/7/8 and ADT7516/7/9 driver") Signed-off-by: Jeremy Fertic Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/addac/adt7316.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/staging/iio/addac/adt7316.c b/drivers/staging/iio/addac/adt7316.c index b84360ebb57a..ecbb29e2153e 100644 --- a/drivers/staging/iio/addac/adt7316.c +++ b/drivers/staging/iio/addac/adt7316.c @@ -1442,7 +1442,7 @@ static ssize_t adt7316_show_DAC(struct adt7316_chip_info *chip, static ssize_t adt7316_store_DAC(struct adt7316_chip_info *chip, int channel, const char *buf, size_t len) { - u8 msb, lsb, offset; + u8 msb, lsb, lsb_reg, offset; u16 data; int ret; @@ -1460,9 +1460,13 @@ static ssize_t adt7316_store_DAC(struct adt7316_chip_info *chip, return -EINVAL; if (chip->dac_bits > 8) { - lsb = data & (1 << offset); + lsb = data & ((1 << offset) - 1); + if (chip->dac_bits == 12) + lsb_reg = lsb << ADT7316_DA_12_BIT_LSB_SHIFT; + else + lsb_reg = lsb << ADT7316_DA_10_BIT_LSB_SHIFT; ret = chip->bus.write(chip->bus.client, - ADT7316_DA_DATA_BASE + channel * 2, lsb); + ADT7316_DA_DATA_BASE + channel * 2, lsb_reg); if (ret) return -EIO; } -- cgit v1.2.3 From 0d700633ead433e91707cb654b0e0c1f0951ba1e Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 25 Jan 2019 22:22:38 +0800 Subject: scsi: hisi_sas: Fix to only call scsi_get_prot_op() for non-NULL scsi_cmnd commit e1ba0b0b44512c5a209526c09ea3eb7d256b6951 upstream. A NULL-pointer dereference was introduced for TMF SSP commands from the upstreaming reworking. Fix this by relocating the scsi_get_prot_op() callsite. Fixes: d6a9000b81be ("scsi: hisi_sas: Add support for DIF feature for v2 hw") Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index e0570fd8466e..45e52dd870c8 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -1033,8 +1033,8 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba, struct sas_ssp_task *ssp_task = &task->ssp_task; struct scsi_cmnd *scsi_cmnd = ssp_task->cmd; struct hisi_sas_tmf_task *tmf = slot->tmf; - unsigned char prot_op = scsi_get_prot_op(scsi_cmnd); int has_data = 0, priority = !!tmf; + unsigned char prot_op; u8 *buf_cmd; u32 dw1 = 0, dw2 = 0, len = 0; @@ -1049,6 +1049,7 @@ static void prep_ssp_v3_hw(struct hisi_hba *hisi_hba, dw1 |= 2 << CMD_HDR_FRAME_TYPE_OFF; dw1 |= DIR_NO_DATA << CMD_HDR_DIR_OFF; } else { + prot_op = scsi_get_prot_op(scsi_cmnd); dw1 |= 1 << CMD_HDR_FRAME_TYPE_OFF; switch (scsi_cmnd->sc_data_direction) { case DMA_TO_DEVICE: -- cgit v1.2.3 From 2647c26b20fe9dde62ea27bd2eb9cdaef85cc3b7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Jan 2019 10:34:51 -0800 Subject: scsi: RDMA/srpt: Fix a credit leak for aborted commands commit 40ca8757291ca7a8775498112d320205b2a2e571 upstream. Make sure that the next time a response is sent to the initiator that the credit it had allocated for the aborted request gets freed. Cc: Doug Ledford Cc: Jason Gunthorpe Cc: Nicholas Bellinger Cc: Mike Christie Cc: Hannes Reinecke Cc: Christoph Hellwig Fixes: 131e6abc674e ("target: Add TFO->abort_task for aborted task resources release") # v3.15 Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srpt/ib_srpt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index e9c336cff8f5..f367f3db7ff8 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2887,8 +2887,19 @@ static void srpt_queue_tm_rsp(struct se_cmd *cmd) srpt_queue_response(cmd); } +/* + * This function is called for aborted commands if no response is sent to the + * initiator. Make sure that the credits freed by aborting a command are + * returned to the initiator the next time a response is sent by incrementing + * ch->req_lim_delta. + */ static void srpt_aborted_task(struct se_cmd *cmd) { + struct srpt_send_ioctx *ioctx = container_of(cmd, + struct srpt_send_ioctx, cmd); + struct srpt_rdma_ch *ch = ioctx->ch; + + atomic_inc(&ch->req_lim_delta); } static int srpt_queue_status(struct se_cmd *cmd) -- cgit v1.2.3 From bd567d69186fa0eadfce72fe062a74aac9ec0a17 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 30 Dec 2018 00:00:22 +0100 Subject: ASoC: Intel: bytcr_rt5651: Revert "Fix DMIC map headsetmic mapping" commit aee48a9ffa5a128bf4e433c57c39e015ea5b0208 upstream. Commit 37c7401e8c1f ("ASoC: Intel: bytcr_rt5651: Fix DMIC map headsetmic mapping"), changed the headsetmic mapping from IN3P to IN2P, this was based on the observation that all bytcr_rt5651 devices I have access to (7 devices) where all using IN3P for the headsetmic. This was an attempt to unifify / simplify the mapping, but it was wrong. None of those devices was actually using a digital internal mic. Now I've access to a Point of View TAB-P1006W-232 (v1.0) tabler, which does use a DMIC and it does have its headsetmic connected to IN2P, showing that the original mapping was correct, so this commit reverts the change changing the mapping back to IN2P. Fixes: 37c7401e8c1f ("ASoC: Intel: bytcr_rt5651: Fix DMIC map ... mapping") Acked-by: Pierre-Louis Bossart Signed-off-by: Hans de Goede Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/boards/bytcr_rt5651.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index e528995668b7..0ed844f2ad01 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -266,7 +266,7 @@ static const struct snd_soc_dapm_route byt_rt5651_audio_map[] = { static const struct snd_soc_dapm_route byt_rt5651_intmic_dmic_map[] = { {"DMIC L1", NULL, "Internal Mic"}, {"DMIC R1", NULL, "Internal Mic"}, - {"IN3P", NULL, "Headset Mic"}, + {"IN2P", NULL, "Headset Mic"}, }; static const struct snd_soc_dapm_route byt_rt5651_intmic_in1_map[] = { -- cgit v1.2.3 From e8a7a853180d04b5e4390d77db67a72bdebe8742 Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Mon, 25 Feb 2019 12:14:20 +0100 Subject: ASoC: rsnd: gen: fix SSI9 4/5/6/7 busif related register address commit 8af6c521cc236534093f9e744cfa004314bfe5ae upstream. Currently each SSI unit 's busif mode/adinr/dalign address is registered by: (in busif4 case) RSND_GEN_M_REG(SSI_BUSIF4_MODE, 0x500, 0x80) RSND_GEN_M_REG(SSI_BUSIF4_ADINR,0x504, 0x80) RSND_GEN_M_REG(SSI_BUSIF4_DALIGN, 0x508, 0x80) But according to user manual 41.1.4 Register Configuration ssi9 4/5/6/7 busif mode/adinr/dalign register address ( SSI9-[4/5/6/7]_BUSIF_[MODE/ADINR/DALIGN] ) are out of this rule. This patch registers ssi9 4/5/6/7 mode/adinr/dalign register as single register, and access these registers in case of SSI9 BUSIF 4/5/6/7. Fixes: commit 8c9d75033340 ("ASoC: rsnd: ssiu: Support BUSIF other than BUSIF0") Signed-off-by: Jiada Wang Signed-off-by: Timo Wischer Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/gen.c | 24 ++++++++++++++++++++++++ sound/soc/sh/rcar/rsnd.h | 27 +++++++++++++++++++++++++++ sound/soc/sh/rcar/ssiu.c | 24 +++++++++++------------- 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c index 7cda60188f41..af19010b9d88 100644 --- a/sound/soc/sh/rcar/gen.c +++ b/sound/soc/sh/rcar/gen.c @@ -255,6 +255,30 @@ static int rsnd_gen2_probe(struct rsnd_priv *priv) RSND_GEN_M_REG(SSI_MODE, 0xc, 0x80), RSND_GEN_M_REG(SSI_CTRL, 0x10, 0x80), RSND_GEN_M_REG(SSI_INT_ENABLE, 0x18, 0x80), + RSND_GEN_S_REG(SSI9_BUSIF0_MODE, 0x48c), + RSND_GEN_S_REG(SSI9_BUSIF0_ADINR, 0x484), + RSND_GEN_S_REG(SSI9_BUSIF0_DALIGN, 0x488), + RSND_GEN_S_REG(SSI9_BUSIF1_MODE, 0x4a0), + RSND_GEN_S_REG(SSI9_BUSIF1_ADINR, 0x4a4), + RSND_GEN_S_REG(SSI9_BUSIF1_DALIGN, 0x4a8), + RSND_GEN_S_REG(SSI9_BUSIF2_MODE, 0x4c0), + RSND_GEN_S_REG(SSI9_BUSIF2_ADINR, 0x4c4), + RSND_GEN_S_REG(SSI9_BUSIF2_DALIGN, 0x4c8), + RSND_GEN_S_REG(SSI9_BUSIF3_MODE, 0x4e0), + RSND_GEN_S_REG(SSI9_BUSIF3_ADINR, 0x4e4), + RSND_GEN_S_REG(SSI9_BUSIF3_DALIGN, 0x4e8), + RSND_GEN_S_REG(SSI9_BUSIF4_MODE, 0xd80), + RSND_GEN_S_REG(SSI9_BUSIF4_ADINR, 0xd84), + RSND_GEN_S_REG(SSI9_BUSIF4_DALIGN, 0xd88), + RSND_GEN_S_REG(SSI9_BUSIF5_MODE, 0xda0), + RSND_GEN_S_REG(SSI9_BUSIF5_ADINR, 0xda4), + RSND_GEN_S_REG(SSI9_BUSIF5_DALIGN, 0xda8), + RSND_GEN_S_REG(SSI9_BUSIF6_MODE, 0xdc0), + RSND_GEN_S_REG(SSI9_BUSIF6_ADINR, 0xdc4), + RSND_GEN_S_REG(SSI9_BUSIF6_DALIGN, 0xdc8), + RSND_GEN_S_REG(SSI9_BUSIF7_MODE, 0xde0), + RSND_GEN_S_REG(SSI9_BUSIF7_ADINR, 0xde4), + RSND_GEN_S_REG(SSI9_BUSIF7_DALIGN, 0xde8), }; static const struct rsnd_regmap_field_conf conf_scu[] = { diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index 605e4b934982..90625c57847b 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -191,6 +191,30 @@ enum rsnd_reg { SSI_SYS_STATUS7, HDMI0_SEL, HDMI1_SEL, + SSI9_BUSIF0_MODE, + SSI9_BUSIF1_MODE, + SSI9_BUSIF2_MODE, + SSI9_BUSIF3_MODE, + SSI9_BUSIF4_MODE, + SSI9_BUSIF5_MODE, + SSI9_BUSIF6_MODE, + SSI9_BUSIF7_MODE, + SSI9_BUSIF0_ADINR, + SSI9_BUSIF1_ADINR, + SSI9_BUSIF2_ADINR, + SSI9_BUSIF3_ADINR, + SSI9_BUSIF4_ADINR, + SSI9_BUSIF5_ADINR, + SSI9_BUSIF6_ADINR, + SSI9_BUSIF7_ADINR, + SSI9_BUSIF0_DALIGN, + SSI9_BUSIF1_DALIGN, + SSI9_BUSIF2_DALIGN, + SSI9_BUSIF3_DALIGN, + SSI9_BUSIF4_DALIGN, + SSI9_BUSIF5_DALIGN, + SSI9_BUSIF6_DALIGN, + SSI9_BUSIF7_DALIGN, /* SSI */ SSICR, @@ -209,6 +233,9 @@ enum rsnd_reg { #define SSI_BUSIF_MODE(i) (SSI_BUSIF0_MODE + (i)) #define SSI_BUSIF_ADINR(i) (SSI_BUSIF0_ADINR + (i)) #define SSI_BUSIF_DALIGN(i) (SSI_BUSIF0_DALIGN + (i)) +#define SSI9_BUSIF_MODE(i) (SSI9_BUSIF0_MODE + (i)) +#define SSI9_BUSIF_ADINR(i) (SSI9_BUSIF0_ADINR + (i)) +#define SSI9_BUSIF_DALIGN(i) (SSI9_BUSIF0_DALIGN + (i)) #define SSI_SYS_STATUS(i) (SSI_SYS_STATUS0 + (i)) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index c74991dd18ab..2347f3404c06 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -181,28 +181,26 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, if (rsnd_ssi_use_busif(io)) { int id = rsnd_mod_id(mod); int busif = rsnd_mod_id_sub(mod); + enum rsnd_reg adinr_reg, mode_reg, dalign_reg; - /* - * FIXME - * - * We can't support SSI9-4/5/6/7, because its address is - * out of calculation rule - */ if ((id == 9) && (busif >= 4)) { - struct device *dev = rsnd_priv_to_dev(priv); - - dev_err(dev, "This driver doesn't support SSI%d-%d, so far", - id, busif); + adinr_reg = SSI9_BUSIF_ADINR(busif); + mode_reg = SSI9_BUSIF_MODE(busif); + dalign_reg = SSI9_BUSIF_DALIGN(busif); + } else { + adinr_reg = SSI_BUSIF_ADINR(busif); + mode_reg = SSI_BUSIF_MODE(busif); + dalign_reg = SSI_BUSIF_DALIGN(busif); } - rsnd_mod_write(mod, SSI_BUSIF_ADINR(busif), + rsnd_mod_write(mod, adinr_reg, rsnd_get_adinr_bit(mod, io) | (rsnd_io_is_play(io) ? rsnd_runtime_channel_after_ctu(io) : rsnd_runtime_channel_original(io))); - rsnd_mod_write(mod, SSI_BUSIF_MODE(busif), + rsnd_mod_write(mod, mode_reg, rsnd_get_busif_shift(io, mod) | 1); - rsnd_mod_write(mod, SSI_BUSIF_DALIGN(busif), + rsnd_mod_write(mod, dalign_reg, rsnd_get_dalign(mod, io)); } -- cgit v1.2.3 From 2ed9e9e89a3734364c432db9a8e3e5b823f32791 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 13 Feb 2019 15:04:56 +0800 Subject: ASoC: sunxi: sun50i-codec-analog: Rename hpvcc regulator supply to cpvdd commit 5fd812e6f5ae0376134234ceb70e8de541ccb10d upstream. The A64 datasheet lists the supply rail for the headphone amp's charge pump as "CPVDD". cpvdd-supply is the name of the property for this power rail specified in the device tree bindings. "HPVCC" was the name used in the A33 datasheet for the same function. Rename the supply so it matches the datasheet, bindings, and the subject from the original commit. Fixes: ca0412a05756 ("ASoC: sunxi: sun50i-codec-analog: Add support for cpvdd regulator supply") Signed-off-by: Chen-Yu Tsai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sunxi/sun50i-codec-analog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sunxi/sun50i-codec-analog.c b/sound/soc/sunxi/sun50i-codec-analog.c index df1fed0aa001..d105c90c3706 100644 --- a/sound/soc/sunxi/sun50i-codec-analog.c +++ b/sound/soc/sunxi/sun50i-codec-analog.c @@ -274,7 +274,7 @@ static const struct snd_soc_dapm_widget sun50i_a64_codec_widgets[] = { * stream widgets at the card level. */ - SND_SOC_DAPM_REGULATOR_SUPPLY("hpvcc", 0, 0), + SND_SOC_DAPM_REGULATOR_SUPPLY("cpvdd", 0, 0), SND_SOC_DAPM_MUX("Headphone Source Playback Route", SND_SOC_NOPM, 0, 0, sun50i_codec_hp_src), SND_SOC_DAPM_OUT_DRV("Headphone Amp", SUN50I_ADDA_HP_CTRL, @@ -362,7 +362,7 @@ static const struct snd_soc_dapm_route sun50i_a64_codec_routes[] = { { "Headphone Source Playback Route", "Mixer", "Left Mixer" }, { "Headphone Source Playback Route", "Mixer", "Right Mixer" }, { "Headphone Amp", NULL, "Headphone Source Playback Route" }, - { "Headphone Amp", NULL, "hpvcc" }, + { "Headphone Amp", NULL, "cpvdd" }, { "HP", NULL, "Headphone Amp" }, /* Microphone Routes */ -- cgit v1.2.3 From 934955968fa611706f3f2f06cdbda856b2813420 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 19 Mar 2019 11:52:04 +0000 Subject: ASoC: wm_adsp: Correct handling of compressed streams that restart commit 639e5eb3c7d67e407f2a71fccd95323751398f6f upstream. Previously support was added to allow streams to be stopped and started again without the DSP being power cycled and this was done by clearing the buffer state in trigger start. Another supported use-case is using the DSP for a trigger event then opening the compressed stream later to receive the audio, unfortunately clearing the buffer state in trigger start destroys the data received from such a trigger. Correct this issue by moving the call to wm_adsp_buffer_clear to be in trigger stop instead. Fixes: 61fc060c40e6 ("ASoC: wm_adsp: Support streams which can start/stop with DSP active") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm_adsp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 1dd291cebe67..8219e8f0630e 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3443,8 +3443,6 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd) } } - wm_adsp_buffer_clear(compr->buf); - /* Trigger the IRQ at one fragment of data */ ret = wm_adsp_buffer_write(compr->buf, HOST_BUFFER_FIELD(high_water_mark), @@ -3456,6 +3454,7 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd) } break; case SNDRV_PCM_TRIGGER_STOP: + wm_adsp_buffer_clear(compr->buf); break; default: ret = -EINVAL; -- cgit v1.2.3 From 258604e0a15f25f764901e68a5a22b645666f4d8 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 1 Apr 2019 15:03:54 +0200 Subject: ASoC: dpcm: skip missing substream while applying symmetry commit 6246f283d5e02ac757bd8d9bacde8fdc54c4582d upstream. If for any reason, the backend does not have the requested substream (like capture on a playback only backend), the BE will be skipped in dpcm_be_dai_startup(). However, dpcm_apply_symmetry() does not skip those BE and will dereference the be_substream (NULL) pointer anyway. Like in dpcm_be_dai_startup(), just skip those BE. Fixes: 906c7d690c3b ("ASoC: dpcm: Apply symmetry for DPCM") Signed-off-by: Jerome Brunet Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-pcm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 03f36e534050..0c1dd6bd67ab 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1895,10 +1895,15 @@ static int dpcm_apply_symmetry(struct snd_pcm_substream *fe_substream, struct snd_soc_pcm_runtime *be = dpcm->be; struct snd_pcm_substream *be_substream = snd_soc_dpcm_get_substream(be, stream); - struct snd_soc_pcm_runtime *rtd = be_substream->private_data; + struct snd_soc_pcm_runtime *rtd; struct snd_soc_dai *codec_dai; int i; + /* A backend may not have the requested substream */ + if (!be_substream) + continue; + + rtd = be_substream->private_data; if (rtd->dai_link->be_hw_params_fixup) continue; -- cgit v1.2.3 From 6b4045b37b3275d29e6e3e32569dfb590599e73d Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Fri, 5 Apr 2019 11:19:11 +0200 Subject: ASoC: stm32: fix sai driver name initialisation commit 17d3069ccf06970e2db3f7cbf4335f207524279e upstream. This patch fixes the sai driver structure overwriting which results in a cpu dai name equal NULL. Fixes: 3e086ed ("ASoC: stm32: add SAI driver") Signed-off-by: Arnaud Pouliquen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/stm/stm32_sai_sub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index d4825700b63f..29a131e0569e 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -1394,7 +1394,6 @@ static int stm32_sai_sub_dais_init(struct platform_device *pdev, if (!sai->cpu_dai_drv) return -ENOMEM; - sai->cpu_dai_drv->name = dev_name(&pdev->dev); if (STM_SAI_IS_PLAYBACK(sai)) { memcpy(sai->cpu_dai_drv, &stm32_sai_playback_dai, sizeof(stm32_sai_playback_dai)); @@ -1404,6 +1403,7 @@ static int stm32_sai_sub_dais_init(struct platform_device *pdev, sizeof(stm32_sai_capture_dai)); sai->cpu_dai_drv->capture.stream_name = sai->cpu_dai_drv->name; } + sai->cpu_dai_drv->name = dev_name(&pdev->dev); return 0; } -- cgit v1.2.3 From af0aafff4715050f7a3fbdd12708d47bd573ae64 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Jan 2019 07:40:52 -0800 Subject: KVM: VMX: Save RSI to an unused output in the vCPU-run asm blob commit f3689e3f17f064fd4cd5f0cb01ae2395c94f39d9 upstream. RSI is clobbered by the vCPU-run asm blob, but it's not marked as such, probably because GCC doesn't let you mark inputs as clobbered. "Save" RSI to a dummy output so that GCC recognizes it as being clobbered. Fixes: 773e8a0425c9 ("x86/kvm: use Enlightened VMCS when running on Hyper-V") Reviewed-by: Jim Mattson Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 72f1d17711f2..da6fdd5434a1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6465,7 +6465,7 @@ static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) "xor %%edi, %%edi \n\t" "xor %%ebp, %%ebp \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - : ASM_CALL_CONSTRAINT + : ASM_CALL_CONSTRAINT, "=S"((int){0}) : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), [launched]"i"(offsetof(struct vcpu_vmx, __launched)), [fail]"i"(offsetof(struct vcpu_vmx, fail)), -- cgit v1.2.3 From 774e8693399af434ab81a816bce047cb4b710e93 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Jan 2019 07:40:55 -0800 Subject: KVM: nVMX: Remove a rogue "rax" clobber from nested_vmx_check_vmentry_hw() commit 9ce0a07a6f49822238fd4357c02e0dba060a43cc upstream. RAX is not touched by nested_vmx_check_vmentry_hw(), directly or indirectly (e.g. vmx_vmenter()). Remove it from the clobber list. Fixes: 52017608da33 ("KVM: nVMX: add option to perform early consistency checks via H/W") Reviewed-by: Jim Mattson Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a4bcac94392c..8f8c42b04875 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2793,7 +2793,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) [fail]"i"(offsetof(struct vcpu_vmx, fail)), [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), [wordsize]"i"(sizeof(ulong)) - : "rax", "cc", "memory" + : "cc", "memory" ); preempt_enable(); -- cgit v1.2.3 From 99678e9cf282353b9b2938c7271e551b34a76ac9 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 31 Jan 2019 11:26:39 +0800 Subject: kvm: vmx: Fix typos in vmentry/vmexit control setting commit d92935979adba274b1099e67b7f713f6d8413121 upstream. Previously, 'commit f99e3daf94ff ("KVM: x86: Add Intel PT virtualization work mode")' work mode' offered framework to support Intel PT virtualization. However, the patch has some typos in vmx_vmentry_ctrl() and vmx_vmexit_ctrl(), e.g. used wrong flags and wrong variable, which will cause the VM entry failure later. Fixes: 'commit f99e3daf94ff ("KVM: x86: Add Intel PT virtualization work mode")' Signed-off-by: Yu Zhang Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 1abae731c3e4..b26cb680ba38 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -444,7 +444,8 @@ static inline u32 vmx_vmentry_ctrl(void) { u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; if (pt_mode == PT_MODE_SYSTEM) - vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL); + vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | + VM_ENTRY_LOAD_IA32_RTIT_CTL); /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ return vmentry_ctrl & ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); @@ -454,9 +455,10 @@ static inline u32 vmx_vmexit_ctrl(void) { u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; if (pt_mode == PT_MODE_SYSTEM) - vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL); + vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | + VM_EXIT_CLEAR_IA32_RTIT_CTL); /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmcs_config.vmexit_ctrl & + return vmexit_ctrl & ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); } -- cgit v1.2.3 From 8f2b774cbf81603a8146fb2e2d92dc88001fafec Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Apr 2019 19:01:09 -0700 Subject: KVM: lapic: Check for in-kernel LAPIC before deferencing apic pointer commit b904cb8dff824b79233e82c078837627ebd52717 upstream. ...to avoid dereferencing a null pointer when querying the per-vCPU timer advance. Fixes: 39497d7660d98 ("KVM: lapic: Track lapic timer advance per vCPU") Reported-by: syzbot+f7e65445a40d3e0e4ebf@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 3 --- arch/x86/kvm/x86.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4ae142eea96a..3339697de6e5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1504,9 +1504,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; u64 guest_tsc, tsc_deadline, ns; - if (!lapic_in_kernel(vcpu)) - return; - if (apic->lapic_timer.expired_tscdeadline == 0) return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 613dd3c89b94..3eeb7183fc09 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7886,7 +7886,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } trace_kvm_entry(vcpu->vcpu_id); - if (vcpu->arch.apic->lapic_timer.timer_advance_ns) + if (lapic_in_kernel(vcpu) && + vcpu->arch.apic->lapic_timer.timer_advance_ns) wait_lapic_expire(vcpu); guest_enter_irqoff(); -- cgit v1.2.3 From 7a7b0c0ee92fa305b445102e9457884c04d02e31 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 1 Feb 2019 13:02:27 +0530 Subject: platform/x86: intel_pmc_core: Fix PCH IP name commit d6827015e671cd17871c9b7a0fabe06c044f7470 upstream. For Cannonlake and Icelake, the IP name for Res_6 should be SPF i.e. South Port F. No functional change is intended other than just renaming the IP appropriately. Cc: "David E. Box" Cc: Srinivas Pandruvada Fixes: 291101f6a735 ("platform/x86: intel_pmc_core: Add CannonLake PCH support") Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel_pmc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c index a9cbe5be277b..af6f7aece2e7 100644 --- a/drivers/platform/x86/intel_pmc_core.c +++ b/drivers/platform/x86/intel_pmc_core.c @@ -205,7 +205,7 @@ static const struct pmc_bit_map cnp_pfear_map[] = { {"CNVI", BIT(3)}, {"UFS0", BIT(4)}, {"EMMC", BIT(5)}, - {"Res_6", BIT(6)}, + {"SPF", BIT(6)}, {"SBR6", BIT(7)}, {"SBR7", BIT(0)}, -- cgit v1.2.3 From 2f86122bd369e11ebdc4136a56bda0f5484e9b0d Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 1 Feb 2019 13:02:25 +0530 Subject: platform/x86: intel_pmc_core: Handle CFL regmap properly commit e50af8332785355de3cb40d9f5e8c45dbfc86f53 upstream. Only Coffeelake should use Cannonlake regmap other than Cannonlake platform. This allows Coffeelake special handling only when there is no matching PCI device and default reg map selected as per CPUID is for Sunrisepoint PCH. This change is needed to enable support for newer SoCs such as Icelake. Cc: "David E. Box" Cc: Srinivas Pandruvada Fixes: 661405bd817b ("platform/x86: intel_pmc_core: Special case for Coffeelake") Acked-by: "David E. Box" Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel_pmc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c index af6f7aece2e7..e88e183914af 100644 --- a/drivers/platform/x86/intel_pmc_core.c +++ b/drivers/platform/x86/intel_pmc_core.c @@ -802,7 +802,7 @@ static int __init pmc_core_probe(void) * Sunrisepoint PCH regmap can't be used. Use Cannonlake PCH regmap * in this case. */ - if (!pci_dev_present(pmc_pci_ids)) + if (pmcdev->map == &spt_reg_map && !pci_dev_present(pmc_pci_ids)) pmcdev->map = &cnp_reg_map; if (lpit_read_residency_count_address(&slp_s0_addr)) -- cgit v1.2.3 From 4cbfe6e7366c0270ac7a979bea3fe514d8e9dfcc Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Sat, 2 Feb 2019 11:09:42 +0200 Subject: IB/core: Unregister notifier before freeing MAD security commit d60667fc398ed34b3c7456b020481c55c760e503 upstream. If the notifier runs after the security context is freed an access of freed memory can occur. Fixes: 47a2b338fe63 ("IB/core: Enforce security on management datagrams") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/security.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1efadbccf394..402449d4a888 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -727,9 +727,10 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent) if (!rdma_protocol_ib(agent->device, agent->port_num)) return; - security_ib_free_security(agent->security); if (agent->lsm_nb_reg) unregister_lsm_notifier(&agent->lsm_nb); + + security_ib_free_security(agent->security); } int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) -- cgit v1.2.3 From ae691c80904e4ea04c1b8e64d9f1e6cddc123209 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Sat, 2 Feb 2019 11:09:43 +0200 Subject: IB/core: Fix potential memory leak while creating MAD agents commit 6e88e672b69f0e627acdae74a527b730ea224b6b upstream. If the MAD agents isn't allowed to manage the subnet, or fails to register for the LSM notifier, the security context is leaked. Free the context in these cases. Fixes: 47a2b338fe63 ("IB/core: Enforce security on management datagrams") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Reported-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/security.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 402449d4a888..7662e9347238 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -710,16 +710,20 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent, dev_name(&agent->device->dev), agent->port_num); if (ret) - return ret; + goto free_security; agent->lsm_nb.notifier_call = ib_mad_agent_security_change; ret = register_lsm_notifier(&agent->lsm_nb); if (ret) - return ret; + goto free_security; agent->smp_allowed = true; agent->lsm_nb_reg = true; return 0; + +free_security: + security_ib_free_security(agent->security); + return ret; } void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent) -- cgit v1.2.3 From a3d147d5826007736ddec61a685ed9f6bac49b59 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Tue, 22 Jan 2019 09:02:05 +0200 Subject: IB/core: Destroy QP if XRC QP fails commit 535005ca8e5e71918d64074032f4b9d4fef8981e upstream. The open-coded variant missed destroy of SELinux created QP, reuse already existing ib_detroy_qp() call and use this opportunity to clean ib_create_qp() from double prints and unclear exit paths. Reported-by: Parav Pandit Fixes: d291f1a65232 ("IB/core: Enforce PKey security on QPs") Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/verbs.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ac011836bb54..3220fb42ecce 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1106,8 +1106,8 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, } EXPORT_SYMBOL(ib_open_qp); -static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, - struct ib_qp_init_attr *qp_init_attr) +static struct ib_qp *create_xrc_qp(struct ib_qp *qp, + struct ib_qp_init_attr *qp_init_attr) { struct ib_qp *real_qp = qp; @@ -1122,10 +1122,10 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, qp_init_attr->qp_context); - if (!IS_ERR(qp)) - __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); - else - real_qp->device->ops.destroy_qp(real_qp); + if (IS_ERR(qp)) + return qp; + + __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); return qp; } @@ -1156,10 +1156,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, return qp; ret = ib_create_qp_security(qp, device); - if (ret) { - ib_destroy_qp(qp); - return ERR_PTR(ret); - } + if (ret) + goto err; qp->real_qp = qp; qp->qp_type = qp_init_attr->qp_type; @@ -1172,8 +1170,15 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, INIT_LIST_HEAD(&qp->sig_mrs); qp->port = 0; - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) - return ib_create_xrc_qp(qp, qp_init_attr); + if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { + struct ib_qp *xrc_qp = create_xrc_qp(qp, qp_init_attr); + + if (IS_ERR(xrc_qp)) { + ret = PTR_ERR(xrc_qp); + goto err; + } + return xrc_qp; + } qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; @@ -1200,11 +1205,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) { ret = rdma_rw_init_mrs(qp, qp_init_attr); - if (ret) { - pr_err("failed to init MR pool ret= %d\n", ret); - ib_destroy_qp(qp); - return ERR_PTR(ret); - } + if (ret) + goto err; } /* @@ -1217,6 +1219,11 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, device->attrs.max_sge_rd); return qp; + +err: + ib_destroy_qp(qp); + return ERR_PTR(ret); + } EXPORT_SYMBOL(ib_create_qp); -- cgit v1.2.3 From 290d72e8c40472121d1212500bb1ebb40e29e7e0 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 3 Apr 2019 15:14:44 -0700 Subject: Input: snvs_pwrkey - initialize necessary driver data before enabling IRQ commit bf2a7ca39fd3ab47ef71c621a7ee69d1813b1f97 upstream. SNVS IRQ is requested before necessary driver data initialized, if there is a pending IRQ during driver probe phase, kernel NULL pointer panic will occur in IRQ handler. To avoid such scenario, just initialize necessary driver data before enabling IRQ. This patch is inspired by NXP's internal kernel tree. Fixes: d3dc6e232215 ("input: keyboard: imx: add snvs power key driver") Signed-off-by: Anson Huang Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/snvs_pwrkey.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c index effb63205d3d..4c67cf30a5d9 100644 --- a/drivers/input/keyboard/snvs_pwrkey.c +++ b/drivers/input/keyboard/snvs_pwrkey.c @@ -148,6 +148,9 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) return error; } + pdata->input = input; + platform_set_drvdata(pdev, pdata); + error = devm_request_irq(&pdev->dev, pdata->irq, imx_snvs_pwrkey_interrupt, 0, pdev->name, pdev); @@ -163,9 +166,6 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) return error; } - pdata->input = input; - platform_set_drvdata(pdev, pdata); - device_init_wakeup(&pdev->dev, pdata->wakeup); return 0; -- cgit v1.2.3 From 92cd6ae804615d0f415191319315ca6bbf4651d6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 5 Feb 2019 14:40:40 -0800 Subject: Input: stmfts - acknowledge that setting brightness is a blocking call commit 937c4e552fd1174784045684740edfcea536159d upstream. We need to turn regulators on and off when switching brightness, and that may block, therefore we have to set stmfts_brightness_set() as LED's brightness_set_blocking() method. Fixes: 78bcac7b2ae1 ("Input: add support for the STMicroelectronics FingerTip touchscreen") Acked-by: Andi Shyti Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/stmfts.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index 704e99046916..b6f95f20f924 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -106,27 +106,29 @@ struct stmfts_data { bool running; }; -static void stmfts_brightness_set(struct led_classdev *led_cdev, +static int stmfts_brightness_set(struct led_classdev *led_cdev, enum led_brightness value) { struct stmfts_data *sdata = container_of(led_cdev, struct stmfts_data, led_cdev); int err; - if (value == sdata->led_status || !sdata->ledvdd) - return; - - if (!value) { - regulator_disable(sdata->ledvdd); - } else { - err = regulator_enable(sdata->ledvdd); - if (err) - dev_warn(&sdata->client->dev, - "failed to disable ledvdd regulator: %d\n", - err); + if (value != sdata->led_status && sdata->ledvdd) { + if (!value) { + regulator_disable(sdata->ledvdd); + } else { + err = regulator_enable(sdata->ledvdd); + if (err) { + dev_warn(&sdata->client->dev, + "failed to disable ledvdd regulator: %d\n", + err); + return err; + } + } + sdata->led_status = value; } - sdata->led_status = value; + return 0; } static enum led_brightness stmfts_brightness_get(struct led_classdev *led_cdev) @@ -608,7 +610,7 @@ static int stmfts_enable_led(struct stmfts_data *sdata) sdata->led_cdev.name = STMFTS_DEV_NAME; sdata->led_cdev.max_brightness = LED_ON; sdata->led_cdev.brightness = LED_OFF; - sdata->led_cdev.brightness_set = stmfts_brightness_set; + sdata->led_cdev.brightness_set_blocking = stmfts_brightness_set; sdata->led_cdev.brightness_get = stmfts_brightness_get; err = devm_led_classdev_register(&sdata->client->dev, &sdata->led_cdev); -- cgit v1.2.3 From a3769982eceeb4a0ff4970210c6cdb55252e28fb Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Sat, 23 Feb 2019 03:18:25 +0000 Subject: gpio: mxc: add check to return defer probe if clock tree NOT ready commit a329bbe707cee2cf8c660890ef2ad0d00ec7e8a3 upstream. On i.MX8MQ platform, clock driver uses platform driver model and it is probed after GPIO driver, so when GPIO driver fails to get clock, it should check the error type to decide whether to return defer probe or just ignore the clock operation. Fixes: 2808801aab8a ("gpio: mxc: add clock operation") Signed-off-by: Anson Huang Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-mxc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index 2d1dfa1e0745..e86e61dda4b7 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -438,8 +438,11 @@ static int mxc_gpio_probe(struct platform_device *pdev) /* the controller clock is optional */ port->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(port->clk)) + if (IS_ERR(port->clk)) { + if (PTR_ERR(port->clk) == -EPROBE_DEFER) + return -EPROBE_DEFER; port->clk = NULL; + } err = clk_prepare_enable(port->clk); if (err) { -- cgit v1.2.3 From b7e8c254560a057561644b28074be697336f1ec6 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 12 Dec 2018 10:10:55 -0500 Subject: selinux: avoid silent denials in permissive mode under RCU walk commit 3a28cff3bd4bf43f02be0c4e7933aebf3dc8197e upstream. commit 0dc1ba24f7fff6 ("SELINUX: Make selinux cache VFS RCU walks safe") results in no audit messages at all if in permissive mode because the cache is updated during the rcu walk and thus no denial occurs on the subsequent ref walk. Fix this by not updating the cache when performing a non-blocking permission check. This only affects search and symlink read checks during rcu walk. Fixes: 0dc1ba24f7fff6 ("SELINUX: Make selinux cache VFS RCU walks safe") Reported-by: BMK Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/avc.c | 23 +++++++++++++++++++++-- security/selinux/hooks.c | 4 +++- security/selinux/include/avc.h | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 635e5c1e3e48..5de18a6d5c3f 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -838,6 +838,7 @@ out: * @ssid,@tsid,@tclass : identifier of an AVC entry * @seqno : sequence number when decision was made * @xpd: extended_perms_decision to be added to the node + * @flags: the AVC_* flags, e.g. AVC_NONBLOCKING, AVC_EXTENDED_PERMS, or 0. * * if a valid AVC entry doesn't exist,this function returns -ENOENT. * if kmalloc() called internal returns NULL, this function returns -ENOMEM. @@ -856,6 +857,23 @@ static int avc_update_node(struct selinux_avc *avc, struct hlist_head *head; spinlock_t *lock; + /* + * If we are in a non-blocking code path, e.g. VFS RCU walk, + * then we must not add permissions to a cache entry + * because we cannot safely audit the denial. Otherwise, + * during the subsequent blocking retry (e.g. VFS ref walk), we + * will find the permissions already granted in the cache entry + * and won't audit anything at all, leading to silent denials in + * permissive mode that only appear when in enforcing mode. + * + * See the corresponding handling in slow_avc_audit(), and the + * logic in selinux_inode_follow_link and selinux_inode_permission + * for the VFS MAY_NOT_BLOCK flag, which is transliterated into + * AVC_NONBLOCKING for avc_has_perm_noaudit(). + */ + if (flags & AVC_NONBLOCKING) + return 0; + node = avc_alloc_node(avc); if (!node) { rc = -ENOMEM; @@ -1115,7 +1133,7 @@ decision: * @tsid: target security identifier * @tclass: target security class * @requested: requested permissions, interpreted based on @tclass - * @flags: AVC_STRICT or 0 + * @flags: AVC_STRICT, AVC_NONBLOCKING, or 0 * @avd: access vector decisions * * Check the AVC to determine whether the @requested permissions are granted @@ -1199,7 +1217,8 @@ int avc_has_perm_flags(struct selinux_state *state, struct av_decision avd; int rc, rc2; - rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, 0, + rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, + (flags & MAY_NOT_BLOCK) ? AVC_NONBLOCKING : 0, &avd); rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 07b11b5aaf1f..95cb58c17224 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2985,7 +2985,9 @@ static int selinux_inode_permission(struct inode *inode, int mask) return PTR_ERR(isec); rc = avc_has_perm_noaudit(&selinux_state, - sid, isec->sid, isec->sclass, perms, 0, &avd); + sid, isec->sid, isec->sclass, perms, + (flags & MAY_NOT_BLOCK) ? AVC_NONBLOCKING : 0, + &avd); audited = avc_audit_required(perms, &avd, rc, from_access ? FILE__AUDIT_ACCESS : 0, &denied); diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h index ef899bcfd2cb..74ea50977c20 100644 --- a/security/selinux/include/avc.h +++ b/security/selinux/include/avc.h @@ -142,6 +142,7 @@ static inline int avc_audit(struct selinux_state *state, #define AVC_STRICT 1 /* Ignore permissive mode. */ #define AVC_EXTENDED_PERMS 2 /* update extended permissions */ +#define AVC_NONBLOCKING 4 /* non blocking */ int avc_has_perm_noaudit(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, -- cgit v1.2.3 From 420e89889f62af93cc2fde1ccdaf6b767318be28 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 21 Dec 2018 21:18:52 +0100 Subject: selinux: never allow relabeling on context mounts commit a83d6ddaebe541570291205cb538e35ad4ff94f9 upstream. In the SECURITY_FS_USE_MNTPOINT case we never want to allow relabeling files/directories, so we should never set the SBLABEL_MNT flag. The 'special handling' in selinux_is_sblabel_mnt() is only intended for when the behavior is set to SECURITY_FS_USE_GENFS. While there, make the logic in selinux_is_sblabel_mnt() more explicit and add a BUILD_BUG_ON() to make sure that introducing a new SECURITY_FS_USE_* forces a review of the logic. Fixes: d5f3a5f6e7e7 ("selinux: add security in-core xattr support for pstore and debugfs") Signed-off-by: Ondrej Mosnacek Reviewed-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 95cb58c17224..b005283f0090 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -534,16 +534,10 @@ static int may_context_mount_inode_relabel(u32 sid, return rc; } -static int selinux_is_sblabel_mnt(struct super_block *sb) +static int selinux_is_genfs_special_handling(struct super_block *sb) { - struct superblock_security_struct *sbsec = sb->s_security; - - return sbsec->behavior == SECURITY_FS_USE_XATTR || - sbsec->behavior == SECURITY_FS_USE_TRANS || - sbsec->behavior == SECURITY_FS_USE_TASK || - sbsec->behavior == SECURITY_FS_USE_NATIVE || - /* Special handling. Genfs but also in-core setxattr handler */ - !strcmp(sb->s_type->name, "sysfs") || + /* Special handling. Genfs but also in-core setxattr handler */ + return !strcmp(sb->s_type->name, "sysfs") || !strcmp(sb->s_type->name, "pstore") || !strcmp(sb->s_type->name, "debugfs") || !strcmp(sb->s_type->name, "tracefs") || @@ -553,6 +547,34 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) !strcmp(sb->s_type->name, "cgroup2"))); } +static int selinux_is_sblabel_mnt(struct super_block *sb) +{ + struct superblock_security_struct *sbsec = sb->s_security; + + /* + * IMPORTANT: Double-check logic in this function when adding a new + * SECURITY_FS_USE_* definition! + */ + BUILD_BUG_ON(SECURITY_FS_USE_MAX != 7); + + switch (sbsec->behavior) { + case SECURITY_FS_USE_XATTR: + case SECURITY_FS_USE_TRANS: + case SECURITY_FS_USE_TASK: + case SECURITY_FS_USE_NATIVE: + return 1; + + case SECURITY_FS_USE_GENFS: + return selinux_is_genfs_special_handling(sb); + + /* Never allow relabeling on context mounts */ + case SECURITY_FS_USE_MNTPOINT: + case SECURITY_FS_USE_NONE: + default: + return 0; + } +} + static int sb_finish_set_opts(struct super_block *sb) { struct superblock_security_struct *sbsec = sb->s_security; -- cgit v1.2.3 From 0f92374871e4f2c926866a680d6e79d8f8686343 Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Sat, 9 Feb 2019 15:01:38 +0100 Subject: mac80211: Honor SW_CRYPTO_CONTROL for unicast keys in AP VLAN mode commit 78ad2341521d5ea96cb936244ed4c4c4ef9ec13b upstream. Restore SW_CRYPTO_CONTROL operation on AP_VLAN interfaces for unicast keys, the original override was intended to be done for group keys as those are treated specially by mac80211 and would always have been rejected. Now the situation is that AP_VLAN support must be enabled by the driver if it can support it (meaning it can support software crypto GTK TX). Thus, also simplify the code - if we get here with AP_VLAN and non- pairwise key, software crypto must be used (driver doesn't know about the interface) and can be used (driver must've advertised AP_VLAN if it also uses SW_CRYPTO_CONTROL). Fixes: db3bdcb9c3ff ("mac80211: allow AP_VLAN operation on crypto controlled devices") Signed-off-by: Alexander Wetzel [rewrite commit message] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/key.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 4700718e010f..37e372896230 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -167,8 +167,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) * The driver doesn't know anything about VLAN interfaces. * Hence, don't send GTKs for VLAN interfaces to the driver. */ - if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) + if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { + ret = 1; goto out_unsupported; + } } ret = drv_set_key(key->local, SET_KEY, sdata, @@ -213,11 +215,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) /* all of these we can do in software - if driver can */ if (ret == 1) return 0; - if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return 0; + if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) return -EINVAL; - } return 0; default: return -EINVAL; -- cgit v1.2.3 From aafdf8479a7f09c988f38c426530456aabf73dd7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 26 Feb 2019 10:09:35 +0530 Subject: powerpc/mm/hash: Handle mmap_min_addr correctly in get_unmapped_area topdown search commit 3b4d07d2674f6b4a9281031f99d1f7efd325b16d upstream. When doing top-down search the low_limit is not PAGE_SIZE but rather max(PAGE_SIZE, mmap_min_addr). This handle cases in which mmap_min_addr > PAGE_SIZE. Fixes: fba2369e6ceb ("mm: use vm_unmapped_area() on powerpc architecture") Reviewed-by: Laurent Dufour Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/slice.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 06898c13901d..aec91dbcdc0b 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -377,6 +378,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long addr, found, prev; struct vm_unmapped_area_info info; + unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr); info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; @@ -393,7 +395,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, if (high_limit > DEFAULT_MAP_WINDOW) addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW; - while (addr > PAGE_SIZE) { + while (addr > min_addr) { info.high_limit = addr; if (!slice_scan_available(addr - 1, available, 0, &addr)) continue; @@ -405,8 +407,8 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * Check if we need to reduce the range, or if we can * extend it to cover the previous available slice. */ - if (addr < PAGE_SIZE) - addr = PAGE_SIZE; + if (addr < min_addr) + addr = min_addr; else if (slice_scan_available(addr - 1, available, 0, &prev)) { addr = prev; goto prev_slice; @@ -528,7 +530,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, addr = _ALIGN_UP(addr, page_size); slice_dbg(" aligned addr=%lx\n", addr); /* Ignore hint if it's too large or overlaps a VMA */ - if (addr > high_limit - len || + if (addr > high_limit - len || addr < mmap_min_addr || !slice_area_is_free(mm, addr, len)) addr = 0; } -- cgit v1.2.3 From 9e5b35b444789f402b6f251dd2e51eeb16e7dfe0 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Feb 2019 12:59:40 -0800 Subject: x86/mce: Improve error message when kernel cannot recover, p2 commit 41f035a86b5b72a4f947c38e94239d20d595352a upstream. In c7d606f560e4 ("x86/mce: Improve error message when kernel cannot recover") a case was added for a machine check caused by a DATA access to poison memory from the kernel. A case should have been added also for an uncorrectable error during an instruction fetch in the kernel. Add that extra case so the error message now reads: mce: [Hardware Error]: Machine check: Instruction fetch error in kernel Fixes: c7d606f560e4 ("x86/mce: Improve error message when kernel cannot recover") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Pu Wen Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190225205940.15226-1-tony.luck@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mce/severity.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index dc3e26e905a3..65201e180fe0 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -165,6 +165,11 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), KERNEL ), + MCESEV( + PANIC, "Instruction fetch error in kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), + KERNEL + ), #endif MCESEV( PANIC, "Action required: unknown MCACOD", -- cgit v1.2.3 From ad5ef880a0e38162adea1b4c50597a395402814f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20M=C3=BCller?= Date: Mon, 8 Apr 2019 15:33:54 +0200 Subject: clk: x86: Add system specific quirk to mark clocks as critical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7c2e07130090ae001a97a6b65597830d6815e93e upstream. Since commit 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL"), the pmc_plt_clocks of the Bay Trail SoC are unconditionally gated off. Unfortunately this will break systems where these clocks are used for external purposes beyond the kernel's knowledge. Fix it by implementing a system specific quirk to mark the necessary pmc_plt_clks as critical. Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") Signed-off-by: David Müller Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/x86/clk-pmc-atom.c | 14 +++++++++++--- drivers/platform/x86/pmc_atom.c | 21 +++++++++++++++++++++ include/linux/platform_data/x86/clk-pmc-atom.h | 3 +++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c index d977193842df..19174835693b 100644 --- a/drivers/clk/x86/clk-pmc-atom.c +++ b/drivers/clk/x86/clk-pmc-atom.c @@ -165,7 +165,7 @@ static const struct clk_ops plt_clk_ops = { }; static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id, - void __iomem *base, + const struct pmc_clk_data *pmc_data, const char **parent_names, int num_parents) { @@ -184,9 +184,17 @@ static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id, init.num_parents = num_parents; pclk->hw.init = &init; - pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE; + pclk->reg = pmc_data->base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE; spin_lock_init(&pclk->lock); + /* + * On some systems, the pmc_plt_clocks already enabled by the + * firmware are being marked as critical to avoid them being + * gated by the clock framework. + */ + if (pmc_data->critical && plt_clk_is_enabled(&pclk->hw)) + init.flags |= CLK_IS_CRITICAL; + ret = devm_clk_hw_register(&pdev->dev, &pclk->hw); if (ret) { pclk = ERR_PTR(ret); @@ -332,7 +340,7 @@ static int plt_clk_probe(struct platform_device *pdev) return PTR_ERR(parent_names); for (i = 0; i < PMC_CLK_NUM; i++) { - data->clks[i] = plt_clk_register(pdev, i, pmc_data->base, + data->clks[i] = plt_clk_register(pdev, i, pmc_data, parent_names, data->nparents); if (IS_ERR(data->clks[i])) { err = PTR_ERR(data->clks[i]); diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 8f018b3f3cd4..eaec2d306481 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -391,11 +392,27 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc) } #endif /* CONFIG_DEBUG_FS */ +/* + * Some systems need one or more of their pmc_plt_clks to be + * marked as critical. + */ +static const struct dmi_system_id critclk_systems[] __initconst = { + { + .ident = "MPL CEC1x", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"), + DMI_MATCH(DMI_PRODUCT_NAME, "CEC10 Family"), + }, + }, + { /*sentinel*/ } +}; + static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap, const struct pmc_data *pmc_data) { struct platform_device *clkdev; struct pmc_clk_data *clk_data; + const struct dmi_system_id *d = dmi_first_match(critclk_systems); clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); if (!clk_data) @@ -403,6 +420,10 @@ static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap, clk_data->base = pmc_regmap; /* offset is added by client */ clk_data->clks = pmc_data->clks; + if (d) { + clk_data->critical = true; + pr_info("%s critclks quirk enabled\n", d->ident); + } clkdev = platform_device_register_data(&pdev->dev, "clk-pmc-atom", PLATFORM_DEVID_NONE, diff --git a/include/linux/platform_data/x86/clk-pmc-atom.h b/include/linux/platform_data/x86/clk-pmc-atom.h index 3ab892208343..7a37ac27d0fb 100644 --- a/include/linux/platform_data/x86/clk-pmc-atom.h +++ b/include/linux/platform_data/x86/clk-pmc-atom.h @@ -35,10 +35,13 @@ struct pmc_clk { * * @base: PMC clock register base offset * @clks: pointer to set of registered clocks, typically 0..5 + * @critical: flag to indicate if firmware enabled pmc_plt_clks + * should be marked as critial or not */ struct pmc_clk_data { void __iomem *base; const struct pmc_clk *clks; + bool critical; }; #endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */ -- cgit v1.2.3 From 245a1a555e337bfd06d35786d56660fcae164e36 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 4 Apr 2019 10:03:13 +0800 Subject: x86/mm/KASLR: Fix the size of the direct mapping section commit ec3937107ab43f3e8b2bc9dad95710043c462ff7 upstream. kernel_randomize_memory() uses __PHYSICAL_MASK_SHIFT to calculate the maximum amount of system RAM supported. The size of the direct mapping section is obtained from the smaller one of the below two values: (actual system RAM size + padding size) vs (max system RAM size supported) This calculation is wrong since commit b83ce5ee9147 ("x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52"). In it, __PHYSICAL_MASK_SHIFT was changed to be 52, regardless of whether the kernel is using 4-level or 5-level page tables. Thus, it will always use 4 PB as the maximum amount of system RAM, even in 4-level paging mode where it should actually be 64 TB. Thus, the size of the direct mapping section will always be the sum of the actual system RAM size plus the padding size. Even when the amount of system RAM is 64 TB, the following layout will still be used. Obviously KALSR will be weakened significantly. |____|_______actual RAM_______|_padding_|______the rest_______| 0 64TB ~120TB Instead, it should be like this: |____|_______actual RAM_______|_________the rest______________| 0 64TB ~120TB The size of padding region is controlled by CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING, which is 10 TB by default. The above issue only exists when CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING is set to a non-zero value, which is the case when CONFIG_MEMORY_HOTPLUG is enabled. Otherwise, using __PHYSICAL_MASK_SHIFT doesn't affect KASLR. Fix it by replacing __PHYSICAL_MASK_SHIFT with MAX_PHYSMEM_BITS. [ bp: Massage commit message. ] Fixes: b83ce5ee9147 ("x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52") Signed-off-by: Baoquan He Signed-off-by: Borislav Petkov Reviewed-by: Thomas Garnier Acked-by: Kirill A. Shutemov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Kees Cook Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: frank.ramsay@hpe.com Cc: herbert@gondor.apana.org.au Cc: kirill@shutemov.name Cc: mike.travis@hpe.com Cc: thgarnie@google.com Cc: x86-ml Cc: yamada.masahiro@socionext.com Link: https://lkml.kernel.org/r/20190417083536.GE7065@MiWiFi-R3L-srv Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 3f452ffed7e9..d669c5e797e0 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void) if (!kaslr_memory_enabled()) return; - kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT); + kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT); kaslr_regions[1].size_tb = VMALLOC_SIZE_TB; /* -- cgit v1.2.3 From 8ecf2c08aee9a076ee46277e1248052089b426c9 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 23 Apr 2019 12:58:11 -0400 Subject: x86/mm: Fix a crash with kmemleak_scan() commit 0d02113b31b2017dd349ec9df2314e798a90fa6e upstream. The first kmemleak_scan() call after boot would trigger the crash below because this callpath: kernel_init free_initmem mem_encrypt_free_decrypted_mem free_init_pages unmaps memory inside the .bss when DEBUG_PAGEALLOC=y. kmemleak_init() will register the .data/.bss sections and then kmemleak_scan() will scan those addresses and dereference them looking for pointer references. If free_init_pages() frees and unmaps pages in those sections, kmemleak_scan() will crash if referencing one of those addresses: BUG: unable to handle kernel paging request at ffffffffbd402000 CPU: 12 PID: 325 Comm: kmemleak Not tainted 5.1.0-rc4+ #4 RIP: 0010:scan_block Call Trace: scan_gray_list kmemleak_scan kmemleak_scan_thread kthread ret_from_fork Since kmemleak_free_part() is tolerant to unknown objects (not tracked by kmemleak), it is fine to call it from free_init_pages() even if not all address ranges passed to this function are known to kmemleak. [ bp: Massage. ] Fixes: b3f0907c71e0 ("x86/mm: Add .bss..decrypted section to hold shared variables") Signed-off-by: Qian Cai Signed-off-by: Borislav Petkov Reviewed-by: Catalin Marinas Cc: Andy Lutomirski Cc: Brijesh Singh Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190423165811.36699-1-cai@lca.pw Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f905a2371080..8dacdb96899e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) if (debug_pagealloc_enabled()) { pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", begin, end - 1); + /* + * Inform kmemleak about the hole in the memory since the + * corresponding pages will be unmapped. + */ + kmemleak_free_part((void *)begin, end - begin); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); } else { /* -- cgit v1.2.3 From b304ba5ca081051a6c015ec2acffb19d27aa979b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Apr 2019 10:03:35 +0200 Subject: x86/mm/tlb: Revert "x86/mm: Align TLB invalidation info" commit 780e0106d468a2962b16b52fdf42898f2639e0a0 upstream. Revert the following commit: 515ab7c41306: ("x86/mm: Align TLB invalidation info") I found out (the hard way) that under some .config options (notably L1_CACHE_SHIFT=7) and compiler combinations this on-stack alignment leads to a 320 byte stack usage, which then triggers a KASAN stack warning elsewhere. Using 320 bytes of stack space for a 40 byte structure is ludicrous and clearly not right. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Acked-by: Nadav Amit Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 515ab7c41306 ("x86/mm: Align TLB invalidation info") Link: http://lkml.kernel.org/r/20190416080335.GM7905@worktop.programming.kicks-ass.net [ Minor changelog edits. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 999d6d8f0bef..9a49335e717a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -731,7 +731,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, { int cpu; - struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { + struct flush_tlb_info info = { .mm = mm, .stride_shift = stride_shift, .freed_tables = freed_tables, -- cgit v1.2.3 From 8259fd1be8d1e080cd0a85360f14bc9de0b2d363 Mon Sep 17 00:00:00 2001 From: Nicolas Le Bayon Date: Wed, 6 Mar 2019 15:12:16 +0000 Subject: i2c: i2c-stm32f7: Fix SDADEL minimum formula commit c86da50cfd840edf223a242580913692acddbcf6 upstream. It conforms with Reference Manual I2C timing section. Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Nicolas Le Bayon Signed-off-by: Bich Hemon Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-stm32f7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 13e1213561d4..4284fc991cfd 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -432,7 +432,7 @@ static int stm32f7_i2c_compute_timing(struct stm32f7_i2c_dev *i2c_dev, STM32F7_I2C_ANALOG_FILTER_DELAY_MAX : 0); dnf_delay = setup->dnf * i2cclk; - sdadel_min = setup->fall_time - i2c_specs[setup->speed].hddat_min - + sdadel_min = i2c_specs[setup->speed].hddat_min + setup->fall_time - af_delay_min - (setup->dnf + 3) * i2cclk; sdadel_max = i2c_specs[setup->speed].vddat_max - setup->rise_time - -- cgit v1.2.3 From 387b15d5f8edd2a88739314031c040b4cba03cf5 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 29 Dec 2017 07:22:26 -0500 Subject: media: v4l2: i2c: ov7670: Fix PLL bypass register values commit 61da76beef1e4f0b6ba7be4f8d0cf0dac7ce1f55 upstream. The following commits: commit f6dd927f34d6 ("[media] media: ov7670: calculate framerate properly for ov7675") commit 04ee6d92047e ("[media] media: ov7670: add possibility to bypass pll for ov7675") introduced the ability to bypass PLL multiplier and use input clock (xvclk) as pixel clock output frequency for ov7675 sensor. PLL is bypassed using register DBLV[7:6], according to ov7670 and ov7675 sensor manuals. Macros used to set DBLV register seem wrong in the driver, as their values do not match what reported in the datasheet. Fix by changing DBLV_* macros to use bits [7:6] and set bits [3:0] to default 0x0a reserved value (according to datasheets). While at there, remove a write to DBLV register in "ov7675_set_framerate()" that over-writes the previous one to the same register that takes "info->pll_bypass" flag into account instead of setting PLL multiplier to 4x unconditionally. And, while at there, since "info->pll_bypass" is only used in set/get_framerate() functions used by ov7675 only, it is not necessary to check for the device id at probe time to make sure that when using ov7670 "info->pll_bypass" is set to false. Fixes: f6dd927f34d6 ("[media] media: ov7670: calculate framerate properly for ov7675") Signed-off-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ov7670.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c index a70a6ff7b36e..4939a83b50e4 100644 --- a/drivers/media/i2c/ov7670.c +++ b/drivers/media/i2c/ov7670.c @@ -160,10 +160,10 @@ MODULE_PARM_DESC(debug, "Debug level (0-1)"); #define REG_GFIX 0x69 /* Fix gain control */ #define REG_DBLV 0x6b /* PLL control an debugging */ -#define DBLV_BYPASS 0x00 /* Bypass PLL */ -#define DBLV_X4 0x01 /* clock x4 */ -#define DBLV_X6 0x10 /* clock x6 */ -#define DBLV_X8 0x11 /* clock x8 */ +#define DBLV_BYPASS 0x0a /* Bypass PLL */ +#define DBLV_X4 0x4a /* clock x4 */ +#define DBLV_X6 0x8a /* clock x6 */ +#define DBLV_X8 0xca /* clock x8 */ #define REG_SCALING_XSC 0x70 /* Test pattern and horizontal scale factor */ #define TEST_PATTTERN_0 0x80 @@ -863,7 +863,7 @@ static int ov7675_set_framerate(struct v4l2_subdev *sd, if (ret < 0) return ret; - return ov7670_write(sd, REG_DBLV, DBLV_X4); + return 0; } static void ov7670_get_framerate_legacy(struct v4l2_subdev *sd, @@ -1801,11 +1801,7 @@ static int ov7670_probe(struct i2c_client *client, if (config->clock_speed) info->clock_speed = config->clock_speed; - /* - * It should be allowed for ov7670 too when it is migrated to - * the new frame rate formula. - */ - if (config->pll_bypass && id->driver_data != MODEL_OV7670) + if (config->pll_bypass) info->pll_bypass = true; if (config->pclk_hb_disable) -- cgit v1.2.3 From ec031ab048703ee0602948bb2bd7820464df3e9a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 2 Apr 2019 13:49:14 +0100 Subject: ASoC: wm_adsp: Check for buffer in trigger stop commit 43d147be5738a9ed6cfb25c285ac50d6dd5793be upstream. Trigger stop can be called in situations where trigger start failed and as such it can't be assumed the buffer is already attached to the compressed stream or a NULL pointer may be dereferenced. Fixes: 639e5eb3c7d6 ("ASoC: wm_adsp: Correct handling of compressed streams that restart") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Cc: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm_adsp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 8219e8f0630e..0600e4404f90 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3454,7 +3454,8 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd) } break; case SNDRV_PCM_TRIGGER_STOP: - wm_adsp_buffer_clear(compr->buf); + if (wm_adsp_compr_attached(compr)) + wm_adsp_buffer_clear(compr->buf); break; default: ret = -EINVAL; -- cgit v1.2.3 From f4572f615a4f6d4e7f746176c223c9d2caa316e2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Apr 2019 17:50:48 -0700 Subject: mm/kmemleak.c: fix unused-function warning commit dce5b0bdeec61bdbee56121ceb1d014151d5cab1 upstream. The only references outside of the #ifdef have been removed, so now we get a warning in non-SMP configurations: mm/kmemleak.c:1404:13: error: unused function 'scan_large_block' [-Werror,-Wunused-function] Add a new #ifdef around it. Link: http://lkml.kernel.org/r/20190416123148.3502045-1-arnd@arndb.de Fixes: 298a32b13208 ("kmemleak: powerpc: skip scanning holes in the .bss section") Signed-off-by: Arnd Bergmann Acked-by: Catalin Marinas Cc: Vincent Whitchurch Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- mm/kmemleak.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 6c318f5ac234..2e435b8142e5 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1401,6 +1401,7 @@ static void scan_block(void *_start, void *_end, /* * Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency. */ +#ifdef CONFIG_SMP static void scan_large_block(void *start, void *end) { void *next; @@ -1412,6 +1413,7 @@ static void scan_large_block(void *start, void *end) cond_resched(); } } +#endif /* * Scan a memory block corresponding to a kmemleak_object. A condition is -- cgit v1.2.3 From 274ede3e1a5fb3d0fd33acafb08993e95972c51f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 May 2019 07:23:00 +0200 Subject: Linux 5.0.14 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 51a819544505..5ce29665eeed 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 13 +SUBLEVEL = 14 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3 From 634424f6337386fd17c782070d4b8402d264d9c7 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 17 Apr 2019 09:49:39 +0800 Subject: net: stmmac: Use bfsize1 in ndesc_init_rx_desc commit f87db4dbd52f2f8a170a2b51cb0926221ca7c9e2 upstream. gcc warn this: drivers/net/ethernet/stmicro/stmmac/norm_desc.c: In function ndesc_init_rx_desc: drivers/net/ethernet/stmicro/stmmac/norm_desc.c:138:6: warning: variable 'bfsize1' set but not used [-Wunused-but-set-variable] Like enh_desc_init_rx_desc, we should use bfsize1 in ndesc_init_rx_desc to calculate 'p->des1' Fixes: 583e63614149 ("net: stmmac: use correct DMA buffer size in the RX descriptor") Signed-off-by: YueHaibing Reviewed-by: Aaro Koskinen Signed-off-by: David S. Miller Cc: Nobuhiro Iwamatsu Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index b7dd4e3c760d..6d690678c20e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -140,7 +140,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, p->des0 |= cpu_to_le32(RDES0_OWN); bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1); - p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK); + p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ndesc_rx_set_on_chain(p, end); -- cgit v1.2.3 From 7c03a3534d24924ae6f87ea90b36fc73c4fd7c9a Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 12 Apr 2019 23:34:45 +0000 Subject: Drivers: hv: vmbus: Remove the undesired put_cpu_ptr() in hv_synic_cleanup() commit a0033bd1eae4650b69be07c17cb87393da584563 upstream. With CONFIG_DEBUG_PREEMPT=y, the put_cpu_ptr() triggers an underflow warning in preempt_count_sub(). Fixes: 37cdd991fac8 ("vmbus: put related per-cpu variable together") Cc: stable@vger.kernel.org Cc: Stephen Hemminger Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin (Microsoft) Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 632d25674e7f..45653029ee18 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -408,7 +408,6 @@ int hv_synic_cleanup(unsigned int cpu) clockevents_unbind_device(hv_cpu->clk_evt, cpu); hv_ce_shutdown(hv_cpu->clk_evt); - put_cpu_ptr(hv_cpu); } hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); -- cgit v1.2.3 From 9fe5b8e9d4c25d30c84703c409f2c62b074c9b2e Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 6 May 2019 13:45:26 +0300 Subject: ubsan: Fix nasty -Wbuiltin-declaration-mismatch GCC-9 warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f0996bc2978e02d2ea898101462b960f6119b18f upstream. Building lib/ubsan.c with gcc-9 results in a ton of nasty warnings like this one: lib/ubsan.c warning: conflicting types for built-in function ‘__ubsan_handle_negate_overflow’; expected ‘void(void *, void *)’ [-Wbuiltin-declaration-mismatch] The kernel's declarations of __ubsan_handle_*() often uses 'unsigned long' types in parameters while GCC these parameters as 'void *' types, hence the mismatch. Fix this by using 'void *' to match GCC's declarations. Reported-by: Linus Torvalds Signed-off-by: Andrey Ryabinin Fixes: c6d308534aef ("UBSAN: run-time undefined behavior sanity checker") Cc: Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/ubsan.c | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index e4162f59a81c..1e9e2ab25539 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -86,11 +86,13 @@ static bool is_inline_int(struct type_descriptor *type) return bits <= inline_bits; } -static s_max get_signed_val(struct type_descriptor *type, unsigned long val) +static s_max get_signed_val(struct type_descriptor *type, void *val) { if (is_inline_int(type)) { unsigned extra_bits = sizeof(s_max)*8 - type_bit_width(type); - return ((s_max)val) << extra_bits >> extra_bits; + unsigned long ulong_val = (unsigned long)val; + + return ((s_max)ulong_val) << extra_bits >> extra_bits; } if (type_bit_width(type) == 64) @@ -99,15 +101,15 @@ static s_max get_signed_val(struct type_descriptor *type, unsigned long val) return *(s_max *)val; } -static bool val_is_negative(struct type_descriptor *type, unsigned long val) +static bool val_is_negative(struct type_descriptor *type, void *val) { return type_is_signed(type) && get_signed_val(type, val) < 0; } -static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val) +static u_max get_unsigned_val(struct type_descriptor *type, void *val) { if (is_inline_int(type)) - return val; + return (unsigned long)val; if (type_bit_width(type) == 64) return *(u64 *)val; @@ -116,7 +118,7 @@ static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val) } static void val_to_string(char *str, size_t size, struct type_descriptor *type, - unsigned long value) + void *value) { if (type_is_int(type)) { if (type_bit_width(type) == 128) { @@ -163,8 +165,8 @@ static void ubsan_epilogue(unsigned long *flags) current->in_ubsan--; } -static void handle_overflow(struct overflow_data *data, unsigned long lhs, - unsigned long rhs, char op) +static void handle_overflow(struct overflow_data *data, void *lhs, + void *rhs, char op) { struct type_descriptor *type = data->type; @@ -191,8 +193,7 @@ static void handle_overflow(struct overflow_data *data, unsigned long lhs, } void __ubsan_handle_add_overflow(struct overflow_data *data, - unsigned long lhs, - unsigned long rhs) + void *lhs, void *rhs) { handle_overflow(data, lhs, rhs, '+'); @@ -200,23 +201,21 @@ void __ubsan_handle_add_overflow(struct overflow_data *data, EXPORT_SYMBOL(__ubsan_handle_add_overflow); void __ubsan_handle_sub_overflow(struct overflow_data *data, - unsigned long lhs, - unsigned long rhs) + void *lhs, void *rhs) { handle_overflow(data, lhs, rhs, '-'); } EXPORT_SYMBOL(__ubsan_handle_sub_overflow); void __ubsan_handle_mul_overflow(struct overflow_data *data, - unsigned long lhs, - unsigned long rhs) + void *lhs, void *rhs) { handle_overflow(data, lhs, rhs, '*'); } EXPORT_SYMBOL(__ubsan_handle_mul_overflow); void __ubsan_handle_negate_overflow(struct overflow_data *data, - unsigned long old_val) + void *old_val) { unsigned long flags; char old_val_str[VALUE_LENGTH]; @@ -237,8 +236,7 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow); void __ubsan_handle_divrem_overflow(struct overflow_data *data, - unsigned long lhs, - unsigned long rhs) + void *lhs, void *rhs) { unsigned long flags; char rhs_val_str[VALUE_LENGTH]; @@ -323,7 +321,7 @@ static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, } void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, - unsigned long ptr) + void *ptr) { struct type_mismatch_data_common common_data = { .location = &data->location, @@ -332,12 +330,12 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, .type_check_kind = data->type_check_kind }; - ubsan_type_mismatch_common(&common_data, ptr); + ubsan_type_mismatch_common(&common_data, (unsigned long)ptr); } EXPORT_SYMBOL(__ubsan_handle_type_mismatch); void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, - unsigned long ptr) + void *ptr) { struct type_mismatch_data_common common_data = { @@ -347,12 +345,12 @@ void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, .type_check_kind = data->type_check_kind }; - ubsan_type_mismatch_common(&common_data, ptr); + ubsan_type_mismatch_common(&common_data, (unsigned long)ptr); } EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data, - unsigned long bound) + void *bound) { unsigned long flags; char bound_str[VALUE_LENGTH]; @@ -369,8 +367,7 @@ void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data, } EXPORT_SYMBOL(__ubsan_handle_vla_bound_not_positive); -void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, - unsigned long index) +void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index) { unsigned long flags; char index_str[VALUE_LENGTH]; @@ -388,7 +385,7 @@ void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, EXPORT_SYMBOL(__ubsan_handle_out_of_bounds); void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, - unsigned long lhs, unsigned long rhs) + void *lhs, void *rhs) { unsigned long flags; struct type_descriptor *rhs_type = data->rhs_type; @@ -439,7 +436,7 @@ void __ubsan_handle_builtin_unreachable(struct unreachable_data *data) EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable); void __ubsan_handle_load_invalid_value(struct invalid_value_data *data, - unsigned long val) + void *val) { unsigned long flags; char val_str[VALUE_LENGTH]; -- cgit v1.2.3 From 9cccac4ee35f398142a277e57f23abeb7a84acb1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Apr 2019 08:53:30 +0200 Subject: staging: greybus: power_supply: fix prop-descriptor request size commit 47830c1127ef166af787caf2f871f23089610a7f upstream. Since moving the message buffers off the stack, the dynamically allocated get-prop-descriptor request buffer is incorrectly sized due to using the pointer rather than request-struct size when creating the operation. Fortunately, the pointer size is always larger than this one-byte request, but this could still cause trouble on the remote end due to the unexpected message size. Fixes: 9d15134d067e ("greybus: power_supply: rework get descriptors") Cc: stable # 4.9 Cc: Rui Miguel Silva Signed-off-by: Johan Hovold Reviewed-by: Rui Miguel Silva Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/power_supply.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/greybus/power_supply.c b/drivers/staging/greybus/power_supply.c index 0529e5628c24..ae5c0285a942 100644 --- a/drivers/staging/greybus/power_supply.c +++ b/drivers/staging/greybus/power_supply.c @@ -520,7 +520,7 @@ static int gb_power_supply_prop_descriptors_get(struct gb_power_supply *gbpsy) op = gb_operation_create(connection, GB_POWER_SUPPLY_TYPE_GET_PROP_DESCRIPTORS, - sizeof(req), sizeof(*resp) + props_count * + sizeof(*req), sizeof(*resp) + props_count * sizeof(struct gb_power_supply_props_desc), GFP_KERNEL); if (!op) -- cgit v1.2.3 From 3f57fef02fa7977ac217b72f382ea5d13a0817e0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 7 Apr 2019 21:58:43 +0900 Subject: staging: wilc1000: Avoid GFP_KERNEL allocation from atomic context. commit ae26aa844679cdf660e12c7055f958cb90889eb6 upstream. Since wilc_set_multicast_list() is called with dev->addr_list_lock spinlock held, we can't use GFP_KERNEL memory allocation. Signed-off-by: Tetsuo Handa Fixes: e624c58cf8eb ("staging: wilc1000: refactor code to avoid use of wilc_set_multicast_list global") Cc: Ajay Singh Reviewed-by: Adham Abozaeid Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/linux_wlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c index 5e5149c9a92d..2448805315c5 100644 --- a/drivers/staging/wilc1000/linux_wlan.c +++ b/drivers/staging/wilc1000/linux_wlan.c @@ -816,7 +816,7 @@ static void wilc_set_multicast_list(struct net_device *dev) return; } - mc_list = kmalloc_array(dev->mc.count, ETH_ALEN, GFP_KERNEL); + mc_list = kmalloc_array(dev->mc.count, ETH_ALEN, GFP_ATOMIC); if (!mc_list) return; -- cgit v1.2.3 From 2dbcc037de1a25b78dcb1d570ef401c7b8bdd0c1 Mon Sep 17 00:00:00 2001 From: Suresh Udipi Date: Wed, 24 Apr 2019 21:23:43 +0200 Subject: staging: most: cdev: fix chrdev_region leak in mod_exit commit af708900e9a48c0aa46070c8a8cdf0608a1d2025 upstream. It looks like v4.18-rc1 commit [0] which upstreams mld-1.8.0 commit [1] missed to fix the memory leak in mod_exit function. Do it now. [0] aba258b7310167 ("staging: most: cdev: fix chrdev_region leak") [1] https://github.com/microchip-ais/linux/commit/a2d8f7ae7ea381 ("staging: most: cdev: fix leak for chrdev_region") Signed-off-by: Suresh Udipi Signed-off-by: Eugeniu Rosca Acked-by: Christian Gromm Fixes: aba258b73101 ("staging: most: cdev: fix chrdev_region leak") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/most/cdev/cdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/most/cdev/cdev.c b/drivers/staging/most/cdev/cdev.c index ea64aabda94e..67ac51c8fc5b 100644 --- a/drivers/staging/most/cdev/cdev.c +++ b/drivers/staging/most/cdev/cdev.c @@ -546,7 +546,7 @@ static void __exit mod_exit(void) destroy_cdev(c); destroy_channel(c); } - unregister_chrdev_region(comp.devno, 1); + unregister_chrdev_region(comp.devno, CHRDEV_REGION_SIZE); ida_destroy(&comp.minor_id); class_destroy(comp.class); } -- cgit v1.2.3 From ce3a072f275a0b7214dae646d092521fef77d375 Mon Sep 17 00:00:00 2001 From: Christian Gromm Date: Tue, 30 Apr 2019 14:07:48 +0200 Subject: staging: most: sound: pass correct device when creating a sound card commit 98592c1faca82a9024a64e4ecead68b19f81c299 upstream. This patch fixes the usage of the wrong struct device when calling function snd_card_new. Reported-by: Eugeniu Rosca Signed-off-by: Christian Gromm Fixes: 69c90cf1b2fa ("staging: most: sound: call snd_card_new with struct device") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/most/sound/sound.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/most/sound/sound.c b/drivers/staging/most/sound/sound.c index 79ab3a78c5ec..1e6f47cfe42c 100644 --- a/drivers/staging/most/sound/sound.c +++ b/drivers/staging/most/sound/sound.c @@ -622,7 +622,7 @@ static int audio_probe_channel(struct most_interface *iface, int channel_id, INIT_LIST_HEAD(&adpt->dev_list); iface->priv = adpt; list_add_tail(&adpt->list, &adpt_list); - ret = snd_card_new(&iface->dev, -1, "INIC", THIS_MODULE, + ret = snd_card_new(iface->driver_dev, -1, "INIC", THIS_MODULE, sizeof(*channel), &adpt->card); if (ret < 0) goto err_free_adpt; -- cgit v1.2.3 From 045c73ba325da86124b71679d0d684baccee1cf2 Mon Sep 17 00:00:00 2001 From: Philipp Puschmann Date: Wed, 27 Feb 2019 16:17:33 +0100 Subject: ASoC: tlv320aic3x: fix reset gpio reference counting [ Upstream commit 82ad759143ed77673db0d93d53c1cde7b99917ee ] This patch fixes a bug that prevents freeing the reset gpio on unloading the module. aic3x_i2c_probe is called when loading the module and it calls list_add with a probably uninitialized list entry aic3x->list (next = prev = NULL)). So even if list_del is called it does nothing and in the end the gpio_reset is not freed. Then a repeated module probing fails silently because gpio_request fails. When moving INIT_LIST_HEAD to aic3x_i2c_probe we also have to move list_del to aic3x_i2c_remove because aic3x_remove may be called multiple times without aic3x_i2c_remove being called which leads to a NULL pointer dereference. Signed-off-by: Philipp Puschmann Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tlv320aic3x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 6aa0edf8c5ef..cea3ebecdb12 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -1609,7 +1609,6 @@ static int aic3x_probe(struct snd_soc_component *component) struct aic3x_priv *aic3x = snd_soc_component_get_drvdata(component); int ret, i; - INIT_LIST_HEAD(&aic3x->list); aic3x->component = component; for (i = 0; i < ARRAY_SIZE(aic3x->supplies); i++) { @@ -1692,7 +1691,6 @@ static void aic3x_remove(struct snd_soc_component *component) struct aic3x_priv *aic3x = snd_soc_component_get_drvdata(component); int i; - list_del(&aic3x->list); for (i = 0; i < ARRAY_SIZE(aic3x->supplies); i++) regulator_unregister_notifier(aic3x->supplies[i].consumer, &aic3x->disable_nb[i].nb); @@ -1890,6 +1888,7 @@ static int aic3x_i2c_probe(struct i2c_client *i2c, if (ret != 0) goto err_gpio; + INIT_LIST_HEAD(&aic3x->list); list_add(&aic3x->list, &reset_list); return 0; @@ -1906,6 +1905,8 @@ static int aic3x_i2c_remove(struct i2c_client *client) { struct aic3x_priv *aic3x = i2c_get_clientdata(client); + list_del(&aic3x->list); + if (gpio_is_valid(aic3x->gpio_reset) && !aic3x_is_shared_reset(aic3x)) { gpio_set_value(aic3x->gpio_reset, 0); -- cgit v1.2.3 From aba1a357cd907acff358ea3740ab21f287ea3d5a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 28 Feb 2019 15:30:34 +0000 Subject: ASoC: hdmi-codec: fix S/PDIF DAI [ Upstream commit 2e95f984aae4cf0608d0ba2189c756f2bd50b44a ] When using the S/PDIF DAI, there is no requirement to call snd_soc_dai_set_fmt() as there is no DAI format definition that defines S/PDIF. In any case, S/PDIF does not have separate clocks, this is embedded into the data stream. Consequently, when attempting to use TDA998x in S/PDIF mode, the attempt to configure TDA998x via the hw_params callback fails as the hdmi_codec_daifmt is left initialised to zero. Since the S/PDIF DAI will only be used by S/PDIF, prepare the hdmi_codec_daifmt structure for this format. Signed-off-by: Russell King Reviewed-by: Jyri Sarha Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/hdmi-codec.c | 118 +++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index e5b6769b9797..d5f73c837281 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -529,73 +529,71 @@ static int hdmi_codec_set_fmt(struct snd_soc_dai *dai, { struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai); struct hdmi_codec_daifmt cf = { 0 }; - int ret = 0; dev_dbg(dai->dev, "%s()\n", __func__); - if (dai->id == DAI_ID_SPDIF) { - cf.fmt = HDMI_SPDIF; - } else { - switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { - case SND_SOC_DAIFMT_CBM_CFM: - cf.bit_clk_master = 1; - cf.frame_clk_master = 1; - break; - case SND_SOC_DAIFMT_CBS_CFM: - cf.frame_clk_master = 1; - break; - case SND_SOC_DAIFMT_CBM_CFS: - cf.bit_clk_master = 1; - break; - case SND_SOC_DAIFMT_CBS_CFS: - break; - default: - return -EINVAL; - } + if (dai->id == DAI_ID_SPDIF) + return 0; + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + cf.bit_clk_master = 1; + cf.frame_clk_master = 1; + break; + case SND_SOC_DAIFMT_CBS_CFM: + cf.frame_clk_master = 1; + break; + case SND_SOC_DAIFMT_CBM_CFS: + cf.bit_clk_master = 1; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } - switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_NB_NF: - break; - case SND_SOC_DAIFMT_NB_IF: - cf.frame_clk_inv = 1; - break; - case SND_SOC_DAIFMT_IB_NF: - cf.bit_clk_inv = 1; - break; - case SND_SOC_DAIFMT_IB_IF: - cf.frame_clk_inv = 1; - cf.bit_clk_inv = 1; - break; - } + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_NB_IF: + cf.frame_clk_inv = 1; + break; + case SND_SOC_DAIFMT_IB_NF: + cf.bit_clk_inv = 1; + break; + case SND_SOC_DAIFMT_IB_IF: + cf.frame_clk_inv = 1; + cf.bit_clk_inv = 1; + break; + } - switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { - case SND_SOC_DAIFMT_I2S: - cf.fmt = HDMI_I2S; - break; - case SND_SOC_DAIFMT_DSP_A: - cf.fmt = HDMI_DSP_A; - break; - case SND_SOC_DAIFMT_DSP_B: - cf.fmt = HDMI_DSP_B; - break; - case SND_SOC_DAIFMT_RIGHT_J: - cf.fmt = HDMI_RIGHT_J; - break; - case SND_SOC_DAIFMT_LEFT_J: - cf.fmt = HDMI_LEFT_J; - break; - case SND_SOC_DAIFMT_AC97: - cf.fmt = HDMI_AC97; - break; - default: - dev_err(dai->dev, "Invalid DAI interface format\n"); - return -EINVAL; - } + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + cf.fmt = HDMI_I2S; + break; + case SND_SOC_DAIFMT_DSP_A: + cf.fmt = HDMI_DSP_A; + break; + case SND_SOC_DAIFMT_DSP_B: + cf.fmt = HDMI_DSP_B; + break; + case SND_SOC_DAIFMT_RIGHT_J: + cf.fmt = HDMI_RIGHT_J; + break; + case SND_SOC_DAIFMT_LEFT_J: + cf.fmt = HDMI_LEFT_J; + break; + case SND_SOC_DAIFMT_AC97: + cf.fmt = HDMI_AC97; + break; + default: + dev_err(dai->dev, "Invalid DAI interface format\n"); + return -EINVAL; } hcp->daifmt[dai->id] = cf; - return ret; + return 0; } static int hdmi_codec_digital_mute(struct snd_soc_dai *dai, int mute) @@ -792,8 +790,10 @@ static int hdmi_codec_probe(struct platform_device *pdev) i++; } - if (hcd->spdif) + if (hcd->spdif) { hcp->daidrv[i] = hdmi_spdif_dai; + hcp->daifmt[DAI_ID_SPDIF].fmt = HDMI_SPDIF; + } dev_set_drvdata(dev, hcp); -- cgit v1.2.3 From 27162c8fdfb9086a59b0c4a10a1f1afd02826826 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 28 Feb 2019 14:19:21 +0100 Subject: ASoC: stm32: sai: fix iec958 controls indexation [ Upstream commit 5f8a1000c3e630c3ac06f1d664eeaa755bce8823 ] Allow indexation of sai iec958 controls according to device id. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/stm/stm32_sai_sub.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 29a131e0569e..aaacec5b442d 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -100,7 +100,7 @@ * @slot_mask: rx or tx active slots mask. set at init or at runtime * @data_size: PCM data width. corresponds to PCM substream width. * @spdif_frm_cnt: S/PDIF playback frame counter - * @snd_aes_iec958: iec958 data + * @iec958: iec958 data * @ctrl_lock: control lock */ struct stm32_sai_sub_data { @@ -1070,11 +1070,12 @@ static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *cpu_dai) { struct stm32_sai_sub_data *sai = dev_get_drvdata(cpu_dai->dev); + struct snd_kcontrol_new knew = iec958_ctls; if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) { dev_dbg(&sai->pdev->dev, "%s: register iec controls", __func__); - return snd_ctl_add(rtd->pcm->card, - snd_ctl_new1(&iec958_ctls, sai)); + knew.device = rtd->pcm->device; + return snd_ctl_add(rtd->pcm->card, snd_ctl_new1(&knew, sai)); } return 0; -- cgit v1.2.3 From b329de3769b0120c3152a57deb3a87a283f160bf Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 28 Feb 2019 14:19:22 +0100 Subject: ASoC: stm32: sai: fix exposed capabilities in spdif mode [ Upstream commit b8468192971807c43a80d6e2c41f83141cb7b211 ] Change capabilities exposed in SAI S/PDIF mode, to match actually supported formats. In S/PDIF mode only 32 bits stereo is supported. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/stm/stm32_sai_sub.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index aaacec5b442d..1653cea936cb 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -682,6 +682,14 @@ static int stm32_sai_startup(struct snd_pcm_substream *substream, sai->substream = substream; + if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) { + snd_pcm_hw_constraint_mask64(substream->runtime, + SNDRV_PCM_HW_PARAM_FORMAT, + SNDRV_PCM_FMTBIT_S32_LE); + snd_pcm_hw_constraint_single(substream->runtime, + SNDRV_PCM_HW_PARAM_CHANNELS, 2); + } + ret = clk_prepare_enable(sai->sai_ck); if (ret < 0) { dev_err(cpu_dai->dev, "Failed to enable clock: %d\n", ret); -- cgit v1.2.3 From 6872cfa57c4289344f8d80a03e33ee56d6c8c8b0 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 28 Feb 2019 14:19:23 +0100 Subject: ASoC: stm32: sai: fix race condition in irq handler [ Upstream commit 26f98e82dd49b7c3cc5ef0edd882aa732a62b672 ] When snd_pcm_stop_xrun() is called in interrupt routine, substream context may have already been released. Add protection on substream context. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/stm/stm32_sai_sub.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 1653cea936cb..bc69e68191ad 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -102,6 +102,7 @@ * @spdif_frm_cnt: S/PDIF playback frame counter * @iec958: iec958 data * @ctrl_lock: control lock + * @irq_lock: prevent race condition with IRQ */ struct stm32_sai_sub_data { struct platform_device *pdev; @@ -133,6 +134,7 @@ struct stm32_sai_sub_data { unsigned int spdif_frm_cnt; struct snd_aes_iec958 iec958; struct mutex ctrl_lock; /* protect resources accessed by controls */ + spinlock_t irq_lock; /* used to prevent race condition with IRQ */ }; enum stm32_sai_fifo_th { @@ -474,8 +476,10 @@ static irqreturn_t stm32_sai_isr(int irq, void *devid) status = SNDRV_PCM_STATE_XRUN; } - if (status != SNDRV_PCM_STATE_RUNNING) + spin_lock(&sai->irq_lock); + if (status != SNDRV_PCM_STATE_RUNNING && sai->substream) snd_pcm_stop_xrun(sai->substream); + spin_unlock(&sai->irq_lock); return IRQ_HANDLED; } @@ -679,8 +683,11 @@ static int stm32_sai_startup(struct snd_pcm_substream *substream, { struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); int imr, cr2, ret; + unsigned long flags; + spin_lock_irqsave(&sai->irq_lock, flags); sai->substream = substream; + spin_unlock_irqrestore(&sai->irq_lock, flags); if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) { snd_pcm_hw_constraint_mask64(substream->runtime, @@ -1061,6 +1068,7 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai) { struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + unsigned long flags; regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, SAI_XIMR_MASK, 0); @@ -1071,7 +1079,9 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream, clk_rate_exclusive_put(sai->sai_mclk); + spin_lock_irqsave(&sai->irq_lock, flags); sai->substream = NULL; + spin_unlock_irqrestore(&sai->irq_lock, flags); } static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd, @@ -1435,6 +1445,7 @@ static int stm32_sai_sub_probe(struct platform_device *pdev) sai->pdev = pdev; mutex_init(&sai->ctrl_lock); + spin_lock_init(&sai->irq_lock); platform_set_drvdata(pdev, sai); sai->pdata = dev_get_drvdata(pdev->dev.parent); -- cgit v1.2.3 From 30d948ac01d98b65fb0f1bb9774e7f60cdfef107 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 8 Mar 2019 16:38:57 +0800 Subject: ASoC:soc-pcm:fix a codec fixup issue in TDM case [ Upstream commit 570f18b6a8d1f0e60e8caf30e66161b6438dcc91 ] On HDaudio platforms, if playback is started when capture is working, there is no audible output. This can be root-caused to the use of the rx|tx_mask to store an HDaudio stream tag. If capture is stared before playback, rx_mask would be non-zero on HDaudio platform, then the channel number of playback, which is in the same codec dai with the capture, would be changed by soc_pcm_codec_params_fixup based on the tx_mask at first, then overwritten by this function based on rx_mask at last. According to the author of tx|rx_mask, tx_mask is for playback and rx_mask is for capture. And stream direction is checked at all other references of tx|rx_mask in ASoC, so here should be an error. This patch checks stream direction for tx|rx_mask for fixup function. This issue would affect not only HDaudio+ASoC, but also I2S codecs if the channel number based on rx_mask is not equal to the one for tx_mask. It could be rarely reproduecd because most drivers in kernel set the same channel number to tx|rx_mask or rx_mask is zero. Tested on all platforms using stream_tag & HDaudio and intel I2S platforms. Signed-off-by: Rander Wang Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 0c1dd6bd67ab..fcdeffddbe46 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -954,10 +954,13 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, codec_params = *params; /* fixup params based on TDM slot masks */ - if (codec_dai->tx_mask) + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && + codec_dai->tx_mask) soc_pcm_codec_params_fixup(&codec_params, codec_dai->tx_mask); - if (codec_dai->rx_mask) + + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE && + codec_dai->rx_mask) soc_pcm_codec_params_fixup(&codec_params, codec_dai->rx_mask); -- cgit v1.2.3 From ad8af1f8d26eef102d5c784fe04e56c1662072f1 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 8 Mar 2019 16:38:58 +0800 Subject: ASoC:hdac_hda:use correct format to setup hda codec [ Upstream commit 03d0aa4d4fddce4a5d865d819a4d98bfc3d451e6 ] The current implementation of the hdac_hda codec results in zero-valued samples on capture and noise with headset playback when SOF is used on platforms with an on-board HDaudio codec. This is root-caused to SOF using be_hw_params_fixup, and the prepare() call using invalid runtime fields to determine the format. This patch moves the format handling to the hw_params() callback, as done already for hdac_hdmi, to make sure the fixed-up information is taken into account but keeps the codec initialization in prepare() as the stream_tag is only available at that time. Moving everything in the prepare() callback is possible but the code is less elegant so this two-step solution was chosen. The solution was tested with the SST driver with no regressions, and all the issues with SOF playback and capture are solved. Signed-off-by: Rander Wang Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/hdac_hda.c | 53 +++++++++++++++++++++++++++++++++------------ sound/soc/codecs/hdac_hda.h | 1 + 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c index ffecdaaa8cf2..f889d94c8e3c 100644 --- a/sound/soc/codecs/hdac_hda.c +++ b/sound/soc/codecs/hdac_hda.c @@ -38,6 +38,9 @@ static void hdac_hda_dai_close(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); +static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai); static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai); static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai, @@ -50,6 +53,7 @@ static const struct snd_soc_dai_ops hdac_hda_dai_ops = { .startup = hdac_hda_dai_open, .shutdown = hdac_hda_dai_close, .prepare = hdac_hda_dai_prepare, + .hw_params = hdac_hda_dai_hw_params, .hw_free = hdac_hda_dai_hw_free, .set_tdm_slot = hdac_hda_dai_set_tdm_slot, }; @@ -139,6 +143,39 @@ static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai, return 0; } +static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct hdac_hda_priv *hda_pvt; + unsigned int format_val; + unsigned int maxbps; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + maxbps = dai->driver->playback.sig_bits; + else + maxbps = dai->driver->capture.sig_bits; + + hda_pvt = snd_soc_component_get_drvdata(component); + format_val = snd_hdac_calc_stream_format(params_rate(params), + params_channels(params), + params_format(params), + maxbps, + 0); + if (!format_val) { + dev_err(dai->dev, + "invalid format_val, rate=%d, ch=%d, format=%d, maxbps=%d\n", + params_rate(params), params_channels(params), + params_format(params), maxbps); + + return -EINVAL; + } + + hda_pvt->pcm[dai->id].format_val[substream->stream] = format_val; + return 0; +} + static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -162,10 +199,9 @@ static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_component *component = dai->component; + struct hda_pcm_stream *hda_stream; struct hdac_hda_priv *hda_pvt; - struct snd_pcm_runtime *runtime = substream->runtime; struct hdac_device *hdev; - struct hda_pcm_stream *hda_stream; unsigned int format_val; struct hda_pcm *pcm; unsigned int stream; @@ -179,19 +215,8 @@ static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream, hda_stream = &pcm->stream[substream->stream]; - format_val = snd_hdac_calc_stream_format(runtime->rate, - runtime->channels, - runtime->format, - hda_stream->maxbps, - 0); - if (!format_val) { - dev_err(&hdev->dev, - "invalid format_val, rate=%d, ch=%d, format=%d\n", - runtime->rate, runtime->channels, runtime->format); - return -EINVAL; - } - stream = hda_pvt->pcm[dai->id].stream_tag[substream->stream]; + format_val = hda_pvt->pcm[dai->id].format_val[substream->stream]; ret = snd_hda_codec_prepare(&hda_pvt->codec, hda_stream, stream, format_val, substream); diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h index e444ef593360..6b1bd4f428e7 100644 --- a/sound/soc/codecs/hdac_hda.h +++ b/sound/soc/codecs/hdac_hda.h @@ -8,6 +8,7 @@ struct hdac_hda_pcm { int stream_tag[2]; + unsigned int format_val[2]; }; struct hdac_hda_priv { -- cgit v1.2.3 From 71ec072682ff024ef57fa46fc9e8d63bf0b4bc7a Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 8 Mar 2019 16:38:59 +0800 Subject: ASoC:intel:skl:fix a simultaneous playback & capture issue on hda platform [ Upstream commit c899df3e9b0bf7b76e642aed1a214582ea7012d5 ] If playback and capture are enabled concurrently, when the capture stops the output becomes inaudile. The playback application will become stuck and underrun after a timeout. This is caused by mistaken use of the stream_id, which should only be set for playback and not for capture Tested on Apollolake and Kabylake with SST driver. Signed-off-by: Rander Wang Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/skylake/skl-pcm.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 557f80c0bfe5..5cd308d622f6 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -181,6 +181,7 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params) struct hdac_stream *hstream; struct hdac_ext_stream *stream; struct hdac_ext_link *link; + unsigned char stream_tag; hstream = snd_hdac_get_stream(bus, params->stream, params->link_dma_id + 1); @@ -199,10 +200,13 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params) snd_hdac_ext_link_stream_setup(stream, format_val); - list_for_each_entry(link, &bus->hlink_list, list) { - if (link->index == params->link_index) - snd_hdac_ext_link_set_stream_id(link, - hstream->stream_tag); + stream_tag = hstream->stream_tag; + if (stream->hstream.direction == SNDRV_PCM_STREAM_PLAYBACK) { + list_for_each_entry(link, &bus->hlink_list, list) { + if (link->index == params->link_index) + snd_hdac_ext_link_set_stream_id(link, + stream_tag); + } } stream->link_prepared = 1; @@ -645,6 +649,7 @@ static int skl_link_hw_free(struct snd_pcm_substream *substream, struct hdac_ext_stream *link_dev = snd_soc_dai_get_dma_data(dai, substream); struct hdac_ext_link *link; + unsigned char stream_tag; dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name); @@ -654,7 +659,11 @@ static int skl_link_hw_free(struct snd_pcm_substream *substream, if (!link) return -EINVAL; - snd_hdac_ext_link_clear_stream_id(link, hdac_stream(link_dev)->stream_tag); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + stream_tag = hdac_stream(link_dev)->stream_tag; + snd_hdac_ext_link_clear_stream_id(link, stream_tag); + } + snd_hdac_ext_stream_release(link_dev, HDAC_EXT_STREAM_TYPE_LINK); return 0; } -- cgit v1.2.3 From f4f4303c6d54d7d7238776ff9afba7fbaeddf384 Mon Sep 17 00:00:00 2001 From: KaiChieh Chuang Date: Fri, 8 Mar 2019 13:05:53 +0800 Subject: ASoC: dpcm: prevent snd_soc_dpcm use after free [ Upstream commit a9764869779081e8bf24da07ac040e8f3efcf13a ] The dpcm get from fe_clients/be_clients may be free before use Add a spin lock at snd_soc_card level, to protect the dpcm instance. The lock may be used in atomic context, so use spin lock. Use irq spin lock version, since the lock may be used in interrupts. possible race condition between void dpcm_be_disconnect( ... list_del(&dpcm->list_be); list_del(&dpcm->list_fe); kfree(dpcm); ... and for_each_dpcm_fe() for_each_dpcm_be*() race condition example Thread 1: snd_soc_dapm_mixer_update_power() -> soc_dpcm_runtime_update() -> dpcm_be_disconnect() -> kfree(dpcm); Thread 2: dpcm_fe_dai_trigger() -> dpcm_be_dai_trigger() -> snd_soc_dpcm_can_be_free_stop() -> if (dpcm->fe == fe) Excpetion Scenario: two FE link to same BE FE1 -> BE FE2 -> Thread 1: switch of mixer between FE2 -> BE Thread 2: pcm_stop FE1 Exception: Unable to handle kernel paging request at virtual address dead0000000000e0 pc=<> [] dpcm_be_dai_trigger+0x29c/0x47c sound/soc/soc-pcm.c:3226 if (dpcm->fe == fe) lr=<> [] dpcm_fe_dai_do_trigger+0x94/0x26c Backtrace: [] notify_die+0x68/0xb8 [] die+0x118/0x2a8 [] __do_kernel_fault+0x13c/0x14c [] do_translation_fault+0x64/0xa0 [] do_mem_abort+0x4c/0xd0 [] el1_da+0x24/0x40 [] dpcm_be_dai_trigger+0x29c/0x47c [] dpcm_fe_dai_do_trigger+0x94/0x26c [] dpcm_fe_dai_trigger+0x3c/0x44 [] snd_pcm_do_stop+0x50/0x5c [] snd_pcm_action+0xb4/0x13c [] snd_pcm_drop+0xa0/0x128 [] snd_pcm_common_ioctl+0x9d8/0x30f0 [] snd_pcm_ioctl_compat+0x29c/0x2f14 [] compat_SyS_ioctl+0x128/0x244 [] el0_svc_naked+0x34/0x38 [] 0xffffffffffffffff Signed-off-by: KaiChieh Chuang Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/sound/soc.h | 2 ++ sound/soc/soc-core.c | 1 + sound/soc/soc-pcm.c | 40 +++++++++++++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index e665f111b0d2..fa82d6215328 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1043,6 +1043,8 @@ struct snd_soc_card { struct mutex mutex; struct mutex dapm_mutex; + spinlock_t dpcm_lock; + bool instantiated; bool topology_shortname_created; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 50617db05c46..416c371fa01a 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2790,6 +2790,7 @@ int snd_soc_register_card(struct snd_soc_card *card) card->instantiated = 0; mutex_init(&card->mutex); mutex_init(&card->dapm_mutex); + spin_lock_init(&card->dpcm_lock); return snd_soc_bind_card(card); } diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index fcdeffddbe46..22946493a11f 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1212,6 +1212,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be, int stream) { struct snd_soc_dpcm *dpcm; + unsigned long flags; /* only add new dpcms */ for_each_dpcm_be(fe, stream, dpcm) { @@ -1227,8 +1228,10 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe, dpcm->fe = fe; be->dpcm[stream].runtime = fe->dpcm[stream].runtime; dpcm->state = SND_SOC_DPCM_LINK_STATE_NEW; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_add(&dpcm->list_be, &fe->dpcm[stream].be_clients); list_add(&dpcm->list_fe, &be->dpcm[stream].fe_clients); + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); dev_dbg(fe->dev, "connected new DPCM %s path %s %s %s\n", stream ? "capture" : "playback", fe->dai_link->name, @@ -1274,6 +1277,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm, *d; + unsigned long flags; for_each_dpcm_be_safe(fe, stream, dpcm, d) { dev_dbg(fe->dev, "ASoC: BE %s disconnect check for %s\n", @@ -1293,8 +1297,10 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) #ifdef CONFIG_DEBUG_FS debugfs_remove(dpcm->debugfs_state); #endif + spin_lock_irqsave(&fe->card->dpcm_lock, flags); list_del(&dpcm->list_be); list_del(&dpcm->list_fe); + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); kfree(dpcm); } } @@ -1546,10 +1552,13 @@ int dpcm_process_paths(struct snd_soc_pcm_runtime *fe, void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; + unsigned long flags; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) dpcm->be->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); } static void dpcm_be_dai_startup_unwind(struct snd_soc_pcm_runtime *fe, @@ -2575,6 +2584,7 @@ static int dpcm_run_update_startup(struct snd_soc_pcm_runtime *fe, int stream) struct snd_soc_dpcm *dpcm; enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream]; int ret; + unsigned long flags; dev_dbg(fe->dev, "ASoC: runtime %s open on FE %s\n", stream ? "capture" : "playback", fe->dai_link->name); @@ -2644,11 +2654,13 @@ close: dpcm_be_dai_shutdown(fe, stream); disconnect: /* disconnect any non started BEs */ + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START) dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE; } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); return ret; } @@ -3224,7 +3236,10 @@ int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe, { struct snd_soc_dpcm *dpcm; int state; + int ret = 1; + unsigned long flags; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) @@ -3233,12 +3248,15 @@ int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe, state = dpcm->fe->dpcm[stream].state; if (state == SND_SOC_DPCM_STATE_START || state == SND_SOC_DPCM_STATE_PAUSED || - state == SND_SOC_DPCM_STATE_SUSPEND) - return 0; + state == SND_SOC_DPCM_STATE_SUSPEND) { + ret = 0; + break; + } } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); /* it's safe to free/stop this BE DAI */ - return 1; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_free_stop); @@ -3251,7 +3269,10 @@ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe, { struct snd_soc_dpcm *dpcm; int state; + int ret = 1; + unsigned long flags; + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) @@ -3261,12 +3282,15 @@ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe, if (state == SND_SOC_DPCM_STATE_START || state == SND_SOC_DPCM_STATE_PAUSED || state == SND_SOC_DPCM_STATE_SUSPEND || - state == SND_SOC_DPCM_STATE_PREPARE) - return 0; + state == SND_SOC_DPCM_STATE_PREPARE) { + ret = 0; + break; + } } + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); /* it's safe to change hw_params */ - return 1; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_params); @@ -3305,6 +3329,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_pcm_hw_params *params = &fe->dpcm[stream].hw_params; struct snd_soc_dpcm *dpcm; ssize_t offset = 0; + unsigned long flags; /* FE state */ offset += snprintf(buf + offset, size - offset, @@ -3332,6 +3357,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, goto out; } + spin_lock_irqsave(&fe->card->dpcm_lock, flags); for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; @@ -3352,7 +3378,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_channels(params), params_rate(params)); } - + spin_unlock_irqrestore(&fe->card->dpcm_lock, flags); out: return offset; } -- cgit v1.2.3 From 6c4a8ae4baa6393b6adba0ac3c514cbaa1479a98 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Mon, 11 Mar 2019 09:36:45 +0800 Subject: ASoC: nau8824: fix the issue of the widget with prefix name [ Upstream commit 844a4a362dbec166b44d6b9b3dd45b08cb273703 ] The driver has two issues when machine add prefix name for codec. (1)The stream name of DAI can't find the AIF widgets. (2)The drivr can enable/disalbe the MICBIAS and SAR widgets. The patch will fix these issues caused by prefixed name added. Signed-off-by: John Hsu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/nau8824.c | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 468d5143e2c4..663a208c2f78 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -681,8 +681,8 @@ static const struct snd_soc_dapm_widget nau8824_dapm_widgets[] = { SND_SOC_DAPM_ADC("ADCR", NULL, NAU8824_REG_ANALOG_ADC_2, NAU8824_ADCR_EN_SFT, 0), - SND_SOC_DAPM_AIF_OUT("AIFTX", "HiFi Capture", 0, SND_SOC_NOPM, 0, 0), - SND_SOC_DAPM_AIF_IN("AIFRX", "HiFi Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("AIFTX", "Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("AIFRX", "Playback", 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_DAC("DACL", NULL, NAU8824_REG_RDAC, NAU8824_DACL_EN_SFT, 0), @@ -831,6 +831,36 @@ static void nau8824_int_status_clear_all(struct regmap *regmap) } } +static void nau8824_dapm_disable_pin(struct nau8824 *nau8824, const char *pin) +{ + struct snd_soc_dapm_context *dapm = nau8824->dapm; + const char *prefix = dapm->component->name_prefix; + char prefixed_pin[80]; + + if (prefix) { + snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", + prefix, pin); + snd_soc_dapm_disable_pin(dapm, prefixed_pin); + } else { + snd_soc_dapm_disable_pin(dapm, pin); + } +} + +static void nau8824_dapm_enable_pin(struct nau8824 *nau8824, const char *pin) +{ + struct snd_soc_dapm_context *dapm = nau8824->dapm; + const char *prefix = dapm->component->name_prefix; + char prefixed_pin[80]; + + if (prefix) { + snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", + prefix, pin); + snd_soc_dapm_force_enable_pin(dapm, prefixed_pin); + } else { + snd_soc_dapm_force_enable_pin(dapm, pin); + } +} + static void nau8824_eject_jack(struct nau8824 *nau8824) { struct snd_soc_dapm_context *dapm = nau8824->dapm; @@ -839,8 +869,8 @@ static void nau8824_eject_jack(struct nau8824 *nau8824) /* Clear all interruption status */ nau8824_int_status_clear_all(regmap); - snd_soc_dapm_disable_pin(dapm, "SAR"); - snd_soc_dapm_disable_pin(dapm, "MICBIAS"); + nau8824_dapm_disable_pin(nau8824, "SAR"); + nau8824_dapm_disable_pin(nau8824, "MICBIAS"); snd_soc_dapm_sync(dapm); /* Enable the insertion interruption, disable the ejection @@ -870,8 +900,8 @@ static void nau8824_jdet_work(struct work_struct *work) struct regmap *regmap = nau8824->regmap; int adc_value, event = 0, event_mask = 0; - snd_soc_dapm_force_enable_pin(dapm, "MICBIAS"); - snd_soc_dapm_force_enable_pin(dapm, "SAR"); + nau8824_dapm_enable_pin(nau8824, "MICBIAS"); + nau8824_dapm_enable_pin(nau8824, "SAR"); snd_soc_dapm_sync(dapm); msleep(100); @@ -882,8 +912,8 @@ static void nau8824_jdet_work(struct work_struct *work) if (adc_value < HEADSET_SARADC_THD) { event |= SND_JACK_HEADPHONE; - snd_soc_dapm_disable_pin(dapm, "SAR"); - snd_soc_dapm_disable_pin(dapm, "MICBIAS"); + nau8824_dapm_disable_pin(nau8824, "SAR"); + nau8824_dapm_disable_pin(nau8824, "MICBIAS"); snd_soc_dapm_sync(dapm); } else { event |= SND_JACK_HEADSET; -- cgit v1.2.3 From b44509a152a3237eec2bc68da945f2f4676938b5 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Wed, 13 Mar 2019 16:23:44 +0800 Subject: ASoC: nau8810: fix the issue of widget with prefixed name [ Upstream commit 54d1cf78b0f4ba348a7c7fb8b7d0708d71b6cc8a ] The driver changes the stream name of DAC and ADC to avoid the issue of widget with prefixed name. When the machine adds prefixed name for codec, the stream name of DAI may not find the widgets. Signed-off-by: John Hsu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/nau8810.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/nau8810.c b/sound/soc/codecs/nau8810.c index bfd74b86c9d2..645aa0794123 100644 --- a/sound/soc/codecs/nau8810.c +++ b/sound/soc/codecs/nau8810.c @@ -411,9 +411,9 @@ static const struct snd_soc_dapm_widget nau8810_dapm_widgets[] = { SND_SOC_DAPM_MIXER("Mono Mixer", NAU8810_REG_POWER3, NAU8810_MOUTMX_EN_SFT, 0, &nau8810_mono_mixer_controls[0], ARRAY_SIZE(nau8810_mono_mixer_controls)), - SND_SOC_DAPM_DAC("DAC", "HiFi Playback", NAU8810_REG_POWER3, + SND_SOC_DAPM_DAC("DAC", "Playback", NAU8810_REG_POWER3, NAU8810_DAC_EN_SFT, 0), - SND_SOC_DAPM_ADC("ADC", "HiFi Capture", NAU8810_REG_POWER2, + SND_SOC_DAPM_ADC("ADC", "Capture", NAU8810_REG_POWER2, NAU8810_ADC_EN_SFT, 0), SND_SOC_DAPM_PGA("SpkN Out", NAU8810_REG_POWER3, NAU8810_NSPK_EN_SFT, 0, NULL, 0), -- cgit v1.2.3 From 3f60f8813be9eda7ec480deeb4c7007f0439ee44 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 12 Mar 2019 18:40:06 +0100 Subject: ASoC: samsung: odroid: Fix clock configuration for 44100 sample rate [ Upstream commit 2b13bee3884926cba22061efa75bd315e871de24 ] After commit fbeec965b8d1c ("ASoC: samsung: odroid: Fix 32000 sample rate handling") the audio root clock frequency is configured improperly for 44100 sample rate. Due to clock rate rounding it's 20070401 Hz instead of 22579000 Hz. This results in a too low value of the PSR clock divider in the CPU DAI driver and too fast actual sample rate for fs=44100. E.g. 1 kHz tone has actual 1780 Hz frequency (1 kHz * 20070401/22579000 * 2). Fix this by increasing the correction passed to clk_set_rate() to take into account inaccuracy of the EPLL frequency properly. Fixes: fbeec965b8d1c ("ASoC: samsung: odroid: Fix 32000 sample rate handling") Reported-by: JaeChul Lee Signed-off-by: Sylwester Nawrocki Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/samsung/odroid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c index e7b371b07230..45c6d7396785 100644 --- a/sound/soc/samsung/odroid.c +++ b/sound/soc/samsung/odroid.c @@ -64,11 +64,11 @@ static int odroid_card_hw_params(struct snd_pcm_substream *substream, return ret; /* - * We add 1 to the rclk_freq value in order to avoid too low clock + * We add 2 to the rclk_freq value in order to avoid too low clock * frequency values due to the EPLL output frequency not being exact * multiple of the audio sampling rate. */ - rclk_freq = params_rate(params) * rfs + 1; + rclk_freq = params_rate(params) * rfs + 2; ret = clk_set_rate(priv->sclk_i2s, rclk_freq); if (ret < 0) -- cgit v1.2.3 From 8425db6714401ad79c846199b4d1f4e96a5fd292 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Fri, 8 Mar 2019 11:36:08 +0800 Subject: ASoC: rt5682: Check JD status when system resume [ Upstream commit 4834d7070c85a5fb69637265dbbb05d13043280c ] The IRQ function may not work when system suspend. We remove snd_soc_dapm_force_enable_pin function call to make sure the bias off when idle and run into suspend/resume function. Signed-off-by: Shuming Fan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index a9b91bcfcc09..49ff5e52db58 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -904,13 +904,20 @@ static int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) { struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); - struct snd_soc_dapm_context *dapm = - snd_soc_component_get_dapm(component); unsigned int val, count; if (jack_insert) { - snd_soc_dapm_force_enable_pin(dapm, "CBJ Power"); - snd_soc_dapm_sync(dapm); + + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, + RT5682_PWR_VREF2, RT5682_PWR_VREF2); + snd_soc_component_update_bits(component, + RT5682_PWR_ANLG_1, RT5682_PWR_FV2, 0); + usleep_range(15000, 20000); + snd_soc_component_update_bits(component, + RT5682_PWR_ANLG_1, RT5682_PWR_FV2, RT5682_PWR_FV2); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, + RT5682_PWR_CBJ, RT5682_PWR_CBJ); + snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH); @@ -938,8 +945,10 @@ static int rt5682_headset_detect(struct snd_soc_component *component, rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); - snd_soc_dapm_disable_pin(dapm, "CBJ Power"); - snd_soc_dapm_sync(dapm); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, + RT5682_PWR_VREF2, 0); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, + RT5682_PWR_CBJ, 0); rt5682->jack_type = 0; } @@ -1585,8 +1594,6 @@ static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = { 0, NULL, 0), SND_SOC_DAPM_SUPPLY("Vref1", RT5682_PWR_ANLG_1, RT5682_PWR_VREF1_BIT, 0, rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU), - SND_SOC_DAPM_SUPPLY("Vref2", RT5682_PWR_ANLG_1, RT5682_PWR_VREF2_BIT, 0, - rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU), /* ASRC */ SND_SOC_DAPM_SUPPLY_S("DAC STO1 ASRC", 1, RT5682_PLL_TRACK_1, @@ -1621,9 +1628,6 @@ static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = { SND_SOC_DAPM_PGA("BST1 CBJ", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY("CBJ Power", RT5682_PWR_ANLG_3, - RT5682_PWR_CBJ_BIT, 0, NULL, 0), - /* REC Mixer */ SND_SOC_DAPM_MIXER("RECMIX1L", SND_SOC_NOPM, 0, 0, rt5682_rec1_l_mix, ARRAY_SIZE(rt5682_rec1_l_mix)), @@ -1786,17 +1790,13 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = { /*Vref*/ {"MICBIAS1", NULL, "Vref1"}, - {"MICBIAS1", NULL, "Vref2"}, {"MICBIAS2", NULL, "Vref1"}, - {"MICBIAS2", NULL, "Vref2"}, {"CLKDET SYS", NULL, "CLKDET"}, {"IN1P", NULL, "LDO2"}, {"BST1 CBJ", NULL, "IN1P"}, - {"BST1 CBJ", NULL, "CBJ Power"}, - {"CBJ Power", NULL, "Vref2"}, {"RECMIX1L", "CBJ Switch", "BST1 CBJ"}, {"RECMIX1L", NULL, "RECMIX1L Power"}, @@ -1906,9 +1906,7 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = { {"HP Amp", NULL, "Capless"}, {"HP Amp", NULL, "Charge Pump"}, {"HP Amp", NULL, "CLKDET SYS"}, - {"HP Amp", NULL, "CBJ Power"}, {"HP Amp", NULL, "Vref1"}, - {"HP Amp", NULL, "Vref2"}, {"HPOL Playback", "Switch", "HP Amp"}, {"HPOR Playback", "Switch", "HP Amp"}, {"HPOL", NULL, "HPOL Playback"}, @@ -2357,6 +2355,8 @@ static int rt5682_resume(struct snd_soc_component *component) regcache_cache_only(rt5682->regmap, false); regcache_sync(rt5682->regmap); + rt5682_irq(0, rt5682); + return 0; } #else -- cgit v1.2.3 From b2cb6f8f307b6224d1b93d2dac3a1daaf67b3fbb Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 18 Mar 2019 15:17:13 +0800 Subject: ASoC: rt5682: fix jack type detection issue [ Upstream commit 675212bfb23394514b7f68ebf3954ba936281ccc ] The jack type detection needs the main bias power of analog. The modification makes sure the main bias power on/off while jack plug/unplug. Signed-off-by: Shuming Fan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 49ff5e52db58..9331c13d2017 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -909,7 +909,8 @@ static int rt5682_headset_detect(struct snd_soc_component *component, if (jack_insert) { snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, - RT5682_PWR_VREF2, RT5682_PWR_VREF2); + RT5682_PWR_VREF2 | RT5682_PWR_MB, + RT5682_PWR_VREF2 | RT5682_PWR_MB); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_FV2, 0); usleep_range(15000, 20000); @@ -946,7 +947,7 @@ static int rt5682_headset_detect(struct snd_soc_component *component, snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, - RT5682_PWR_VREF2, 0); + RT5682_PWR_VREF2 | RT5682_PWR_MB, 0); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, RT5682_PWR_CBJ, 0); @@ -2295,16 +2296,13 @@ static int rt5682_set_bias_level(struct snd_soc_component *component, switch (level) { case SND_SOC_BIAS_PREPARE: regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1, - RT5682_PWR_MB | RT5682_PWR_BG, - RT5682_PWR_MB | RT5682_PWR_BG); + RT5682_PWR_BG, RT5682_PWR_BG); regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1, RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO, RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO); break; case SND_SOC_BIAS_STANDBY: - regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1, - RT5682_PWR_MB, RT5682_PWR_MB); regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1, RT5682_DIG_GATE_CTRL, RT5682_DIG_GATE_CTRL); break; @@ -2312,7 +2310,7 @@ static int rt5682_set_bias_level(struct snd_soc_component *component, regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1, RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO, 0); regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1, - RT5682_PWR_MB | RT5682_PWR_BG, 0); + RT5682_PWR_BG, 0); break; default: -- cgit v1.2.3 From 9fb991d9cf503e5b24de33e1b9fbda8a016e76ae Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 18 Mar 2019 15:17:42 +0800 Subject: ASoC: rt5682: recording has no sound after booting [ Upstream commit 1c5b6a27e432e4fe170a924c8b41012271496a4c ] If ASRC turns on, HW will use clk_dac as the reference clock whether recording or playback. Both of clk_dac and clk_adc should set proper clock while using ASRC. Signed-off-by: Shuming Fan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 9331c13d2017..72ef2a0f6387 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -1202,7 +1202,7 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w, struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); - int ref, val, reg, sft, mask, idx = -EINVAL; + int ref, val, reg, idx = -EINVAL; static const int div_f[] = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48}; static const int div_o[] = {1, 2, 4, 6, 8, 12, 16, 24, 32, 48}; @@ -1216,15 +1216,10 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w, idx = rt5682_div_sel(rt5682, ref, div_f, ARRAY_SIZE(div_f)); - if (w->shift == RT5682_PWR_ADC_S1F_BIT) { + if (w->shift == RT5682_PWR_ADC_S1F_BIT) reg = RT5682_PLL_TRACK_3; - sft = RT5682_ADC_OSR_SFT; - mask = RT5682_ADC_OSR_MASK; - } else { + else reg = RT5682_PLL_TRACK_2; - sft = RT5682_DAC_OSR_SFT; - mask = RT5682_DAC_OSR_MASK; - } snd_soc_component_update_bits(component, reg, RT5682_FILTER_CLK_DIV_MASK, idx << RT5682_FILTER_CLK_DIV_SFT); @@ -1236,7 +1231,8 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w, } snd_soc_component_update_bits(component, RT5682_ADDA_CLK_1, - mask, idx << sft); + RT5682_ADC_OSR_MASK | RT5682_DAC_OSR_MASK, + (idx << RT5682_ADC_OSR_SFT) | (idx << RT5682_DAC_OSR_SFT)); return 0; } -- cgit v1.2.3 From b2b47cecd2ce58e2acd86ade3e0a7f027b0b097e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 19 Mar 2019 11:52:06 +0000 Subject: ASoC: wm_adsp: Add locking to wm_adsp2_bus_error [ Upstream commit a2225a6d155fcb247fe4c6d87f7c91807462966d ] Best to lock across handling the bus error to ensure the DSP doesn't change power state as we are reading the status registers. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 0600e4404f90..eb5b1be77c47 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -3821,11 +3821,13 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) struct regmap *regmap = dsp->regmap; int ret = 0; + mutex_lock(&dsp->pwr_lock); + ret = regmap_read(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, &val); if (ret) { adsp_err(dsp, "Failed to read Region Lock Ctrl register: %d\n", ret); - return IRQ_HANDLED; + goto error; } if (val & ADSP2_WDT_TIMEOUT_STS_MASK) { @@ -3844,7 +3846,7 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) adsp_err(dsp, "Failed to read Bus Err Addr register: %d\n", ret); - return IRQ_HANDLED; + goto error; } adsp_err(dsp, "bus error address = 0x%x\n", @@ -3857,7 +3859,7 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) adsp_err(dsp, "Failed to read Pmem Xmem Err Addr register: %d\n", ret); - return IRQ_HANDLED; + goto error; } adsp_err(dsp, "xmem error address = 0x%x\n", @@ -3870,6 +3872,9 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) regmap_update_bits(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, ADSP2_CTRL_ERR_EINT, ADSP2_CTRL_ERR_EINT); +error: + mutex_unlock(&dsp->pwr_lock); + return IRQ_HANDLED; } EXPORT_SYMBOL_GPL(wm_adsp2_bus_error); -- cgit v1.2.3 From 5bff91d2a18faaa1f29d3056cd49bf1988813ac1 Mon Sep 17 00:00:00 2001 From: Maxime Jourdan Date: Tue, 19 Mar 2019 11:25:37 +0100 Subject: clk: meson-gxbb: round the vdec dividers to closest [ Upstream commit 9b70c697e87286ade406e6a02091757307dd4b7c ] We want the video decoder clocks to always round to closest. While the muxes are already using CLK_MUX_ROUND_CLOSEST, the corresponding CLK_DIVIDER_ROUND_CLOSEST was forgotten for the dividers. Fix this by adding the flag to the two vdec dividers. Fixes: a565242eb9fc ("clk: meson: gxbb: add the video decoder clocks") Signed-off-by: Maxime Jourdan Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lkml.kernel.org/r/20190319102537.2043-1-mjourdan@baylibre.com Signed-off-by: Sasha Levin --- drivers/clk/meson/gxbb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index 65f2599e5243..08824b2cd142 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -2213,6 +2213,7 @@ static struct clk_regmap gxbb_vdec_1_div = { .offset = HHI_VDEC_CLK_CNTL, .shift = 0, .width = 7, + .flags = CLK_DIVIDER_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "vdec_1_div", @@ -2258,6 +2259,7 @@ static struct clk_regmap gxbb_vdec_hevc_div = { .offset = HHI_VDEC2_CLK_CNTL, .shift = 16, .width = 7, + .flags = CLK_DIVIDER_ROUND_CLOSEST, }, .hw.init = &(struct clk_init_data){ .name = "vdec_hevc_div", -- cgit v1.2.3 From 9baa2f8ab75862dc604459da363624a0cc9c741c Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 4 Mar 2019 15:52:43 +0100 Subject: ASoC: stm32: dfsdm: manage multiple prepare [ Upstream commit 19441e35a43b616ea6afad91ed0d9e77268d8f6a ] The DFSDM must be stopped when a new setting is applied. restart systematically DFSDM on multiple prepare calls, to apply changes. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/stm/stm32_adfsdm.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c index 706ff005234f..71d341b732a4 100644 --- a/sound/soc/stm/stm32_adfsdm.c +++ b/sound/soc/stm/stm32_adfsdm.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -37,6 +38,8 @@ struct stm32_adfsdm_priv { /* PCM buffer */ unsigned char *pcm_buff; unsigned int pos; + + struct mutex lock; /* protect against race condition on iio state */ }; static const struct snd_pcm_hardware stm32_adfsdm_pcm_hw = { @@ -62,10 +65,12 @@ static void stm32_adfsdm_shutdown(struct snd_pcm_substream *substream, { struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai); + mutex_lock(&priv->lock); if (priv->iio_active) { iio_channel_stop_all_cb(priv->iio_cb); priv->iio_active = false; } + mutex_unlock(&priv->lock); } static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream, @@ -74,13 +79,19 @@ static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream, struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai); int ret; + mutex_lock(&priv->lock); + if (priv->iio_active) { + iio_channel_stop_all_cb(priv->iio_cb); + priv->iio_active = false; + } + ret = iio_write_channel_attribute(priv->iio_ch, substream->runtime->rate, 0, IIO_CHAN_INFO_SAMP_FREQ); if (ret < 0) { dev_err(dai->dev, "%s: Failed to set %d sampling rate\n", __func__, substream->runtime->rate); - return ret; + goto out; } if (!priv->iio_active) { @@ -92,6 +103,9 @@ static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream, __func__, ret); } +out: + mutex_unlock(&priv->lock); + return ret; } @@ -298,6 +312,7 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) priv->dev = &pdev->dev; priv->dai_drv = stm32_adfsdm_dai; + mutex_init(&priv->lock); dev_set_drvdata(&pdev->dev, priv); -- cgit v1.2.3 From 1be14f5da0b1df357a0c84c832951b9c16f3b5e4 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 4 Mar 2019 15:52:44 +0100 Subject: ASoC: stm32: dfsdm: fix debugfs warnings on entry creation [ Upstream commit c47255b61129857b74b0d86eaf59335348be05e0 ] Register platform component with a prefix, to avoid warnings on debugfs entries creation, due to component name redundancy. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/stm/stm32_adfsdm.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c index 71d341b732a4..24948b95eb19 100644 --- a/sound/soc/stm/stm32_adfsdm.c +++ b/sound/soc/stm/stm32_adfsdm.c @@ -304,6 +304,7 @@ MODULE_DEVICE_TABLE(of, stm32_adfsdm_of_match); static int stm32_adfsdm_probe(struct platform_device *pdev) { struct stm32_adfsdm_priv *priv; + struct snd_soc_component *component; int ret; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); @@ -331,9 +332,15 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) if (IS_ERR(priv->iio_cb)) return PTR_ERR(priv->iio_cb); - ret = devm_snd_soc_register_component(&pdev->dev, - &stm32_adfsdm_soc_platform, - NULL, 0); + component = devm_kzalloc(&pdev->dev, sizeof(*component), GFP_KERNEL); + if (!component) + return -ENOMEM; +#ifdef CONFIG_DEBUG_FS + component->debugfs_prefix = "pcm"; +#endif + + ret = snd_soc_add_component(&pdev->dev, component, + &stm32_adfsdm_soc_platform, NULL, 0); if (ret < 0) dev_err(&pdev->dev, "%s: Failed to register PCM platform\n", __func__); @@ -341,12 +348,20 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) return ret; } +static int stm32_adfsdm_remove(struct platform_device *pdev) +{ + snd_soc_unregister_component(&pdev->dev); + + return 0; +} + static struct platform_driver stm32_adfsdm_driver = { .driver = { .name = STM32_ADFSDM_DRV_NAME, .of_match_table = stm32_adfsdm_of_match, }, .probe = stm32_adfsdm_probe, + .remove = stm32_adfsdm_remove, }; module_platform_driver(stm32_adfsdm_driver); -- cgit v1.2.3 From 15d326f9548bfc3c944ad1d7d4e20f6d1c2ea92e Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 20 Mar 2019 22:41:56 +0100 Subject: ASoC: cs4270: Set auto-increment bit for register writes [ Upstream commit f0f2338a9cfaf71db895fa989ea7234e8a9b471d ] The CS4270 does not by default increment the register address on consecutive writes. During normal operation it doesn't matter as all register accesses are done individually. At resume time after suspend, however, the regcache code gathers the biggest possible block of registers to sync and sends them one on one go. To fix this, set the INCR bit in all cases. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs4270.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index 33d74f163bd7..793a14d58667 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -642,6 +642,7 @@ static const struct regmap_config cs4270_regmap = { .reg_defaults = cs4270_reg_defaults, .num_reg_defaults = ARRAY_SIZE(cs4270_reg_defaults), .cache_type = REGCACHE_RBTREE, + .write_flag_mask = CS4270_I2C_INCR, .readable_reg = cs4270_reg_is_readable, .volatile_reg = cs4270_reg_is_volatile, -- cgit v1.2.3 From 08aa8be65b52923d8b5adf0f89af0d5b4c87665b Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Fri, 22 Mar 2019 18:00:09 +0530 Subject: ASoC: dapm: Fix NULL pointer dereference in snd_soc_dapm_free_kcontrol [ Upstream commit cacea3a90e211f0c111975535508d446a4a928d2 ] w_text_param can be NULL and it is being dereferenced without checking. Add the missing sanity check to prevent NULL pointer dereference. Signed-off-by: Pankaj Bharadiya Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-dapm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 20bad755888b..08ab5fef75dc 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3840,6 +3840,10 @@ snd_soc_dapm_free_kcontrol(struct snd_soc_card *card, int count; devm_kfree(card->dev, (void *)*private_value); + + if (!w_param_text) + return; + for (count = 0 ; count < num_params; count++) devm_kfree(card->dev, (void *)w_param_text[count]); devm_kfree(card->dev, w_param_text); -- cgit v1.2.3 From a2fdb5d19477f6ad8f1161e2d0f3ed2e7698f6d1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 26 Mar 2019 08:14:37 -0700 Subject: drm/omap: hdmi4_cec: Fix CEC clock handling for PM [ Upstream commit 36a1da15b5df493241b0011d2185fdd724ac1ed1 ] If CONFIG_OMAP4_DSS_HDMI_CEC is enabled in .config, deeper SoC idle states are blocked because the CEC clock gets always enabled on init. Let's fix the issue by moving the CEC clock handling to happen later in hdmi_cec_adap_enable() as suggested by Hans Verkuil . This way the CEC clock gets only enabled when needed. This can be tested by doing cec-ctl --playback to enable the CEC, and doing cec-ctl --clear to disable it. Let's also fix the typo for "divider" in the comments while at it. Fixes: 8d7f934df8d8 ("omapdrm: hdmi4_cec: add OMAP4 HDMI CEC support") Suggested-by: Hans Verkuil Cc: Hans Verkuil Cc: Jyri Sarha Cc: Laurent Pinchart Signed-off-by: Tony Lindgren Reviewed-by: Hans Verkuil Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20190326151438.32414-1-tony@atomide.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index 340383150fb9..ebf9c96d43ee 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -175,6 +175,7 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) REG_FLD_MOD(core->base, HDMI_CORE_SYS_INTR_UNMASK4, 0, 3, 3); hdmi_wp_clear_irqenable(core->wp, HDMI_IRQ_CORE); hdmi_wp_set_irqstatus(core->wp, HDMI_IRQ_CORE); + REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0); hdmi4_core_disable(core); return 0; } @@ -182,16 +183,24 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) if (err) return err; + /* + * Initialize CEC clock divider: CEC needs 2MHz clock hence + * set the divider to 24 to get 48/24=2MHz clock + */ + REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0x18, 5, 0); + /* Clear TX FIFO */ if (!hdmi_cec_clear_tx_fifo(adap)) { pr_err("cec-%s: could not clear TX FIFO\n", adap->name); - return -EIO; + err = -EIO; + goto err_disable_clk; } /* Clear RX FIFO */ if (!hdmi_cec_clear_rx_fifo(adap)) { pr_err("cec-%s: could not clear RX FIFO\n", adap->name); - return -EIO; + err = -EIO; + goto err_disable_clk; } /* Clear CEC interrupts */ @@ -236,6 +245,12 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, temp); } return 0; + +err_disable_clk: + REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0); + hdmi4_core_disable(core); + + return err; } static int hdmi_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr) @@ -333,11 +348,8 @@ int hdmi4_cec_init(struct platform_device *pdev, struct hdmi_core_data *core, return ret; core->wp = wp; - /* - * Initialize CEC clock divider: CEC needs 2MHz clock hence - * set the devider to 24 to get 48/24=2MHz clock - */ - REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0x18, 5, 0); + /* Disable clock initially, hdmi_cec_adap_enable() manages it */ + REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0); ret = cec_register_adapter(core->adap, &pdev->dev); if (ret < 0) { -- cgit v1.2.3 From 1f9f22f6c7d6c4e81eea78693355e85d373530b8 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:55:29 -0700 Subject: IB/hfi1: Clear the IOWAIT pending bits when QP is put into error state [ Upstream commit 93b289b9aff66eca7575b09f36f5abbeca8e6167 ] When a QP is put into error state, it may be waiting for send engine resources. In this case, the QP will be removed from the send engine's waiting list, but its IOWAIT pending bits are not cleared. This will normally not have any major impact as the QP is being destroyed. However, the QP still needs to wind down its operations, such as draining the send queue by scheduling the send engine. Clearing the pending bits will avoid any potential complications. In addition, if the QP will eventually hang, clearing the pending bits can help debugging by presenting a consistent picture if the user dumps the qp_stats. This patch clears a QP's IOWAIT_PENDING_IB and IO_PENDING_TID bits in priv->s_iowait.flags in this case. Fixes: 5da0fc9dbf89 ("IB/hfi1: Prepare resource waits for dual leg") Reviewed-by: Mike Marciniszyn Reviewed-by: Alex Estrin Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 5866f358ea04..df8e812804b3 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -834,6 +834,8 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); + iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID); list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; rvt_put_qp(qp); -- cgit v1.2.3 From a3270ed433898fac2410d48f9f5c0690a82d3c15 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:55:39 -0700 Subject: IB/hfi1: Eliminate opcode tests on mr deref [ Upstream commit a8639a79e85c18c16c10089edd589c7948f19bbd ] When an old ack_queue entry is used to store an incoming request, it may need to clean up the old entry if it is still referencing the MR. Originally only RDMA READ request needed to reference MR on the responder side and therefore the opcode was tested when cleaning up the old entry. The introduction of tid rdma specific operations in the ack_queue makes the specific opcode tests wrong. Multiple opcodes (RDMA READ, TID RDMA READ, and TID RDMA WRITE) may need MR ref cleanup. Remove the opcode specific tests associated with the ack_queue. Fixes: f48ad614c100 ("IB/hfi1: Move driver out of staging") Signed-off-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/rc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index be603f35d7e4..cfde43b1df96 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2302,7 +2302,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + if (e->rdma_sge.mr) { rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } @@ -2376,7 +2376,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + if (e->rdma_sge.mr) { rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } -- cgit v1.2.3 From 59188acd0c7d0787b6938057f79a49f3ba5aa4ec Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:55:49 -0700 Subject: IB/hfi1: Fix the allocation of RSM table [ Upstream commit d0294344470e6b52d097aa7369173f32d11f2f52 ] The receive side mapping (RSM) on hfi1 hardware is a special matching mechanism to direct an incoming packet to a given hardware receive context. It has 4 instances of matching capabilities (RSM0 - RSM3) that share the same RSM table (RMT). The RMT has a total of 256 entries, each of which points to a receive context. Currently, three instances of RSM have been used: 1. RSM0 by QOS; 2. RSM1 by PSM FECN; 3. RSM2 by VNIC. Each RSM instance should reserve enough entries in RMT to function properly. Since both PSM and VNIC could allocate any receive context between dd->first_dyn_alloc_ctxt and dd->num_rcv_contexts, PSM FECN must reserve enough RMT entries to cover the entire receive context index range (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt) instead of only the user receive contexts allocated for PSM (dd->num_user_contexts). Consequently, the sizing of dd->num_user_contexts in set_up_context_variables is incorrect. Fixes: 2280740f01ae ("IB/hfi1: Virtual Network Interface Controller (VNIC) HW support") Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/chip.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b443642eac02..0ae05e9249b3 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13219,7 +13219,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int total_contexts; int ret; unsigned ngroups; - int qos_rmt_count; + int rmt_count; int user_rmt_reduced; u32 n_usr_ctxts; u32 send_contexts = chip_send_contexts(dd); @@ -13281,10 +13281,20 @@ static int set_up_context_variables(struct hfi1_devdata *dd) n_usr_ctxts = rcv_contexts - total_contexts; } - /* each user context requires an entry in the RMT */ - qos_rmt_count = qos_rmt_entries(dd, NULL, NULL); - if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { - user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count; + /* + * The RMT entries are currently allocated as shown below: + * 1. QOS (0 to 128 entries); + * 2. FECN for PSM (num_user_contexts + num_vnic_contexts); + * 3. VNIC (num_vnic_contexts). + * It should be noted that PSM FECN oversubscribe num_vnic_contexts + * entries of RMT because both VNIC and PSM could allocate any receive + * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, + * and PSM FECN must reserve an RMT entry for each possible PSM receive + * context. + */ + rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { + user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; dd_dev_err(dd, "RMT size is reducing the number of user receive contexts from %u to %d\n", n_usr_ctxts, @@ -14272,9 +14282,11 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, u64 reg; int i, idx, regoff, regidx; u8 offset; + u32 total_cnt; /* there needs to be enough room in the map table */ - if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) { + total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; + if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) { dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); return; } @@ -14328,7 +14340,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, /* add rule 1 */ add_rsm_rule(dd, RSM_INS_FECN, &rrd); - rmt->used += dd->num_user_contexts; + rmt->used += total_cnt; } /* Initialize RSM for VNIC */ -- cgit v1.2.3 From e904a8b148953ea5c8cf6330c6c857fa2eb81ac2 Mon Sep 17 00:00:00 2001 From: Chong Qiao Date: Thu, 28 Mar 2019 07:08:01 +0800 Subject: MIPS: KGDB: fix kgdb support for SMP platforms. [ Upstream commit ab8a6d821179ab9bea1a9179f535ccba6330c1ed ] KGDB_call_nmi_hook is called by other cpu through smp call. MIPS smp call is processed in ipi irq handler and regs is saved in handle_int. So kgdb_call_nmi_hook get regs by get_irq_regs and regs will be passed to kgdb_cpu_enter. Signed-off-by: Chong Qiao Reviewed-by: Douglas Anderson Acked-by: Daniel Thompson Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: Will Deacon Cc: Christophe Leroy Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: QiaoChong Signed-off-by: Sasha Levin --- arch/mips/kernel/kgdb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index 149100e1bc7c..90f37626100f 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -33,6 +33,7 @@ #include #include #include +#include static struct hard_trap_info { unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */ @@ -214,7 +215,7 @@ void kgdb_call_nmi_hook(void *ignored) old_fs = get_fs(); set_fs(get_ds()); - kgdb_nmicallback(raw_smp_processor_id(), NULL); + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); set_fs(old_fs); } -- cgit v1.2.3 From 693d72f920e70ca5496b137ef3210f0500a974d2 Mon Sep 17 00:00:00 2001 From: Annaliese McDermond Date: Sat, 30 Mar 2019 09:02:02 -0700 Subject: ASoC: tlv320aic32x4: Fix Common Pins [ Upstream commit c63adb28f6d913310430f14c69f0a2ea55eed0cc ] The common pins were mistakenly not added to the DAPM graph. Adding these pins will allow valid graphs to be created. Signed-off-by: Annaliese McDermond Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tlv320aic32x4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index f03195d2ab2e..45d9f4a09044 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -462,6 +462,8 @@ static const struct snd_soc_dapm_widget aic32x4_dapm_widgets[] = { SND_SOC_DAPM_INPUT("IN2_R"), SND_SOC_DAPM_INPUT("IN3_L"), SND_SOC_DAPM_INPUT("IN3_R"), + SND_SOC_DAPM_INPUT("CM_L"), + SND_SOC_DAPM_INPUT("CM_R"), }; static const struct snd_soc_dapm_route aic32x4_dapm_routes[] = { -- cgit v1.2.3 From 83f3ed3b4bdebdf9eb83de5e58f6eef49effa4c3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Mar 2019 17:31:30 +0300 Subject: drm/mediatek: Fix an error code in mtk_hdmi_dt_parse_pdata() [ Upstream commit 2d85978341e6a32e7443d9f28639da254d53f400 ] We don't want to overwrite "ret", it already holds the correct error code. The "regmap" variable might be a valid pointer as this point. Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support") Signed-off-by: Dan Carpenter Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 862f3ec22131..c910850d2077 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1479,7 +1479,6 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, if (IS_ERR(regmap)) ret = PTR_ERR(regmap); if (ret) { - ret = PTR_ERR(regmap); dev_err(dev, "Failed to get system configuration registers: %d\n", ret); -- cgit v1.2.3 From 9bd3e66587f5de2fa9136430115b8e7c99928002 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 6 Mar 2019 11:50:48 -0800 Subject: perf/x86/intel: Fix handling of wakeup_events for multi-entry PEBS [ Upstream commit 583feb08e7f7ac9d533b446882eb3a54737a6dbb ] When an event is programmed with attr.wakeup_events=N (N>0), it means the caller is interested in getting a user level notification after N samples have been recorded in the kernel sampling buffer. With precise events on Intel processors, the kernel uses PEBS. The kernel tries minimize sampling overhead by verifying if the event configuration is compatible with multi-entry PEBS mode. If so, the kernel is notified only when the buffer has reached its threshold. Other PEBS operates in single-entry mode, the kenrel is notified for each PEBS sample. The problem is that the current implementation look at frequency mode and event sample_type but ignores the wakeup_events field. Thus, it may not be possible to receive a notification after each precise event. This patch fixes this problem by disabling multi-entry PEBS if wakeup_events is non-zero. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Link: https://lkml.kernel.org/r/20190306195048.189514-1-eranian@google.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 470d7daa915d..77353555d491 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3184,7 +3184,7 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { - if (!event->attr.freq) { + if (!(event->attr.freq || event->attr.wakeup_events)) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event))) -- cgit v1.2.3 From bae9b6b98342914af31a7b296066eb5ba587abf6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Mar 2019 13:38:49 +0100 Subject: perf/x86/intel: Initialize TFA MSR [ Upstream commit d7262457e35dbe239659e62654e56f8ddb814bed ] Stephane reported that the TFA MSR is not initialized by the kernel, but the TFA bit could set by firmware or as a leftover from a kexec, which makes the state inconsistent. Reported-by: Stephane Eranian Tested-by: Nelson DSouza Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: tonyj@suse.com Link: https://lkml.kernel.org/r/20190321123849.GN6521@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/events/intel/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 77353555d491..71fb8b7b2954 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3563,6 +3563,12 @@ static void intel_pmu_cpu_starting(int cpu) cpuc->lbr_sel = NULL; + if (x86_pmu.flags & PMU_FL_TFA) { + WARN_ON_ONCE(cpuc->tfa_shadow); + cpuc->tfa_shadow = ~0ULL; + intel_set_tfa(cpuc, false); + } + if (x86_pmu.version > 1) flip_smm_bit(&x86_pmu.attr_freeze_on_smi); -- cgit v1.2.3 From 2e94d4e8f2b9ea26e933b8cd1fef5b4b7a3f23db Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 29 Mar 2019 22:46:49 +0100 Subject: linux/kernel.h: Use parentheses around argument in u64_to_user_ptr() [ Upstream commit a0fe2c6479aab5723239b315ef1b552673f434a3 ] Use parentheses around uses of the argument in u64_to_user_ptr() to ensure that the cast doesn't apply to part of the argument. There are existing uses of the macro of the form u64_to_user_ptr(A + B) which expands to (void __user *)(uintptr_t)A + B (the cast applies to the first operand of the addition, the addition is a pointer addition). This happens to still work as intended, the semantic difference doesn't cause a difference in behavior. But I want to use u64_to_user_ptr() with a ternary operator in the argument, like so: u64_to_user_ptr(A ? B : C) This currently doesn't work as intended. Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Reviewed-by: Mukesh Ojha Cc: Andrei Vagin Cc: Andrew Morton Cc: Dan Carpenter Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jani Nikula Cc: Kees Cook Cc: Masahiro Yamada Cc: NeilBrown Cc: Peter Zijlstra Cc: Qiaowei Ren Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190329214652.258477-1-jannh@google.com Signed-off-by: Sasha Levin --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 8f0e68e250a7..fd827b240059 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -73,8 +73,8 @@ #define u64_to_user_ptr(x) ( \ { \ - typecheck(u64, x); \ - (void __user *)(uintptr_t)x; \ + typecheck(u64, (x)); \ + (void __user *)(uintptr_t)(x); \ } \ ) -- cgit v1.2.3 From 9680a806201d763b9f662b21f28570df7df56d88 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 4 Apr 2019 10:31:14 +0800 Subject: iov_iter: Fix build error without CONFIG_CRYPTO [ Upstream commit 27fad74a5a77fe2e1f876db7bf27efcf2ec304b2 ] If CONFIG_CRYPTO is not set or set to m, gcc building warn this: lib/iov_iter.o: In function `hash_and_copy_to_iter': iov_iter.c:(.text+0x9129): undefined reference to `crypto_stats_get' iov_iter.c:(.text+0x9152): undefined reference to `crypto_stats_ahash_update' Reported-by: Hulk Robot Fixes: d05f443554b3 ("iov_iter: introduce hash_and_copy_to_iter helper") Suggested-by: Al Viro Signed-off-by: YueHaibing Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- lib/iov_iter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index be4bd627caf0..a0d1cd88f903 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1515,6 +1515,7 @@ EXPORT_SYMBOL(csum_and_copy_to_iter); size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i) { +#ifdef CONFIG_CRYPTO struct ahash_request *hash = hashp; struct scatterlist sg; size_t copied; @@ -1524,6 +1525,9 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, ahash_request_set_crypt(hash, &sg, NULL, copied); crypto_ahash_update(hash); return copied; +#else + return 0; +#endif } EXPORT_SYMBOL(hash_and_copy_to_iter); -- cgit v1.2.3 From dfa9efe42df2dabeb34e755338f0af9efcc0ed30 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 3 Apr 2019 20:22:42 -0700 Subject: xtensa: fix initialization of pt_regs::syscall in start_thread [ Upstream commit 2663147dc7465cb29040a05cc4286fdd839978b5 ] New pt_regs should indicate that there's no syscall, not that there's syscall #0. While at it wrap macro body in do/while and parenthesize macro arguments. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/include/asm/processor.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index f7dd895b2353..0c14018d1c26 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -187,15 +187,18 @@ struct thread_struct { /* Clearing a0 terminates the backtrace. */ #define start_thread(regs, new_pc, new_sp) \ - memset(regs, 0, sizeof(*regs)); \ - regs->pc = new_pc; \ - regs->ps = USER_PS_VALUE; \ - regs->areg[1] = new_sp; \ - regs->areg[0] = 0; \ - regs->wmask = 1; \ - regs->depc = 0; \ - regs->windowbase = 0; \ - regs->windowstart = 1; + do { \ + memset((regs), 0, sizeof(*(regs))); \ + (regs)->pc = (new_pc); \ + (regs)->ps = USER_PS_VALUE; \ + (regs)->areg[1] = (new_sp); \ + (regs)->areg[0] = 0; \ + (regs)->wmask = 1; \ + (regs)->depc = 0; \ + (regs)->windowbase = 0; \ + (regs)->windowstart = 1; \ + (regs)->syscall = NO_SYSCALL; \ + } while (0) /* Forward declaration */ struct task_struct; -- cgit v1.2.3 From 2f0ec100032e9f23b7561ec6f3b5c8a9806a7100 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Wed, 3 Apr 2019 21:40:45 +0800 Subject: ASoC: rockchip: pdm: fix regmap_ops hang issue [ Upstream commit c85064435fe7a216ec0f0238ef2b8f7cd850a450 ] This is because set_fmt ops maybe called when PD is off, and in such case, regmap_ops will lead system hang. enale PD before doing regmap_ops. Signed-off-by: Sugar Zhang Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/rockchip/rockchip_pdm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c index 400e29edb1c9..8a2e3bbce3a1 100644 --- a/sound/soc/rockchip/rockchip_pdm.c +++ b/sound/soc/rockchip/rockchip_pdm.c @@ -208,7 +208,9 @@ static int rockchip_pdm_set_fmt(struct snd_soc_dai *cpu_dai, return -EINVAL; } + pm_runtime_get_sync(cpu_dai->dev); regmap_update_bits(pdm->regmap, PDM_CLK_CTRL, mask, val); + pm_runtime_put(cpu_dai->dev); return 0; } -- cgit v1.2.3 From 255063992678e5249faf2db344aff965ccf8d6b7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Apr 2019 12:30:32 -0500 Subject: drm/amdkfd: Add picasso pci id [ Upstream commit e7ad88553aa1d48e950ca9a4934d246c1bee4be4 ] Picasso is a new raven variant. Reviewed-by: Kent Russell Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8be9677c0c07..cf9a49f49d3a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -320,6 +320,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9876, &carrizo_device_info }, /* Carrizo */ { 0x9877, &carrizo_device_info }, /* Carrizo */ { 0x15DD, &raven_device_info }, /* Raven */ + { 0x15D8, &raven_device_info }, /* Raven */ #endif { 0x67A0, &hawaii_device_info }, /* Hawaii */ { 0x67A1, &hawaii_device_info }, /* Hawaii */ -- cgit v1.2.3 From f528dbeec01762b0381703be2ce5f31c58be263f Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Mon, 1 Apr 2019 16:09:34 -0400 Subject: drm/amdgpu: Adjust IB test timeout for XGMI configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d4162c61e253177936fcfe6c29f7b224d9a1efb8 ] On XGMI configuration the ib test may take longer to finish Signed-off-by: shaoyunl Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index c48207b377bc..b82c5fca217b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -35,6 +35,7 @@ #include "amdgpu_trace.h" #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) +#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT msecs_to_jiffies(2000) /* * IB @@ -344,6 +345,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) * cost waiting for it coming back under RUNTIME only */ tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; + } else if (adev->gmc.xgmi.hive_id) { + tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT; } for (i = 0; i < adev->num_rings; ++i) { -- cgit v1.2.3 From 4b5f2b0ce17cf960925f94fcfe9cd1cb452ee580 Mon Sep 17 00:00:00 2001 From: wentalou Date: Tue, 2 Apr 2019 17:13:05 +0800 Subject: drm/amdgpu: amdgpu_device_recover_vram always failed if only one node in shadow_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1712fb1a2f6829150032ac76eb0e39b82a549cfb ] amdgpu_bo_restore_shadow would assign zero to r if succeeded. r would remain zero if there is only one node in shadow_list. current code would always return failure when r <= 0. restart the timeout for each wait was a rather problematic bug as well. The value of tmo SHOULD be changed, otherwise we wait tmo jiffies on each loop. Signed-off-by: Wentao Lou Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7ff3a28fc903..d55dd570a702 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3158,11 +3158,16 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) break; if (fence) { - r = dma_fence_wait_timeout(fence, false, tmo); + tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); fence = next; - if (r <= 0) + if (tmo == 0) { + r = -ETIMEDOUT; break; + } else if (tmo < 0) { + r = tmo; + break; + } } else { fence = next; } @@ -3173,8 +3178,8 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); - if (r <= 0 || tmo <= 0) { - DRM_ERROR("recover vram bo from shadow failed\n"); + if (r < 0 || tmo <= 0) { + DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); return -EIO; } -- cgit v1.2.3 From c577757d294e4878fa462accdd86e461f72c5190 Mon Sep 17 00:00:00 2001 From: tiancyin Date: Mon, 1 Apr 2019 10:15:31 +0800 Subject: drm/amd/display: fix cursor black issue [ Upstream commit c1cefe115d1cdc460014483319d440b2f0d07c68 ] [Why] the member sdr_white_level of struct dc_cursor_attributes was not initialized, then the random value result that dcn10_set_cursor_sdr_white_level() set error hw_scale value 0x20D9(normal value is 0x3c00), this cause the black cursor issue. [how] just initilize the obj of struct dc_cursor_attributes to zero to avoid the random value. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Tianci Yin Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 83c8a0407537..84ee77786944 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4455,6 +4455,7 @@ static void handle_cursor_update(struct drm_plane *plane, amdgpu_crtc->cursor_width = plane->state->crtc_w; amdgpu_crtc->cursor_height = plane->state->crtc_h; + memset(&attributes, 0, sizeof(attributes)); attributes.address.high_part = upper_32_bits(address); attributes.address.low_part = lower_32_bits(address); attributes.width = plane->state->crtc_w; -- cgit v1.2.3 From fa42760cf27664c9949098e6346a80ef4987bc66 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 4 Apr 2019 17:27:20 +0100 Subject: ASoC: cs35l35: Disable regulators on driver removal [ Upstream commit 47c4cc08cb5b34e93ab337b924c5ede77ca3c936 ] The chips main power supplies VA and VP are enabled during probe but then never disabled, this will cause warnings from the regulator framework on driver removal. Fix this by adding a remove callback and disabling the supplies, whilst doing so follow best practice and put the chip back into reset as well. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs35l35.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/cs35l35.c b/sound/soc/codecs/cs35l35.c index 9f4a59871cee..c71696146c5e 100644 --- a/sound/soc/codecs/cs35l35.c +++ b/sound/soc/codecs/cs35l35.c @@ -1635,6 +1635,16 @@ err: return ret; } +static int cs35l35_i2c_remove(struct i2c_client *i2c_client) +{ + struct cs35l35_private *cs35l35 = i2c_get_clientdata(i2c_client); + + regulator_bulk_disable(cs35l35->num_supplies, cs35l35->supplies); + gpiod_set_value_cansleep(cs35l35->reset_gpio, 0); + + return 0; +} + static const struct of_device_id cs35l35_of_match[] = { {.compatible = "cirrus,cs35l35"}, {}, @@ -1655,6 +1665,7 @@ static struct i2c_driver cs35l35_i2c_driver = { }, .id_table = cs35l35_id, .probe = cs35l35_i2c_probe, + .remove = cs35l35_i2c_remove, }; module_i2c_driver(cs35l35_i2c_driver); -- cgit v1.2.3 From ad74ab443e3031cd57a3d9771184803d5969f2be Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 4 Apr 2019 12:17:35 -0500 Subject: objtool: Add rewind_stack_do_exit() to the noreturn list [ Upstream commit 4fa5ecda2bf96be7464eb406df8aba9d89260227 ] This fixes the following warning seen on GCC 7.3: arch/x86/kernel/dumpstack.o: warning: objtool: oops_end() falls through to next function show_regs() Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/3418ebf5a5a9f6ed7e80954c741c0b904b67b5dc.1554398240.git.jpoimboe@redhat.com Signed-off-by: Sasha Levin --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5dde107083c6..479196aeb409 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -165,6 +165,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "fortify_panic", "usercopy_abort", "machine_real_restart", + "rewind_stack_do_exit", }; if (func->bind == STB_WEAK) -- cgit v1.2.3 From 09c5ad16c22c55616c3ab960b260603a8f5e2a5e Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 6 Apr 2019 18:59:01 -0400 Subject: slab: fix a crash by reading /proc/slab_allocators [ Upstream commit fcf88917dd435c6a4cb2830cb086ee58605a1d85 ] The commit 510ded33e075 ("slab: implement slab_root_caches list") changes the name of the list node within "struct kmem_cache" from "list" to "root_caches_node", but leaks_show() still use the "list" which causes a crash when reading /proc/slab_allocators. You need to have CONFIG_SLAB=y and CONFIG_MEMCG=y to see the problem, because without MEMCG all slab caches are root caches, and the "list" node happens to be the right one. Fixes: 510ded33e075 ("slab: implement slab_root_caches list") Signed-off-by: Qian Cai Reviewed-by: Tobin C. Harding Cc: Tejun Heo Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/slab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 2f2aa8eaf7d9..188c4b65255d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4297,7 +4297,8 @@ static void show_symbol(struct seq_file *m, unsigned long address) static int leaks_show(struct seq_file *m, void *p) { - struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); + struct kmem_cache *cachep = list_entry(p, struct kmem_cache, + root_caches_node); struct page *page; struct kmem_cache_node *n; const char *name; -- cgit v1.2.3 From e0696fe3c10fa1193d03449c4163326df2032d59 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 6 Apr 2019 01:30:48 +0200 Subject: drm/sun4i: tcon top: Fix NULL/invalid pointer dereference in sun8i_tcon_top_un/bind [ Upstream commit 1a07a94b47b1f528f39c3e6187b5eaf02efe44ea ] There are two problems here: 1. Not all clk_data->hws[] need to be initialized, depending on various configured quirks. This leads to NULL ptr deref in clk_hw_unregister_gate() in sun8i_tcon_top_unbind() 2. If there is error when registering the clk_data->hws[], err_unregister_gates error path will try to unregister IS_ERR()=true (invalid) pointer. For problem (1) I have this stack trace: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: clk_hw_unregister+0x8/0x18 clk_hw_unregister_gate+0x14/0x28 sun8i_tcon_top_unbind+0x2c/0x60 component_unbind.isra.4+0x2c/0x50 component_bind_all+0x1d4/0x230 sun4i_drv_bind+0xc4/0x1a0 try_to_bring_up_master+0x164/0x1c0 __component_add+0xa0/0x168 component_add+0x10/0x18 sun8i_dw_hdmi_probe+0x18/0x20 platform_drv_probe+0x3c/0x70 really_probe+0xcc/0x278 driver_probe_device+0x34/0xa8 Problem (2) was identified by head scratching. Signed-off-by: Ondrej Jirman Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190405233048.3823-1-megous@megous.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun8i_tcon_top.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c index fc36e0c10a37..b1e7c76e9c17 100644 --- a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c +++ b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c @@ -227,7 +227,7 @@ static int sun8i_tcon_top_bind(struct device *dev, struct device *master, err_unregister_gates: for (i = 0; i < CLK_NUM; i++) - if (clk_data->hws[i]) + if (!IS_ERR_OR_NULL(clk_data->hws[i])) clk_hw_unregister_gate(clk_data->hws[i]); clk_disable_unprepare(tcon_top->bus); err_assert_reset: @@ -245,7 +245,8 @@ static void sun8i_tcon_top_unbind(struct device *dev, struct device *master, of_clk_del_provider(dev->of_node); for (i = 0; i < CLK_NUM; i++) - clk_hw_unregister_gate(clk_data->hws[i]); + if (clk_data->hws[i]) + clk_hw_unregister_gate(clk_data->hws[i]); clk_disable_unprepare(tcon_top->bus); reset_control_assert(tcon_top->rst); -- cgit v1.2.3 From 3fa40c30fe4c691915302b28365a54909a67af3b Mon Sep 17 00:00:00 2001 From: Longpeng Date: Sat, 9 Mar 2019 15:17:40 +0800 Subject: virtio_pci: fix a NULL pointer reference in vp_del_vqs [ Upstream commit 6a8aae68c87349dbbcd46eac380bc43cdb98a13b ] If the msix_affinity_masks is alloced failed, then we'll try to free some resources in vp_free_vectors() that may access it directly. We met the following stack in our production: [ 29.296767] BUG: unable to handle kernel NULL pointer dereference at (null) [ 29.311151] IP: [] vp_free_vectors+0x6a/0x150 [virtio_pci] [ 29.324787] PGD 0 [ 29.333224] Oops: 0000 [#1] SMP [...] [ 29.425175] RIP: 0010:[] [] vp_free_vectors+0x6a/0x150 [virtio_pci] [ 29.441405] RSP: 0018:ffff9a55c2dcfa10 EFLAGS: 00010206 [ 29.453491] RAX: 0000000000000000 RBX: ffff9a55c322c400 RCX: 0000000000000000 [ 29.467488] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9a55c322c400 [ 29.481461] RBP: ffff9a55c2dcfa20 R08: 0000000000000000 R09: ffffc1b6806ff020 [ 29.495427] R10: 0000000000000e95 R11: 0000000000aaaaaa R12: 0000000000000000 [ 29.509414] R13: 0000000000010000 R14: ffff9a55bd2d9e98 R15: ffff9a55c322c400 [ 29.523407] FS: 00007fdcba69f8c0(0000) GS:ffff9a55c2840000(0000) knlGS:0000000000000000 [ 29.538472] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 29.551621] CR2: 0000000000000000 CR3: 000000003ce52000 CR4: 00000000003607a0 [ 29.565886] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 29.580055] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 29.594122] Call Trace: [ 29.603446] [] vp_request_msix_vectors+0xe2/0x260 [virtio_pci] [ 29.618017] [] vp_try_to_find_vqs+0x95/0x3b0 [virtio_pci] [ 29.632152] [] vp_find_vqs+0x37/0xb0 [virtio_pci] [ 29.645582] [] init_vq+0x153/0x260 [virtio_blk] [ 29.658831] [] virtblk_probe+0xe8/0x87f [virtio_blk] [...] Cc: Gonglei Signed-off-by: Longpeng Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei Signed-off-by: Sasha Levin --- drivers/virtio/virtio_pci_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index d0584c040c60..7a0398bb84f7 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -255,9 +255,11 @@ void vp_del_vqs(struct virtio_device *vdev) for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); - for (i = 0; i < vp_dev->msix_vectors; i++) - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); + if (vp_dev->msix_affinity_masks) { + for (i = 0; i < vp_dev->msix_vectors; i++) + if (vp_dev->msix_affinity_masks[i]) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + } if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ -- cgit v1.2.3 From f4d87f9b027a6d962e10b5ff91d8cfd83f834be5 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 3 Apr 2019 16:52:54 +0300 Subject: RDMA/vmw_pvrdma: Fix memory leak on pvrdma_pci_remove [ Upstream commit ea7a5c706fa49273cf6d1d9def053ecb50db2076 ] Make sure to free the DSR on pvrdma_pci_remove() to avoid the memory leak. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Signed-off-by: Kamal Heib Acked-by: Adit Ranadive Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 39c37b6fd715..76b8dda40edd 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -1125,6 +1125,8 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); pvrdma_page_dir_cleanup(dev, &dev->async_pdir); pvrdma_free_slots(dev); + dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, + dev->dsrbase); iounmap(dev->regs); kfree(dev->sgid_tbl); -- cgit v1.2.3 From 6fb3aa5d730782a0d8195cb7fb365883c0175e6f Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sun, 7 Apr 2019 13:23:38 +0800 Subject: RDMA/hns: Fix bug that caused srq creation to fail [ Upstream commit 4772e03d239484f3461e33c79d721c8ea03f7416 ] Due to the incorrect use of the seg and obj information, the position of the mtt is calculated incorrectly, and the free space of the page is not enough to store the entire mtt, resulting in access to the next page. This patch fixes this problem. Unable to handle kernel paging request at virtual address ffff00006e3cd000 ... Call trace: hns_roce_write_mtt+0x154/0x2f0 [hns_roce] hns_roce_buf_write_mtt+0xa8/0xd8 [hns_roce] hns_roce_create_srq+0x74c/0x808 [hns_roce] ib_create_srq+0x28/0xc8 Fixes: 0203b14c4f32 ("RDMA/hns: Unify the calculation for hem index in hip08") Signed-off-by: chenglang Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hem.c | 6 ++++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 4cdbcafa5915..cae23364cfea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -763,6 +763,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk; dma_offset = offset = idx_offset * table->obj_size; } else { + u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */ + hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); /* mtt mhop */ i = mhop.l0_idx; @@ -774,8 +776,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, hem_idx = i; hem = table->hem[hem_idx]; - dma_offset = offset = (obj & (table->num_obj - 1)) * - table->obj_size % mhop.bt_chunk_size; + dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size % + mhop.bt_chunk_size; if (mhop.hop_num == 2) dma_offset = offset = 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index ee5991bd4171..dd4bb0ec6113 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -746,7 +746,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table; dma_addr_t dma_handle; __le64 *mtts; - u32 s = start_index * sizeof(u64); u32 bt_page_size; u32 i; @@ -780,7 +779,8 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, return -EINVAL; mtts = hns_roce_table_find(hr_dev, table, - mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, + mtt->first_seg + + start_index / HNS_ROCE_MTT_ENTRY_PER_SEG, &dma_handle); if (!mtts) return -ENOMEM; -- cgit v1.2.3 From 353392e5b9a56f59fd9b12e3d51446b7ef7f5119 Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Mon, 22 Oct 2018 16:43:57 -0700 Subject: KEYS: trusted: fix -Wvarags warning [ Upstream commit be24b37e22c20cbaa891971616784dd0f35211e8 ] Fixes the warning reported by Clang: security/keys/trusted.c:146:17: warning: passing an object that undergoes default argument promotion to 'va_start' has undefined behavior [-Wvarargs] va_start(argp, h3); ^ security/keys/trusted.c:126:37: note: parameter of type 'unsigned char' is declared here unsigned char *h2, unsigned char h3, ...) ^ Specifically, it seems that both the C90 (4.8.1.1) and C11 (7.16.1.4) standards explicitly call this out as undefined behavior: The parameter parmN is the identifier of the rightmost parameter in the variable parameter list in the function definition (the one just before the ...). If the parameter parmN is declared with ... or with a type that is not compatible with the type that results after application of the default argument promotions, the behavior is undefined. Link: https://github.com/ClangBuiltLinux/linux/issues/41 Link: https://www.eskimo.com/~scs/cclass/int/sx11c.html Suggested-by: David Laight Suggested-by: Denis Kenzior Suggested-by: James Bottomley Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris Signed-off-by: Sasha Levin --- include/keys/trusted.h | 2 +- security/keys/trusted.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/keys/trusted.h b/include/keys/trusted.h index adbcb6817826..0071298b9b28 100644 --- a/include/keys/trusted.h +++ b/include/keys/trusted.h @@ -38,7 +38,7 @@ enum { int TSS_authhmac(unsigned char *digest, const unsigned char *key, unsigned int keylen, unsigned char *h1, - unsigned char *h2, unsigned char h3, ...); + unsigned char *h2, unsigned int h3, ...); int TSS_checkhmac1(unsigned char *buffer, const uint32_t command, const unsigned char *ononce, diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 4d98f4f87236..94d2b28c7c22 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -123,7 +123,7 @@ out: */ int TSS_authhmac(unsigned char *digest, const unsigned char *key, unsigned int keylen, unsigned char *h1, - unsigned char *h2, unsigned char h3, ...) + unsigned char *h2, unsigned int h3, ...) { unsigned char paramdigest[SHA1_DIGEST_SIZE]; struct sdesc *sdesc; @@ -139,7 +139,7 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key, return PTR_ERR(sdesc); } - c = h3; + c = !!h3; ret = crypto_shash_init(&sdesc->shash); if (ret < 0) goto out; -- cgit v1.2.3 From 3051b6a1a14aebd6f3172caf1f4286de6ced2ac3 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Fri, 5 Apr 2019 20:39:13 +0530 Subject: scsi: csiostor: fix missing data copy in csio_scsi_err_handler() [ Upstream commit 5c2442fd78998af60e13aba506d103f7f43f8701 ] If scsi cmd sglist is not suitable for DDP then csiostor driver uses preallocated buffers for DDP, because of this data copy is required from DDP buffer to scsi cmd sglist before calling ->scsi_done(). Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/csiostor/csio_scsi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index bc5547a62c00..c54c6cd504c4 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1713,8 +1713,11 @@ csio_scsi_err_handler(struct csio_hw *hw, struct csio_ioreq *req) } out: - if (req->nsge > 0) + if (req->nsge > 0) { scsi_dma_unmap(cmnd); + if (req->dcopy && (host_status == DID_OK)) + host_status = csio_scsi_copy_to_sgl(hw, req); + } cmnd->result = (((host_status) << 16) | scsi_status); cmnd->scsi_done(cmnd); -- cgit v1.2.3 From a873474c769aab1f8e43544a9a26d434dba7d0d4 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Thu, 4 Apr 2019 00:04:09 +0800 Subject: drm/mediatek: fix possible object reference leak [ Upstream commit 2ae2c3316fb77dcf64275d011596b60104c45426 ] The call to of_parse_phandle returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: drivers/gpu/drm/mediatek/mtk_hdmi.c:1521:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1509, but without a corresponding object release within this function. drivers/gpu/drm/mediatek/mtk_hdmi.c:1524:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1509, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: CK Hu Cc: Philipp Zabel Cc: David Airlie Cc: Daniel Vetter Cc: Matthias Brugger Cc: dri-devel@lists.freedesktop.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mediatek@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index c910850d2077..a687fe3e1d6c 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1514,6 +1514,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, of_node_put(remote); hdmi->ddc_adpt = of_find_i2c_adapter_by_node(i2c_np); + of_node_put(i2c_np); if (!hdmi->ddc_adpt) { dev_err(dev, "Failed to get ddc i2c adapter by node\n"); return -EINVAL; -- cgit v1.2.3 From 4b112e5e6af9cf6d91e19d7329f74928f03bc0a4 Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:04 +0800 Subject: drm/mediatek: fix the rate and divder of hdmi phy for MT2701 [ Upstream commit 0c24613cda163dedfa229afc8eff6072e57fac8d ] Due to a clerical error,there is one zero less for 12800000. Fix it for 128000000 Fixes: 0fc721b2968e ("drm/mediatek: add hdmi driver for MT2701 and MT7623") Signed-off-by: Wangyan Wang Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c index fcc42dc6ea7f..0746fc887706 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c @@ -116,8 +116,8 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, if (rate <= 64000000) pos_div = 3; - else if (rate <= 12800000) - pos_div = 1; + else if (rate <= 128000000) + pos_div = 2; else pos_div = 1; -- cgit v1.2.3 From 273ed6c20cb537697f39995b5105c802d7034ac2 Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:06 +0800 Subject: drm/mediatek: make implementation of recalc_rate() for MT2701 hdmi phy [ Upstream commit 321b628e6f5a3af999f75eadd373adbcb8b4cb1f ] Recalculate the rate of this clock, by querying hardware to make implementation of recalc_rate() to match the definition. Signed-off-by: Wangyan Wang Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_hdmi_phy.c | 8 ------ drivers/gpu/drm/mediatek/mtk_hdmi_phy.h | 2 -- drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 38 +++++++++++++++++++++++--- drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 8 ++++++ 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c index 4ef9c57ffd44..efc400ebbb90 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c @@ -29,14 +29,6 @@ long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, return rate; } -unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); - - return hdmi_phy->pll_rate; -} - void mtk_hdmi_phy_clear_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset, u32 bits) { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h index f39b1fc66612..71430691ffe4 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h @@ -50,8 +50,6 @@ void mtk_hdmi_phy_mask(struct mtk_hdmi_phy *hdmi_phy, u32 offset, struct mtk_hdmi_phy *to_mtk_hdmi_phy(struct clk_hw *hw); long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate); -unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate); extern struct platform_driver mtk_hdmi_phy_driver; extern struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf; diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c index 0746fc887706..feb6a7ed63d1 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c @@ -79,7 +79,6 @@ static int mtk_hdmi_pll_prepare(struct clk_hw *hw) mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK); usleep_range(80, 100); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN); - mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK); @@ -94,7 +93,6 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw) mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK); - mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN); usleep_range(80, 100); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK); @@ -123,6 +121,7 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON6, RG_HTPLL_PREDIV_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON6, RG_HTPLL_POSDIV_MASK); + mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (0x1 << RG_HTPLL_IC), RG_HTPLL_IC_MASK); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (0x1 << RG_HTPLL_IR), @@ -154,6 +153,39 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } +static unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); + unsigned long out_rate, val; + + val = (readl(hdmi_phy->regs + HDMI_CON6) + & RG_HTPLL_PREDIV_MASK) >> RG_HTPLL_PREDIV; + switch (val) { + case 0x00: + out_rate = parent_rate; + break; + case 0x01: + out_rate = parent_rate / 2; + break; + default: + out_rate = parent_rate / 4; + break; + } + + val = (readl(hdmi_phy->regs + HDMI_CON6) + & RG_HTPLL_FBKDIV_MASK) >> RG_HTPLL_FBKDIV; + out_rate *= (val + 1) * 2; + val = (readl(hdmi_phy->regs + HDMI_CON2) + & RG_HDMITX_TX_POSDIV_MASK); + out_rate >>= (val >> RG_HDMITX_TX_POSDIV); + + if (readl(hdmi_phy->regs + HDMI_CON2) & RG_HDMITX_EN_TX_POSDIV) + out_rate /= 5; + + return out_rate; +} + static const struct clk_ops mtk_hdmi_phy_pll_ops = { .prepare = mtk_hdmi_pll_prepare, .unprepare = mtk_hdmi_pll_unprepare, @@ -174,7 +206,6 @@ static void mtk_hdmi_phy_enable_tmds(struct mtk_hdmi_phy *hdmi_phy) mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK); usleep_range(80, 100); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN); - mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK); mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK); @@ -186,7 +217,6 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy) mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK); - mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN); usleep_range(80, 100); mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK); diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index ed5916b27658..83662a208491 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -285,6 +285,14 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } +static unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); + + return hdmi_phy->pll_rate; +} + static const struct clk_ops mtk_hdmi_phy_pll_ops = { .prepare = mtk_hdmi_pll_prepare, .unprepare = mtk_hdmi_pll_unprepare, -- cgit v1.2.3 From 5b82d95ac6fe79da1ec70e68f21973ef8444ef8f Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:03 +0800 Subject: drm/mediatek: remove flag CLK_SET_RATE_PARENT for MT2701 hdmi phy [ Upstream commit 827abdd024207146822f66ba3ba74867135866b9 ] This is the first step to make MT2701 hdmi stable. The parent rate of hdmi phy had set by DPI driver. We should not set or change the parent rate of MT2701 hdmi phy, as a result we should remove the flags of "CLK_SET_RATE_PARENT" from the clock of MT2701 hdmi phy. Signed-off-by: Wangyan Wang Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_hdmi_phy.c | 13 +++++-------- drivers/gpu/drm/mediatek/mtk_hdmi_phy.h | 1 + drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 1 + drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 1 + 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c index efc400ebbb90..08b029772c5a 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c @@ -102,13 +102,11 @@ mtk_hdmi_phy_dev_get_ops(const struct mtk_hdmi_phy *hdmi_phy) return NULL; } -static void mtk_hdmi_phy_clk_get_ops(struct mtk_hdmi_phy *hdmi_phy, - const struct clk_ops **ops) +static void mtk_hdmi_phy_clk_get_data(struct mtk_hdmi_phy *hdmi_phy, + struct clk_init_data *clk_init) { - if (hdmi_phy && hdmi_phy->conf && hdmi_phy->conf->hdmi_phy_clk_ops) - *ops = hdmi_phy->conf->hdmi_phy_clk_ops; - else - dev_err(hdmi_phy->dev, "Failed to get clk ops of phy\n"); + clk_init->flags = hdmi_phy->conf->flags; + clk_init->ops = hdmi_phy->conf->hdmi_phy_clk_ops; } static int mtk_hdmi_phy_probe(struct platform_device *pdev) @@ -121,7 +119,6 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev) struct clk_init_data clk_init = { .num_parents = 1, .parent_names = (const char * const *)&ref_clk_name, - .flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, }; struct phy *phy; @@ -159,7 +156,7 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev) hdmi_phy->dev = dev; hdmi_phy->conf = (struct mtk_hdmi_phy_conf *)of_device_get_match_data(dev); - mtk_hdmi_phy_clk_get_ops(hdmi_phy, &clk_init.ops); + mtk_hdmi_phy_clk_get_data(hdmi_phy, &clk_init); hdmi_phy->pll_hw.init = &clk_init; hdmi_phy->pll = devm_clk_register(dev, &hdmi_phy->pll_hw); if (IS_ERR(hdmi_phy->pll)) { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h index 71430691ffe4..d28b8d5ed2b4 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h @@ -21,6 +21,7 @@ struct mtk_hdmi_phy; struct mtk_hdmi_phy_conf { bool tz_disabled; + unsigned long flags; const struct clk_ops *hdmi_phy_clk_ops; void (*hdmi_phy_enable_tmds)(struct mtk_hdmi_phy *hdmi_phy); void (*hdmi_phy_disable_tmds)(struct mtk_hdmi_phy *hdmi_phy); diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c index feb6a7ed63d1..31f3175f032b 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c @@ -232,6 +232,7 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy) struct mtk_hdmi_phy_conf mtk_hdmi_phy_2701_conf = { .tz_disabled = true, + .flags = CLK_SET_RATE_GATE, .hdmi_phy_clk_ops = &mtk_hdmi_phy_pll_ops, .hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds, .hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds, diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index 83662a208491..37f9503d7643 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -317,6 +317,7 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy) } struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf = { + .flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, .hdmi_phy_clk_ops = &mtk_hdmi_phy_pll_ops, .hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds, .hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds, -- cgit v1.2.3 From 718254750661b06cff47158e6528e9361b7c874f Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:05 +0800 Subject: drm/mediatek: using new factor for tvdpll for MT2701 hdmi phy [ Upstream commit 8eeb3946feeb00486ac0909e2309da87db8988a5 ] This is the second step to make MT2701 HDMI stable. The factor depends on the divider of DPI in MT2701, therefore, we should fix this factor to the right and new one. Test: search ok Signed-off-by: Wangyan Wang Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_dpi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 62a9d47df948..9160c55769f8 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -662,13 +662,11 @@ static unsigned int mt8173_calculate_factor(int clock) static unsigned int mt2701_calculate_factor(int clock) { if (clock <= 64000) - return 16; - else if (clock <= 128000) - return 8; - else if (clock <= 256000) return 4; - else + else if (clock <= 128000) return 2; + else + return 1; } static const struct mtk_dpi_conf mt8173_conf = { -- cgit v1.2.3 From c79f5a7a3559e444cfca8557b48ea0c19ccc6eb1 Mon Sep 17 00:00:00 2001 From: Wangyan Wang Date: Tue, 9 Apr 2019 14:53:07 +0800 Subject: drm/mediatek: no change parent rate in round_rate() for MT2701 hdmi phy [ Upstream commit 9ee76098a1b8ae21cccac641b735ee4d3a77bccf ] This is the third step to make MT2701 HDMI stable. We should not change the rate of parent for hdmi phy when doing round_rate for this clock. The parent clock of hdmi phy must be the same as it. We change it when doing set_rate only. Signed-off-by: Wangyan Wang Signed-off-by: CK Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_hdmi_phy.c | 14 -------------- drivers/gpu/drm/mediatek/mtk_hdmi_phy.h | 2 -- drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 6 ++++++ drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 14 ++++++++++++++ 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c index 08b029772c5a..5223498502c4 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c @@ -15,20 +15,6 @@ static const struct phy_ops mtk_hdmi_phy_dev_ops = { .owner = THIS_MODULE, }; -long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) -{ - struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); - - hdmi_phy->pll_rate = rate; - if (rate <= 74250000) - *parent_rate = rate; - else - *parent_rate = rate / 2; - - return rate; -} - void mtk_hdmi_phy_clear_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset, u32 bits) { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h index d28b8d5ed2b4..2d8b3182470d 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h @@ -49,8 +49,6 @@ void mtk_hdmi_phy_set_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset, void mtk_hdmi_phy_mask(struct mtk_hdmi_phy *hdmi_phy, u32 offset, u32 val, u32 mask); struct mtk_hdmi_phy *to_mtk_hdmi_phy(struct clk_hw *hw); -long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate); extern struct platform_driver mtk_hdmi_phy_driver; extern struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf; diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c index 31f3175f032b..d3cc4022e988 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c @@ -106,6 +106,12 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw) usleep_range(80, 100); } +static long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + return rate; +} + static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index 37f9503d7643..47f8a2951682 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -199,6 +199,20 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw) usleep_range(100, 150); } +static long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); + + hdmi_phy->pll_rate = rate; + if (rate <= 74250000) + *parent_rate = rate; + else + *parent_rate = rate / 2; + + return rate; +} + static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { -- cgit v1.2.3 From 60ec4c3d39be4d21f4df076521a48cfe8cbad674 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Mon, 8 Apr 2019 17:08:58 +0800 Subject: ASoC: Intel: kbl: fix wrong number of channels [ Upstream commit d6ba3f815bc5f3c4249d15c8bc5fbb012651b4a4 ] Fix wrong setting on number of channels. The context wants to set constraint to 2 channels instead of 4. Signed-off-by: Tzung-Bi Shih Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 7044d8c2b187..879f14257a3e 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -405,7 +405,7 @@ static const struct snd_pcm_hw_constraint_list constraints_dmic_channels = { }; static const unsigned int dmic_2ch[] = { - 4, + 2, }; static const struct snd_pcm_hw_constraint_list constraints_dmic_2ch = { -- cgit v1.2.3 From 7c7450aa98387a97ccd9ce5a6c02d648382010d9 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 10 Apr 2019 10:08:36 +0200 Subject: ASoC: stm32: sai: fix master clock management [ Upstream commit e37c2deafe7058cf7989c4c47bbf1140cc867d89 ] When master clock is used, master clock rate is set exclusively. Parent clocks of master clock cannot be changed after a call to clk_set_rate_exclusive(). So the parent clock of SAI kernel clock must be set before. Ensure also that exclusive rate operations are balanced in STM32 SAI driver. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/stm/stm32_sai_sub.c | 64 +++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index bc69e68191ad..1cf9df4b6f11 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -70,6 +70,7 @@ #define SAI_IEC60958_STATUS_BYTES 24 #define SAI_MCLK_NAME_LEN 32 +#define SAI_RATE_11K 11025 /** * struct stm32_sai_sub_data - private data of SAI sub block (block A or B) @@ -309,6 +310,25 @@ static int stm32_sai_set_clk_div(struct stm32_sai_sub_data *sai, return ret; } +static int stm32_sai_set_parent_clock(struct stm32_sai_sub_data *sai, + unsigned int rate) +{ + struct platform_device *pdev = sai->pdev; + struct clk *parent_clk = sai->pdata->clk_x8k; + int ret; + + if (!(rate % SAI_RATE_11K)) + parent_clk = sai->pdata->clk_x11k; + + ret = clk_set_parent(sai->sai_ck, parent_clk); + if (ret) + dev_err(&pdev->dev, " Error %d setting sai_ck parent clock. %s", + ret, ret == -EBUSY ? + "Active stream rates conflict\n" : "\n"); + + return ret; +} + static long stm32_sai_mclk_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) { @@ -490,25 +510,29 @@ static int stm32_sai_set_sysclk(struct snd_soc_dai *cpu_dai, struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); int ret; - if (dir == SND_SOC_CLOCK_OUT) { + if (dir == SND_SOC_CLOCK_OUT && sai->sai_mclk) { ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV, (unsigned int)~SAI_XCR1_NODIV); if (ret < 0) return ret; - dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq); - sai->mclk_rate = freq; + /* If master clock is used, set parent clock now */ + ret = stm32_sai_set_parent_clock(sai, freq); + if (ret) + return ret; - if (sai->sai_mclk) { - ret = clk_set_rate_exclusive(sai->sai_mclk, - sai->mclk_rate); - if (ret) { - dev_err(cpu_dai->dev, - "Could not set mclk rate\n"); - return ret; - } + ret = clk_set_rate_exclusive(sai->sai_mclk, freq); + if (ret) { + dev_err(cpu_dai->dev, + ret == -EBUSY ? + "Active streams have incompatible rates" : + "Could not set mclk rate\n"); + return ret; } + + dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq); + sai->mclk_rate = freq; } return 0; @@ -916,11 +940,13 @@ static int stm32_sai_configure_clock(struct snd_soc_dai *cpu_dai, int cr1, mask, div = 0; int sai_clk_rate, mclk_ratio, den; unsigned int rate = params_rate(params); + int ret; - if (!(rate % 11025)) - clk_set_parent(sai->sai_ck, sai->pdata->clk_x11k); - else - clk_set_parent(sai->sai_ck, sai->pdata->clk_x8k); + if (!sai->sai_mclk) { + ret = stm32_sai_set_parent_clock(sai, rate); + if (ret) + return ret; + } sai_clk_rate = clk_get_rate(sai->sai_ck); if (STM_SAI_IS_F4(sai->pdata)) { @@ -1075,9 +1101,13 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream, regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV, SAI_XCR1_NODIV); - clk_disable_unprepare(sai->sai_ck); + /* Release mclk rate only if rate was actually set */ + if (sai->mclk_rate) { + clk_rate_exclusive_put(sai->sai_mclk); + sai->mclk_rate = 0; + } - clk_rate_exclusive_put(sai->sai_mclk); + clk_disable_unprepare(sai->sai_ck); spin_lock_irqsave(&sai->irq_lock, flags); sai->substream = NULL; -- cgit v1.2.3 From 677713b1254fa4f71eec95725088e4322adfaabf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Apr 2019 12:49:55 +0200 Subject: ALSA: hda: Fix racy display power access [ Upstream commit d7a181da2dfa3190487c446042ba01e07d851c74 ] snd_hdac_display_power() doesn't handle the concurrent calls carefully enough, and it may lead to the doubly get_power or put_power calls, when a runtime PM and an async work get called in racy way. This patch addresses it by reusing the bus->lock mutex that has been used for protecting the link state change in ext bus code, so that it can protect against racy display state changes. The initialization of bus->lock was moved from snd_hdac_ext_bus_init() to snd_hdac_bus_init() as well accordingly. Testcase: igt/i915_pm_rpm/module-reload #glk-dsi Reported-by: Chris Wilson Reviewed-by: Chris Wilson Cc: Imre Deak Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/hda/ext/hdac_ext_bus.c | 1 - sound/hda/hdac_bus.c | 1 + sound/hda/hdac_component.c | 6 +++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c index 9c37d9af3023..ec7715c6b0c0 100644 --- a/sound/hda/ext/hdac_ext_bus.c +++ b/sound/hda/ext/hdac_ext_bus.c @@ -107,7 +107,6 @@ int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev, INIT_LIST_HEAD(&bus->hlink_list); bus->idx = idx++; - mutex_init(&bus->lock); bus->cmd_dma_state = true; return 0; diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c index 012305177f68..ad8eee08013f 100644 --- a/sound/hda/hdac_bus.c +++ b/sound/hda/hdac_bus.c @@ -38,6 +38,7 @@ int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev, INIT_WORK(&bus->unsol_work, snd_hdac_bus_process_unsol_events); spin_lock_init(&bus->reg_lock); mutex_init(&bus->cmd_mutex); + mutex_init(&bus->lock); bus->irq = -1; return 0; } diff --git a/sound/hda/hdac_component.c b/sound/hda/hdac_component.c index a6d37b9d6413..6b5caee61c6e 100644 --- a/sound/hda/hdac_component.c +++ b/sound/hda/hdac_component.c @@ -69,13 +69,15 @@ void snd_hdac_display_power(struct hdac_bus *bus, unsigned int idx, bool enable) dev_dbg(bus->dev, "display power %s\n", enable ? "enable" : "disable"); + + mutex_lock(&bus->lock); if (enable) set_bit(idx, &bus->display_power_status); else clear_bit(idx, &bus->display_power_status); if (!acomp || !acomp->ops) - return; + goto unlock; if (bus->display_power_status) { if (!bus->display_power_active) { @@ -92,6 +94,8 @@ void snd_hdac_display_power(struct hdac_bus *bus, unsigned int idx, bool enable) bus->display_power_active = false; } } + unlock: + mutex_unlock(&bus->lock); } EXPORT_SYMBOL_GPL(snd_hdac_display_power); -- cgit v1.2.3 From e81f9ca291ace55959c9ea4ece0bbde7ae9500b9 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 27 Mar 2019 18:36:34 +0800 Subject: virtio-blk: limit number of hw queues by nr_cpu_ids [ Upstream commit bf348f9b78d413e75bb079462751a1d86b6de36c ] When tag_set->nr_maps is 1, the block layer limits the number of hw queues by nr_cpu_ids. No matter how many hw queues are used by virtio-blk, as it has (tag_set->nr_maps == 1), it can use at most nr_cpu_ids hw queues. In addition, specifically for pci scenario, when the 'num-queues' specified by qemu is more than maxcpus, virtio-blk would not be able to allocate more than maxcpus vectors in order to have a vector for each queue. As a result, it falls back into MSI-X with one vector for config and one shared for queues. Considering above reasons, this patch limits the number of hw queues used by virtio-blk by nr_cpu_ids. Reviewed-by: Stefan Hajnoczi Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/virtio_blk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b16a887bbd02..29bede887237 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -513,6 +513,8 @@ static int init_vq(struct virtio_blk *vblk) if (err) num_vqs = 1; + num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); + vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); if (!vblk->vqs) return -ENOMEM; -- cgit v1.2.3 From e62732d12bd97ec7a122299356c23e4948db11bc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 9 Apr 2019 06:31:21 +0800 Subject: blk-mq: introduce blk_mq_complete_request_sync() [ Upstream commit 1b8f21b74c3c9c82fce5a751d7aefb7cc0b8d33d ] In NVMe's error handler, follows the typical steps of tearing down hardware for recovering controller: 1) stop blk_mq hw queues 2) stop the real hw queues 3) cancel in-flight requests via blk_mq_tagset_busy_iter(tags, cancel_request, ...) cancel_request(): mark the request as abort blk_mq_complete_request(req); 4) destroy real hw queues However, there may be race between #3 and #4, because blk_mq_complete_request() may run q->mq_ops->complete(rq) remotelly and asynchronously, and ->complete(rq) may be run after #4. This patch introduces blk_mq_complete_request_sync() for fixing the above race. Cc: Sagi Grimberg Cc: Bart Van Assche Cc: James Smart Cc: linux-nvme@lists.infradead.org Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-mq.c | 7 +++++++ include/linux/blk-mq.h | 1 + 2 files changed, 8 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 5a2585d69c81..6930c82ab75f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -657,6 +657,13 @@ bool blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); +void blk_mq_complete_request_sync(struct request *rq) +{ + WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); + rq->q->mq_ops->complete(rq); +} +EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync); + int blk_mq_request_started(struct request *rq) { return blk_mq_rq_state(rq) != MQ_RQ_IDLE; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0e030f5f76b6..7e092bdac27f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -306,6 +306,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); bool blk_mq_complete_request(struct request *rq); +void blk_mq_complete_request_sync(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio); bool blk_mq_queue_stopped(struct request_queue *q); -- cgit v1.2.3 From 407bb38bf3f7abf8ee78c247d057b2cea6422bc1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 9 Apr 2019 06:31:22 +0800 Subject: nvme: cancel request synchronously [ Upstream commit eb3afb75b57c28599af0dfa03a99579d410749e9 ] nvme_cancel_request() is used in error handler, and it is always reliable to cancel request synchronously, and avoids possible race in which request may be completed after real hw queue is destroyed. One issue is reported by our customer on NVMe RDMA, in which freed ib queue pair may be used in nvme_rdma_complete_rq(). Cc: Sagi Grimberg Cc: Bart Van Assche Cc: James Smart Cc: linux-nvme@lists.infradead.org Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6a9dd68c0f4f..4c4413ad3ceb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -291,7 +291,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) "Cancelling I/O %d", req->tag); nvme_req(req)->status = NVME_SC_ABORT_REQ; - blk_mq_complete_request(req); + blk_mq_complete_request_sync(req); return true; } EXPORT_SYMBOL_GPL(nvme_cancel_request); -- cgit v1.2.3 From ea359038ab73da3007f46c6845272bdb43d21482 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 8 Apr 2019 11:15:19 -0700 Subject: nvme-fc: correct csn initialization and increments on error [ Upstream commit 67f471b6ed3b09033c4ac77ea03f92afdb1989fe ] This patch fixes a long-standing bug that initialized the FC-NVME cmnd iu CSN value to 1. Early FC-NVME specs had the connection starting with CSN=1. By the time the spec reached approval, the language had changed to state a connection should start with CSN=0. This patch corrects the initialization value for FC-NVME connections. Additionally, in reviewing the transport, the CSN value is assigned to the new IU early in the start routine. It's possible that a later dma map request may fail, causing the command to never be sent to the controller. Change the location of the assignment so that it is immediately prior to calling the lldd. Add a comment block to explain the impacts if the lldd were to additionally fail sending the command. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Ewan D. Milne Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c37d5bbd72ab..8625b73d94bf 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1857,7 +1857,7 @@ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx) memset(queue, 0, sizeof(*queue)); queue->ctrl = ctrl; queue->qnum = idx; - atomic_set(&queue->csn, 1); + atomic_set(&queue->csn, 0); queue->dev = ctrl->dev; if (idx > 0) @@ -1899,7 +1899,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) */ queue->connection_id = 0; - atomic_set(&queue->csn, 1); + atomic_set(&queue->csn, 0); } static void @@ -2195,7 +2195,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, { struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; - u32 csn; int ret, opstate; /* @@ -2210,8 +2209,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, /* format the FC-NVME CMD IU and fcp_req */ cmdiu->connection_id = cpu_to_be64(queue->connection_id); - csn = atomic_inc_return(&queue->csn); - cmdiu->csn = cpu_to_be32(csn); cmdiu->data_len = cpu_to_be32(data_len); switch (io_dir) { case NVMEFC_FCP_WRITE: @@ -2269,11 +2266,24 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (!(op->flags & FCOP_FLAGS_AEN)) blk_mq_start_request(op->rq); + cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn)); ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, &ctrl->rport->remoteport, queue->lldd_handle, &op->fcp_req); if (ret) { + /* + * If the lld fails to send the command is there an issue with + * the csn value? If the command that fails is the Connect, + * no - as the connection won't be live. If it is a command + * post-connect, it's possible a gap in csn may be created. + * Does this matter? As Linux initiators don't send fused + * commands, no. The gap would exist, but as there's nothing + * that depends on csn order to be delivered on the target + * side, it shouldn't hurt. It would be difficult for a + * target to even detect the csn gap as it has no idea when the + * cmd with the csn was supposed to arrive. + */ opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); -- cgit v1.2.3 From e6f2733f48cb0f11d30813b48965f21cae7cb8c6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 9 Apr 2019 10:03:59 -0600 Subject: nvmet: fix discover log page when offsets are used [ Upstream commit d808b7f759b50acf0784ce6230ffa63e12ef465d ] The nvme target hadn't been taking the Get Log Page offset parameter into consideration, and so has been returning corrupted log pages when offsets are used. Since many tools, including nvme-cli, split the log request to 4k, we've been breaking discovery log responses when more than 3 subsystems exist. Fix the returned data by internally generating the entire discovery log page and copying only the requested bytes into the user buffer. The command log page offset type has been modified to a native __le64 to make it easier to extract the value from a command. Signed-off-by: Keith Busch Tested-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/admin-cmd.c | 5 +++ drivers/nvme/target/discovery.c | 68 +++++++++++++++++++++++++++-------------- drivers/nvme/target/nvmet.h | 1 + include/linux/nvme.h | 9 ++++-- 4 files changed, 58 insertions(+), 25 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 11baeb14c388..8fdae510c5ac 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -32,6 +32,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd) return len; } +u64 nvmet_get_log_page_offset(struct nvme_command *cmd) +{ + return le64_to_cpu(cmd->get_log_page.lpo); +} + static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) { nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len)); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index d2cb71a0b419..389c1a90197d 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -139,54 +139,76 @@ static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); } +static size_t discovery_log_entries(struct nvmet_req *req) +{ + struct nvmet_ctrl *ctrl = req->sq->ctrl; + struct nvmet_subsys_link *p; + struct nvmet_port *r; + size_t entries = 0; + + list_for_each_entry(p, &req->port->subsystems, entry) { + if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) + continue; + entries++; + } + list_for_each_entry(r, &req->port->referrals, entry) + entries++; + return entries; +} + static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) { const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry); struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmf_disc_rsp_page_hdr *hdr; + u64 offset = nvmet_get_log_page_offset(req->cmd); size_t data_len = nvmet_get_log_page_len(req->cmd); - size_t alloc_len = max(data_len, sizeof(*hdr)); - int residual_len = data_len - sizeof(*hdr); + size_t alloc_len; struct nvmet_subsys_link *p; struct nvmet_port *r; u32 numrec = 0; u16 status = 0; + void *buffer; + + /* Spec requires dword aligned offsets */ + if (offset & 0x3) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + goto out; + } /* * Make sure we're passing at least a buffer of response header size. * If host provided data len is less than the header size, only the * number of bytes requested by host will be sent to host. */ - hdr = kzalloc(alloc_len, GFP_KERNEL); - if (!hdr) { + down_read(&nvmet_config_sem); + alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req); + buffer = kzalloc(alloc_len, GFP_KERNEL); + if (!buffer) { + up_read(&nvmet_config_sem); status = NVME_SC_INTERNAL; goto out; } - down_read(&nvmet_config_sem); + hdr = buffer; list_for_each_entry(p, &req->port->subsystems, entry) { + char traddr[NVMF_TRADDR_SIZE]; + if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) continue; - if (residual_len >= entry_size) { - char traddr[NVMF_TRADDR_SIZE]; - - nvmet_set_disc_traddr(req, req->port, traddr); - nvmet_format_discovery_entry(hdr, req->port, - p->subsys->subsysnqn, traddr, - NVME_NQN_NVME, numrec); - residual_len -= entry_size; - } + + nvmet_set_disc_traddr(req, req->port, traddr); + nvmet_format_discovery_entry(hdr, req->port, + p->subsys->subsysnqn, traddr, + NVME_NQN_NVME, numrec); numrec++; } list_for_each_entry(r, &req->port->referrals, entry) { - if (residual_len >= entry_size) { - nvmet_format_discovery_entry(hdr, r, - NVME_DISC_SUBSYS_NAME, - r->disc_addr.traddr, - NVME_NQN_DISC, numrec); - residual_len -= entry_size; - } + nvmet_format_discovery_entry(hdr, r, + NVME_DISC_SUBSYS_NAME, + r->disc_addr.traddr, + NVME_NQN_DISC, numrec); numrec++; } @@ -198,8 +220,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) up_read(&nvmet_config_sem); - status = nvmet_copy_to_sgl(req, 0, hdr, data_len); - kfree(hdr); + status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len); + kfree(buffer); out: nvmet_req_complete(req, status); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 3e4719fdba85..d253c45c1aa6 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -436,6 +436,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len); u32 nvmet_get_log_page_len(struct nvme_command *cmd); +u64 nvmet_get_log_page_offset(struct nvme_command *cmd); extern struct list_head *nvmet_ports; void nvmet_port_disc_changed(struct nvmet_port *port, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bbcc83886899..7ba0368f16e6 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -975,8 +975,13 @@ struct nvme_get_log_page_command { __le16 numdl; __le16 numdu; __u16 rsvd11; - __le32 lpol; - __le32 lpou; + union { + struct { + __le32 lpol; + __le32 lpou; + }; + __le64 lpo; + }; __u32 rsvd14[2]; }; -- cgit v1.2.3 From a3aa7cab0fc24395c428275c8b994c98502f75fa Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 11 Apr 2019 10:22:43 -0700 Subject: platform/x86: pmc_atom: Drop __initconst on dmi table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b995dcca7cf12f208cfd95fd9d5768dca7cccec7 ] It's used by probe and that isn't an init function. Drop this so that we don't get a section mismatch. Reported-by: kbuild test robot Cc: David Müller Cc: Hans de Goede Cc: Andy Shevchenko Fixes: 7c2e07130090 ("clk: x86: Add system specific quirk to mark clocks as critical") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/platform/x86/pmc_atom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index eaec2d306481..c7039f52ad51 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -396,7 +396,7 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc) * Some systems need one or more of their pmc_plt_clks to be * marked as critical. */ -static const struct dmi_system_id critclk_systems[] __initconst = { +static const struct dmi_system_id critclk_systems[] = { { .ident = "MPL CEC1x", .matches = { -- cgit v1.2.3 From a5f62d2c15a2014ffbac47fce46364fd01e4dbf3 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 11 Apr 2019 14:34:18 -0400 Subject: NFSv4.1 fix incorrect return value in copy_file_range [ Upstream commit 0769663b4f580566ef6cdf366f3073dbe8022c39 ] According to the NFSv4.2 spec if the input and output file is the same file, operation should fail with EINVAL. However, linux copy_file_range() system call has no such restrictions. Therefore, in such case let's return EOPNOTSUPP and allow VFS to fallback to doing do_splice_direct(). Also when copy_file_range is called on an NFSv4.0 or 4.1 mount (ie., a server that doesn't support COPY functionality), we also need to return EOPNOTSUPP and fallback to a regular copy. Fixes xfstest generic/075, generic/091, generic/112, generic/263 for all NFSv4.x versions. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs42proc.c | 3 --- fs/nfs/nfs4file.c | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index fed06fd9998d..94f98e190e63 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -329,9 +329,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, }; ssize_t err, err2; - if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY)) - return -EOPNOTSUPP; - src_lock = nfs_get_lock_context(nfs_file_open_context(src)); if (IS_ERR(src_lock)) return PTR_ERR(src_lock); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 45b2322e092d..00d17198ee12 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,8 +133,10 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { + if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) + return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) - return -EINVAL; + return -EOPNOTSUPP; return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); } -- cgit v1.2.3 From 6610c1785f70c385d719d231fc4517db62a8ab65 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Apr 2019 15:03:00 +0200 Subject: perf/core: Fix perf_event_disable_inatomic() race [ Upstream commit 1d54ad944074010609562da5c89e4f5df2f4e5db ] Thomas-Mich Richter reported he triggered a WARN()ing from event_function_local() on his s390. The problem boils down to: CPU-A CPU-B perf_event_overflow() perf_event_disable_inatomic() @pending_disable = 1 irq_work_queue(); sched-out event_sched_out() @pending_disable = 0 sched-in perf_event_overflow() perf_event_disable_inatomic() @pending_disable = 1; irq_work_queue(); // FAILS irq_work_run() perf_pending_event() if (@pending_disable) perf_event_disable_local(); // WHOOPS The problem exists in generic, but s390 is particularly sensitive because it doesn't implement arch_irq_work_raise(), nor does it call irq_work_run() from it's PMU interrupt handler (nor would that be sufficient in this case, because s390 also generates perf_event_overflow() from pmu::stop). Add to that the fact that s390 is a virtual architecture and (virtual) CPU-A can stall long enough for the above race to happen, even if it would self-IPI. Adding a irq_work_sync() to event_sched_in() would work for all hardare PMUs that properly use irq_work_run() but fails for software PMUs. Instead encode the CPU number in @pending_disable, such that we can tell which CPU requested the disable. This then allows us to detect the above scenario and even redirect the IPI to make up for the failed queue. Reported-by: Thomas-Mich Richter Tested-by: Thomas Richter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Kees Cook Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 52 +++++++++++++++++++++++++++++++++++++-------- kernel/events/ring_buffer.c | 4 ++-- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 2e2305a81047..124e1e3d06b9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2007,8 +2007,8 @@ event_sched_out(struct perf_event *event, event->pmu->del(event, 0); event->oncpu = -1; - if (event->pending_disable) { - event->pending_disable = 0; + if (READ_ONCE(event->pending_disable) >= 0) { + WRITE_ONCE(event->pending_disable, -1); state = PERF_EVENT_STATE_OFF; } perf_event_set_state(event, state); @@ -2196,7 +2196,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable); void perf_event_disable_inatomic(struct perf_event *event) { - event->pending_disable = 1; + WRITE_ONCE(event->pending_disable, smp_processor_id()); + /* can fail, see perf_pending_event_disable() */ irq_work_queue(&event->pending); } @@ -5803,10 +5804,45 @@ void perf_event_wakeup(struct perf_event *event) } } +static void perf_pending_event_disable(struct perf_event *event) +{ + int cpu = READ_ONCE(event->pending_disable); + + if (cpu < 0) + return; + + if (cpu == smp_processor_id()) { + WRITE_ONCE(event->pending_disable, -1); + perf_event_disable_local(event); + return; + } + + /* + * CPU-A CPU-B + * + * perf_event_disable_inatomic() + * @pending_disable = CPU-A; + * irq_work_queue(); + * + * sched-out + * @pending_disable = -1; + * + * sched-in + * perf_event_disable_inatomic() + * @pending_disable = CPU-B; + * irq_work_queue(); // FAILS + * + * irq_work_run() + * perf_pending_event() + * + * But the event runs on CPU-B and wants disabling there. + */ + irq_work_queue_on(&event->pending, cpu); +} + static void perf_pending_event(struct irq_work *entry) { - struct perf_event *event = container_of(entry, - struct perf_event, pending); + struct perf_event *event = container_of(entry, struct perf_event, pending); int rctx; rctx = perf_swevent_get_recursion_context(); @@ -5815,10 +5851,7 @@ static void perf_pending_event(struct irq_work *entry) * and we won't recurse 'further'. */ - if (event->pending_disable) { - event->pending_disable = 0; - perf_event_disable_local(event); - } + perf_pending_event_disable(event); if (event->pending_wakeup) { event->pending_wakeup = 0; @@ -9998,6 +10031,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, init_waitqueue_head(&event->waitq); + event->pending_disable = -1; init_irq_work(&event->pending, perf_pending_event); mutex_init(&event->mmap_mutex); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index dbd7656b4f73..a5fc56a654fd 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -393,7 +393,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * store that will be enabled on successful return */ if (!handle->size) { /* A, matches D */ - event->pending_disable = 1; + event->pending_disable = smp_processor_id(); perf_output_wakeup(handle); local_set(&rb->aux_nest, 0); goto err_put; @@ -478,7 +478,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) if (wakeup) { if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) - handle->event->pending_disable = 1; + handle->event->pending_disable = smp_processor_id(); perf_output_wakeup(handle); } -- cgit v1.2.3 From b5dbb40581836391155197ae62de2e1ba3abb058 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Apr 2019 12:50:31 +0200 Subject: iommu/amd: Set exclusion range correctly [ Upstream commit 3c677d206210f53a4be972211066c0f1cd47fe12 ] The exlcusion range limit register needs to contain the base-address of the last page that is part of the range, as bits 0-11 of this register are treated as 0xfff by the hardware for comparisons. So correctly set the exclusion range in the hardware to the last page which is _in_ the range. Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO space') Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 84fa5b22371e..e8a2efe0afce 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -358,7 +358,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) static void iommu_set_exclusion_range(struct amd_iommu *iommu) { u64 start = iommu->exclusion_start & PAGE_MASK; - u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; + u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; u64 entry; if (!iommu->exclusion_start) -- cgit v1.2.3 From 70a44a01f8a4853100856bdb65a733772a201bd8 Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Sun, 24 Mar 2019 07:57:04 -0700 Subject: genirq: Prevent use-after-free and work list corruption [ Upstream commit 59c39840f5abf4a71e1810a8da71aaccd6c17d26 ] When irq_set_affinity_notifier() replaces the notifier, then the reference count on the old notifier is dropped which causes it to be freed. But nothing ensures that the old notifier is not longer queued in the work list. If it is queued this results in a use after free and possibly in work list corruption. Ensure that the work is canceled before the reference is dropped. Signed-off-by: Prasad Sodagudi Signed-off-by: Thomas Gleixner Cc: marc.zyngier@arm.com Link: https://lkml.kernel.org/r/1553439424-6529-1-git-send-email-psodagud@codeaurora.org Signed-off-by: Sasha Levin --- kernel/irq/manage.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 84b54a17b95d..df557ec20a6f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -356,8 +356,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) desc->affinity_notify = notify; raw_spin_unlock_irqrestore(&desc->lock, flags); - if (old_notify) + if (old_notify) { + cancel_work_sync(&old_notify->work); kref_put(&old_notify->kref, old_notify->release); + } return 0; } -- cgit v1.2.3 From 9092861ce665d19f1f2a9fc435f420bc9cf74b17 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Wed, 24 Apr 2019 17:00:57 +0200 Subject: usb: dwc3: Allow building USB_DWC3_QCOM without EXTCON commit 77a4946516fe488b6a33390de6d749f934a243ba upstream. Keep EXTCON support optional, as some platforms do not need it. Do the same for USB_DWC3_OMAP while we're at it. Fixes: 3def4031b3e3f ("usb: dwc3: add EXTCON dependency for qcom") Signed-off-by: Marc Gonzalez Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig index 1a0404fda596..5d22f4bf2a9f 100644 --- a/drivers/usb/dwc3/Kconfig +++ b/drivers/usb/dwc3/Kconfig @@ -52,7 +52,8 @@ comment "Platform Glue Driver Support" config USB_DWC3_OMAP tristate "Texas Instruments OMAP5 and similar Platforms" - depends on EXTCON && (ARCH_OMAP2PLUS || COMPILE_TEST) + depends on ARCH_OMAP2PLUS || COMPILE_TEST + depends on EXTCON || !EXTCON depends on OF default USB_DWC3 help @@ -113,7 +114,8 @@ config USB_DWC3_ST config USB_DWC3_QCOM tristate "Qualcomm Platform" - depends on EXTCON && (ARCH_QCOM || COMPILE_TEST) + depends on ARCH_QCOM || COMPILE_TEST + depends on EXTCON || !EXTCON depends on OF default USB_DWC3 help -- cgit v1.2.3 From 083a8f69962a27ad381658a16d5f2f279e51e554 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 25 Apr 2019 13:55:23 -0700 Subject: usb: dwc3: Fix default lpm_nyet_threshold value commit 8d791929b2fbdf7734c1596d808e55cb457f4562 upstream. The max possible value for DCTL.LPM_NYET_THRES is 15 and not 255. Change the default value to 15. Cc: stable@vger.kernel.org Fixes: 80caf7d21adc ("usb: dwc3: add lpm erratum support") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index a1b126f90261..f944cea4056b 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1218,7 +1218,7 @@ static void dwc3_get_properties(struct dwc3 *dwc) u8 tx_max_burst_prd; /* default to highest possible threshold */ - lpm_nyet_threshold = 0xff; + lpm_nyet_threshold = 0xf; /* default to -3.5dB de-emphasis */ tx_de_emphasis = 1; -- cgit v1.2.3 From 5b1c70f36832900f8090d065a5b2046f5eeb1f9f Mon Sep 17 00:00:00 2001 From: "Ji-Ze Hong (Peter Hong)" Date: Tue, 30 Apr 2019 09:22:29 +0800 Subject: USB: serial: f81232: fix interrupt worker not stop commit 804dbee1e49774918339c1e5a87400988c0819e8 upstream. The F81232 will use interrupt worker to handle MSR change. This patch will fix the issue that interrupt work should stop in close() and suspend(). This also fixes line-status events being disabled after a suspend cycle until the port is re-opened. Signed-off-by: Ji-Ze Hong (Peter Hong) [ johan: amend commit message ] Fixes: 87fe5adcd8de ("USB: f81232: implement read IIR/MSR with endpoint") Cc: stable # 4.1 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/f81232.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c index 0dcdcb4b2cde..dee6f2caf9b5 100644 --- a/drivers/usb/serial/f81232.c +++ b/drivers/usb/serial/f81232.c @@ -556,9 +556,12 @@ static int f81232_open(struct tty_struct *tty, struct usb_serial_port *port) static void f81232_close(struct usb_serial_port *port) { + struct f81232_private *port_priv = usb_get_serial_port_data(port); + f81232_port_disable(port); usb_serial_generic_close(port); usb_kill_urb(port->interrupt_in_urb); + flush_work(&port_priv->interrupt_work); } static void f81232_dtr_rts(struct usb_serial_port *port, int on) @@ -632,6 +635,40 @@ static int f81232_port_remove(struct usb_serial_port *port) return 0; } +static int f81232_suspend(struct usb_serial *serial, pm_message_t message) +{ + struct usb_serial_port *port = serial->port[0]; + struct f81232_private *port_priv = usb_get_serial_port_data(port); + int i; + + for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) + usb_kill_urb(port->read_urbs[i]); + + usb_kill_urb(port->interrupt_in_urb); + + if (port_priv) + flush_work(&port_priv->interrupt_work); + + return 0; +} + +static int f81232_resume(struct usb_serial *serial) +{ + struct usb_serial_port *port = serial->port[0]; + int result; + + if (tty_port_initialized(&port->port)) { + result = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); + if (result) { + dev_err(&port->dev, "submit interrupt urb failed: %d\n", + result); + return result; + } + } + + return usb_serial_generic_resume(serial); +} + static struct usb_serial_driver f81232_device = { .driver = { .owner = THIS_MODULE, @@ -655,6 +692,8 @@ static struct usb_serial_driver f81232_device = { .read_int_callback = f81232_read_int_callback, .port_probe = f81232_port_probe, .port_remove = f81232_port_remove, + .suspend = f81232_suspend, + .resume = f81232_resume, }; static struct usb_serial_driver * const serial_drivers[] = { -- cgit v1.2.3 From bba2feefcacd3cbdb7b9570debc5cbf80b944944 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 25 Apr 2019 18:05:39 +0200 Subject: USB: cdc-acm: fix unthrottle races commit 764478f41130f1b8d8057575b89e69980a0f600d upstream. Fix two long-standing bugs which could potentially lead to memory corruption or leave the port throttled until it is reopened (on weakly ordered systems), respectively, when read-URB completion races with unthrottle(). First, the URB must not be marked as free before processing is complete to prevent it from being submitted by unthrottle() on another CPU. CPU 1 CPU 2 ================ ================ complete() unthrottle() process_urb(); smp_mb__before_atomic(); set_bit(i, free); if (test_and_clear_bit(i, free)) submit_urb(); Second, the URB must be marked as free before checking the throttled flag to prevent unthrottle() on another CPU from failing to observe that the URB needs to be submitted if complete() sees that the throttled flag is set. CPU 1 CPU 2 ================ ================ complete() unthrottle() set_bit(i, free); throttled = 0; smp_mb__after_atomic(); smp_mb(); if (throttled) if (test_and_clear_bit(i, free)) return; submit_urb(); Note that test_and_clear_bit() only implies barriers when the test is successful. To handle the case where the URB is still in use an explicit barrier needs to be added to unthrottle() for the second race condition. Also note that the first race was fixed by 36e59e0d70d6 ("cdc-acm: fix race between callback and unthrottle") back in 2015, but the bug was reintroduced a year later. Fixes: 1aba579f3cf5 ("cdc-acm: handle read pipe errors") Fixes: 088c64f81284 ("USB: cdc-acm: re-write read processing") Signed-off-by: Johan Hovold Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index ec666eb4b7b4..c03aa8550980 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -470,12 +470,12 @@ static void acm_read_bulk_callback(struct urb *urb) struct acm *acm = rb->instance; unsigned long flags; int status = urb->status; + bool stopped = false; + bool stalled = false; dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n", rb->index, urb->actual_length, status); - set_bit(rb->index, &acm->read_urbs_free); - if (!acm->dev) { dev_dbg(&acm->data->dev, "%s - disconnected\n", __func__); return; @@ -488,15 +488,16 @@ static void acm_read_bulk_callback(struct urb *urb) break; case -EPIPE: set_bit(EVENT_RX_STALL, &acm->flags); - schedule_work(&acm->work); - return; + stalled = true; + break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&acm->data->dev, "%s - urb shutting down with status: %d\n", __func__, status); - return; + stopped = true; + break; default: dev_dbg(&acm->data->dev, "%s - nonzero urb status received: %d\n", @@ -505,10 +506,24 @@ static void acm_read_bulk_callback(struct urb *urb) } /* - * Unthrottle may run on another CPU which needs to see events - * in the same order. Submission has an implict barrier + * Make sure URB processing is done before marking as free to avoid + * racing with unthrottle() on another CPU. Matches the barriers + * implied by the test_and_clear_bit() in acm_submit_read_urb(). */ smp_mb__before_atomic(); + set_bit(rb->index, &acm->read_urbs_free); + /* + * Make sure URB is marked as free before checking the throttled flag + * to avoid racing with unthrottle() on another CPU. Matches the + * smp_mb() in unthrottle(). + */ + smp_mb__after_atomic(); + + if (stopped || stalled) { + if (stalled) + schedule_work(&acm->work); + return; + } /* throttle device if requested by tty */ spin_lock_irqsave(&acm->read_lock, flags); @@ -842,6 +857,9 @@ static void acm_tty_unthrottle(struct tty_struct *tty) acm->throttle_req = 0; spin_unlock_irq(&acm->read_lock); + /* Matches the smp_mb__after_atomic() in acm_read_bulk_callback(). */ + smp_mb(); + if (was_throttled) acm_submit_read_urbs(acm, GFP_KERNEL); } -- cgit v1.2.3 From 704eaf49399fe5998140c93b9af8acc73b79a813 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Apr 2019 13:19:25 -0400 Subject: usb-storage: Set virt_boundary_mask to avoid SG overflows commit 747668dbc061b3e62bc1982767a3a1f9815fcf0e upstream. The USB subsystem has always had an unusual requirement for its scatter-gather transfers: Each element in the scatterlist (except the last one) must have a length divisible by the bulk maxpacket size. This is a particular issue for USB mass storage, which uses SG lists created by the block layer rather than setting up its own. So far we have scraped by okay because most devices have a logical block size of 512 bytes or larger, and the bulk maxpacket sizes for USB 2 and below are all <= 512. However, USB 3 has a bulk maxpacket size of 1024. Since the xhci-hcd driver includes native SG support, this hasn't mattered much. But now people are trying to use USB-3 mass storage devices with USBIP, and the vhci-hcd driver currently does not have full SG support. The result is an overflow error, when the driver attempts to implement an SG transfer of 63 512-byte blocks as a single 3584-byte (7 blocks) transfer followed by seven 4096-byte (8 blocks) transfers. The device instead sends 31 1024-byte packets followed by a 512-byte packet, and this overruns the first SG buffer. Ideally this would be fixed by adding better SG support to vhci-hcd. But for now it appears we can work around the problem by asking the block layer to respect the maxpacket limitation, through the use of the virt_boundary_mask. Signed-off-by: Alan Stern Reported-by: Seth Bollinger Tested-by: Seth Bollinger CC: Ming Lei Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index a73ea495d5a7..59190d88fa9f 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -65,6 +65,7 @@ static const char* host_info(struct Scsi_Host *host) static int slave_alloc (struct scsi_device *sdev) { struct us_data *us = host_to_us(sdev->host); + int maxp; /* * Set the INQUIRY transfer length to 36. We don't use any of @@ -74,20 +75,17 @@ static int slave_alloc (struct scsi_device *sdev) sdev->inquiry_len = 36; /* - * USB has unusual DMA-alignment requirements: Although the - * starting address of each scatter-gather element doesn't matter, - * the length of each element except the last must be divisible - * by the Bulk maxpacket value. There's currently no way to - * express this by block-layer constraints, so we'll cop out - * and simply require addresses to be aligned at 512-byte - * boundaries. This is okay since most block I/O involves - * hardware sectors that are multiples of 512 bytes in length, - * and since host controllers up through USB 2.0 have maxpacket - * values no larger than 512. - * - * But it doesn't suffice for Wireless USB, where Bulk maxpacket - * values can be as large as 2048. To make that work properly - * will require changes to the block layer. + * USB has unusual scatter-gather requirements: the length of each + * scatterlist element except the last must be divisible by the + * Bulk maxpacket value. Fortunately this value is always a + * power of 2. Inform the block layer about this requirement. + */ + maxp = usb_maxpacket(us->pusb_dev, us->recv_bulk_pipe, 0); + blk_queue_virt_boundary(sdev->request_queue, maxp - 1); + + /* + * Some host controllers may have alignment requirements. + * We'll play it safe by requiring 512-byte alignment always. */ blk_queue_update_dma_alignment(sdev->request_queue, (512 - 1)); -- cgit v1.2.3 From 284af27884325c5260dbfd29672a46cc5bf0556c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 17 Apr 2019 10:35:36 +0300 Subject: intel_th: pci: Add Comet Lake support commit e60e9a4b231a20a199d7a61caadc48693c30d695 upstream. This adds support for Intel TH on Comet Lake. Signed-off-by: Alexander Shishkin Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 1cf6290d6435..70f2cb90adc5 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -165,6 +165,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x34a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Comet Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x02a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; -- cgit v1.2.3 From 573a935bfb4fdd35fe8ca60170322c19ff0fe305 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 16 Apr 2019 16:49:27 -0700 Subject: iio: adc: qcom-spmi-adc5: Fix of-based module autoloading commit 447ccb4e0834a9f9f0dd5643e421c7f1a1649e6a upstream. The of_device_id table needs to be registered as module alias in order for automatic module loading to pick the kernel module based on the DeviceTree compatible. So add MODULE_DEVICE_TABLE() to make this happen. Fixes: e13d757279bb ("iio: adc: Add QCOM SPMI PMIC5 ADC driver") Cc: stable@vger.kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/qcom-spmi-adc5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/qcom-spmi-adc5.c b/drivers/iio/adc/qcom-spmi-adc5.c index 6a866cc187f7..21fdcde77883 100644 --- a/drivers/iio/adc/qcom-spmi-adc5.c +++ b/drivers/iio/adc/qcom-spmi-adc5.c @@ -664,6 +664,7 @@ static const struct of_device_id adc5_match_table[] = { }, { } }; +MODULE_DEVICE_TABLE(of, adc5_match_table); static int adc5_get_dt_data(struct adc5_chip *adc, struct device_node *node) { -- cgit v1.2.3 From 61ae16c4586b34ba00edd5d4357eb37a9bec08ef Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 8 Mar 2019 17:47:10 +0100 Subject: cpufreq: armada-37xx: fix frequency calculation for opp commit 8db82563451f976597ab7b282ec655e4390a4088 upstream. The frequency calculation was based on the current(max) frequency of the CPU. However for low frequency, the value used was already the parent frequency divided by a factor of 2. Instead of using this frequency, this fix directly get the frequency from the parent clock. Fixes: 92ce45fb875d ("cpufreq: Add DVFS support for Armada 37xx") Cc: Reported-by: Christian Neubert Signed-off-by: Gregory CLEMENT Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/armada-37xx-cpufreq.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 75491fc841a6..0df16eb1eb3c 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -359,11 +359,11 @@ static int __init armada37xx_cpufreq_driver_init(void) struct armada_37xx_dvfs *dvfs; struct platform_device *pdev; unsigned long freq; - unsigned int cur_frequency; + unsigned int cur_frequency, base_frequency; struct regmap *nb_pm_base, *avs_base; struct device *cpu_dev; int load_lvl, ret; - struct clk *clk; + struct clk *clk, *parent; nb_pm_base = syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm"); @@ -399,6 +399,22 @@ static int __init armada37xx_cpufreq_driver_init(void) return PTR_ERR(clk); } + parent = clk_get_parent(clk); + if (IS_ERR(parent)) { + dev_err(cpu_dev, "Cannot get parent clock for CPU0\n"); + clk_put(clk); + return PTR_ERR(parent); + } + + /* Get parent CPU frequency */ + base_frequency = clk_get_rate(parent); + + if (!base_frequency) { + dev_err(cpu_dev, "Failed to get parent clock rate for CPU\n"); + clk_put(clk); + return -EINVAL; + } + /* Get nominal (current) CPU frequency */ cur_frequency = clk_get_rate(clk); if (!cur_frequency) { @@ -431,7 +447,7 @@ static int __init armada37xx_cpufreq_driver_init(void) for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000; - freq = cur_frequency / dvfs->divider[load_lvl]; + freq = base_frequency / dvfs->divider[load_lvl]; ret = dev_pm_opp_add(cpu_dev, freq, u_volt); if (ret) goto remove_opp; -- cgit v1.2.3 From 34ebc8ad2ea4eb4f8b981fa403391a6030721f54 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 18 Apr 2019 13:39:33 +0200 Subject: ACPI / LPSS: Use acpi_lpss_* instead of acpi_subsys_* functions for hibernate commit c8afd03486c26accdda4846e5561aa3f8e862a9d upstream. Commit 48402cee6889 ("ACPI / LPSS: Resume BYT/CHT I2C controllers from resume_noirq") makes acpi_lpss_{suspend_late,resume_early}() bail early on BYT/CHT as resume_from_noirq is set. This means that on resume from hibernate dw_i2c_plat_resume() doesn't get called by the restore_early callback, acpi_lpss_resume_early(). Instead it should be called by the restore_noirq callback matching how things are done when resume_from_noirq is set and we are doing a regular resume. Change the restore_noirq callback to acpi_lpss_resume_noirq so that dw_i2c_plat_resume() gets properly called when resume_from_noirq is set and we are resuming from hibernate. Likewise also change the poweroff_noirq callback so that dw_i2c_plat_suspend gets called properly. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202139 Fixes: 48402cee6889 ("ACPI / LPSS: Resume BYT/CHT I2C controllers from resume_noirq") Reported-by: Kai-Heng Feng Signed-off-by: Hans de Goede Cc: 4.20+ # 4.20+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_lpss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 5f94c35d165f..a2d8c03b1e24 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -1142,8 +1142,8 @@ static struct dev_pm_domain acpi_lpss_pm_domain = { .thaw_noirq = acpi_subsys_thaw_noirq, .poweroff = acpi_subsys_suspend, .poweroff_late = acpi_lpss_suspend_late, - .poweroff_noirq = acpi_subsys_suspend_noirq, - .restore_noirq = acpi_subsys_resume_noirq, + .poweroff_noirq = acpi_lpss_suspend_noirq, + .restore_noirq = acpi_lpss_resume_noirq, .restore_early = acpi_lpss_resume_early, #endif .runtime_suspend = acpi_lpss_runtime_suspend, -- cgit v1.2.3 From 185e58d16ac81155bb3c3460788f698049d15b8e Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 30 Apr 2019 09:59:37 -0500 Subject: soc: sunxi: Fix missing dependency on REGMAP_MMIO commit a84014e1db35d8e7af09878d0b4bf30804fb17d5 upstream. When enabling ARCH_SUNXI from allnoconfig, SUNXI_SRAM is enabled, but not REGMAP_MMIO, so the kernel fails to link with an undefined reference to __devm_regmap_init_mmio_clk. Select REGMAP_MMIO, as suggested in drivers/base/regmap/Kconfig. This creates the following dependency loop: drivers/of/Kconfig:68: symbol OF_IRQ depends on IRQ_DOMAIN kernel/irq/Kconfig:63: symbol IRQ_DOMAIN is selected by REGMAP drivers/base/regmap/Kconfig:7: symbol REGMAP default is visible depending on REGMAP_MMIO drivers/base/regmap/Kconfig:39: symbol REGMAP_MMIO is selected by SUNXI_SRAM drivers/soc/sunxi/Kconfig:4: symbol SUNXI_SRAM is selected by USB_MUSB_SUNXI drivers/usb/musb/Kconfig:63: symbol USB_MUSB_SUNXI depends on GENERIC_PHY drivers/phy/Kconfig:7: symbol GENERIC_PHY is selected by PHY_BCM_NS_USB3 drivers/phy/broadcom/Kconfig:29: symbol PHY_BCM_NS_USB3 depends on MDIO_BUS drivers/net/phy/Kconfig:12: symbol MDIO_BUS default is visible depending on PHYLIB drivers/net/phy/Kconfig:181: symbol PHYLIB is selected by ARC_EMAC_CORE drivers/net/ethernet/arc/Kconfig:18: symbol ARC_EMAC_CORE is selected by ARC_EMAC drivers/net/ethernet/arc/Kconfig:24: symbol ARC_EMAC depends on OF_IRQ To fix the circular dependency, make USB_MUSB_SUNXI select GENERIC_PHY instead of depending on it. This matches the use of GENERIC_PHY by all but two other drivers. Cc: # 4.19 Fixes: 5828729bebbb ("soc: sunxi: export a regmap for EMAC clock reg on A64") Signed-off-by: Samuel Holland Acked-by: Maxime Ripard Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/soc/sunxi/Kconfig | 1 + drivers/usb/musb/Kconfig | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/sunxi/Kconfig b/drivers/soc/sunxi/Kconfig index 353b07e40176..e84eb4e59f58 100644 --- a/drivers/soc/sunxi/Kconfig +++ b/drivers/soc/sunxi/Kconfig @@ -4,6 +4,7 @@ config SUNXI_SRAM bool default ARCH_SUNXI + select REGMAP_MMIO help Say y here to enable the SRAM controller support. This device is responsible on mapping the SRAM in the sunXi SoCs diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index ad08895e78f9..c3dae7d5cb6e 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -66,7 +66,7 @@ config USB_MUSB_SUNXI depends on NOP_USB_XCEIV depends on PHY_SUN4I_USB depends on EXTCON - depends on GENERIC_PHY + select GENERIC_PHY select SUNXI_SRAM config USB_MUSB_DAVINCI -- cgit v1.2.3 From 45076c8e403a275185a089a20574fb772134f7c1 Mon Sep 17 00:00:00 2001 From: Silvio Cesare Date: Thu, 21 Mar 2019 09:44:32 -0700 Subject: scsi: lpfc: change snprintf to scnprintf for possible overflow commit e7f7b6f38a44697428f5a2e7c606de028df2b0e3 upstream. Change snprintf to scnprintf. There are generally two cases where using snprintf causes problems. 1) Uses of size += snprintf(buf, SIZE - size, fmt, ...) In this case, if snprintf would have written more characters than what the buffer size (SIZE) is, then size will end up larger than SIZE. In later uses of snprintf, SIZE - size will result in a negative number, leading to problems. Note that size might already be too large by using size = snprintf before the code reaches a case of size += snprintf. 2) If size is ultimately used as a length parameter for a copy back to user space, then it will potentially allow for a buffer overflow and information disclosure when size is greater than SIZE. When the size is used to index the buffer directly, we can have memory corruption. This also means when size = snprintf... is used, it may also cause problems since size may become large. Copying to userspace is mitigated by the HARDENED_USERCOPY kernel configuration. The solution to these issues is to use scnprintf which returns the number of characters actually written to the buffer, so the size variable will never exceed SIZE. Signed-off-by: Silvio Cesare Signed-off-by: Willy Tarreau Signed-off-by: James Smart Cc: Dick Kennedy Cc: Dan Carpenter Cc: Kees Cook Cc: Will Deacon Cc: Greg KH Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_attr.c | 192 +++++++++-------- drivers/scsi/lpfc/lpfc_ct.c | 12 +- drivers/scsi/lpfc/lpfc_debugfs.c | 455 ++++++++++++++++++++------------------- drivers/scsi/lpfc/lpfc_debugfs.h | 6 +- 4 files changed, 336 insertions(+), 329 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 4bae72cbf3f6..45b26a8eb61e 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -117,7 +117,7 @@ static ssize_t lpfc_drvr_version_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, LPFC_MODULE_DESC "\n"); + return scnprintf(buf, PAGE_SIZE, LPFC_MODULE_DESC "\n"); } /** @@ -137,9 +137,9 @@ lpfc_enable_fip_show(struct device *dev, struct device_attribute *attr, struct lpfc_hba *phba = vport->phba; if (phba->hba_flag & HBA_FIP_SUPPORT) - return snprintf(buf, PAGE_SIZE, "1\n"); + return scnprintf(buf, PAGE_SIZE, "1\n"); else - return snprintf(buf, PAGE_SIZE, "0\n"); + return scnprintf(buf, PAGE_SIZE, "0\n"); } static ssize_t @@ -517,14 +517,15 @@ lpfc_bg_info_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - if (phba->cfg_enable_bg) + if (phba->cfg_enable_bg) { if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) - return snprintf(buf, PAGE_SIZE, "BlockGuard Enabled\n"); + return scnprintf(buf, PAGE_SIZE, + "BlockGuard Enabled\n"); else - return snprintf(buf, PAGE_SIZE, + return scnprintf(buf, PAGE_SIZE, "BlockGuard Not Supported\n"); - else - return snprintf(buf, PAGE_SIZE, + } else + return scnprintf(buf, PAGE_SIZE, "BlockGuard Disabled\n"); } @@ -536,7 +537,7 @@ lpfc_bg_guard_err_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%llu\n", + return scnprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)phba->bg_guard_err_cnt); } @@ -548,7 +549,7 @@ lpfc_bg_apptag_err_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%llu\n", + return scnprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)phba->bg_apptag_err_cnt); } @@ -560,7 +561,7 @@ lpfc_bg_reftag_err_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%llu\n", + return scnprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)phba->bg_reftag_err_cnt); } @@ -578,7 +579,7 @@ lpfc_info_show(struct device *dev, struct device_attribute *attr, { struct Scsi_Host *host = class_to_shost(dev); - return snprintf(buf, PAGE_SIZE, "%s\n",lpfc_info(host)); + return scnprintf(buf, PAGE_SIZE, "%s\n", lpfc_info(host)); } /** @@ -597,7 +598,7 @@ lpfc_serialnum_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%s\n",phba->SerialNumber); + return scnprintf(buf, PAGE_SIZE, "%s\n", phba->SerialNumber); } /** @@ -619,7 +620,7 @@ lpfc_temp_sensor_show(struct device *dev, struct device_attribute *attr, struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%d\n",phba->temp_sensor_support); + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->temp_sensor_support); } /** @@ -638,7 +639,7 @@ lpfc_modeldesc_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelDesc); + return scnprintf(buf, PAGE_SIZE, "%s\n", phba->ModelDesc); } /** @@ -657,7 +658,7 @@ lpfc_modelname_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelName); + return scnprintf(buf, PAGE_SIZE, "%s\n", phba->ModelName); } /** @@ -676,7 +677,7 @@ lpfc_programtype_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%s\n",phba->ProgramType); + return scnprintf(buf, PAGE_SIZE, "%s\n", phba->ProgramType); } /** @@ -694,7 +695,7 @@ lpfc_mlomgmt_show(struct device *dev, struct device_attribute *attr, char *buf) struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%d\n", + return scnprintf(buf, PAGE_SIZE, "%d\n", (phba->sli.sli_flag & LPFC_MENLO_MAINT)); } @@ -714,7 +715,7 @@ lpfc_vportnum_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%s\n",phba->Port); + return scnprintf(buf, PAGE_SIZE, "%s\n", phba->Port); } /** @@ -742,10 +743,10 @@ lpfc_fwrev_show(struct device *dev, struct device_attribute *attr, sli_family = phba->sli4_hba.pc_sli4_params.sli_family; if (phba->sli_rev < LPFC_SLI_REV4) - len = snprintf(buf, PAGE_SIZE, "%s, sli-%d\n", + len = scnprintf(buf, PAGE_SIZE, "%s, sli-%d\n", fwrev, phba->sli_rev); else - len = snprintf(buf, PAGE_SIZE, "%s, sli-%d:%d:%x\n", + len = scnprintf(buf, PAGE_SIZE, "%s, sli-%d:%d:%x\n", fwrev, phba->sli_rev, if_type, sli_family); return len; @@ -769,7 +770,7 @@ lpfc_hdw_show(struct device *dev, struct device_attribute *attr, char *buf) lpfc_vpd_t *vp = &phba->vpd; lpfc_jedec_to_ascii(vp->rev.biuRev, hdw); - return snprintf(buf, PAGE_SIZE, "%s\n", hdw); + return scnprintf(buf, PAGE_SIZE, "%s\n", hdw); } /** @@ -790,10 +791,11 @@ lpfc_option_rom_version_show(struct device *dev, struct device_attribute *attr, char fwrev[FW_REV_STR_SIZE]; if (phba->sli_rev < LPFC_SLI_REV4) - return snprintf(buf, PAGE_SIZE, "%s\n", phba->OptionROMVersion); + return scnprintf(buf, PAGE_SIZE, "%s\n", + phba->OptionROMVersion); lpfc_decode_firmware_rev(phba, fwrev, 1); - return snprintf(buf, PAGE_SIZE, "%s\n", fwrev); + return scnprintf(buf, PAGE_SIZE, "%s\n", fwrev); } /** @@ -824,20 +826,20 @@ lpfc_link_state_show(struct device *dev, struct device_attribute *attr, case LPFC_LINK_DOWN: case LPFC_HBA_ERROR: if (phba->hba_flag & LINK_DISABLED) - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Link Down - User disabled\n"); else - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Link Down\n"); break; case LPFC_LINK_UP: case LPFC_CLEAR_LA: case LPFC_HBA_READY: - len += snprintf(buf + len, PAGE_SIZE-len, "Link Up - "); + len += scnprintf(buf + len, PAGE_SIZE-len, "Link Up - "); switch (vport->port_state) { case LPFC_LOCAL_CFG_LINK: - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Configuring Link\n"); break; case LPFC_FDISC: @@ -847,38 +849,40 @@ lpfc_link_state_show(struct device *dev, struct device_attribute *attr, case LPFC_NS_QRY: case LPFC_BUILD_DISC_LIST: case LPFC_DISC_AUTH: - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "Discovery\n"); break; case LPFC_VPORT_READY: - len += snprintf(buf + len, PAGE_SIZE - len, "Ready\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, + "Ready\n"); break; case LPFC_VPORT_FAILED: - len += snprintf(buf + len, PAGE_SIZE - len, "Failed\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, + "Failed\n"); break; case LPFC_VPORT_UNKNOWN: - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "Unknown\n"); break; } if (phba->sli.sli_flag & LPFC_MENLO_MAINT) - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, " Menlo Maint Mode\n"); else if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) { if (vport->fc_flag & FC_PUBLIC_LOOP) - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, " Public Loop\n"); else - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, " Private Loop\n"); } else { if (vport->fc_flag & FC_FABRIC) - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, " Fabric\n"); else - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, " Point-2-Point\n"); } } @@ -890,28 +894,28 @@ lpfc_link_state_show(struct device *dev, struct device_attribute *attr, struct lpfc_trunk_link link = phba->trunk_link; if (bf_get(lpfc_conf_trunk_port0, &phba->sli4_hba)) - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "Trunk port 0: Link %s %s\n", (link.link0.state == LPFC_LINK_UP) ? "Up" : "Down. ", trunk_errmsg[link.link0.fault]); if (bf_get(lpfc_conf_trunk_port1, &phba->sli4_hba)) - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "Trunk port 1: Link %s %s\n", (link.link1.state == LPFC_LINK_UP) ? "Up" : "Down. ", trunk_errmsg[link.link1.fault]); if (bf_get(lpfc_conf_trunk_port2, &phba->sli4_hba)) - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "Trunk port 2: Link %s %s\n", (link.link2.state == LPFC_LINK_UP) ? "Up" : "Down. ", trunk_errmsg[link.link2.fault]); if (bf_get(lpfc_conf_trunk_port3, &phba->sli4_hba)) - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "Trunk port 3: Link %s %s\n", (link.link3.state == LPFC_LINK_UP) ? "Up" : "Down. ", @@ -939,15 +943,15 @@ lpfc_sli4_protocol_show(struct device *dev, struct device_attribute *attr, struct lpfc_hba *phba = vport->phba; if (phba->sli_rev < LPFC_SLI_REV4) - return snprintf(buf, PAGE_SIZE, "fc\n"); + return scnprintf(buf, PAGE_SIZE, "fc\n"); if (phba->sli4_hba.lnk_info.lnk_dv == LPFC_LNK_DAT_VAL) { if (phba->sli4_hba.lnk_info.lnk_tp == LPFC_LNK_TYPE_GE) - return snprintf(buf, PAGE_SIZE, "fcoe\n"); + return scnprintf(buf, PAGE_SIZE, "fcoe\n"); if (phba->sli4_hba.lnk_info.lnk_tp == LPFC_LNK_TYPE_FC) - return snprintf(buf, PAGE_SIZE, "fc\n"); + return scnprintf(buf, PAGE_SIZE, "fc\n"); } - return snprintf(buf, PAGE_SIZE, "unknown\n"); + return scnprintf(buf, PAGE_SIZE, "unknown\n"); } /** @@ -967,7 +971,7 @@ lpfc_oas_supported_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%d\n", + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->sli4_hba.pc_sli4_params.oas_supported); } @@ -1025,7 +1029,7 @@ lpfc_num_discovered_ports_show(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; - return snprintf(buf, PAGE_SIZE, "%d\n", + return scnprintf(buf, PAGE_SIZE, "%d\n", vport->fc_map_cnt + vport->fc_unmap_cnt); } @@ -1539,7 +1543,7 @@ lpfc_nport_evt_cnt_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt); + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt); } int @@ -1628,7 +1632,7 @@ lpfc_board_mode_show(struct device *dev, struct device_attribute *attr, else state = "online"; - return snprintf(buf, PAGE_SIZE, "%s\n", state); + return scnprintf(buf, PAGE_SIZE, "%s\n", state); } /** @@ -1854,8 +1858,8 @@ lpfc_max_rpi_show(struct device *dev, struct device_attribute *attr, uint32_t cnt; if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, NULL, NULL, NULL)) - return snprintf(buf, PAGE_SIZE, "%d\n", cnt); - return snprintf(buf, PAGE_SIZE, "Unknown\n"); + return scnprintf(buf, PAGE_SIZE, "%d\n", cnt); + return scnprintf(buf, PAGE_SIZE, "Unknown\n"); } /** @@ -1882,8 +1886,8 @@ lpfc_used_rpi_show(struct device *dev, struct device_attribute *attr, uint32_t cnt, acnt; if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, &acnt, NULL, NULL)) - return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); - return snprintf(buf, PAGE_SIZE, "Unknown\n"); + return scnprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); + return scnprintf(buf, PAGE_SIZE, "Unknown\n"); } /** @@ -1910,8 +1914,8 @@ lpfc_max_xri_show(struct device *dev, struct device_attribute *attr, uint32_t cnt; if (lpfc_get_hba_info(phba, &cnt, NULL, NULL, NULL, NULL, NULL)) - return snprintf(buf, PAGE_SIZE, "%d\n", cnt); - return snprintf(buf, PAGE_SIZE, "Unknown\n"); + return scnprintf(buf, PAGE_SIZE, "%d\n", cnt); + return scnprintf(buf, PAGE_SIZE, "Unknown\n"); } /** @@ -1938,8 +1942,8 @@ lpfc_used_xri_show(struct device *dev, struct device_attribute *attr, uint32_t cnt, acnt; if (lpfc_get_hba_info(phba, &cnt, &acnt, NULL, NULL, NULL, NULL)) - return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); - return snprintf(buf, PAGE_SIZE, "Unknown\n"); + return scnprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); + return scnprintf(buf, PAGE_SIZE, "Unknown\n"); } /** @@ -1966,8 +1970,8 @@ lpfc_max_vpi_show(struct device *dev, struct device_attribute *attr, uint32_t cnt; if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, NULL)) - return snprintf(buf, PAGE_SIZE, "%d\n", cnt); - return snprintf(buf, PAGE_SIZE, "Unknown\n"); + return scnprintf(buf, PAGE_SIZE, "%d\n", cnt); + return scnprintf(buf, PAGE_SIZE, "Unknown\n"); } /** @@ -1994,8 +1998,8 @@ lpfc_used_vpi_show(struct device *dev, struct device_attribute *attr, uint32_t cnt, acnt; if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, &acnt)) - return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); - return snprintf(buf, PAGE_SIZE, "Unknown\n"); + return scnprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); + return scnprintf(buf, PAGE_SIZE, "Unknown\n"); } /** @@ -2020,10 +2024,10 @@ lpfc_npiv_info_show(struct device *dev, struct device_attribute *attr, struct lpfc_hba *phba = vport->phba; if (!(phba->max_vpi)) - return snprintf(buf, PAGE_SIZE, "NPIV Not Supported\n"); + return scnprintf(buf, PAGE_SIZE, "NPIV Not Supported\n"); if (vport->port_type == LPFC_PHYSICAL_PORT) - return snprintf(buf, PAGE_SIZE, "NPIV Physical\n"); - return snprintf(buf, PAGE_SIZE, "NPIV Virtual (VPI %d)\n", vport->vpi); + return scnprintf(buf, PAGE_SIZE, "NPIV Physical\n"); + return scnprintf(buf, PAGE_SIZE, "NPIV Virtual (VPI %d)\n", vport->vpi); } /** @@ -2045,7 +2049,7 @@ lpfc_poll_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%#x\n", phba->cfg_poll); + return scnprintf(buf, PAGE_SIZE, "%#x\n", phba->cfg_poll); } /** @@ -2149,7 +2153,7 @@ lpfc_fips_level_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%d\n", phba->fips_level); + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->fips_level); } /** @@ -2168,7 +2172,7 @@ lpfc_fips_rev_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%d\n", phba->fips_spec_rev); + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->fips_spec_rev); } /** @@ -2187,7 +2191,7 @@ lpfc_dss_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "%s - %sOperational\n", + return scnprintf(buf, PAGE_SIZE, "%s - %sOperational\n", (phba->cfg_enable_dss) ? "Enabled" : "Disabled", (phba->sli3_options & LPFC_SLI3_DSS_ENABLED) ? "" : "Not "); @@ -2216,7 +2220,7 @@ lpfc_sriov_hw_max_virtfn_show(struct device *dev, uint16_t max_nr_virtfn; max_nr_virtfn = lpfc_sli_sriov_nr_virtfn_get(phba); - return snprintf(buf, PAGE_SIZE, "%d\n", max_nr_virtfn); + return scnprintf(buf, PAGE_SIZE, "%d\n", max_nr_virtfn); } static inline bool lpfc_rangecheck(uint val, uint min, uint max) @@ -2276,7 +2280,7 @@ lpfc_##attr##_show(struct device *dev, struct device_attribute *attr, \ struct Scsi_Host *shost = class_to_shost(dev);\ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ struct lpfc_hba *phba = vport->phba;\ - return snprintf(buf, PAGE_SIZE, "%d\n",\ + return scnprintf(buf, PAGE_SIZE, "%d\n",\ phba->cfg_##attr);\ } @@ -2304,7 +2308,7 @@ lpfc_##attr##_show(struct device *dev, struct device_attribute *attr, \ struct lpfc_hba *phba = vport->phba;\ uint val = 0;\ val = phba->cfg_##attr;\ - return snprintf(buf, PAGE_SIZE, "%#x\n",\ + return scnprintf(buf, PAGE_SIZE, "%#x\n",\ phba->cfg_##attr);\ } @@ -2440,7 +2444,7 @@ lpfc_##attr##_show(struct device *dev, struct device_attribute *attr, \ { \ struct Scsi_Host *shost = class_to_shost(dev);\ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ - return snprintf(buf, PAGE_SIZE, "%d\n", vport->cfg_##attr);\ + return scnprintf(buf, PAGE_SIZE, "%d\n", vport->cfg_##attr);\ } /** @@ -2465,7 +2469,7 @@ lpfc_##attr##_show(struct device *dev, struct device_attribute *attr, \ { \ struct Scsi_Host *shost = class_to_shost(dev);\ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ - return snprintf(buf, PAGE_SIZE, "%#x\n", vport->cfg_##attr);\ + return scnprintf(buf, PAGE_SIZE, "%#x\n", vport->cfg_##attr);\ } /** @@ -2736,7 +2740,7 @@ lpfc_soft_wwpn_show(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; - return snprintf(buf, PAGE_SIZE, "0x%llx\n", + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)phba->cfg_soft_wwpn); } @@ -2833,7 +2837,7 @@ lpfc_soft_wwnn_show(struct device *dev, struct device_attribute *attr, { struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; - return snprintf(buf, PAGE_SIZE, "0x%llx\n", + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)phba->cfg_soft_wwnn); } @@ -2899,7 +2903,7 @@ lpfc_oas_tgt_show(struct device *dev, struct device_attribute *attr, struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; - return snprintf(buf, PAGE_SIZE, "0x%llx\n", + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", wwn_to_u64(phba->cfg_oas_tgt_wwpn)); } @@ -2967,7 +2971,7 @@ lpfc_oas_priority_show(struct device *dev, struct device_attribute *attr, struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; - return snprintf(buf, PAGE_SIZE, "%d\n", phba->cfg_oas_priority); + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->cfg_oas_priority); } /** @@ -3030,7 +3034,7 @@ lpfc_oas_vpt_show(struct device *dev, struct device_attribute *attr, struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; - return snprintf(buf, PAGE_SIZE, "0x%llx\n", + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", wwn_to_u64(phba->cfg_oas_vpt_wwpn)); } @@ -3101,7 +3105,7 @@ lpfc_oas_lun_state_show(struct device *dev, struct device_attribute *attr, struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; - return snprintf(buf, PAGE_SIZE, "%d\n", phba->cfg_oas_lun_state); + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->cfg_oas_lun_state); } /** @@ -3165,7 +3169,7 @@ lpfc_oas_lun_status_show(struct device *dev, struct device_attribute *attr, if (!(phba->cfg_oas_flags & OAS_LUN_VALID)) return -EFAULT; - return snprintf(buf, PAGE_SIZE, "%d\n", phba->cfg_oas_lun_status); + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->cfg_oas_lun_status); } static DEVICE_ATTR(lpfc_xlane_lun_status, S_IRUGO, lpfc_oas_lun_status_show, NULL); @@ -3317,7 +3321,7 @@ lpfc_oas_lun_show(struct device *dev, struct device_attribute *attr, if (oas_lun != NOT_OAS_ENABLED_LUN) phba->cfg_oas_flags |= OAS_LUN_VALID; - len += snprintf(buf + len, PAGE_SIZE-len, "0x%llx", oas_lun); + len += scnprintf(buf + len, PAGE_SIZE-len, "0x%llx", oas_lun); return len; } @@ -3451,7 +3455,7 @@ lpfc_iocb_hw_show(struct device *dev, struct device_attribute *attr, char *buf) struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_hba *phba = ((struct lpfc_vport *) shost->hostdata)->phba; - return snprintf(buf, PAGE_SIZE, "%d\n", phba->iocb_max); + return scnprintf(buf, PAGE_SIZE, "%d\n", phba->iocb_max); } static DEVICE_ATTR(iocb_hw, S_IRUGO, @@ -3463,7 +3467,7 @@ lpfc_txq_hw_show(struct device *dev, struct device_attribute *attr, char *buf) struct lpfc_hba *phba = ((struct lpfc_vport *) shost->hostdata)->phba; struct lpfc_sli_ring *pring = lpfc_phba_elsring(phba); - return snprintf(buf, PAGE_SIZE, "%d\n", + return scnprintf(buf, PAGE_SIZE, "%d\n", pring ? pring->txq_max : 0); } @@ -3477,7 +3481,7 @@ lpfc_txcmplq_hw_show(struct device *dev, struct device_attribute *attr, struct lpfc_hba *phba = ((struct lpfc_vport *) shost->hostdata)->phba; struct lpfc_sli_ring *pring = lpfc_phba_elsring(phba); - return snprintf(buf, PAGE_SIZE, "%d\n", + return scnprintf(buf, PAGE_SIZE, "%d\n", pring ? pring->txcmplq_max : 0); } @@ -3513,7 +3517,7 @@ lpfc_nodev_tmo_show(struct device *dev, struct device_attribute *attr, struct Scsi_Host *shost = class_to_shost(dev); struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; - return snprintf(buf, PAGE_SIZE, "%d\n", vport->cfg_devloss_tmo); + return scnprintf(buf, PAGE_SIZE, "%d\n", vport->cfg_devloss_tmo); } /** @@ -5016,19 +5020,19 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, switch (phba->cfg_fcp_cpu_map) { case 0: - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "fcp_cpu_map: No mapping (%d)\n", phba->cfg_fcp_cpu_map); return len; case 1: - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "fcp_cpu_map: HBA centric mapping (%d): " "%d online CPUs\n", phba->cfg_fcp_cpu_map, phba->sli4_hba.num_online_cpu); break; case 2: - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "fcp_cpu_map: Driver centric mapping (%d): " "%d online CPUs\n", phba->cfg_fcp_cpu_map, @@ -5041,14 +5045,14 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, /* margin should fit in this and the truncated message */ if (cpup->irq == LPFC_VECTOR_MAP_EMPTY) - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "CPU %02d io_chan %02d " "physid %d coreid %d\n", phba->sli4_hba.curr_disp_cpu, cpup->channel_id, cpup->phys_id, cpup->core_id); else - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "CPU %02d io_chan %02d " "physid %d coreid %d IRQ %d\n", phba->sli4_hba.curr_disp_cpu, @@ -5061,7 +5065,7 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, if (phba->sli4_hba.curr_disp_cpu < phba->sli4_hba.num_present_cpu && (len >= (PAGE_SIZE - 64))) { - len += snprintf(buf + len, PAGE_SIZE-len, "more...\n"); + len += scnprintf(buf + len, PAGE_SIZE-len, "more...\n"); break; } } @@ -5586,10 +5590,10 @@ lpfc_sg_seg_cnt_show(struct device *dev, struct device_attribute *attr, struct lpfc_hba *phba = vport->phba; int len; - len = snprintf(buf, PAGE_SIZE, "SGL sz: %d total SGEs: %d\n", + len = scnprintf(buf, PAGE_SIZE, "SGL sz: %d total SGEs: %d\n", phba->cfg_sg_dma_buf_size, phba->cfg_total_seg_cnt); - len += snprintf(buf + len, PAGE_SIZE, "Cfg: %d SCSI: %d NVME: %d\n", + len += scnprintf(buf + len, PAGE_SIZE, "Cfg: %d SCSI: %d NVME: %d\n", phba->cfg_sg_seg_cnt, phba->cfg_scsi_seg_cnt, phba->cfg_nvme_seg_cnt); return len; @@ -6586,7 +6590,7 @@ lpfc_show_rport_##field (struct device *dev, \ { \ struct fc_rport *rport = transport_class_to_rport(dev); \ struct lpfc_rport_data *rdata = rport->hostdata; \ - return snprintf(buf, sz, format_string, \ + return scnprintf(buf, sz, format_string, \ (rdata->target) ? cast rdata->target->field : 0); \ } diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 552da8bf43e4..221f8fd87d24 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -1430,7 +1430,7 @@ lpfc_vport_symbolic_port_name(struct lpfc_vport *vport, char *symbol, * Name object. NPIV is not in play so this integer * value is sufficient and unique per FC-ID. */ - n = snprintf(symbol, size, "%d", vport->phba->brd_no); + n = scnprintf(symbol, size, "%d", vport->phba->brd_no); return n; } @@ -1444,26 +1444,26 @@ lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol, lpfc_decode_firmware_rev(vport->phba, fwrev, 0); - n = snprintf(symbol, size, "Emulex %s", vport->phba->ModelName); + n = scnprintf(symbol, size, "Emulex %s", vport->phba->ModelName); if (size < n) return n; - n += snprintf(symbol + n, size - n, " FV%s", fwrev); + n += scnprintf(symbol + n, size - n, " FV%s", fwrev); if (size < n) return n; - n += snprintf(symbol + n, size - n, " DV%s.", + n += scnprintf(symbol + n, size - n, " DV%s.", lpfc_release_version); if (size < n) return n; - n += snprintf(symbol + n, size - n, " HN:%s.", + n += scnprintf(symbol + n, size - n, " HN:%s.", init_utsname()->nodename); if (size < n) return n; /* Note :- OS name is "Linux" */ - n += snprintf(symbol + n, size - n, " OS:%s\n", + n += scnprintf(symbol + n, size - n, " OS:%s\n", init_utsname()->sysname); return n; } diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index a58f0b3f03a9..361521dd5bd8 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -170,7 +170,7 @@ lpfc_debugfs_disc_trc_data(struct lpfc_vport *vport, char *buf, int size) snprintf(buffer, LPFC_DEBUG_TRC_ENTRY_SIZE, "%010d:%010d ms:%s\n", dtp->seq_cnt, ms, dtp->fmt); - len += snprintf(buf+len, size-len, buffer, + len += scnprintf(buf+len, size-len, buffer, dtp->data1, dtp->data2, dtp->data3); } for (i = 0; i < index; i++) { @@ -181,7 +181,7 @@ lpfc_debugfs_disc_trc_data(struct lpfc_vport *vport, char *buf, int size) snprintf(buffer, LPFC_DEBUG_TRC_ENTRY_SIZE, "%010d:%010d ms:%s\n", dtp->seq_cnt, ms, dtp->fmt); - len += snprintf(buf+len, size-len, buffer, + len += scnprintf(buf+len, size-len, buffer, dtp->data1, dtp->data2, dtp->data3); } @@ -236,7 +236,7 @@ lpfc_debugfs_slow_ring_trc_data(struct lpfc_hba *phba, char *buf, int size) snprintf(buffer, LPFC_DEBUG_TRC_ENTRY_SIZE, "%010d:%010d ms:%s\n", dtp->seq_cnt, ms, dtp->fmt); - len += snprintf(buf+len, size-len, buffer, + len += scnprintf(buf+len, size-len, buffer, dtp->data1, dtp->data2, dtp->data3); } for (i = 0; i < index; i++) { @@ -247,7 +247,7 @@ lpfc_debugfs_slow_ring_trc_data(struct lpfc_hba *phba, char *buf, int size) snprintf(buffer, LPFC_DEBUG_TRC_ENTRY_SIZE, "%010d:%010d ms:%s\n", dtp->seq_cnt, ms, dtp->fmt); - len += snprintf(buf+len, size-len, buffer, + len += scnprintf(buf+len, size-len, buffer, dtp->data1, dtp->data2, dtp->data3); } @@ -307,7 +307,7 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size) i = lpfc_debugfs_last_hbq; - len += snprintf(buf+len, size-len, "HBQ %d Info\n", i); + len += scnprintf(buf+len, size-len, "HBQ %d Info\n", i); hbqs = &phba->hbqs[i]; posted = 0; @@ -315,21 +315,21 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size) posted++; hip = lpfc_hbq_defs[i]; - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "idx:%d prof:%d rn:%d bufcnt:%d icnt:%d acnt:%d posted %d\n", hip->hbq_index, hip->profile, hip->rn, hip->buffer_count, hip->init_count, hip->add_count, posted); raw_index = phba->hbq_get[i]; getidx = le32_to_cpu(raw_index); - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "entries:%d bufcnt:%d Put:%d nPut:%d localGet:%d hbaGet:%d\n", hbqs->entry_count, hbqs->buffer_count, hbqs->hbqPutIdx, hbqs->next_hbqPutIdx, hbqs->local_hbqGetIdx, getidx); hbqe = (struct lpfc_hbq_entry *) phba->hbqs[i].hbq_virt; for (j=0; jentry_count; j++) { - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "%03d: %08x %04x %05x ", j, le32_to_cpu(hbqe->bde.addrLow), le32_to_cpu(hbqe->bde.tus.w), @@ -341,14 +341,16 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size) low = hbqs->hbqPutIdx - posted; if (low >= 0) { if ((j >= hbqs->hbqPutIdx) || (j < low)) { - len += snprintf(buf+len, size-len, "Unused\n"); + len += scnprintf(buf + len, size - len, + "Unused\n"); goto skipit; } } else { if ((j >= hbqs->hbqPutIdx) && (j < (hbqs->entry_count+low))) { - len += snprintf(buf+len, size-len, "Unused\n"); + len += scnprintf(buf + len, size - len, + "Unused\n"); goto skipit; } } @@ -358,7 +360,7 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size) hbq_buf = container_of(d_buf, struct hbq_dmabuf, dbuf); phys = ((uint64_t)hbq_buf->dbuf.phys & 0xffffffff); if (phys == le32_to_cpu(hbqe->bde.addrLow)) { - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "Buf%d: %p %06x\n", i, hbq_buf->dbuf.virt, hbq_buf->tag); found = 1; @@ -367,7 +369,7 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size) i++; } if (!found) { - len += snprintf(buf+len, size-len, "No DMAinfo?\n"); + len += scnprintf(buf+len, size-len, "No DMAinfo?\n"); } skipit: hbqe++; @@ -413,7 +415,7 @@ lpfc_debugfs_dumpHBASlim_data(struct lpfc_hba *phba, char *buf, int size) off = 0; spin_lock_irq(&phba->hbalock); - len += snprintf(buf+len, size-len, "HBA SLIM\n"); + len += scnprintf(buf+len, size-len, "HBA SLIM\n"); lpfc_memcpy_from_slim(buffer, phba->MBslimaddr + lpfc_debugfs_last_hba_slim_off, 1024); @@ -427,7 +429,7 @@ lpfc_debugfs_dumpHBASlim_data(struct lpfc_hba *phba, char *buf, int size) i = 1024; while (i > 0) { - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "%08x: %08x %08x %08x %08x %08x %08x %08x %08x\n", off, *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4), *(ptr+5), *(ptr+6), *(ptr+7)); @@ -471,11 +473,11 @@ lpfc_debugfs_dumpHostSlim_data(struct lpfc_hba *phba, char *buf, int size) off = 0; spin_lock_irq(&phba->hbalock); - len += snprintf(buf+len, size-len, "SLIM Mailbox\n"); + len += scnprintf(buf+len, size-len, "SLIM Mailbox\n"); ptr = (uint32_t *)phba->slim2p.virt; i = sizeof(MAILBOX_t); while (i > 0) { - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "%08x: %08x %08x %08x %08x %08x %08x %08x %08x\n", off, *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4), *(ptr+5), *(ptr+6), *(ptr+7)); @@ -484,11 +486,11 @@ lpfc_debugfs_dumpHostSlim_data(struct lpfc_hba *phba, char *buf, int size) off += (8 * sizeof(uint32_t)); } - len += snprintf(buf+len, size-len, "SLIM PCB\n"); + len += scnprintf(buf+len, size-len, "SLIM PCB\n"); ptr = (uint32_t *)phba->pcb; i = sizeof(PCB_t); while (i > 0) { - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "%08x: %08x %08x %08x %08x %08x %08x %08x %08x\n", off, *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4), *(ptr+5), *(ptr+6), *(ptr+7)); @@ -501,7 +503,7 @@ lpfc_debugfs_dumpHostSlim_data(struct lpfc_hba *phba, char *buf, int size) for (i = 0; i < 4; i++) { pgpp = &phba->port_gp[i]; pring = &psli->sli3_ring[i]; - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "Ring %d: CMD GetInx:%d " "(Max:%d Next:%d " "Local:%d flg:x%x) " @@ -518,7 +520,7 @@ lpfc_debugfs_dumpHostSlim_data(struct lpfc_hba *phba, char *buf, int size) word1 = readl(phba->CAregaddr); word2 = readl(phba->HSregaddr); word3 = readl(phba->HCregaddr); - len += snprintf(buf+len, size-len, "HA:%08x CA:%08x HS:%08x " + len += scnprintf(buf+len, size-len, "HA:%08x CA:%08x HS:%08x " "HC:%08x\n", word0, word1, word2, word3); } spin_unlock_irq(&phba->hbalock); @@ -556,12 +558,12 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE); outio = 0; - len += snprintf(buf+len, size-len, "\nFCP Nodelist Entries ...\n"); + len += scnprintf(buf+len, size-len, "\nFCP Nodelist Entries ...\n"); spin_lock_irq(shost->host_lock); list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { iocnt = 0; if (!cnt) { - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "Missing Nodelist Entries\n"); break; } @@ -599,63 +601,63 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) default: statep = "UNKNOWN"; } - len += snprintf(buf+len, size-len, "%s DID:x%06x ", + len += scnprintf(buf+len, size-len, "%s DID:x%06x ", statep, ndlp->nlp_DID); - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "WWPN x%llx ", wwn_to_u64(ndlp->nlp_portname.u.wwn)); - len += snprintf(buf+len, size-len, + len += scnprintf(buf+len, size-len, "WWNN x%llx ", wwn_to_u64(ndlp->nlp_nodename.u.wwn)); if (ndlp->nlp_flag & NLP_RPI_REGISTERED) - len += snprintf(buf+len, size-len, "RPI:%03d ", + len += scnprintf(buf+len, size-len, "RPI:%03d ", ndlp->nlp_rpi); else - len += snprintf(buf+len, size-len, "RPI:none "); - len += snprintf(buf+len, size-len, "flag:x%08x ", + len += scnprintf(buf+len, size-len, "RPI:none "); + len += scnprintf(buf+len, size-len, "flag:x%08x ", ndlp->nlp_flag); if (!ndlp->nlp_type) - len += snprintf(buf+len, size-len, "UNKNOWN_TYPE "); + len += scnprintf(buf+len, size-len, "UNKNOWN_TYPE "); if (ndlp->nlp_type & NLP_FC_NODE) - len += snprintf(buf+len, size-len, "FC_NODE "); + len += scnprintf(buf+len, size-len, "FC_NODE "); if (ndlp->nlp_type & NLP_FABRIC) { - len += snprintf(buf+len, size-len, "FABRIC "); + len += scnprintf(buf+len, size-len, "FABRIC "); iocnt = 0; } if (ndlp->nlp_type & NLP_FCP_TARGET) - len += snprintf(buf+len, size-len, "FCP_TGT sid:%d ", + len += scnprintf(buf+len, size-len, "FCP_TGT sid:%d ", ndlp->nlp_sid); if (ndlp->nlp_type & NLP_FCP_INITIATOR) - len += snprintf(buf+len, size-len, "FCP_INITIATOR "); + len += scnprintf(buf+len, size-len, "FCP_INITIATOR "); if (ndlp->nlp_type & NLP_NVME_TARGET) - len += snprintf(buf + len, + len += scnprintf(buf + len, size - len, "NVME_TGT sid:%d ", NLP_NO_SID); if (ndlp->nlp_type & NLP_NVME_INITIATOR) - len += snprintf(buf + len, + len += scnprintf(buf + len, size - len, "NVME_INITIATOR "); - len += snprintf(buf+len, size-len, "usgmap:%x ", + len += scnprintf(buf+len, size-len, "usgmap:%x ", ndlp->nlp_usg_map); - len += snprintf(buf+len, size-len, "refcnt:%x", + len += scnprintf(buf+len, size-len, "refcnt:%x", kref_read(&ndlp->kref)); if (iocnt) { i = atomic_read(&ndlp->cmd_pending); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, " OutIO:x%x Qdepth x%x", i, ndlp->cmd_qdepth); outio += i; } - len += snprintf(buf + len, size - len, "defer:%x ", + len += scnprintf(buf + len, size - len, "defer:%x ", ndlp->nlp_defer_did); - len += snprintf(buf+len, size-len, "\n"); + len += scnprintf(buf+len, size-len, "\n"); } spin_unlock_irq(shost->host_lock); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "\nOutstanding IO x%x\n", outio); if (phba->nvmet_support && phba->targetport && (vport == phba->pport)) { - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "\nNVME Targetport Entry ...\n"); /* Port state is only one of two values for now. */ @@ -663,18 +665,18 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) statep = "REGISTERED"; else statep = "INIT"; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "TGT WWNN x%llx WWPN x%llx State %s\n", wwn_to_u64(vport->fc_nodename.u.wwn), wwn_to_u64(vport->fc_portname.u.wwn), statep); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, " Targetport DID x%06x\n", phba->targetport->port_id); goto out_exit; } - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "\nNVME Lport/Rport Entries ...\n"); localport = vport->localport; @@ -689,11 +691,11 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) else statep = "UNKNOWN "; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Lport DID x%06x PortState %s\n", localport->port_id, statep); - len += snprintf(buf + len, size - len, "\tRport List:\n"); + len += scnprintf(buf + len, size - len, "\tRport List:\n"); list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { /* local short-hand pointer. */ spin_lock(&phba->hbalock); @@ -720,32 +722,32 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) } /* Tab in to show lport ownership. */ - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "\t%s Port ID:x%06x ", statep, nrport->port_id); - len += snprintf(buf + len, size - len, "WWPN x%llx ", + len += scnprintf(buf + len, size - len, "WWPN x%llx ", nrport->port_name); - len += snprintf(buf + len, size - len, "WWNN x%llx ", + len += scnprintf(buf + len, size - len, "WWNN x%llx ", nrport->node_name); /* An NVME rport can have multiple roles. */ if (nrport->port_role & FC_PORT_ROLE_NVME_INITIATOR) - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "INITIATOR "); if (nrport->port_role & FC_PORT_ROLE_NVME_TARGET) - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "TARGET "); if (nrport->port_role & FC_PORT_ROLE_NVME_DISCOVERY) - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "DISCSRVC "); if (nrport->port_role & ~(FC_PORT_ROLE_NVME_INITIATOR | FC_PORT_ROLE_NVME_TARGET | FC_PORT_ROLE_NVME_DISCOVERY)) - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "UNKNOWN ROLE x%x", nrport->port_role); /* Terminate the string. */ - len += snprintf(buf + len, size - len, "\n"); + len += scnprintf(buf + len, size - len, "\n"); } spin_unlock_irq(shost->host_lock); @@ -784,35 +786,35 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) if (!phba->targetport) return len; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "\nNVME Targetport Statistics\n"); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "LS: Rcv %08x Drop %08x Abort %08x\n", atomic_read(&tgtp->rcv_ls_req_in), atomic_read(&tgtp->rcv_ls_req_drop), atomic_read(&tgtp->xmt_ls_abort)); if (atomic_read(&tgtp->rcv_ls_req_in) != atomic_read(&tgtp->rcv_ls_req_out)) { - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Rcv LS: in %08x != out %08x\n", atomic_read(&tgtp->rcv_ls_req_in), atomic_read(&tgtp->rcv_ls_req_out)); } - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "LS: Xmt %08x Drop %08x Cmpl %08x\n", atomic_read(&tgtp->xmt_ls_rsp), atomic_read(&tgtp->xmt_ls_drop), atomic_read(&tgtp->xmt_ls_rsp_cmpl)); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "LS: RSP Abort %08x xb %08x Err %08x\n", atomic_read(&tgtp->xmt_ls_rsp_aborted), atomic_read(&tgtp->xmt_ls_rsp_xb_set), atomic_read(&tgtp->xmt_ls_rsp_error)); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "FCP: Rcv %08x Defer %08x Release %08x " "Drop %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), @@ -822,13 +824,13 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) if (atomic_read(&tgtp->rcv_fcp_cmd_in) != atomic_read(&tgtp->rcv_fcp_cmd_out)) { - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Rcv FCP: in %08x != out %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), atomic_read(&tgtp->rcv_fcp_cmd_out)); } - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "FCP Rsp: read %08x readrsp %08x " "write %08x rsp %08x\n", atomic_read(&tgtp->xmt_fcp_read), @@ -836,31 +838,31 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) atomic_read(&tgtp->xmt_fcp_write), atomic_read(&tgtp->xmt_fcp_rsp)); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "FCP Rsp Cmpl: %08x err %08x drop %08x\n", atomic_read(&tgtp->xmt_fcp_rsp_cmpl), atomic_read(&tgtp->xmt_fcp_rsp_error), atomic_read(&tgtp->xmt_fcp_rsp_drop)); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "FCP Rsp Abort: %08x xb %08x xricqe %08x\n", atomic_read(&tgtp->xmt_fcp_rsp_aborted), atomic_read(&tgtp->xmt_fcp_rsp_xb_set), atomic_read(&tgtp->xmt_fcp_xri_abort_cqe)); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "ABORT: Xmt %08x Cmpl %08x\n", atomic_read(&tgtp->xmt_fcp_abort), atomic_read(&tgtp->xmt_fcp_abort_cmpl)); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "ABORT: Sol %08x Usol %08x Err %08x Cmpl %08x", atomic_read(&tgtp->xmt_abort_sol), atomic_read(&tgtp->xmt_abort_unsol), atomic_read(&tgtp->xmt_abort_rsp), atomic_read(&tgtp->xmt_abort_rsp_error)); - len += snprintf(buf + len, size - len, "\n"); + len += scnprintf(buf + len, size - len, "\n"); cnt = 0; spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); @@ -871,7 +873,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) } spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); if (cnt) { - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "ABORT: %d ctx entries\n", cnt); spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); list_for_each_entry_safe(ctxp, next_ctxp, @@ -879,7 +881,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) list) { if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) break; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Entry: oxid %x state %x " "flag %x\n", ctxp->oxid, ctxp->state, @@ -893,7 +895,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) tot += atomic_read(&tgtp->xmt_fcp_release); tot = atomic_read(&tgtp->rcv_fcp_cmd_in) - tot; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "IO_CTX: %08x WAIT: cur %08x tot %08x\n" "CTX Outstanding %08llx\n", phba->sli4_hba.nvmet_xri_cnt, @@ -911,10 +913,10 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) if (!lport) return len; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "\nNVME Lport Statistics\n"); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "LS: Xmt %016x Cmpl %016x\n", atomic_read(&lport->fc4NvmeLsRequests), atomic_read(&lport->fc4NvmeLsCmpls)); @@ -938,20 +940,20 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) if (i >= 32) continue; - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "FCP (%d): Rd %016llx Wr %016llx " "IO %016llx ", i, data1, data2, data3); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "Cmpl %016llx OutIO %016llx\n", tot, ((data1 + data2 + data3) - tot)); } - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "Total FCP Cmpl %016llx Issue %016llx " "OutIO %016llx\n", totin, totout, totout - totin); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "LS Xmt Err: Abrt %08x Err %08x " "Cmpl Err: xb %08x Err %08x\n", atomic_read(&lport->xmt_ls_abort), @@ -959,7 +961,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) atomic_read(&lport->cmpl_ls_xb), atomic_read(&lport->cmpl_ls_err)); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "FCP Xmt Err: noxri %06x nondlp %06x " "qdepth %06x wqerr %06x err %06x Abrt %06x\n", atomic_read(&lport->xmt_fcp_noxri), @@ -969,7 +971,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) atomic_read(&lport->xmt_fcp_err), atomic_read(&lport->xmt_fcp_abort)); - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "FCP Cmpl Err: xb %08x Err %08x\n", atomic_read(&lport->cmpl_fcp_xb), atomic_read(&lport->cmpl_fcp_err)); @@ -1001,58 +1003,58 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) if (phba->nvmet_support == 0) { /* NVME Initiator */ - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "ktime %s: Total Samples: %lld\n", (phba->ktime_on ? "Enabled" : "Disabled"), phba->ktime_data_samples); if (phba->ktime_data_samples == 0) return len; - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "Segment 1: Last NVME Cmd cmpl " "done -to- Start of next NVME cnd (in driver)\n"); - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg1_total, phba->ktime_data_samples), phba->ktime_seg1_min, phba->ktime_seg1_max); - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "Segment 2: Driver start of NVME cmd " "-to- Firmware WQ doorbell\n"); - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg2_total, phba->ktime_data_samples), phba->ktime_seg2_min, phba->ktime_seg2_max); - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "Segment 3: Firmware WQ doorbell -to- " "MSI-X ISR cmpl\n"); - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg3_total, phba->ktime_data_samples), phba->ktime_seg3_min, phba->ktime_seg3_max); - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "Segment 4: MSI-X ISR cmpl -to- " "NVME cmpl done\n"); - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg4_total, phba->ktime_data_samples), phba->ktime_seg4_min, phba->ktime_seg4_max); - len += snprintf( + len += scnprintf( buf + len, PAGE_SIZE - len, "Total IO avg time: %08lld\n", div_u64(phba->ktime_seg1_total + @@ -1064,7 +1066,7 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) } /* NVME Target */ - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "ktime %s: Total Samples: %lld %lld\n", (phba->ktime_on ? "Enabled" : "Disabled"), phba->ktime_data_samples, @@ -1072,46 +1074,46 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) if (phba->ktime_data_samples == 0) return len; - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 1: MSI-X ISR Rcv cmd -to- " "cmd pass to NVME Layer\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg1_total, phba->ktime_data_samples), phba->ktime_seg1_min, phba->ktime_seg1_max); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 2: cmd pass to NVME Layer- " "-to- Driver rcv cmd OP (action)\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg2_total, phba->ktime_data_samples), phba->ktime_seg2_min, phba->ktime_seg2_max); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 3: Driver rcv cmd OP -to- " "Firmware WQ doorbell: cmd\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg3_total, phba->ktime_data_samples), phba->ktime_seg3_min, phba->ktime_seg3_max); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 4: Firmware WQ doorbell: cmd " "-to- MSI-X ISR for cmd cmpl\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg4_total, phba->ktime_data_samples), phba->ktime_seg4_min, phba->ktime_seg4_max); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 5: MSI-X ISR for cmd cmpl " "-to- NVME layer passed cmd done\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg5_total, phba->ktime_data_samples), @@ -1119,10 +1121,10 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) phba->ktime_seg5_max); if (phba->ktime_status_samples == 0) { - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Total: cmd received by MSI-X ISR " "-to- cmd completed on wire\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld " "max %08lld\n", div_u64(phba->ktime_seg10_total, @@ -1132,46 +1134,46 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size) return len; } - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 6: NVME layer passed cmd done " "-to- Driver rcv rsp status OP\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg6_total, phba->ktime_status_samples), phba->ktime_seg6_min, phba->ktime_seg6_max); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 7: Driver rcv rsp status OP " "-to- Firmware WQ doorbell: status\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg7_total, phba->ktime_status_samples), phba->ktime_seg7_min, phba->ktime_seg7_max); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 8: Firmware WQ doorbell: status" " -to- MSI-X ISR for status cmpl\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg8_total, phba->ktime_status_samples), phba->ktime_seg8_min, phba->ktime_seg8_max); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Segment 9: MSI-X ISR for status cmpl " "-to- NVME layer passed status done\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg9_total, phba->ktime_status_samples), phba->ktime_seg9_min, phba->ktime_seg9_max); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "Total: cmd received by MSI-X ISR -to- " "cmd completed on wire\n"); - len += snprintf(buf + len, PAGE_SIZE-len, + len += scnprintf(buf + len, PAGE_SIZE-len, "avg:%08lld min:%08lld max %08lld\n", div_u64(phba->ktime_seg10_total, phba->ktime_status_samples), @@ -1206,7 +1208,7 @@ lpfc_debugfs_nvmeio_trc_data(struct lpfc_hba *phba, char *buf, int size) (phba->nvmeio_trc_size - 1); skip = phba->nvmeio_trc_output_idx; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "%s IO Trace %s: next_idx %d skip %d size %d\n", (phba->nvmet_support ? "NVME" : "NVMET"), (state ? "Enabled" : "Disabled"), @@ -1228,18 +1230,18 @@ lpfc_debugfs_nvmeio_trc_data(struct lpfc_hba *phba, char *buf, int size) if (!dtp->fmt) continue; - len += snprintf(buf + len, size - len, dtp->fmt, + len += scnprintf(buf + len, size - len, dtp->fmt, dtp->data1, dtp->data2, dtp->data3); if (phba->nvmeio_trc_output_idx >= phba->nvmeio_trc_size) { phba->nvmeio_trc_output_idx = 0; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Trace Complete\n"); goto out; } if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) { - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Trace Continue (%d of %d)\n", phba->nvmeio_trc_output_idx, phba->nvmeio_trc_size); @@ -1257,18 +1259,18 @@ lpfc_debugfs_nvmeio_trc_data(struct lpfc_hba *phba, char *buf, int size) if (!dtp->fmt) continue; - len += snprintf(buf + len, size - len, dtp->fmt, + len += scnprintf(buf + len, size - len, dtp->fmt, dtp->data1, dtp->data2, dtp->data3); if (phba->nvmeio_trc_output_idx >= phba->nvmeio_trc_size) { phba->nvmeio_trc_output_idx = 0; - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Trace Complete\n"); goto out; } if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) { - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Trace Continue (%d of %d)\n", phba->nvmeio_trc_output_idx, phba->nvmeio_trc_size); @@ -1276,7 +1278,7 @@ lpfc_debugfs_nvmeio_trc_data(struct lpfc_hba *phba, char *buf, int size) } } - len += snprintf(buf + len, size - len, + len += scnprintf(buf + len, size - len, "Trace Done\n"); out: return len; @@ -1308,39 +1310,39 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size) if (phba->nvmet_support == 0) { /* NVME Initiator */ - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "CPUcheck %s\n", (phba->cpucheck_on & LPFC_CHECK_NVME_IO ? "Enabled" : "Disabled")); for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { if (i >= LPFC_CHECK_CPU_CNT) break; - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%02d: xmit x%08x cmpl x%08x\n", i, phba->cpucheck_xmt_io[i], phba->cpucheck_cmpl_io[i]); tot_xmt += phba->cpucheck_xmt_io[i]; tot_cmpl += phba->cpucheck_cmpl_io[i]; } - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "tot:xmit x%08x cmpl x%08x\n", tot_xmt, tot_cmpl); return len; } /* NVME Target */ - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "CPUcheck %s ", (phba->cpucheck_on & LPFC_CHECK_NVMET_IO ? "IO Enabled - " : "IO Disabled - ")); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV ? "Rcv Enabled\n" : "Rcv Disabled\n")); for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { if (i >= LPFC_CHECK_CPU_CNT) break; - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%02d: xmit x%08x ccmpl x%08x " "cmpl x%08x rcv x%08x\n", i, phba->cpucheck_xmt_io[i], @@ -1352,7 +1354,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size) tot_cmpl += phba->cpucheck_cmpl_io[i]; tot_ccmpl += phba->cpucheck_ccmpl_io[i]; } - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "tot:xmit x%08x ccmpl x%08x cmpl x%08x rcv x%08x\n", tot_xmt, tot_ccmpl, tot_cmpl, tot_rcv); return len; @@ -1797,28 +1799,29 @@ lpfc_debugfs_dif_err_read(struct file *file, char __user *buf, int cnt = 0; if (dent == phba->debug_writeGuard) - cnt = snprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wgrd_cnt); + cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wgrd_cnt); else if (dent == phba->debug_writeApp) - cnt = snprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wapp_cnt); + cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wapp_cnt); else if (dent == phba->debug_writeRef) - cnt = snprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wref_cnt); + cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_wref_cnt); else if (dent == phba->debug_readGuard) - cnt = snprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rgrd_cnt); + cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rgrd_cnt); else if (dent == phba->debug_readApp) - cnt = snprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rapp_cnt); + cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rapp_cnt); else if (dent == phba->debug_readRef) - cnt = snprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rref_cnt); + cnt = scnprintf(cbuf, 32, "%u\n", phba->lpfc_injerr_rref_cnt); else if (dent == phba->debug_InjErrNPortID) - cnt = snprintf(cbuf, 32, "0x%06x\n", phba->lpfc_injerr_nportid); + cnt = scnprintf(cbuf, 32, "0x%06x\n", + phba->lpfc_injerr_nportid); else if (dent == phba->debug_InjErrWWPN) { memcpy(&tmp, &phba->lpfc_injerr_wwpn, sizeof(struct lpfc_name)); tmp = cpu_to_be64(tmp); - cnt = snprintf(cbuf, 32, "0x%016llx\n", tmp); + cnt = scnprintf(cbuf, 32, "0x%016llx\n", tmp); } else if (dent == phba->debug_InjErrLBA) { if (phba->lpfc_injerr_lba == (sector_t)(-1)) - cnt = snprintf(cbuf, 32, "off\n"); + cnt = scnprintf(cbuf, 32, "off\n"); else - cnt = snprintf(cbuf, 32, "0x%llx\n", + cnt = scnprintf(cbuf, 32, "0x%llx\n", (uint64_t) phba->lpfc_injerr_lba); } else lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -2624,17 +2627,17 @@ lpfc_idiag_pcicfg_read(struct file *file, char __user *buf, size_t nbytes, switch (count) { case SIZE_U8: /* byte (8 bits) */ pci_read_config_byte(pdev, where, &u8val); - len += snprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, "%03x: %02x\n", where, u8val); break; case SIZE_U16: /* word (16 bits) */ pci_read_config_word(pdev, where, &u16val); - len += snprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, "%03x: %04x\n", where, u16val); break; case SIZE_U32: /* double word (32 bits) */ pci_read_config_dword(pdev, where, &u32val); - len += snprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, "%03x: %08x\n", where, u32val); break; case LPFC_PCI_CFG_BROWSE: /* browse all */ @@ -2654,25 +2657,25 @@ pcicfg_browse: offset = offset_label; /* Read PCI config space */ - len += snprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, "%03x: ", offset_label); while (index > 0) { pci_read_config_dword(pdev, offset, &u32val); - len += snprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, "%08x ", u32val); offset += sizeof(uint32_t); if (offset >= LPFC_PCI_CFG_SIZE) { - len += snprintf(pbuffer+len, + len += scnprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, "\n"); break; } index -= sizeof(uint32_t); if (!index) - len += snprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, "\n"); else if (!(index % (8 * sizeof(uint32_t)))) { offset_label += (8 * sizeof(uint32_t)); - len += snprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_CFG_SIZE-len, "\n%03x: ", offset_label); } } @@ -2943,7 +2946,7 @@ lpfc_idiag_baracc_read(struct file *file, char __user *buf, size_t nbytes, if (acc_range == SINGLE_WORD) { offset_run = offset; u32val = readl(mem_mapped_bar + offset_run); - len += snprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, "%05x: %08x\n", offset_run, u32val); } else goto baracc_browse; @@ -2957,35 +2960,35 @@ baracc_browse: offset_run = offset_label; /* Read PCI bar memory mapped space */ - len += snprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, "%05x: ", offset_label); index = LPFC_PCI_BAR_RD_SIZE; while (index > 0) { u32val = readl(mem_mapped_bar + offset_run); - len += snprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, "%08x ", u32val); offset_run += sizeof(uint32_t); if (acc_range == LPFC_PCI_BAR_BROWSE) { if (offset_run >= bar_size) { - len += snprintf(pbuffer+len, + len += scnprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, "\n"); break; } } else { if (offset_run >= offset + (acc_range * sizeof(uint32_t))) { - len += snprintf(pbuffer+len, + len += scnprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, "\n"); break; } } index -= sizeof(uint32_t); if (!index) - len += snprintf(pbuffer+len, + len += scnprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, "\n"); else if (!(index % (8 * sizeof(uint32_t)))) { offset_label += (8 * sizeof(uint32_t)); - len += snprintf(pbuffer+len, + len += scnprintf(pbuffer+len, LPFC_PCI_BAR_RD_BUF_SIZE-len, "\n%05x: ", offset_label); } @@ -3158,19 +3161,19 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype, if (!qp) return len; - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\t\t%s WQ info: ", wqtype); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "AssocCQID[%04d]: WQ-STAT[oflow:x%x posted:x%llx]\n", qp->assoc_qid, qp->q_cnt_1, (unsigned long long)qp->q_cnt_4); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", qp->queue_id, qp->entry_count, qp->entry_size, qp->host_index, qp->hba_index, qp->entry_repost); - len += snprintf(pbuffer + len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); return len; } @@ -3208,21 +3211,21 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype, if (!qp) return len; - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\t%s CQ info: ", cqtype); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "AssocEQID[%02d]: CQ STAT[max:x%x relw:x%x " "xabt:x%x wq:x%llx]\n", qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3, (unsigned long long)qp->q_cnt_4); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", qp->queue_id, qp->entry_count, qp->entry_size, qp->host_index, qp->hba_index, qp->entry_repost); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); return len; } @@ -3234,19 +3237,19 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp, if (!qp || !datqp) return len; - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\t\t%s RQ info: ", rqtype); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "AssocCQID[%02d]: RQ-STAT[nopost:x%x nobuf:x%x " "posted:x%x rcv:x%llx]\n", qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3, (unsigned long long)qp->q_cnt_4); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n", qp->queue_id, qp->entry_count, qp->entry_size, qp->host_index, qp->hba_index, qp->entry_repost); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n", datqp->queue_id, datqp->entry_count, @@ -3331,17 +3334,17 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype, if (!qp) return len; - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n%s EQ info: EQ-STAT[max:x%x noE:x%x " "cqe_proc:x%x eqe_proc:x%llx eqd %d]\n", eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3, (unsigned long long)qp->q_cnt_4, qp->q_mode); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", qp->queue_id, qp->entry_count, qp->entry_size, qp->host_index, qp->hba_index, qp->entry_repost); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); return len; } @@ -3399,7 +3402,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes, if (phba->cfg_fof == 0) phba->lpfc_idiag_last_eq = 0; - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "EQ %d out of %d HBA EQs\n", x, phba->io_channel_irqs); @@ -3512,7 +3515,7 @@ fof: return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len); too_big: - len += snprintf(pbuffer + len, + len += scnprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "Truncated ...\n"); out: spin_unlock_irq(&phba->hbalock); @@ -3568,22 +3571,22 @@ lpfc_idiag_queacc_read_qe(char *pbuffer, int len, struct lpfc_queue *pque, return 0; esize = pque->entry_size; - len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, "QE-INDEX[%04d]:\n", index); offset = 0; pentry = pque->qe[index].address; while (esize > 0) { - len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, "%08x ", *pentry); pentry++; offset += sizeof(uint32_t); esize -= sizeof(uint32_t); if (esize > 0 && !(offset % (4 * sizeof(uint32_t)))) - len += snprintf(pbuffer+len, + len += scnprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, "\n"); } - len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, "\n"); + len += scnprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, "\n"); return len; } @@ -3989,27 +3992,27 @@ lpfc_idiag_drbacc_read_reg(struct lpfc_hba *phba, char *pbuffer, switch (drbregid) { case LPFC_DRB_EQ: - len += snprintf(pbuffer + len, LPFC_DRB_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer + len, LPFC_DRB_ACC_BUF_SIZE-len, "EQ-DRB-REG: 0x%08x\n", readl(phba->sli4_hba.EQDBregaddr)); break; case LPFC_DRB_CQ: - len += snprintf(pbuffer + len, LPFC_DRB_ACC_BUF_SIZE - len, + len += scnprintf(pbuffer + len, LPFC_DRB_ACC_BUF_SIZE - len, "CQ-DRB-REG: 0x%08x\n", readl(phba->sli4_hba.CQDBregaddr)); break; case LPFC_DRB_MQ: - len += snprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len, "MQ-DRB-REG: 0x%08x\n", readl(phba->sli4_hba.MQDBregaddr)); break; case LPFC_DRB_WQ: - len += snprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len, "WQ-DRB-REG: 0x%08x\n", readl(phba->sli4_hba.WQDBregaddr)); break; case LPFC_DRB_RQ: - len += snprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_DRB_ACC_BUF_SIZE-len, "RQ-DRB-REG: 0x%08x\n", readl(phba->sli4_hba.RQDBregaddr)); break; @@ -4199,37 +4202,37 @@ lpfc_idiag_ctlacc_read_reg(struct lpfc_hba *phba, char *pbuffer, switch (ctlregid) { case LPFC_CTL_PORT_SEM: - len += snprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, "Port SemReg: 0x%08x\n", readl(phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_SEM_OFFSET)); break; case LPFC_CTL_PORT_STA: - len += snprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, "Port StaReg: 0x%08x\n", readl(phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_STA_OFFSET)); break; case LPFC_CTL_PORT_CTL: - len += snprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, "Port CtlReg: 0x%08x\n", readl(phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_CTL_OFFSET)); break; case LPFC_CTL_PORT_ER1: - len += snprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, "Port Er1Reg: 0x%08x\n", readl(phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_ER1_OFFSET)); break; case LPFC_CTL_PORT_ER2: - len += snprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, "Port Er2Reg: 0x%08x\n", readl(phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_ER2_OFFSET)); break; case LPFC_CTL_PDEV_CTL: - len += snprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_CTL_ACC_BUF_SIZE-len, "PDev CtlReg: 0x%08x\n", readl(phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET)); @@ -4422,13 +4425,13 @@ lpfc_idiag_mbxacc_get_setup(struct lpfc_hba *phba, char *pbuffer) mbx_dump_cnt = idiag.cmd.data[IDIAG_MBXACC_DPCNT_INDX]; mbx_word_cnt = idiag.cmd.data[IDIAG_MBXACC_WDCNT_INDX]; - len += snprintf(pbuffer+len, LPFC_MBX_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_MBX_ACC_BUF_SIZE-len, "mbx_dump_map: 0x%08x\n", mbx_dump_map); - len += snprintf(pbuffer+len, LPFC_MBX_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_MBX_ACC_BUF_SIZE-len, "mbx_dump_cnt: %04d\n", mbx_dump_cnt); - len += snprintf(pbuffer+len, LPFC_MBX_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_MBX_ACC_BUF_SIZE-len, "mbx_word_cnt: %04d\n", mbx_word_cnt); - len += snprintf(pbuffer+len, LPFC_MBX_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_MBX_ACC_BUF_SIZE-len, "mbx_mbox_cmd: 0x%02x\n", mbx_mbox_cmd); return len; @@ -4577,35 +4580,35 @@ lpfc_idiag_extacc_avail_get(struct lpfc_hba *phba, char *pbuffer, int len) { uint16_t ext_cnt, ext_size; - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\nAvailable Extents Information:\n"); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tPort Available VPI extents: "); lpfc_sli4_get_avail_extnt_rsrc(phba, LPFC_RSC_TYPE_FCOE_VPI, &ext_cnt, &ext_size); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "Count %3d, Size %3d\n", ext_cnt, ext_size); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tPort Available VFI extents: "); lpfc_sli4_get_avail_extnt_rsrc(phba, LPFC_RSC_TYPE_FCOE_VFI, &ext_cnt, &ext_size); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "Count %3d, Size %3d\n", ext_cnt, ext_size); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tPort Available RPI extents: "); lpfc_sli4_get_avail_extnt_rsrc(phba, LPFC_RSC_TYPE_FCOE_RPI, &ext_cnt, &ext_size); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "Count %3d, Size %3d\n", ext_cnt, ext_size); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tPort Available XRI extents: "); lpfc_sli4_get_avail_extnt_rsrc(phba, LPFC_RSC_TYPE_FCOE_XRI, &ext_cnt, &ext_size); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "Count %3d, Size %3d\n", ext_cnt, ext_size); return len; @@ -4629,55 +4632,55 @@ lpfc_idiag_extacc_alloc_get(struct lpfc_hba *phba, char *pbuffer, int len) uint16_t ext_cnt, ext_size; int rc; - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\nAllocated Extents Information:\n"); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tHost Allocated VPI extents: "); rc = lpfc_sli4_get_allocated_extnts(phba, LPFC_RSC_TYPE_FCOE_VPI, &ext_cnt, &ext_size); if (!rc) - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "Port %d Extent %3d, Size %3d\n", phba->brd_no, ext_cnt, ext_size); else - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "N/A\n"); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tHost Allocated VFI extents: "); rc = lpfc_sli4_get_allocated_extnts(phba, LPFC_RSC_TYPE_FCOE_VFI, &ext_cnt, &ext_size); if (!rc) - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "Port %d Extent %3d, Size %3d\n", phba->brd_no, ext_cnt, ext_size); else - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "N/A\n"); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tHost Allocated RPI extents: "); rc = lpfc_sli4_get_allocated_extnts(phba, LPFC_RSC_TYPE_FCOE_RPI, &ext_cnt, &ext_size); if (!rc) - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "Port %d Extent %3d, Size %3d\n", phba->brd_no, ext_cnt, ext_size); else - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "N/A\n"); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tHost Allocated XRI extents: "); rc = lpfc_sli4_get_allocated_extnts(phba, LPFC_RSC_TYPE_FCOE_XRI, &ext_cnt, &ext_size); if (!rc) - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "Port %d Extent %3d, Size %3d\n", phba->brd_no, ext_cnt, ext_size); else - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "N/A\n"); return len; @@ -4701,49 +4704,49 @@ lpfc_idiag_extacc_drivr_get(struct lpfc_hba *phba, char *pbuffer, int len) struct lpfc_rsrc_blks *rsrc_blks; int index; - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\nDriver Extents Information:\n"); - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tVPI extents:\n"); index = 0; list_for_each_entry(rsrc_blks, &phba->lpfc_vpi_blk_list, list) { - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\t\tBlock %3d: Start %4d, Count %4d\n", index, rsrc_blks->rsrc_start, rsrc_blks->rsrc_size); index++; } - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tVFI extents:\n"); index = 0; list_for_each_entry(rsrc_blks, &phba->sli4_hba.lpfc_vfi_blk_list, list) { - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\t\tBlock %3d: Start %4d, Count %4d\n", index, rsrc_blks->rsrc_start, rsrc_blks->rsrc_size); index++; } - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tRPI extents:\n"); index = 0; list_for_each_entry(rsrc_blks, &phba->sli4_hba.lpfc_rpi_blk_list, list) { - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\t\tBlock %3d: Start %4d, Count %4d\n", index, rsrc_blks->rsrc_start, rsrc_blks->rsrc_size); index++; } - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\tXRI extents:\n"); index = 0; list_for_each_entry(rsrc_blks, &phba->sli4_hba.lpfc_xri_blk_list, list) { - len += snprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, + len += scnprintf(pbuffer+len, LPFC_EXT_ACC_BUF_SIZE-len, "\t\tBlock %3d: Start %4d, Count %4d\n", index, rsrc_blks->rsrc_start, rsrc_blks->rsrc_size); @@ -5137,11 +5140,11 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp, if (i != 0) pr_err("%s\n", line_buf); len = 0; - len += snprintf(line_buf+len, + len += scnprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, "%03d: ", i); } - len += snprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, + len += scnprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, "%08x ", (uint32_t)*pword); pword++; } @@ -5204,11 +5207,11 @@ lpfc_idiag_mbxacc_dump_issue_mbox(struct lpfc_hba *phba, MAILBOX_t *pmbox) pr_err("%s\n", line_buf); len = 0; memset(line_buf, 0, LPFC_MBX_ACC_LBUF_SZ); - len += snprintf(line_buf+len, + len += scnprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, "%03d: ", i); } - len += snprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, + len += scnprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, "%08x ", ((uint32_t)*pword) & 0xffffffff); pword++; @@ -5227,18 +5230,18 @@ lpfc_idiag_mbxacc_dump_issue_mbox(struct lpfc_hba *phba, MAILBOX_t *pmbox) pr_err("%s\n", line_buf); len = 0; memset(line_buf, 0, LPFC_MBX_ACC_LBUF_SZ); - len += snprintf(line_buf+len, + len += scnprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, "%03d: ", i); } for (j = 0; j < 4; j++) { - len += snprintf(line_buf+len, + len += scnprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, "%02x", ((uint8_t)*pbyte) & 0xff); pbyte++; } - len += snprintf(line_buf+len, + len += scnprintf(line_buf+len, LPFC_MBX_ACC_LBUF_SZ-len, " "); } if ((i - 1) % 8) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h index 30efc7bf91bd..824de3e410ca 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.h +++ b/drivers/scsi/lpfc/lpfc_debugfs.h @@ -342,7 +342,7 @@ lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx) pword = q->qe[idx].address; len = 0; - len += snprintf(line_buf+len, LPFC_LBUF_SZ-len, "QE[%04d]: ", idx); + len += scnprintf(line_buf+len, LPFC_LBUF_SZ-len, "QE[%04d]: ", idx); if (qe_word_cnt > 8) printk(KERN_ERR "%s\n", line_buf); @@ -353,11 +353,11 @@ lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx) if (qe_word_cnt > 8) { len = 0; memset(line_buf, 0, LPFC_LBUF_SZ); - len += snprintf(line_buf+len, LPFC_LBUF_SZ-len, + len += scnprintf(line_buf+len, LPFC_LBUF_SZ-len, "%03d: ", i); } } - len += snprintf(line_buf+len, LPFC_LBUF_SZ-len, "%08x ", + len += scnprintf(line_buf+len, LPFC_LBUF_SZ-len, "%08x ", ((uint32_t)*pword) & 0xffffffff); pword++; } -- cgit v1.2.3 From aee2053554eaa5b887b1a655753f7b8d254fae19 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Tue, 2 Apr 2019 14:24:25 -0700 Subject: scsi: qla2xxx: Fix incorrect region-size setting in optrom SYSFS routines commit 5cbdae10bf11f96e30b4d14de7b08c8b490e903c upstream. Commit e6f77540c067 ("scsi: qla2xxx: Fix an integer overflow in sysfs code") incorrectly set 'optrom_region_size' to 'start+size', which can overflow option-rom boundaries when 'start' is non-zero. Continue setting optrom_region_size to the proper adjusted value of 'size'. Fixes: e6f77540c067 ("scsi: qla2xxx: Fix an integer overflow in sysfs code") Cc: stable@vger.kernel.org Signed-off-by: Andrew Vasquez Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index ac504a1ff0ff..1a396f843de1 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -364,7 +364,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, } ha->optrom_region_start = start; - ha->optrom_region_size = start + size; + ha->optrom_region_size = size; ha->optrom_state = QLA_SREADING; ha->optrom_buffer = vmalloc(ha->optrom_region_size); @@ -437,7 +437,7 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, } ha->optrom_region_start = start; - ha->optrom_region_size = start + size; + ha->optrom_region_size = size; ha->optrom_state = QLA_SWRITING; ha->optrom_buffer = vmalloc(ha->optrom_region_size); -- cgit v1.2.3 From 69d6687a5c6638336f99a3ae9cad30bd64aa8710 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 23 Apr 2019 14:52:35 -0700 Subject: scsi: qla2xxx: Fix device staying in blocked state commit 2137490f2147a8d0799b72b9a1023efb012d40c7 upstream. This patch fixes issue reported by some of the customers, who discovered that after cable pull scenario the devices disappear and path seems to remain in blocked state. Once the device reappears, driver does not seem to update path to online. This issue appears because of the defer flag creating race condition where the same session reappears. This patch fixes this issue by indicating SCSI-ML of device lost when qlt_free_session_done() is called from qlt_unreg_sess(). Fixes: 41dc529a4602a ("qla2xxx: Improve RSCN handling in driver") Signed-off-by: Quinn Tran Cc: stable@vger.kernel.org #4.19 Signed-off-by: Himanshu Madhani Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 510337eac106..d4ac18573d81 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -977,6 +977,8 @@ void qlt_free_session_done(struct work_struct *work) sess->send_els_logo); if (!IS_SW_RESV_ADDR(sess->d_id)) { + qla2x00_mark_device_lost(vha, sess, 0, 0); + if (sess->send_els_logo) { qlt_port_logo_t logo; @@ -1157,8 +1159,6 @@ void qlt_unreg_sess(struct fc_port *sess) if (sess->se_sess) vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess); - qla2x00_mark_device_lost(vha, sess, 0, 0); - sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; sess->disc_state = DSC_DELETE_PEND; sess->last_rscn_gen = sess->rscn_gen; -- cgit v1.2.3 From 1c1727f4b9482b252e2113bd4d9095c086c0d2a0 Mon Sep 17 00:00:00 2001 From: Young Xiao Date: Fri, 12 Apr 2019 15:24:30 +0800 Subject: Bluetooth: hidp: fix buffer overflow commit a1616a5ac99ede5d605047a9012481ce7ff18b16 upstream. Struct ca is copied from userspace. It is not checked whether the "name" field is NULL terminated, which allows local users to obtain potentially sensitive information from kernel stack memory, via a HIDPCONNADD command. This vulnerability is similar to CVE-2011-1079. Signed-off-by: Young Xiao Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hidp/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 9f85a1943be9..2151913892ce 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -75,6 +75,7 @@ static int do_hidp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user sockfd_put(csock); return err; } + ca.name[sizeof(ca.name)-1] = 0; err = hidp_connection_add(&ca, csock, isock); if (!err && copy_to_user(argp, &ca, sizeof(ca))) -- cgit v1.2.3 From 2c93762f4b38d894158bdf73a783685d39d6a981 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 24 Apr 2019 22:19:17 +0200 Subject: Bluetooth: Align minimum encryption key size for LE and BR/EDR connections commit d5bb334a8e171b262e48f378bd2096c0ea458265 upstream. The minimum encryption key size for LE connections is 56 bits and to align LE with BR/EDR, enforce 56 bits of minimum encryption key size for BR/EDR connections as well. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 3 +++ net/bluetooth/hci_conn.c | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e5ea633ea368..fa61e70788c5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -190,6 +190,9 @@ struct adv_info { #define HCI_MAX_SHORT_NAME_LENGTH 10 +/* Min encryption key size to match with SMP */ +#define HCI_MIN_ENC_KEY_SIZE 7 + /* Default LE RPA expiry time, 15 minutes */ #define HCI_DEFAULT_RPA_TIMEOUT (15 * 60) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index bd4978ce8c45..3cf0764d5793 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1276,6 +1276,14 @@ int hci_conn_check_link_mode(struct hci_conn *conn) !test_bit(HCI_CONN_ENCRYPT, &conn->flags)) return 0; + /* The minimum encryption key size needs to be enforced by the + * host stack before establishing any L2CAP connections. The + * specification in theory allows a minimum of 1, but to align + * BR/EDR and LE transports, a minimum of 7 is chosen. + */ + if (conn->enc_key_size < HCI_MIN_ENC_KEY_SIZE) + return 0; + return 1; } -- cgit v1.2.3 From 41d5f23ef17b8bc4ffc107c0c850a095311b88dd Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 14 Mar 2019 15:43:37 +0200 Subject: Bluetooth: Fix not initializing L2CAP tx_credits commit ba8f5289f706aed94cc95b15cc5b89e22062f61f upstream. l2cap_le_flowctl_init was reseting the tx_credits which works only for outgoing connection since that set the tx_credits on the response, for incoming connections that was not the case which leaves the channel without any credits causing it to be suspended. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org # 4.20+ Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ccdc5c67d22a..a3b2e3b5f04b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -510,12 +510,12 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) } EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults); -static void l2cap_le_flowctl_init(struct l2cap_chan *chan) +static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) { chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; - chan->tx_credits = 0; + chan->tx_credits = tx_credits; /* Derive MPS from connection MTU to stop HCI fragmentation */ chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE); /* Give enough credits for a full packet */ @@ -1281,7 +1281,7 @@ static void l2cap_le_connect(struct l2cap_chan *chan) if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags)) return; - l2cap_le_flowctl_init(chan); + l2cap_le_flowctl_init(chan, 0); req.psm = chan->psm; req.scid = cpu_to_le16(chan->scid); @@ -5531,11 +5531,10 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, chan->dcid = scid; chan->omtu = mtu; chan->remote_mps = mps; - chan->tx_credits = __le16_to_cpu(req->credits); __l2cap_chan_add(conn, chan); - l2cap_le_flowctl_init(chan); + l2cap_le_flowctl_init(chan, __le16_to_cpu(req->credits)); dcid = chan->scid; credits = chan->rx_credits; -- cgit v1.2.3 From 349bb9138b3a013f5abb5f4ddf61e4da5b266385 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 1 Apr 2019 11:43:12 +0800 Subject: Bluetooth: hci_bcm: Fix empty regulator supplies for Intel Macs commit 62611abc8f37d00e3b0cff0eb2d72fa92b05fd27 upstream. The code path for Macs goes through bcm_apple_get_resources(), which skips over the code that sets up the regulator supplies. As a result, the call to regulator_bulk_enable() / regulator_bulk_disable() results in a NULL pointer dereference. This was reported on the kernel.org Bugzilla, bug 202963. Unbreak Broadcom Bluetooth support on Intel Macs by checking if the supplies were set up before enabling or disabling them. The same does not need to be done for the clocks, as the common clock framework API checks for NULL pointers. Fixes: 75d11676dccb ("Bluetooth: hci_bcm: Add support for regulator supplies") Cc: # 5.0.x Signed-off-by: Chen-Yu Tsai Tested-by: Imre Kaloz Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_bcm.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index ddbe518c3e5b..b5d31d583d60 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -228,9 +228,15 @@ static int bcm_gpio_set_power(struct bcm_device *dev, bool powered) int err; if (powered && !dev->res_enabled) { - err = regulator_bulk_enable(BCM_NUM_SUPPLIES, dev->supplies); - if (err) - return err; + /* Intel Macs use bcm_apple_get_resources() and don't + * have regulator supplies configured. + */ + if (dev->supplies[0].supply) { + err = regulator_bulk_enable(BCM_NUM_SUPPLIES, + dev->supplies); + if (err) + return err; + } /* LPO clock needs to be 32.768 kHz */ err = clk_set_rate(dev->lpo_clk, 32768); @@ -259,7 +265,13 @@ static int bcm_gpio_set_power(struct bcm_device *dev, bool powered) if (!powered && dev->res_enabled) { clk_disable_unprepare(dev->txco_clk); clk_disable_unprepare(dev->lpo_clk); - regulator_bulk_disable(BCM_NUM_SUPPLIES, dev->supplies); + + /* Intel Macs use bcm_apple_get_resources() and don't + * have regulator supplies configured. + */ + if (dev->supplies[0].supply) + regulator_bulk_disable(BCM_NUM_SUPPLIES, + dev->supplies); } /* wait for device to power on and come out of reset */ -- cgit v1.2.3 From 987722984163181a347569f8c667449d87ac61f5 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 30 Apr 2019 12:21:45 +0200 Subject: UAS: fix alignment of scatter/gather segments commit 3ae62a42090f1ed48e2313ed256a1182a85fb575 upstream. This is the UAS version of 747668dbc061b3e62bc1982767a3a1f9815fcf0e usb-storage: Set virt_boundary_mask to avoid SG overflows We are not as likely to be vulnerable as storage, as it is unlikelier that UAS is run over a controller without native support for SG, but the issue exists. The issue has been existing since the inception of the driver. Fixes: 115bb1ffa54c ("USB: Add UAS driver") Signed-off-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 36742e8e7edc..d2ed3049e7de 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -796,24 +796,33 @@ static int uas_slave_alloc(struct scsi_device *sdev) { struct uas_dev_info *devinfo = (struct uas_dev_info *)sdev->host->hostdata; + int maxp; sdev->hostdata = devinfo; /* - * USB has unusual DMA-alignment requirements: Although the - * starting address of each scatter-gather element doesn't matter, - * the length of each element except the last must be divisible - * by the Bulk maxpacket value. There's currently no way to - * express this by block-layer constraints, so we'll cop out - * and simply require addresses to be aligned at 512-byte - * boundaries. This is okay since most block I/O involves - * hardware sectors that are multiples of 512 bytes in length, - * and since host controllers up through USB 2.0 have maxpacket - * values no larger than 512. + * We have two requirements here. We must satisfy the requirements + * of the physical HC and the demands of the protocol, as we + * definitely want no additional memory allocation in this path + * ruling out using bounce buffers. * - * But it doesn't suffice for Wireless USB, where Bulk maxpacket - * values can be as large as 2048. To make that work properly - * will require changes to the block layer. + * For a transmission on USB to continue we must never send + * a package that is smaller than maxpacket. Hence the length of each + * scatterlist element except the last must be divisible by the + * Bulk maxpacket value. + * If the HC does not ensure that through SG, + * the upper layer must do that. We must assume nothing + * about the capabilities off the HC, so we use the most + * pessimistic requirement. + */ + + maxp = usb_maxpacket(devinfo->udev, devinfo->data_in_pipe, 0); + blk_queue_virt_boundary(sdev->request_queue, maxp - 1); + + /* + * The protocol has no requirements on alignment in the strict sense. + * Controllers may or may not have alignment restrictions. + * As this is not exported, we use an extremely conservative guess. */ blk_queue_update_dma_alignment(sdev->request_queue, (512 - 1)); -- cgit v1.2.3 From 4b1f2ad28fe1a950dd42de155be3019878a0224f Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 29 Apr 2019 12:25:17 -0600 Subject: ASoC: Intel: avoid Oops if DMA setup fails commit 0efa3334d65b7f421ba12382dfa58f6ff5bf83c4 upstream. Currently in sst_dsp_new() if we get an error return from sst_dma_new() we just print an error message and then still complete the function successfully. This means that we are trying to run without sst->dma properly set up, which will result in NULL pointer dereference when sst->dma is later used. This was happening for me in sst_dsp_dma_get_channel(): struct sst_dma *dma = dsp->dma; ... dma->ch = dma_request_channel(mask, dma_chan_filter, dsp); This resulted in: BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: sst_dsp_dma_get_channel+0x4f/0x125 [snd_soc_sst_firmware] Fix this by adding proper error handling for the case where we fail to set up DMA. This change only affects Haswell and Broadwell systems. Baytrail systems explicilty opt-out of DMA via sst->pdata->resindex_dma_base being set to -1. Signed-off-by: Ross Zwisler Cc: stable@vger.kernel.org Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/common/sst-firmware.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/common/sst-firmware.c b/sound/soc/intel/common/sst-firmware.c index 1e067504b604..f830e59f93ea 100644 --- a/sound/soc/intel/common/sst-firmware.c +++ b/sound/soc/intel/common/sst-firmware.c @@ -1251,11 +1251,15 @@ struct sst_dsp *sst_dsp_new(struct device *dev, goto irq_err; err = sst_dma_new(sst); - if (err) - dev_warn(dev, "sst_dma_new failed %d\n", err); + if (err) { + dev_err(dev, "sst_dma_new failed %d\n", err); + goto dma_err; + } return sst; +dma_err: + free_irq(sst->irq, sst); irq_err: if (sst->ops->free) sst->ops->free(sst); -- cgit v1.2.3 From be4b9a303a25614f92ef1574a1af730ec358fc04 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Apr 2019 13:40:20 +0300 Subject: i3c: Fix a shift wrap bug in i3c_bus_set_addr_slot_status() commit 476c7e1d34f2a03b1aa5a924c50703053fe5f77c upstream. The problem here is that addr can be I3C_BROADCAST_ADDR (126). That means we're shifting by (126 * 2) % 64 which is 60. The I3C_ADDR_SLOT_STATUS_MASK is an enum which is an unsigned int in GCC so shifts greater than 31 are undefined. Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Cc: Signed-off-by: Dan Carpenter Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 1412abcff010..5f4bd52121fe 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -385,8 +385,9 @@ static void i3c_bus_set_addr_slot_status(struct i3c_bus *bus, u16 addr, return; ptr = bus->addrslots + (bitpos / BITS_PER_LONG); - *ptr &= ~(I3C_ADDR_SLOT_STATUS_MASK << (bitpos % BITS_PER_LONG)); - *ptr |= status << (bitpos % BITS_PER_LONG); + *ptr &= ~((unsigned long)I3C_ADDR_SLOT_STATUS_MASK << + (bitpos % BITS_PER_LONG)); + *ptr |= (unsigned long)status << (bitpos % BITS_PER_LONG); } static bool i3c_bus_dev_addr_is_avail(struct i3c_bus *bus, u8 addr) -- cgit v1.2.3 From 3b928b59fae0e10d856f4a58e67839384239e507 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Feb 2019 11:58:08 +0000 Subject: locking/futex: Allow low-level atomic operations to return -EAGAIN commit 6b4f4bc9cb22875f97023984a625386f0c7cc1c0 upstream. Some futex() operations, including FUTEX_WAKE_OP, require the kernel to perform an atomic read-modify-write of the futex word via the userspace mapping. These operations are implemented by each architecture in arch_futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic(), which are called in atomic context with the relevant hash bucket locks held. Although these routines may return -EFAULT in response to a page fault generated when accessing userspace, they are expected to succeed (i.e. return 0) in all other cases. This poses a problem for architectures that do not provide bounded forward progress guarantees or fairness of contended atomic operations and can lead to starvation in some cases. In these problematic scenarios, we must return back to the core futex code so that we can drop the hash bucket locks and reschedule if necessary, much like we do in the case of a page fault. Allow architectures to return -EAGAIN from their implementations of arch_futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic(), which will cause the core futex code to reschedule if necessary and return back to the architecture code later on. Cc: Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 188 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 117 insertions(+), 71 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 52668d44e07b..4eafa8ec76a4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1314,13 +1314,15 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval, static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) { + int err; u32 uninitialized_var(curval); if (unlikely(should_fail_futex(true))) return -EFAULT; - if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) - return -EFAULT; + err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (unlikely(err)) + return err; /* If user space value changed, let the caller retry */ return curval != uval ? -EAGAIN : 0; @@ -1506,10 +1508,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ if (unlikely(should_fail_futex(true))) ret = -EFAULT; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { - ret = -EFAULT; - - } else if (curval != uval) { + ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (!ret && (curval != uval)) { /* * If a unconditional UNLOCK_PI operation (user space did not * try the TID->0 transition) raced with a waiter setting the @@ -1704,32 +1704,32 @@ retry_private: double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { - double_unlock_hb(hb1, hb2); -#ifndef CONFIG_MMU - /* - * we don't get EFAULT from MMU faults if we don't have an MMU, - * but we might get them from range checking - */ - ret = op_ret; - goto out_put_keys; -#endif - - if (unlikely(op_ret != -EFAULT)) { + if (!IS_ENABLED(CONFIG_MMU) || + unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { + /* + * we don't get EFAULT from MMU faults if we don't have + * an MMU, but we might get them from range checking + */ ret = op_ret; goto out_put_keys; } - ret = fault_in_user_writeable(uaddr2); - if (ret) - goto out_put_keys; + if (op_ret == -EFAULT) { + ret = fault_in_user_writeable(uaddr2); + if (ret) + goto out_put_keys; + } - if (!(flags & FLAGS_SHARED)) + if (!(flags & FLAGS_SHARED)) { + cond_resched(); goto retry_private; + } put_futex_key(&key2); put_futex_key(&key1); + cond_resched(); goto retry; } @@ -2354,7 +2354,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, u32 uval, uninitialized_var(curval), newval; struct task_struct *oldowner, *newowner; u32 newtid; - int ret; + int ret, err = 0; lockdep_assert_held(q->lock_ptr); @@ -2425,14 +2425,17 @@ retry: if (!pi_state->owner) newtid |= FUTEX_OWNER_DIED; - if (get_futex_value_locked(&uval, uaddr)) - goto handle_fault; + err = get_futex_value_locked(&uval, uaddr); + if (err) + goto handle_err; for (;;) { newval = (uval & FUTEX_OWNER_DIED) | newtid; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) - goto handle_fault; + err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (err) + goto handle_err; + if (curval == uval) break; uval = curval; @@ -2460,23 +2463,37 @@ retry: return 0; /* - * To handle the page fault we need to drop the locks here. That gives - * the other task (either the highest priority waiter itself or the - * task which stole the rtmutex) the chance to try the fixup of the - * pi_state. So once we are back from handling the fault we need to - * check the pi_state after reacquiring the locks and before trying to - * do another fixup. When the fixup has been done already we simply - * return. + * In order to reschedule or handle a page fault, we need to drop the + * locks here. In the case of a fault, this gives the other task + * (either the highest priority waiter itself or the task which stole + * the rtmutex) the chance to try the fixup of the pi_state. So once we + * are back from handling the fault we need to check the pi_state after + * reacquiring the locks and before trying to do another fixup. When + * the fixup has been done already we simply return. * * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely * drop hb->lock since the caller owns the hb -> futex_q relation. * Dropping the pi_mutex->wait_lock requires the state revalidate. */ -handle_fault: +handle_err: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(q->lock_ptr); - ret = fault_in_user_writeable(uaddr); + switch (err) { + case -EFAULT: + ret = fault_in_user_writeable(uaddr); + break; + + case -EAGAIN: + cond_resched(); + ret = 0; + break; + + default: + WARN_ON_ONCE(1); + ret = err; + break; + } spin_lock(q->lock_ptr); raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); @@ -3045,10 +3062,8 @@ retry: * A unconditional UNLOCK_PI op raced against a waiter * setting the FUTEX_WAITERS bit. Try again. */ - if (ret == -EAGAIN) { - put_futex_key(&key); - goto retry; - } + if (ret == -EAGAIN) + goto pi_retry; /* * wake_futex_pi has detected invalid state. Tell user * space. @@ -3063,9 +3078,19 @@ retry: * preserve the WAITERS bit not the OWNER_DIED one. We are the * owner. */ - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) { + if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) { spin_unlock(&hb->lock); - goto pi_faulted; + switch (ret) { + case -EFAULT: + goto pi_faulted; + + case -EAGAIN: + goto pi_retry; + + default: + WARN_ON_ONCE(1); + goto out_putkey; + } } /* @@ -3079,6 +3104,11 @@ out_putkey: put_futex_key(&key); return ret; +pi_retry: + put_futex_key(&key); + cond_resched(); + goto retry; + pi_faulted: put_futex_key(&key); @@ -3439,6 +3469,7 @@ err_unlock: static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; + int err; /* Futex address must be 32bit aligned */ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) @@ -3448,42 +3479,57 @@ retry: if (get_user(uval, uaddr)) return -1; - if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) { - /* - * Ok, this dying thread is truly holding a futex - * of interest. Set the OWNER_DIED bit atomically - * via cmpxchg, and if the value had FUTEX_WAITERS - * set, wake up a waiter (if any). (We have to do a - * futex_wake() even if OWNER_DIED is already set - - * to handle the rare but possible case of recursive - * thread-death.) The rest of the cleanup is done in - * userspace. - */ - mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; - /* - * We are not holding a lock here, but we want to have - * the pagefault_disable/enable() protection because - * we want to handle the fault gracefully. If the - * access fails we try to fault in the futex with R/W - * verification via get_user_pages. get_user() above - * does not guarantee R/W access. If that fails we - * give up and leave the futex locked. - */ - if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) { + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) + return 0; + + /* + * Ok, this dying thread is truly holding a futex + * of interest. Set the OWNER_DIED bit atomically + * via cmpxchg, and if the value had FUTEX_WAITERS + * set, wake up a waiter (if any). (We have to do a + * futex_wake() even if OWNER_DIED is already set - + * to handle the rare but possible case of recursive + * thread-death.) The rest of the cleanup is done in + * userspace. + */ + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + + /* + * We are not holding a lock here, but we want to have + * the pagefault_disable/enable() protection because + * we want to handle the fault gracefully. If the + * access fails we try to fault in the futex with R/W + * verification via get_user_pages. get_user() above + * does not guarantee R/W access. If that fails we + * give up and leave the futex locked. + */ + if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) { + switch (err) { + case -EFAULT: if (fault_in_user_writeable(uaddr)) return -1; goto retry; - } - if (nval != uval) + + case -EAGAIN: + cond_resched(); goto retry; - /* - * Wake robust non-PI futexes here. The wakeup of - * PI futexes happens in exit_pi_state(): - */ - if (!pi && (uval & FUTEX_WAITERS)) - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + default: + WARN_ON_ONCE(1); + return err; + } } + + if (nval != uval) + goto retry; + + /* + * Wake robust non-PI futexes here. The wakeup of + * PI futexes happens in exit_pi_state(): + */ + if (!pi && (uval & FUTEX_WAITERS)) + futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + return 0; } -- cgit v1.2.3 From 41d7bb19aa31f1079155c4e07bcb366cf424e8d8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 8 Apr 2019 14:23:17 +0100 Subject: arm64: futex: Bound number of LDXR/STXR loops in FUTEX_WAKE_OP commit 03110a5cb2161690ae5ac04994d47ed0cd6cef75 upstream. Our futex implementation makes use of LDXR/STXR loops to perform atomic updates to user memory from atomic context. This can lead to latency problems if we end up spinning around the LL/SC sequence at the expense of doing something useful. Rework our futex atomic operations so that we return -EAGAIN if we fail to update the futex word after 128 attempts. The core futex code will reschedule if necessary and we'll try again later. Cc: Fixes: 6170a97460db ("arm64: Atomic operations") Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/futex.h | 55 ++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index c7e1a7837706..6fb2214333a2 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -23,26 +23,34 @@ #include +#define FUTEX_MAX_LOOPS 128 /* What's the largest number you can think of? */ + #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ do { \ + unsigned int loops = FUTEX_MAX_LOOPS; \ + \ uaccess_enable(); \ asm volatile( \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w0, %w3, %2\n" \ -" cbnz %w0, 1b\n" \ -" dmb ish\n" \ +" cbz %w0, 3f\n" \ +" sub %w4, %w4, %w0\n" \ +" cbnz %w4, 1b\n" \ +" mov %w0, %w7\n" \ "3:\n" \ +" dmb ish\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ -"4: mov %w0, %w5\n" \ +"4: mov %w0, %w6\n" \ " b 3b\n" \ " .popsection\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ - : "r" (oparg), "Ir" (-EFAULT) \ + : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \ + "+r" (loops) \ + : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \ : "memory"); \ uaccess_disable(); \ } while (0) @@ -57,23 +65,23 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %w3, %w4", + __futex_atomic_op("mov %w3, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %w3, %w1, %w4", + __futex_atomic_op("add %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %w3, %w1, %w4", + __futex_atomic_op("orr %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %w3, %w1, %w4", + __futex_atomic_op("and %w3, %w1, %w5", ret, oldval, uaddr, tmp, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %w3, %w1, %w4", + __futex_atomic_op("eor %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; default: @@ -93,6 +101,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, u32 oldval, u32 newval) { int ret = 0; + unsigned int loops = FUTEX_MAX_LOOPS; u32 val, tmp; u32 __user *uaddr; @@ -104,20 +113,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, asm volatile("// futex_atomic_cmpxchg_inatomic\n" " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" -" sub %w3, %w1, %w4\n" -" cbnz %w3, 3f\n" -"2: stlxr %w3, %w5, %2\n" -" cbnz %w3, 1b\n" -" dmb ish\n" +" sub %w3, %w1, %w5\n" +" cbnz %w3, 4f\n" +"2: stlxr %w3, %w6, %2\n" +" cbz %w3, 3f\n" +" sub %w4, %w4, %w3\n" +" cbnz %w4, 1b\n" +" mov %w0, %w8\n" "3:\n" +" dmb ish\n" +"4:\n" " .pushsection .fixup,\"ax\"\n" -"4: mov %w0, %w6\n" -" b 3b\n" +"5: mov %w0, %w7\n" +" b 4b\n" " .popsection\n" - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) - : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) - : "r" (oldval), "r" (newval), "Ir" (-EFAULT) + _ASM_EXTABLE(1b, 5b) + _ASM_EXTABLE(2b, 5b) + : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops) + : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN) : "memory"); uaccess_disable(); -- cgit v1.2.3 From 7b13756d2c328e35f0640d16b68541e6f72339b8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 May 2019 18:36:14 +0200 Subject: Linux 5.0.15 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5ce29665eeed..11c7f7844507 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 14 +SUBLEVEL = 15 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3 From 26c0aff1ad63a3cd3c692d60871377eedab8463a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Feb 2019 12:36:50 +0100 Subject: x86/msr-index: Cleanup bit defines commit d8eabc37310a92df40d07c5a8afc53cebf996716 upstream Greg pointed out that speculation related bit defines are using (1 << N) format instead of BIT(N). Aside of that (1 << N) is wrong as it should use 1UL at least. Clean it up. [ Josh Poimboeuf: Fix tools build ] Reported-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 34 +++++++++++++------------ tools/power/x86/turbostat/Makefile | 2 +- tools/power/x86/x86_energy_perf_policy/Makefile | 2 +- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ca5bc0eacb95..4f1e8b28daa0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H +#include + /* * CPU model specific register (MSR) numbers. * @@ -40,14 +42,14 @@ /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -69,20 +71,20 @@ #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ -#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. - */ +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b -#define L1D_FLUSH (1 << 0) /* - * Writeback and invalidate the - * L1 data cache. - */ +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 1598b4fa0b11..045f5f7d68ab 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line") endif turbostat : turbostat.c -override CFLAGS += -Wall +override CFLAGS += -Wall -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index ae7a0e09b722..1fdeef864e7c 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line") endif x86_energy_perf_policy : x86_energy_perf_policy.c -override CFLAGS += -Wall +override CFLAGS += -Wall -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' %: %.c -- cgit v1.2.3 From 019159aec4b75655c107c753e755ad447e87ea8b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Feb 2019 10:10:23 +0100 Subject: x86/speculation: Consolidate CPU whitelists commit 36ad35131adacc29b328b9c8b6277a8bf0d6fd5d upstream The CPU vulnerability whitelists have some overlap and there are more whitelists coming along. Use the driver_data field in the x86_cpu_id struct to denote the whitelisted vulnerabilities and combine all whitelists into one. Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 110 +++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..26ec15034f86 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -948,61 +948,72 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initconst struct x86_cpu_id cpu_no_speculation[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY }, - { X86_VENDOR_CENTAUR, 5 }, - { X86_VENDOR_INTEL, 5 }, - { X86_VENDOR_NSC, 5 }, - { X86_VENDOR_ANY, 4 }, +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) + +#define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } + +#define VULNWL_INTEL(model, whitelist) \ + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) + +#define VULNWL_AMD(family, whitelist) \ + VULNWL(AMD, family, X86_MODEL_ANY, whitelist) + +#define VULNWL_HYGON(family, whitelist) \ + VULNWL(HYGON, family, X86_MODEL_ANY, whitelist) + +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), + + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_L1TF), + + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), {} }; -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - { X86_VENDOR_AMD }, - { X86_VENDOR_HYGON }, - {} -}; - -/* Only list CPUs which speculate but are non susceptible to SSB */ -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - { X86_VENDOR_AMD, 0x12, }, - { X86_VENDOR_AMD, 0x11, }, - { X86_VENDOR_AMD, 0x10, }, - { X86_VENDOR_AMD, 0xf, }, - {} -}; +static bool __init cpu_matches(unsigned long which) +{ + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { - /* in addition to cpu_no_speculation */ - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - {} -}; + return m && !!(m->driver_data & which); +} static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_speculation)) + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); @@ -1011,15 +1022,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (x86_match_cpu(cpu_no_meltdown)) + if (cpu_matches(NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -1028,7 +1038,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (x86_match_cpu(cpu_no_l1tf)) + if (cpu_matches(NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); -- cgit v1.2.3 From ea3d1b32b86eb75de1fcdb10dd978a80598c9bf3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 18 Jan 2019 16:50:16 -0800 Subject: x86/speculation/mds: Add basic bug infrastructure for MDS commit ed5194c2732c8084af9fd159c146ea92bf137128 upstream Microarchitectural Data Sampling (MDS), is a class of side channel attacks on internal buffers in Intel CPUs. The variants are: - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a dependent load (store-to-load forwarding) as an optimization. The forward can also happen to a faulting or assisting load operation for a different memory address, which can be exploited under certain conditions. Store buffers are partitioned between Hyper-Threads so cross thread forwarding is not possible. But if a thread enters or exits a sleep state the store buffer is repartitioned which can expose data from one thread to the other. MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage L1 miss situations and to hold data which is returned or sent in response to a memory or I/O operation. Fill buffers can forward data to a load operation and also write data to the cache. When the fill buffer is deallocated it can retain the stale data of the preceding operations which can then be forwarded to a faulting or assisting load operation, which can be exploited under certain conditions. Fill buffers are shared between Hyper-Threads so cross thread leakage is possible. MLDPS leaks Load Port Data. Load ports are used to perform load operations from memory or I/O. The received data is then forwarded to the register file or a subsequent operation. In some implementations the Load Port can contain stale data from a previous operation which can be forwarded to faulting or assisting loads under certain conditions, which again can be exploited eventually. Load ports are shared between Hyper-Threads so cross thread leakage is possible. All variants have the same mitigation for single CPU thread case (SMT off), so the kernel can treat them as one MDS issue. Add the basic infrastructure to detect if the current CPU is affected by MDS. [ tglx: Rewrote changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/msr-index.h | 5 +++++ arch/x86/kernel/cpu/common.c | 25 ++++++++++++++++--------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 981ff9479648..71375c827f4f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ @@ -382,5 +383,6 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4f1e8b28daa0..20f7da552e90 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -79,6 +79,11 @@ * attack, so no Speculative Store Bypass * control required. */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 26ec15034f86..e34817bca504 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -952,6 +952,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_MELTDOWN BIT(1) #define NO_SSB BIT(2) #define NO_L1TF BIT(3) +#define NO_MDS BIT(4) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -971,6 +972,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + /* Intel Family 6 */ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), @@ -987,18 +989,20 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(CORE_YONAH, NO_SSB), VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_X, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_L1TF), - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), + + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), {} }; @@ -1029,6 +1033,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) + setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(NO_MELTDOWN)) return; -- cgit v1.2.3 From 78157c977674a9087e7672c422156c16e7578ccc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 Mar 2019 20:21:08 +0100 Subject: x86/speculation/mds: Add BUG_MSBDS_ONLY commit e261f209c3666e842fd645a1e31f001c3a26def9 upstream This bug bit is set on CPUs which are only affected by Microarchitectural Store Buffer Data Sampling (MSBDS) and not by any other MDS variant. This is important because the Store Buffers are partitioned between Hyper-Threads so cross thread forwarding is not possible. But if a thread enters or exits a sleep state the store buffer is repartitioned which can expose data from one thread to the other. This transition can be mitigated. That means that for CPUs which are only affected by MSBDS SMT can be enabled, if the CPU is not affected by other SMT sensitive vulnerabilities, e.g. L1TF. The XEON PHI variants fall into that category. Also the Silvermont/Airmont ATOMs, but for them it's not really relevant as they do not support SMT, but mark them for completeness sake. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 71375c827f4f..75f27ee2c263 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -384,5 +384,6 @@ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e34817bca504..132a63dc5a76 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -953,6 +953,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SSB BIT(2) #define NO_L1TF BIT(3) #define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -979,16 +980,16 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), VULNWL_INTEL(CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), @@ -1033,8 +1034,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(MSBDS_ONLY)) + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); + } if (cpu_matches(NO_MELTDOWN)) return; -- cgit v1.2.3 From 87f96d5e7525f480d6ad63e866b820c29cec1aaa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 18 Jan 2019 16:50:23 -0800 Subject: x86/kvm: Expose X86_FEATURE_MD_CLEAR to guests commit 6c4dbbd14730c43f4ed808a9c42ca41625925c22 upstream X86_FEATURE_MD_CLEAR is a new CPUID bit which is set when microcode provides the mechanism to invoke a flush of various exploitable CPU buffers by invoking the VERW instruction. Hand it through to guests so they can adjust their mitigations. This also requires corresponding qemu changes, which are available separately. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c07958b59f50..39501e7afdb4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -410,7 +410,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); -- cgit v1.2.3 From a5a8ef7cd7c989519c92bb147bd11f78094d5137 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:13:06 +0100 Subject: x86/speculation/mds: Add mds_clear_cpu_buffers() commit 6a9e529272517755904b7afa639f6db59ddb793e upstream The Microarchitectural Data Sampling (MDS) vulernabilities are mitigated by clearing the affected CPU buffers. The mechanism for clearing the buffers uses the unused and obsolete VERW instruction in combination with a microcode update which triggers a CPU buffer clear when VERW is executed. Provide a inline function with the assembly magic. The argument of the VERW instruction must be a memory operand as documented: "MD_CLEAR enumerates that the memory-operand variant of VERW (for example, VERW m16) has been extended to also overwrite buffers affected by MDS. This buffer overwriting functionality is not guaranteed for the register operand variant of VERW." Documentation also recommends to use a writable data segment selector: "The buffer overwriting occurs regardless of the result of the VERW permission check, as well as when the selector is null or causes a descriptor load segment violation. However, for lowest latency we recommend using a selector that indicates a valid writable data segment." Add x86 specific documentation about MDS and the internal workings of the mitigation. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/index.rst | 1 + Documentation/x86/conf.py | 10 ++++ Documentation/x86/index.rst | 8 +++ Documentation/x86/mds.rst | 99 ++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/nospec-branch.h | 25 +++++++++ 5 files changed, 143 insertions(+) create mode 100644 Documentation/x86/conf.py create mode 100644 Documentation/x86/index.rst create mode 100644 Documentation/x86/mds.rst diff --git a/Documentation/index.rst b/Documentation/index.rst index c858c2e66e36..63864826dcd6 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -101,6 +101,7 @@ implementation. :maxdepth: 2 sh/index + x86/index Filesystem Documentation ------------------------ diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py new file mode 100644 index 000000000000..33c5c3142e20 --- /dev/null +++ b/Documentation/x86/conf.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8; mode: python -*- + +project = "X86 architecture specific documentation" + +tags.add("subproject") + +latex_documents = [ + ('index', 'x86.tex', project, + 'The kernel development community', 'manual'), +] diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst new file mode 100644 index 000000000000..ef389dcf1b1d --- /dev/null +++ b/Documentation/x86/index.rst @@ -0,0 +1,8 @@ +========================== +x86 architecture specifics +========================== + +.. toctree:: + :maxdepth: 1 + + mds diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst new file mode 100644 index 000000000000..1096738d50f2 --- /dev/null +++ b/Documentation/x86/mds.rst @@ -0,0 +1,99 @@ +Microarchitectural Data Sampling (MDS) mitigation +================================================= + +.. _mds: + +Overview +-------- + +Microarchitectural Data Sampling (MDS) is a family of side channel attacks +on internal buffers in Intel CPUs. The variants are: + + - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) + - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) + - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) + +MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a +dependent load (store-to-load forwarding) as an optimization. The forward +can also happen to a faulting or assisting load operation for a different +memory address, which can be exploited under certain conditions. Store +buffers are partitioned between Hyper-Threads so cross thread forwarding is +not possible. But if a thread enters or exits a sleep state the store +buffer is repartitioned which can expose data from one thread to the other. + +MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage +L1 miss situations and to hold data which is returned or sent in response +to a memory or I/O operation. Fill buffers can forward data to a load +operation and also write data to the cache. When the fill buffer is +deallocated it can retain the stale data of the preceding operations which +can then be forwarded to a faulting or assisting load operation, which can +be exploited under certain conditions. Fill buffers are shared between +Hyper-Threads so cross thread leakage is possible. + +MLPDS leaks Load Port Data. Load ports are used to perform load operations +from memory or I/O. The received data is then forwarded to the register +file or a subsequent operation. In some implementations the Load Port can +contain stale data from a previous operation which can be forwarded to +faulting or assisting loads under certain conditions, which again can be +exploited eventually. Load ports are shared between Hyper-Threads so cross +thread leakage is possible. + + +Exposure assumptions +-------------------- + +It is assumed that attack code resides in user space or in a guest with one +exception. The rationale behind this assumption is that the code construct +needed for exploiting MDS requires: + + - to control the load to trigger a fault or assist + + - to have a disclosure gadget which exposes the speculatively accessed + data for consumption through a side channel. + + - to control the pointer through which the disclosure gadget exposes the + data + +The existence of such a construct in the kernel cannot be excluded with +100% certainty, but the complexity involved makes it extremly unlikely. + +There is one exception, which is untrusted BPF. The functionality of +untrusted BPF is limited, but it needs to be thoroughly investigated +whether it can be used to create such a construct. + + +Mitigation strategy +------------------- + +All variants have the same mitigation strategy at least for the single CPU +thread case (SMT off): Force the CPU to clear the affected buffers. + +This is achieved by using the otherwise unused and obsolete VERW +instruction in combination with a microcode update. The microcode clears +the affected CPU buffers when the VERW instruction is executed. + +For virtualization there are two ways to achieve CPU buffer +clearing. Either the modified VERW instruction or via the L1D Flush +command. The latter is issued when L1TF mitigation is enabled so the extra +VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to +be issued. + +If the VERW instruction with the supplied segment selector argument is +executed on a CPU without the microcode update there is no side effect +other than a small number of pointlessly wasted CPU cycles. + +This does not protect against cross Hyper-Thread attacks except for MSBDS +which is only exploitable cross Hyper-thread when one of the Hyper-Threads +enters a C-state. + +The kernel provides a function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state +(idle) transitions. + +According to current knowledge additional mitigations inside the kernel +itself are not required because the necessary gadgets to expose the leaked +data cannot be controlled in a way which allows exploitation from malicious +user space or VM guests. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index dad12b767ba0..67cb9b2082b1 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -318,6 +318,31 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +#include + +/** + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static inline void mds_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + #endif /* __ASSEMBLY__ */ /* -- cgit v1.2.3 From 481871997e37078a1d45646bcd2de3b5d33f0e70 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:42:51 +0100 Subject: x86/speculation/mds: Clear CPU buffers on exit to user commit 04dcbdb8057827b043b3c71aa397c4c63e67d086 upstream Add a static key which controls the invocation of the CPU buffer clear mechanism on exit to user space and add the call into prepare_exit_to_usermode() and do_nmi() right before actually returning. Add documentation which kernel to user space transition this covers and explain why some corner cases are not mitigated. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 52 ++++++++++++++++++++++++++++++++++++ arch/x86/entry/common.c | 3 +++ arch/x86/include/asm/nospec-branch.h | 13 +++++++++ arch/x86/kernel/cpu/bugs.c | 3 +++ arch/x86/kernel/nmi.c | 4 +++ arch/x86/kernel/traps.c | 8 ++++++ 6 files changed, 83 insertions(+) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 1096738d50f2..54d935bf283b 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -97,3 +97,55 @@ According to current knowledge additional mitigations inside the kernel itself are not required because the necessary gadgets to expose the leaked data cannot be controlled in a way which allows exploitation from malicious user space or VM guests. + +Mitigation points +----------------- + +1. Return to user space +^^^^^^^^^^^^^^^^^^^^^^^ + + When transitioning from kernel to user space the CPU buffers are flushed + on affected CPUs when the mitigation is not disabled on the kernel + command line. The migitation is enabled through the static key + mds_user_clear. + + The mitigation is invoked in prepare_exit_to_usermode() which covers + most of the kernel to user space transitions. There are a few exceptions + which are not invoking prepare_exit_to_usermode() on return to user + space. These exceptions use the paranoid exit code. + + - Non Maskable Interrupt (NMI): + + Access to sensible data like keys, credentials in the NMI context is + mostly theoretical: The CPU can do prefetching or execute a + misspeculated code path and thereby fetching data which might end up + leaking through a buffer. + + But for mounting other attacks the kernel stack address of the task is + already valuable information. So in full mitigation mode, the NMI is + mitigated on the return from do_nmi() to provide almost complete + coverage. + + - Double fault (#DF): + + A double fault is usually fatal, but the ESPFIX workaround, which can + be triggered from user space through modify_ldt(2) is a recoverable + double fault. #DF uses the paranoid exit path, so explicit mitigation + in the double fault handler is required. + + - Machine Check Exception (#MC): + + Another corner case is a #MC which hits between the CPU buffer clear + invocation and the actual return to user. As this still is in kernel + space it takes the paranoid exit path which does not clear the CPU + buffers. So the #MC handler repopulates the buffers to some + extent. Machine checks are not reliably controllable and the window is + extremly small so mitigation would just tick a checkbox that this + theoretical corner case is covered. To keep the amount of special + cases small, ignore #MC. + + - Debug Exception (#DB): + + This takes the paranoid exit path only when the INT1 breakpoint is in + kernel space. #DB on a user space address takes the regular exit path, + so no extra mitigation required. diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7bc105f47d21..19f650d729f5 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -31,6 +31,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) #endif user_enter_irqoff(); + + mds_user_clear_cpu_buffers(); } #define SYSCALL_EXIT_WORK_FLAGS \ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 67cb9b2082b1..65b747286d96 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -318,6 +318,8 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +DECLARE_STATIC_KEY_FALSE(mds_user_clear); + #include /** @@ -343,6 +345,17 @@ static inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } +/** + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_user_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_user_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 482383c2b184..c2cfd45f709e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -63,6 +63,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +/* Control MDS CPU buffer clear before returning to user space */ +DEFINE_STATIC_KEY_FALSE(mds_user_clear); + void __init check_bugs(void) { identify_boot_cpu(); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 18bc9b51ac9b..086cf1d1d71d 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -34,6 +34,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -533,6 +534,9 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; + + if (user_mode(regs)) + mds_user_clear_cpu_buffers(); } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9b7c4ca8f0a7..85fe1870f873 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -366,6 +367,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&gpregs->orig_ax; + /* + * This situation can be triggered by userspace via + * modify_ldt(2) and the return does not take the regular + * user space exit, so a CPU buffer clear is required when + * MDS mitigation is enabled. + */ + mds_user_clear_cpu_buffers(); return; } #endif -- cgit v1.2.3 From 100087c080307ea163119dbc09604e091d6caa16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Feb 2019 12:48:14 +0100 Subject: x86/kvm/vmx: Add MDS protection when L1D Flush is not active commit 650b68a0622f933444a6d66936abb3103029413b upstream CPUs which are affected by L1TF and MDS mitigate MDS with the L1D Flush on VMENTER when updated microcode is installed. If a CPU is not affected by L1TF or if the L1D Flush is not in use, then MDS mitigation needs to be invoked explicitly. For these cases, follow the host mitigation state and invoke the MDS mitigation before VMENTER. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 1 + arch/x86/kvm/vmx/vmx.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c2cfd45f709e..bef397b4c2f8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -65,6 +65,7 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); /* Control MDS CPU buffer clear before returning to user space */ DEFINE_STATIC_KEY_FALSE(mds_user_clear); +EXPORT_SYMBOL_GPL(mds_user_clear); void __init check_bugs(void) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index da6fdd5434a1..dadb6a6a9b2a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6356,8 +6356,11 @@ static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? (unsigned long)¤t_evmcs->host_rsp : 0; + /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); + else if (static_branch_unlikely(&mds_user_clear)) + mds_clear_cpu_buffers(); asm( /* Store host registers */ -- cgit v1.2.3 From d9117863c52184a665c538238fee4b43f48cc6c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:04:01 +0100 Subject: x86/speculation/mds: Conditionally clear CPU buffers on idle entry commit 07f07f55a29cb705e221eda7894dd67ab81ef343 upstream Add a static key which controls the invocation of the CPU buffer clear mechanism on idle entry. This is independent of other MDS mitigations because the idle entry invocation to mitigate the potential leakage due to store buffer repartitioning is only necessary on SMT systems. Add the actual invocations to the different halt/mwait variants which covers all usage sites. mwaitx is not patched as it's not available on Intel CPUs. The buffer clear is only invoked before entering the C-State to prevent that stale data from the idling CPU is spilled to the Hyper-Thread sibling after the Store buffer got repartitioned and all entries are available to the non idle sibling. When coming out of idle the store buffer is partitioned again so each sibling has half of it available. Now CPU which returned from idle could be speculatively exposed to contents of the sibling, but the buffers are flushed either on exit to user space or on VMENTER. When later on conditional buffer clearing is implemented on top of this, then there is no action required either because before returning to user space the context switch will set the condition flag which causes a flush on the return to user path. Note, that the buffer clearing on idle is only sensible on CPUs which are solely affected by MSBDS and not any other variant of MDS because the other MDS variants cannot be mitigated when SMT is enabled, so the buffer clearing on idle would be a window dressing exercise. This intentionally does not handle the case in the acpi/processor_idle driver which uses the legacy IO port interface for C-State transitions for two reasons: - The acpi/processor_idle driver was replaced by the intel_idle driver almost a decade ago. Anything Nehalem upwards supports it and defaults to that new driver. - The legacy IO port interface is likely to be used on older and therefore unaffected CPUs or on systems which do not receive microcode updates anymore, so there is no point in adding that. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 42 ++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/irqflags.h | 4 ++++ arch/x86/include/asm/mwait.h | 7 ++++++ arch/x86/include/asm/nospec-branch.h | 12 +++++++++++ arch/x86/kernel/cpu/bugs.c | 3 +++ 5 files changed, 68 insertions(+) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 54d935bf283b..87ce8ac9f36e 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -149,3 +149,45 @@ Mitigation points This takes the paranoid exit path only when the INT1 breakpoint is in kernel space. #DB on a user space address takes the regular exit path, so no extra mitigation required. + + +2. C-State transition +^^^^^^^^^^^^^^^^^^^^^ + + When a CPU goes idle and enters a C-State the CPU buffers need to be + cleared on affected CPUs when SMT is active. This addresses the + repartitioning of the store buffer when one of the Hyper-Threads enters + a C-State. + + When SMT is inactive, i.e. either the CPU does not support it or all + sibling threads are offline CPU buffer clearing is not required. + + The idle clearing is enabled on CPUs which are only affected by MSBDS + and not by any other MDS variant. The other MDS variants cannot be + protected against cross Hyper-Thread attacks because the Fill Buffer and + the Load Ports are shared. So on CPUs affected by other variants, the + idle clearing would be a window dressing exercise and is therefore not + activated. + + The invocation is controlled by the static key mds_idle_clear which is + switched depending on the chosen mitigation mode and the SMT state of + the system. + + The buffer clear is only invoked before entering the C-State to prevent + that stale data from the idling CPU from spilling to the Hyper-Thread + sibling after the store buffer got repartitioned and all entries are + available to the non idle sibling. + + When coming out of idle the store buffer is partitioned again so each + sibling has half of it available. The back from idle CPU could be then + speculatively exposed to contents of the sibling. The buffers are + flushed either on exit to user space or on VMENTER so malicious code + in user space or the guest cannot speculatively access them. + + The mitigation is hooked into all variants of halt()/mwait(), but does + not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver + has been superseded by the intel_idle driver around 2010 and is + preferred on all affected CPUs which are expected to gain the MD_CLEAR + functionality in microcode. Aside of that the IO-Port mechanism is a + legacy interface which is only used on older systems which are either + not affected or do not receive microcode updates anymore. diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 058e40fed167..8a0e56e1dcc9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -6,6 +6,8 @@ #ifndef __ASSEMBLY__ +#include + /* Provide __cpuidle; we can't safely include */ #define __cpuidle __attribute__((__section__(".cpuidle.text"))) @@ -54,11 +56,13 @@ static inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 39a2fb29378a..eb0f80ce8524 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -6,6 +6,7 @@ #include #include +#include #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { + /* No MDS buffer clear as this is AMD/HYGON only */ + /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" :: "a" (eax), "b" (ebx), "c" (ecx)); @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + trace_hardirqs_on(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 65b747286d96..4e970390110f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -319,6 +319,7 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(mds_user_clear); +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); #include @@ -356,6 +357,17 @@ static inline void mds_user_clear_cpu_buffers(void) mds_clear_cpu_buffers(); } +/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_idle_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bef397b4c2f8..10d309f99f11 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -66,6 +66,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); /* Control MDS CPU buffer clear before returning to user space */ DEFINE_STATIC_KEY_FALSE(mds_user_clear); EXPORT_SYMBOL_GPL(mds_user_clear); +/* Control MDS CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(mds_idle_clear); +EXPORT_SYMBOL_GPL(mds_idle_clear); void __init check_bugs(void) { -- cgit v1.2.3 From 0174e8984c2db871e72ceb4d4e7a838fbc7431c1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 22:04:08 +0100 Subject: x86/speculation/mds: Add mitigation control for MDS commit bc1241700acd82ec69fde98c5763ce51086269f8 upstream Now that the mitigations are in place, add a command line parameter to control the mitigation, a mitigation selector function and a SMT update mechanism. This is the minimal straight forward initial implementation which just provides an always on/off mode. The command line parameter is: mds=[full|off] This is consistent with the existing mitigations for other speculative hardware vulnerabilities. The idle invocation is dynamically updated according to the SMT state of the system similar to the dynamic update of the STIBP mitigation. The idle mitigation is limited to CPUs which are only affected by MSBDS and not any other variant, because the other variants cannot be mitigated on SMT enabled systems. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 22 ++++++++ arch/x86/include/asm/processor.h | 5 ++ arch/x86/kernel/cpu/bugs.c | 70 +++++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 858b6c0b9a15..dddb024eb523 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2356,6 +2356,28 @@ Format: , Specifies range of consoles to be captured by the MDA. + mds= [X86,INTEL] + Control mitigation for the Micro-architectural Data + Sampling (MDS) vulnerability. + + Certain CPUs are vulnerable to an exploit against CPU + internal buffers which can forward information to a + disclosure gadget under certain conditions. + + In vulnerable processors, the speculatively + forwarded data can be used in a cache side channel + attack, to access data to which the attacker does + not have direct access. + + This parameter controls the MDS mitigation. The + options are: + + full - Enable MDS mitigation on vulnerable CPUs + off - Unconditionally disable MDS mitigation + + Not specifying this option is equivalent to + mds=full. + mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory Amount of memory to be used when the kernel is not able to see the whole system memory or for test. diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 33051436c864..1f0295783325 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -992,4 +992,9 @@ enum l1tf_mitigations { extern enum l1tf_mitigations l1tf_mitigation; +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_FULL, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 10d309f99f11..90f102c85a29 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,6 +37,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); +static void __init mds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -108,6 +109,8 @@ void __init check_bugs(void) l1tf_select_mitigation(); + mds_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -213,6 +216,50 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "MDS: " fmt + +/* Default mitigation for L1TF-affected CPUs */ +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; + +static const char * const mds_strings[] = { + [MDS_MITIGATION_OFF] = "Vulnerable", + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers" +}; + +static void __init mds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_OFF; + return; + } + + if (mds_mitigation == MDS_MITIGATION_FULL) { + if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) + static_branch_enable(&mds_user_clear); + else + mds_mitigation = MDS_MITIGATION_OFF; + } + pr_info("%s\n", mds_strings[mds_mitigation]); +} + +static int __init mds_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + mds_mitigation = MDS_MITIGATION_OFF; + else if (!strcmp(str, "full")) + mds_mitigation = MDS_MITIGATION_FULL; + + return 0; +} +early_param("mds", mds_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -617,6 +664,26 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +/* Update the static key controlling the MDS CPU buffer clear in idle */ +static void update_mds_branch_idle(void) +{ + /* + * Enable the idle clearing if SMT is active on CPUs which are + * affected only by MSBDS and not any other MDS variant. + * + * The other variants cannot be mitigated when SMT is enabled, so + * clearing the buffers on idle just to prevent the Store Buffer + * repartitioning leak would be a window dressing exercise. + */ + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) + return; + + if (sched_smt_active()) + static_branch_enable(&mds_idle_clear); + else + static_branch_disable(&mds_idle_clear); +} + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -638,6 +705,9 @@ void arch_smt_update(void) break; } + if (mds_mitigation == MDS_MITIGATION_FULL) + update_mds_branch_idle(); + mutex_unlock(&spec_ctrl_mutex); } -- cgit v1.2.3 From 3944139ce82897fca4b3c374cd5e7a3208af340b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 22:51:43 +0100 Subject: x86/speculation/mds: Add sysfs reporting for MDS commit 8a4b06d391b0a42a373808979b5028f5c84d9c6a upstream Add the sysfs reporting file for MDS. It exposes the vulnerability and mitigation state similar to the existing files for the other speculative hardware vulnerabilities. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/kernel/cpu/bugs.c | 25 ++++++++++++++++++++++ drivers/base/cpu.c | 8 +++++++ include/linux/cpu.h | 2 ++ 4 files changed, 36 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 9605dbd4b5b5..2db5c3407fd6 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -484,6 +484,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/l1tf + /sys/devices/system/cpu/vulnerabilities/mds Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 90f102c85a29..60eab526f98d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1172,6 +1172,22 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static ssize_t mds_show_state(char *buf) +{ + if (!hypervisor_is_type(X86_HYPER_NATIVE)) { + return sprintf(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); + } + + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "mitigated" : "disabled"); + } + + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1238,6 +1254,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) return l1tf_show_state(buf); break; + + case X86_BUG_MDS: + return mds_show_state(buf); + default: break; } @@ -1269,4 +1289,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b { return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); } + +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); +} #endif diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index eb9443d5bae1..2fd6ca1021c2 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -546,11 +546,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); +static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -558,6 +565,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spectre_v2.attr, &dev_attr_spec_store_bypass.attr, &dev_attr_l1tf.attr, + &dev_attr_mds.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 5041357d0297..3c87ad888ed3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -57,6 +57,8 @@ extern ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From 31fd0223449edb68ebe9751801bcd7969dad9ca5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Feb 2019 09:40:40 +0100 Subject: x86/speculation/mds: Add mitigation mode VMWERV commit 22dd8365088b6403630b82423cf906491859b65e upstream In virtualized environments it can happen that the host has the microcode update which utilizes the VERW instruction to clear CPU buffers, but the hypervisor is not yet updated to expose the X86_FEATURE_MD_CLEAR CPUID bit to guests. Introduce an internal mitigation mode VMWERV which enables the invocation of the CPU buffer clearing even if X86_FEATURE_MD_CLEAR is not set. If the system has no updated microcode this results in a pointless execution of the VERW instruction wasting a few CPU cycles. If the microcode is updated, but not exposed to a guest then the CPU buffers will be cleared. That said: Virtual Machines Will Eventually Receive Vaccine Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 27 +++++++++++++++++++++++++++ arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 87ce8ac9f36e..3d6f943f1afb 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -93,11 +93,38 @@ The kernel provides a function to invoke the buffer clearing: The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. +As a special quirk to address virtualization scenarios where the host has +the microcode updated, but the hypervisor does not (yet) expose the +MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the +hope that it might actually clear the buffers. The state is reflected +accordingly. + According to current knowledge additional mitigations inside the kernel itself are not required because the necessary gadgets to expose the leaked data cannot be controlled in a way which allows exploitation from malicious user space or VM guests. +Kernel internal mitigation modes +-------------------------------- + + ======= ============================================================ + off Mitigation is disabled. Either the CPU is not affected or + mds=off is supplied on the kernel command line + + full Mitigation is eanbled. CPU is affected and MD_CLEAR is + advertised in CPUID. + + vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not + advertised in CPUID. That is mainly for virtualization + scenarios where the host has the updated microcode but the + hypervisor does not expose MD_CLEAR in CPUID. It's a best + effort approach without guarantee. + ======= ============================================================ + +If the CPU is affected and mds=off is not supplied on the kernel command +line then the kernel selects the appropriate mitigation mode depending on +the availability of the MD_CLEAR CPUID bit. + Mitigation points ----------------- diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1f0295783325..aca1ef8cc79f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -995,6 +995,7 @@ extern enum l1tf_mitigations l1tf_mitigation; enum mds_mitigations { MDS_MITIGATION_OFF, MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, }; #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 60eab526f98d..12ab114b294e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -224,7 +224,8 @@ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL static const char * const mds_strings[] = { [MDS_MITIGATION_OFF] = "Vulnerable", - [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers" + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", }; static void __init mds_select_mitigation(void) @@ -235,10 +236,9 @@ static void __init mds_select_mitigation(void) } if (mds_mitigation == MDS_MITIGATION_FULL) { - if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) - static_branch_enable(&mds_user_clear); - else - mds_mitigation = MDS_MITIGATION_OFF; + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + static_branch_enable(&mds_user_clear); } pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -705,8 +705,14 @@ void arch_smt_update(void) break; } - if (mds_mitigation == MDS_MITIGATION_FULL) + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_VMWERV: update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } mutex_unlock(&spec_ctrl_mutex); } -- cgit v1.2.3 From ab8e3e63f23e40411ab0db75633de06ec8a00980 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Feb 2019 11:10:49 +0100 Subject: Documentation: Move L1TF to separate directory commit 65fd4cb65b2dad97feb8330b6690445910b56d6a upstream Move L!TF to a separate directory so the MDS stuff can be added at the side. Otherwise the all hardware vulnerabilites have their own top level entry. Should have done that right away. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 2 +- Documentation/admin-guide/hw-vuln/index.rst | 12 + Documentation/admin-guide/hw-vuln/l1tf.rst | 614 +++++++++++++++++++++ Documentation/admin-guide/index.rst | 6 +- Documentation/admin-guide/kernel-parameters.txt | 2 +- Documentation/admin-guide/l1tf.rst | 614 --------------------- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/vmx/vmx.c | 4 +- 8 files changed, 633 insertions(+), 623 deletions(-) create mode 100644 Documentation/admin-guide/hw-vuln/index.rst create mode 100644 Documentation/admin-guide/hw-vuln/l1tf.rst delete mode 100644 Documentation/admin-guide/l1tf.rst diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 2db5c3407fd6..744c6d764b0c 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -498,7 +498,7 @@ Description: Information about CPU vulnerabilities "Mitigation: $M" CPU is affected and mitigation $M is in effect Details about the l1tf file can be found in - Documentation/admin-guide/l1tf.rst + Documentation/admin-guide/hw-vuln/l1tf.rst What: /sys/devices/system/cpu/smt /sys/devices/system/cpu/smt/active diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst new file mode 100644 index 000000000000..8ce2009f1981 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -0,0 +1,12 @@ +======================== +Hardware vulnerabilities +======================== + +This section describes CPU vulnerabilities and provides an overview of the +possible mitigations along with guidance for selecting mitigations if they +are configurable at compile, boot or run time. + +.. toctree:: + :maxdepth: 1 + + l1tf diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst new file mode 100644 index 000000000000..9af977384168 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/l1tf.rst @@ -0,0 +1,614 @@ +L1TF - L1 Terminal Fault +======================== + +L1 Terminal Fault is a hardware vulnerability which allows unprivileged +speculative access to data which is available in the Level 1 Data Cache +when the page table entry controlling the virtual address, which is used +for the access, has the Present bit cleared or other reserved bits set. + +Affected processors +------------------- + +This vulnerability affects a wide range of Intel processors. The +vulnerability is not present on: + + - Processors from AMD, Centaur and other non Intel vendors + + - Older processor models, where the CPU family is < 6 + + - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, + Penwell, Pineview, Silvermont, Airmont, Merrifield) + + - The Intel XEON PHI family + + - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the + IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected + by the Meltdown vulnerability either. These CPUs should become + available by end of 2018. + +Whether a processor is affected or not can be read out from the L1TF +vulnerability file in sysfs. See :ref:`l1tf_sys_info`. + +Related CVEs +------------ + +The following CVE entries are related to the L1TF vulnerability: + + ============= ================= ============================== + CVE-2018-3615 L1 Terminal Fault SGX related aspects + CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects + CVE-2018-3646 L1 Terminal Fault Virtualization related aspects + ============= ================= ============================== + +Problem +------- + +If an instruction accesses a virtual address for which the relevant page +table entry (PTE) has the Present bit cleared or other reserved bits set, +then speculative execution ignores the invalid PTE and loads the referenced +data if it is present in the Level 1 Data Cache, as if the page referenced +by the address bits in the PTE was still present and accessible. + +While this is a purely speculative mechanism and the instruction will raise +a page fault when it is retired eventually, the pure act of loading the +data and making it available to other speculative instructions opens up the +opportunity for side channel attacks to unprivileged malicious code, +similar to the Meltdown attack. + +While Meltdown breaks the user space to kernel space protection, L1TF +allows to attack any physical memory address in the system and the attack +works across all protection domains. It allows an attack of SGX and also +works from inside virtual machines because the speculation bypasses the +extended page table (EPT) protection mechanism. + + +Attack scenarios +---------------- + +1. Malicious user space +^^^^^^^^^^^^^^^^^^^^^^^ + + Operating Systems store arbitrary information in the address bits of a + PTE which is marked non present. This allows a malicious user space + application to attack the physical memory to which these PTEs resolve. + In some cases user-space can maliciously influence the information + encoded in the address bits of the PTE, thus making attacks more + deterministic and more practical. + + The Linux kernel contains a mitigation for this attack vector, PTE + inversion, which is permanently enabled and has no performance + impact. The kernel ensures that the address bits of PTEs, which are not + marked present, never point to cacheable physical memory space. + + A system with an up to date kernel is protected against attacks from + malicious user space applications. + +2. Malicious guest in a virtual machine +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The fact that L1TF breaks all domain protections allows malicious guest + OSes, which can control the PTEs directly, and malicious guest user + space applications, which run on an unprotected guest kernel lacking the + PTE inversion mitigation for L1TF, to attack physical host memory. + + A special aspect of L1TF in the context of virtualization is symmetric + multi threading (SMT). The Intel implementation of SMT is called + HyperThreading. The fact that Hyperthreads on the affected processors + share the L1 Data Cache (L1D) is important for this. As the flaw allows + only to attack data which is present in L1D, a malicious guest running + on one Hyperthread can attack the data which is brought into the L1D by + the context which runs on the sibling Hyperthread of the same physical + core. This context can be host OS, host user space or a different guest. + + If the processor does not support Extended Page Tables, the attack is + only possible, when the hypervisor does not sanitize the content of the + effective (shadow) page tables. + + While solutions exist to mitigate these attack vectors fully, these + mitigations are not enabled by default in the Linux kernel because they + can affect performance significantly. The kernel provides several + mechanisms which can be utilized to address the problem depending on the + deployment scenario. The mitigations, their protection scope and impact + are described in the next sections. + + The default mitigations and the rationale for choosing them are explained + at the end of this document. See :ref:`default_mitigations`. + +.. _l1tf_sys_info: + +L1TF system information +----------------------- + +The Linux kernel provides a sysfs interface to enumerate the current L1TF +status of the system: whether the system is vulnerable, and which +mitigations are active. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/l1tf + +The possible values in this file are: + + =========================== =============================== + 'Not affected' The processor is not vulnerable + 'Mitigation: PTE Inversion' The host protection is active + =========================== =============================== + +If KVM/VMX is enabled and the processor is vulnerable then the following +information is appended to the 'Mitigation: PTE Inversion' part: + + - SMT status: + + ===================== ================ + 'VMX: SMT vulnerable' SMT is enabled + 'VMX: SMT disabled' SMT is disabled + ===================== ================ + + - L1D Flush mode: + + ================================ ==================================== + 'L1D vulnerable' L1D flushing is disabled + + 'L1D conditional cache flushes' L1D flush is conditionally enabled + + 'L1D cache flushes' L1D flush is unconditionally enabled + ================================ ==================================== + +The resulting grade of protection is discussed in the following sections. + + +Host mitigation mechanism +------------------------- + +The kernel is unconditionally protected against L1TF attacks from malicious +user space running on the host. + + +Guest mitigation mechanisms +--------------------------- + +.. _l1d_flush: + +1. L1D flush on VMENTER +^^^^^^^^^^^^^^^^^^^^^^^ + + To make sure that a guest cannot attack data which is present in the L1D + the hypervisor flushes the L1D before entering the guest. + + Flushing the L1D evicts not only the data which should not be accessed + by a potentially malicious guest, it also flushes the guest + data. Flushing the L1D has a performance impact as the processor has to + bring the flushed guest data back into the L1D. Depending on the + frequency of VMEXIT/VMENTER and the type of computations in the guest + performance degradation in the range of 1% to 50% has been observed. For + scenarios where guest VMEXIT/VMENTER are rare the performance impact is + minimal. Virtio and mechanisms like posted interrupts are designed to + confine the VMEXITs to a bare minimum, but specific configurations and + application scenarios might still suffer from a high VMEXIT rate. + + The kernel provides two L1D flush modes: + - conditional ('cond') + - unconditional ('always') + + The conditional mode avoids L1D flushing after VMEXITs which execute + only audited code paths before the corresponding VMENTER. These code + paths have been verified that they cannot expose secrets or other + interesting data to an attacker, but they can leak information about the + address space layout of the hypervisor. + + Unconditional mode flushes L1D on all VMENTER invocations and provides + maximum protection. It has a higher overhead than the conditional + mode. The overhead cannot be quantified correctly as it depends on the + workload scenario and the resulting number of VMEXITs. + + The general recommendation is to enable L1D flush on VMENTER. The kernel + defaults to conditional mode on affected processors. + + **Note**, that L1D flush does not prevent the SMT problem because the + sibling thread will also bring back its data into the L1D which makes it + attackable again. + + L1D flush can be controlled by the administrator via the kernel command + line and sysfs control files. See :ref:`mitigation_control_command_line` + and :ref:`mitigation_control_kvm`. + +.. _guest_confinement: + +2. Guest VCPU confinement to dedicated physical cores +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + To address the SMT problem, it is possible to make a guest or a group of + guests affine to one or more physical cores. The proper mechanism for + that is to utilize exclusive cpusets to ensure that no other guest or + host tasks can run on these cores. + + If only a single guest or related guests run on sibling SMT threads on + the same physical core then they can only attack their own memory and + restricted parts of the host memory. + + Host memory is attackable, when one of the sibling SMT threads runs in + host OS (hypervisor) context and the other in guest context. The amount + of valuable information from the host OS context depends on the context + which the host OS executes, i.e. interrupts, soft interrupts and kernel + threads. The amount of valuable data from these contexts cannot be + declared as non-interesting for an attacker without deep inspection of + the code. + + **Note**, that assigning guests to a fixed set of physical cores affects + the ability of the scheduler to do load balancing and might have + negative effects on CPU utilization depending on the hosting + scenario. Disabling SMT might be a viable alternative for particular + scenarios. + + For further information about confining guests to a single or to a group + of cores consult the cpusets documentation: + + https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt + +.. _interrupt_isolation: + +3. Interrupt affinity +^^^^^^^^^^^^^^^^^^^^^ + + Interrupts can be made affine to logical CPUs. This is not universally + true because there are types of interrupts which are truly per CPU + interrupts, e.g. the local timer interrupt. Aside of that multi queue + devices affine their interrupts to single CPUs or groups of CPUs per + queue without allowing the administrator to control the affinities. + + Moving the interrupts, which can be affinity controlled, away from CPUs + which run untrusted guests, reduces the attack vector space. + + Whether the interrupts with are affine to CPUs, which run untrusted + guests, provide interesting data for an attacker depends on the system + configuration and the scenarios which run on the system. While for some + of the interrupts it can be assumed that they won't expose interesting + information beyond exposing hints about the host OS memory layout, there + is no way to make general assumptions. + + Interrupt affinity can be controlled by the administrator via the + /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is + available at: + + https://www.kernel.org/doc/Documentation/IRQ-affinity.txt + +.. _smt_control: + +4. SMT control +^^^^^^^^^^^^^^ + + To prevent the SMT issues of L1TF it might be necessary to disable SMT + completely. Disabling SMT can have a significant performance impact, but + the impact depends on the hosting scenario and the type of workloads. + The impact of disabling SMT needs also to be weighted against the impact + of other mitigation solutions like confining guests to dedicated cores. + + The kernel provides a sysfs interface to retrieve the status of SMT and + to control it. It also provides a kernel command line interface to + control SMT. + + The kernel command line interface consists of the following options: + + =========== ========================================================== + nosmt Affects the bring up of the secondary CPUs during boot. The + kernel tries to bring all present CPUs online during the + boot process. "nosmt" makes sure that from each physical + core only one - the so called primary (hyper) thread is + activated. Due to a design flaw of Intel processors related + to Machine Check Exceptions the non primary siblings have + to be brought up at least partially and are then shut down + again. "nosmt" can be undone via the sysfs interface. + + nosmt=force Has the same effect as "nosmt" but it does not allow to + undo the SMT disable via the sysfs interface. + =========== ========================================================== + + The sysfs interface provides two files: + + - /sys/devices/system/cpu/smt/control + - /sys/devices/system/cpu/smt/active + + /sys/devices/system/cpu/smt/control: + + This file allows to read out the SMT control state and provides the + ability to disable or (re)enable SMT. The possible states are: + + ============== =================================================== + on SMT is supported by the CPU and enabled. All + logical CPUs can be onlined and offlined without + restrictions. + + off SMT is supported by the CPU and disabled. Only + the so called primary SMT threads can be onlined + and offlined without restrictions. An attempt to + online a non-primary sibling is rejected + + forceoff Same as 'off' but the state cannot be controlled. + Attempts to write to the control file are rejected. + + notsupported The processor does not support SMT. It's therefore + not affected by the SMT implications of L1TF. + Attempts to write to the control file are rejected. + ============== =================================================== + + The possible states which can be written into this file to control SMT + state are: + + - on + - off + - forceoff + + /sys/devices/system/cpu/smt/active: + + This file reports whether SMT is enabled and active, i.e. if on any + physical core two or more sibling threads are online. + + SMT control is also possible at boot time via the l1tf kernel command + line parameter in combination with L1D flush control. See + :ref:`mitigation_control_command_line`. + +5. Disabling EPT +^^^^^^^^^^^^^^^^ + + Disabling EPT for virtual machines provides full mitigation for L1TF even + with SMT enabled, because the effective page tables for guests are + managed and sanitized by the hypervisor. Though disabling EPT has a + significant performance impact especially when the Meltdown mitigation + KPTI is enabled. + + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. + +There is ongoing research and development for new mitigation mechanisms to +address the performance impact of disabling SMT or EPT. + +.. _mitigation_control_command_line: + +Mitigation control on the kernel command line +--------------------------------------------- + +The kernel command line allows to control the L1TF mitigations at boot +time with the option "l1tf=". The valid arguments for this option are: + + ============ ============================================================= + full Provides all available mitigations for the L1TF + vulnerability. Disables SMT and enables all mitigations in + the hypervisors, i.e. unconditional L1D flushing + + SMT control and L1D flush control via the sysfs interface + is still possible after boot. Hypervisors will issue a + warning when the first VM is started in a potentially + insecure configuration, i.e. SMT enabled or L1D flush + disabled. + + full,force Same as 'full', but disables SMT and L1D flush runtime + control. Implies the 'nosmt=force' command line option. + (i.e. sysfs control of SMT is disabled.) + + flush Leaves SMT enabled and enables the default hypervisor + mitigation, i.e. conditional L1D flushing + + SMT control and L1D flush control via the sysfs interface + is still possible after boot. Hypervisors will issue a + warning when the first VM is started in a potentially + insecure configuration, i.e. SMT enabled or L1D flush + disabled. + + flush,nosmt Disables SMT and enables the default hypervisor mitigation, + i.e. conditional L1D flushing. + + SMT control and L1D flush control via the sysfs interface + is still possible after boot. Hypervisors will issue a + warning when the first VM is started in a potentially + insecure configuration, i.e. SMT enabled or L1D flush + disabled. + + flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is + started in a potentially insecure configuration. + + off Disables hypervisor mitigations and doesn't emit any + warnings. + It also drops the swap size and available RAM limit restrictions + on both hypervisor and bare metal. + + ============ ============================================================= + +The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. + + +.. _mitigation_control_kvm: + +Mitigation control for KVM - module parameter +------------------------------------------------------------- + +The KVM hypervisor mitigation mechanism, flushing the L1D cache when +entering a guest, can be controlled with a module parameter. + +The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the +following arguments: + + ============ ============================================================== + always L1D cache flush on every VMENTER. + + cond Flush L1D on VMENTER only when the code between VMEXIT and + VMENTER can leak host memory which is considered + interesting for an attacker. This still can leak host memory + which allows e.g. to determine the hosts address space layout. + + never Disables the mitigation + ============ ============================================================== + +The parameter can be provided on the kernel command line, as a module +parameter when loading the modules and at runtime modified via the sysfs +file: + +/sys/module/kvm_intel/parameters/vmentry_l1d_flush + +The default is 'cond'. If 'l1tf=full,force' is given on the kernel command +line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush +module parameter is ignored and writes to the sysfs file are rejected. + + +Mitigation selection guide +-------------------------- + +1. No virtualization in use +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The system is protected by the kernel unconditionally and no further + action is required. + +2. Virtualization with trusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + If the guest comes from a trusted source and the guest OS kernel is + guaranteed to have the L1TF mitigations in place the system is fully + protected against L1TF and no further action is required. + + To avoid the overhead of the default L1D flushing on VMENTER the + administrator can disable the flushing via the kernel command line and + sysfs control files. See :ref:`mitigation_control_command_line` and + :ref:`mitigation_control_kvm`. + + +3. Virtualization with untrusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +3.1. SMT not supported or disabled +"""""""""""""""""""""""""""""""""" + + If SMT is not supported by the processor or disabled in the BIOS or by + the kernel, it's only required to enforce L1D flushing on VMENTER. + + Conditional L1D flushing is the default behaviour and can be tuned. See + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. + +3.2. EPT not supported or disabled +"""""""""""""""""""""""""""""""""" + + If EPT is not supported by the processor or disabled in the hypervisor, + the system is fully protected. SMT can stay enabled and L1D flushing on + VMENTER is not required. + + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. + +3.3. SMT and EPT supported and active +""""""""""""""""""""""""""""""""""""" + + If SMT and EPT are supported and active then various degrees of + mitigations can be employed: + + - L1D flushing on VMENTER: + + L1D flushing on VMENTER is the minimal protection requirement, but it + is only potent in combination with other mitigation methods. + + Conditional L1D flushing is the default behaviour and can be tuned. See + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. + + - Guest confinement: + + Confinement of guests to a single or a group of physical cores which + are not running any other processes, can reduce the attack surface + significantly, but interrupts, soft interrupts and kernel threads can + still expose valuable data to a potential attacker. See + :ref:`guest_confinement`. + + - Interrupt isolation: + + Isolating the guest CPUs from interrupts can reduce the attack surface + further, but still allows a malicious guest to explore a limited amount + of host physical memory. This can at least be used to gain knowledge + about the host address space layout. The interrupts which have a fixed + affinity to the CPUs which run the untrusted guests can depending on + the scenario still trigger soft interrupts and schedule kernel threads + which might expose valuable information. See + :ref:`interrupt_isolation`. + +The above three mitigation methods combined can provide protection to a +certain degree, but the risk of the remaining attack surface has to be +carefully analyzed. For full protection the following methods are +available: + + - Disabling SMT: + + Disabling SMT and enforcing the L1D flushing provides the maximum + amount of protection. This mitigation is not depending on any of the + above mitigation methods. + + SMT control and L1D flushing can be tuned by the command line + parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run + time with the matching sysfs control files. See :ref:`smt_control`, + :ref:`mitigation_control_command_line` and + :ref:`mitigation_control_kvm`. + + - Disabling EPT: + + Disabling EPT provides the maximum amount of protection as well. It is + not depending on any of the above mitigation methods. SMT can stay + enabled and L1D flushing is not required, but the performance impact is + significant. + + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' + parameter. + +3.4. Nested virtual machines +"""""""""""""""""""""""""""" + +When nested virtualization is in use, three operating systems are involved: +the bare metal hypervisor, the nested hypervisor and the nested virtual +machine. VMENTER operations from the nested hypervisor into the nested +guest will always be processed by the bare metal hypervisor. If KVM is the +bare metal hypervisor it will: + + - Flush the L1D cache on every switch from the nested hypervisor to the + nested virtual machine, so that the nested hypervisor's secrets are not + exposed to the nested virtual machine; + + - Flush the L1D cache on every switch from the nested virtual machine to + the nested hypervisor; this is a complex operation, and flushing the L1D + cache avoids that the bare metal hypervisor's secrets are exposed to the + nested virtual machine; + + - Instruct the nested hypervisor to not perform any L1D cache flush. This + is an optimization to avoid double L1D flushing. + + +.. _default_mitigations: + +Default mitigations +------------------- + + The kernel default mitigations for vulnerable processors are: + + - PTE inversion to protect against malicious user space. This is done + unconditionally and cannot be controlled. The swap storage is limited + to ~16TB. + + - L1D conditional flushing on VMENTER when EPT is enabled for + a guest. + + The kernel does not by default enforce the disabling of SMT, which leaves + SMT systems vulnerable when running untrusted guests with EPT enabled. + + The rationale for this choice is: + + - Force disabling SMT can break existing setups, especially with + unattended updates. + + - If regular users run untrusted guests on their machine, then L1TF is + just an add on to other malware which might be embedded in an untrusted + guest, e.g. spam-bots or attacks on the local network. + + There is no technical way to prevent a user from running untrusted code + on their machines blindly. + + - It's technically extremely unlikely and from today's knowledge even + impossible that L1TF can be exploited via the most popular attack + mechanisms like JavaScript because these mechanisms have no way to + control PTEs. If this would be possible and not other mitigation would + be possible, then the default might be different. + + - The administrators of cloud and hosting setups have to carefully + analyze the risk for their scenarios and make the appropriate + mitigation choices, which might even vary across their deployed + machines and also result in other changes of their overall setup. + There is no way for the kernel to provide a sensible default for this + kind of scenarios. diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 0a491676685e..42247516962a 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -17,14 +17,12 @@ etc. kernel-parameters devices -This section describes CPU vulnerabilities and provides an overview of the -possible mitigations along with guidance for selecting mitigations if they -are configurable at compile, boot or run time. +This section describes CPU vulnerabilities and their mitigations. .. toctree:: :maxdepth: 1 - l1tf + hw-vuln/index Here is a set of documents aimed at users who are trying to track down problems and bugs in particular. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dddb024eb523..9afcb240a673 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2114,7 +2114,7 @@ Default is 'flush'. - For details see: Documentation/admin-guide/l1tf.rst + For details see: Documentation/admin-guide/hw-vuln/l1tf.rst l2cr= [PPC] diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst deleted file mode 100644 index 9af977384168..000000000000 --- a/Documentation/admin-guide/l1tf.rst +++ /dev/null @@ -1,614 +0,0 @@ -L1TF - L1 Terminal Fault -======================== - -L1 Terminal Fault is a hardware vulnerability which allows unprivileged -speculative access to data which is available in the Level 1 Data Cache -when the page table entry controlling the virtual address, which is used -for the access, has the Present bit cleared or other reserved bits set. - -Affected processors -------------------- - -This vulnerability affects a wide range of Intel processors. The -vulnerability is not present on: - - - Processors from AMD, Centaur and other non Intel vendors - - - Older processor models, where the CPU family is < 6 - - - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, - Penwell, Pineview, Silvermont, Airmont, Merrifield) - - - The Intel XEON PHI family - - - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the - IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected - by the Meltdown vulnerability either. These CPUs should become - available by end of 2018. - -Whether a processor is affected or not can be read out from the L1TF -vulnerability file in sysfs. See :ref:`l1tf_sys_info`. - -Related CVEs ------------- - -The following CVE entries are related to the L1TF vulnerability: - - ============= ================= ============================== - CVE-2018-3615 L1 Terminal Fault SGX related aspects - CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects - CVE-2018-3646 L1 Terminal Fault Virtualization related aspects - ============= ================= ============================== - -Problem -------- - -If an instruction accesses a virtual address for which the relevant page -table entry (PTE) has the Present bit cleared or other reserved bits set, -then speculative execution ignores the invalid PTE and loads the referenced -data if it is present in the Level 1 Data Cache, as if the page referenced -by the address bits in the PTE was still present and accessible. - -While this is a purely speculative mechanism and the instruction will raise -a page fault when it is retired eventually, the pure act of loading the -data and making it available to other speculative instructions opens up the -opportunity for side channel attacks to unprivileged malicious code, -similar to the Meltdown attack. - -While Meltdown breaks the user space to kernel space protection, L1TF -allows to attack any physical memory address in the system and the attack -works across all protection domains. It allows an attack of SGX and also -works from inside virtual machines because the speculation bypasses the -extended page table (EPT) protection mechanism. - - -Attack scenarios ----------------- - -1. Malicious user space -^^^^^^^^^^^^^^^^^^^^^^^ - - Operating Systems store arbitrary information in the address bits of a - PTE which is marked non present. This allows a malicious user space - application to attack the physical memory to which these PTEs resolve. - In some cases user-space can maliciously influence the information - encoded in the address bits of the PTE, thus making attacks more - deterministic and more practical. - - The Linux kernel contains a mitigation for this attack vector, PTE - inversion, which is permanently enabled and has no performance - impact. The kernel ensures that the address bits of PTEs, which are not - marked present, never point to cacheable physical memory space. - - A system with an up to date kernel is protected against attacks from - malicious user space applications. - -2. Malicious guest in a virtual machine -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - The fact that L1TF breaks all domain protections allows malicious guest - OSes, which can control the PTEs directly, and malicious guest user - space applications, which run on an unprotected guest kernel lacking the - PTE inversion mitigation for L1TF, to attack physical host memory. - - A special aspect of L1TF in the context of virtualization is symmetric - multi threading (SMT). The Intel implementation of SMT is called - HyperThreading. The fact that Hyperthreads on the affected processors - share the L1 Data Cache (L1D) is important for this. As the flaw allows - only to attack data which is present in L1D, a malicious guest running - on one Hyperthread can attack the data which is brought into the L1D by - the context which runs on the sibling Hyperthread of the same physical - core. This context can be host OS, host user space or a different guest. - - If the processor does not support Extended Page Tables, the attack is - only possible, when the hypervisor does not sanitize the content of the - effective (shadow) page tables. - - While solutions exist to mitigate these attack vectors fully, these - mitigations are not enabled by default in the Linux kernel because they - can affect performance significantly. The kernel provides several - mechanisms which can be utilized to address the problem depending on the - deployment scenario. The mitigations, their protection scope and impact - are described in the next sections. - - The default mitigations and the rationale for choosing them are explained - at the end of this document. See :ref:`default_mitigations`. - -.. _l1tf_sys_info: - -L1TF system information ------------------------ - -The Linux kernel provides a sysfs interface to enumerate the current L1TF -status of the system: whether the system is vulnerable, and which -mitigations are active. The relevant sysfs file is: - -/sys/devices/system/cpu/vulnerabilities/l1tf - -The possible values in this file are: - - =========================== =============================== - 'Not affected' The processor is not vulnerable - 'Mitigation: PTE Inversion' The host protection is active - =========================== =============================== - -If KVM/VMX is enabled and the processor is vulnerable then the following -information is appended to the 'Mitigation: PTE Inversion' part: - - - SMT status: - - ===================== ================ - 'VMX: SMT vulnerable' SMT is enabled - 'VMX: SMT disabled' SMT is disabled - ===================== ================ - - - L1D Flush mode: - - ================================ ==================================== - 'L1D vulnerable' L1D flushing is disabled - - 'L1D conditional cache flushes' L1D flush is conditionally enabled - - 'L1D cache flushes' L1D flush is unconditionally enabled - ================================ ==================================== - -The resulting grade of protection is discussed in the following sections. - - -Host mitigation mechanism -------------------------- - -The kernel is unconditionally protected against L1TF attacks from malicious -user space running on the host. - - -Guest mitigation mechanisms ---------------------------- - -.. _l1d_flush: - -1. L1D flush on VMENTER -^^^^^^^^^^^^^^^^^^^^^^^ - - To make sure that a guest cannot attack data which is present in the L1D - the hypervisor flushes the L1D before entering the guest. - - Flushing the L1D evicts not only the data which should not be accessed - by a potentially malicious guest, it also flushes the guest - data. Flushing the L1D has a performance impact as the processor has to - bring the flushed guest data back into the L1D. Depending on the - frequency of VMEXIT/VMENTER and the type of computations in the guest - performance degradation in the range of 1% to 50% has been observed. For - scenarios where guest VMEXIT/VMENTER are rare the performance impact is - minimal. Virtio and mechanisms like posted interrupts are designed to - confine the VMEXITs to a bare minimum, but specific configurations and - application scenarios might still suffer from a high VMEXIT rate. - - The kernel provides two L1D flush modes: - - conditional ('cond') - - unconditional ('always') - - The conditional mode avoids L1D flushing after VMEXITs which execute - only audited code paths before the corresponding VMENTER. These code - paths have been verified that they cannot expose secrets or other - interesting data to an attacker, but they can leak information about the - address space layout of the hypervisor. - - Unconditional mode flushes L1D on all VMENTER invocations and provides - maximum protection. It has a higher overhead than the conditional - mode. The overhead cannot be quantified correctly as it depends on the - workload scenario and the resulting number of VMEXITs. - - The general recommendation is to enable L1D flush on VMENTER. The kernel - defaults to conditional mode on affected processors. - - **Note**, that L1D flush does not prevent the SMT problem because the - sibling thread will also bring back its data into the L1D which makes it - attackable again. - - L1D flush can be controlled by the administrator via the kernel command - line and sysfs control files. See :ref:`mitigation_control_command_line` - and :ref:`mitigation_control_kvm`. - -.. _guest_confinement: - -2. Guest VCPU confinement to dedicated physical cores -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - To address the SMT problem, it is possible to make a guest or a group of - guests affine to one or more physical cores. The proper mechanism for - that is to utilize exclusive cpusets to ensure that no other guest or - host tasks can run on these cores. - - If only a single guest or related guests run on sibling SMT threads on - the same physical core then they can only attack their own memory and - restricted parts of the host memory. - - Host memory is attackable, when one of the sibling SMT threads runs in - host OS (hypervisor) context and the other in guest context. The amount - of valuable information from the host OS context depends on the context - which the host OS executes, i.e. interrupts, soft interrupts and kernel - threads. The amount of valuable data from these contexts cannot be - declared as non-interesting for an attacker without deep inspection of - the code. - - **Note**, that assigning guests to a fixed set of physical cores affects - the ability of the scheduler to do load balancing and might have - negative effects on CPU utilization depending on the hosting - scenario. Disabling SMT might be a viable alternative for particular - scenarios. - - For further information about confining guests to a single or to a group - of cores consult the cpusets documentation: - - https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt - -.. _interrupt_isolation: - -3. Interrupt affinity -^^^^^^^^^^^^^^^^^^^^^ - - Interrupts can be made affine to logical CPUs. This is not universally - true because there are types of interrupts which are truly per CPU - interrupts, e.g. the local timer interrupt. Aside of that multi queue - devices affine their interrupts to single CPUs or groups of CPUs per - queue without allowing the administrator to control the affinities. - - Moving the interrupts, which can be affinity controlled, away from CPUs - which run untrusted guests, reduces the attack vector space. - - Whether the interrupts with are affine to CPUs, which run untrusted - guests, provide interesting data for an attacker depends on the system - configuration and the scenarios which run on the system. While for some - of the interrupts it can be assumed that they won't expose interesting - information beyond exposing hints about the host OS memory layout, there - is no way to make general assumptions. - - Interrupt affinity can be controlled by the administrator via the - /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is - available at: - - https://www.kernel.org/doc/Documentation/IRQ-affinity.txt - -.. _smt_control: - -4. SMT control -^^^^^^^^^^^^^^ - - To prevent the SMT issues of L1TF it might be necessary to disable SMT - completely. Disabling SMT can have a significant performance impact, but - the impact depends on the hosting scenario and the type of workloads. - The impact of disabling SMT needs also to be weighted against the impact - of other mitigation solutions like confining guests to dedicated cores. - - The kernel provides a sysfs interface to retrieve the status of SMT and - to control it. It also provides a kernel command line interface to - control SMT. - - The kernel command line interface consists of the following options: - - =========== ========================================================== - nosmt Affects the bring up of the secondary CPUs during boot. The - kernel tries to bring all present CPUs online during the - boot process. "nosmt" makes sure that from each physical - core only one - the so called primary (hyper) thread is - activated. Due to a design flaw of Intel processors related - to Machine Check Exceptions the non primary siblings have - to be brought up at least partially and are then shut down - again. "nosmt" can be undone via the sysfs interface. - - nosmt=force Has the same effect as "nosmt" but it does not allow to - undo the SMT disable via the sysfs interface. - =========== ========================================================== - - The sysfs interface provides two files: - - - /sys/devices/system/cpu/smt/control - - /sys/devices/system/cpu/smt/active - - /sys/devices/system/cpu/smt/control: - - This file allows to read out the SMT control state and provides the - ability to disable or (re)enable SMT. The possible states are: - - ============== =================================================== - on SMT is supported by the CPU and enabled. All - logical CPUs can be onlined and offlined without - restrictions. - - off SMT is supported by the CPU and disabled. Only - the so called primary SMT threads can be onlined - and offlined without restrictions. An attempt to - online a non-primary sibling is rejected - - forceoff Same as 'off' but the state cannot be controlled. - Attempts to write to the control file are rejected. - - notsupported The processor does not support SMT. It's therefore - not affected by the SMT implications of L1TF. - Attempts to write to the control file are rejected. - ============== =================================================== - - The possible states which can be written into this file to control SMT - state are: - - - on - - off - - forceoff - - /sys/devices/system/cpu/smt/active: - - This file reports whether SMT is enabled and active, i.e. if on any - physical core two or more sibling threads are online. - - SMT control is also possible at boot time via the l1tf kernel command - line parameter in combination with L1D flush control. See - :ref:`mitigation_control_command_line`. - -5. Disabling EPT -^^^^^^^^^^^^^^^^ - - Disabling EPT for virtual machines provides full mitigation for L1TF even - with SMT enabled, because the effective page tables for guests are - managed and sanitized by the hypervisor. Though disabling EPT has a - significant performance impact especially when the Meltdown mitigation - KPTI is enabled. - - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. - -There is ongoing research and development for new mitigation mechanisms to -address the performance impact of disabling SMT or EPT. - -.. _mitigation_control_command_line: - -Mitigation control on the kernel command line ---------------------------------------------- - -The kernel command line allows to control the L1TF mitigations at boot -time with the option "l1tf=". The valid arguments for this option are: - - ============ ============================================================= - full Provides all available mitigations for the L1TF - vulnerability. Disables SMT and enables all mitigations in - the hypervisors, i.e. unconditional L1D flushing - - SMT control and L1D flush control via the sysfs interface - is still possible after boot. Hypervisors will issue a - warning when the first VM is started in a potentially - insecure configuration, i.e. SMT enabled or L1D flush - disabled. - - full,force Same as 'full', but disables SMT and L1D flush runtime - control. Implies the 'nosmt=force' command line option. - (i.e. sysfs control of SMT is disabled.) - - flush Leaves SMT enabled and enables the default hypervisor - mitigation, i.e. conditional L1D flushing - - SMT control and L1D flush control via the sysfs interface - is still possible after boot. Hypervisors will issue a - warning when the first VM is started in a potentially - insecure configuration, i.e. SMT enabled or L1D flush - disabled. - - flush,nosmt Disables SMT and enables the default hypervisor mitigation, - i.e. conditional L1D flushing. - - SMT control and L1D flush control via the sysfs interface - is still possible after boot. Hypervisors will issue a - warning when the first VM is started in a potentially - insecure configuration, i.e. SMT enabled or L1D flush - disabled. - - flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is - started in a potentially insecure configuration. - - off Disables hypervisor mitigations and doesn't emit any - warnings. - It also drops the swap size and available RAM limit restrictions - on both hypervisor and bare metal. - - ============ ============================================================= - -The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. - - -.. _mitigation_control_kvm: - -Mitigation control for KVM - module parameter -------------------------------------------------------------- - -The KVM hypervisor mitigation mechanism, flushing the L1D cache when -entering a guest, can be controlled with a module parameter. - -The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the -following arguments: - - ============ ============================================================== - always L1D cache flush on every VMENTER. - - cond Flush L1D on VMENTER only when the code between VMEXIT and - VMENTER can leak host memory which is considered - interesting for an attacker. This still can leak host memory - which allows e.g. to determine the hosts address space layout. - - never Disables the mitigation - ============ ============================================================== - -The parameter can be provided on the kernel command line, as a module -parameter when loading the modules and at runtime modified via the sysfs -file: - -/sys/module/kvm_intel/parameters/vmentry_l1d_flush - -The default is 'cond'. If 'l1tf=full,force' is given on the kernel command -line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush -module parameter is ignored and writes to the sysfs file are rejected. - - -Mitigation selection guide --------------------------- - -1. No virtualization in use -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - The system is protected by the kernel unconditionally and no further - action is required. - -2. Virtualization with trusted guests -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - If the guest comes from a trusted source and the guest OS kernel is - guaranteed to have the L1TF mitigations in place the system is fully - protected against L1TF and no further action is required. - - To avoid the overhead of the default L1D flushing on VMENTER the - administrator can disable the flushing via the kernel command line and - sysfs control files. See :ref:`mitigation_control_command_line` and - :ref:`mitigation_control_kvm`. - - -3. Virtualization with untrusted guests -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -3.1. SMT not supported or disabled -"""""""""""""""""""""""""""""""""" - - If SMT is not supported by the processor or disabled in the BIOS or by - the kernel, it's only required to enforce L1D flushing on VMENTER. - - Conditional L1D flushing is the default behaviour and can be tuned. See - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. - -3.2. EPT not supported or disabled -"""""""""""""""""""""""""""""""""" - - If EPT is not supported by the processor or disabled in the hypervisor, - the system is fully protected. SMT can stay enabled and L1D flushing on - VMENTER is not required. - - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. - -3.3. SMT and EPT supported and active -""""""""""""""""""""""""""""""""""""" - - If SMT and EPT are supported and active then various degrees of - mitigations can be employed: - - - L1D flushing on VMENTER: - - L1D flushing on VMENTER is the minimal protection requirement, but it - is only potent in combination with other mitigation methods. - - Conditional L1D flushing is the default behaviour and can be tuned. See - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. - - - Guest confinement: - - Confinement of guests to a single or a group of physical cores which - are not running any other processes, can reduce the attack surface - significantly, but interrupts, soft interrupts and kernel threads can - still expose valuable data to a potential attacker. See - :ref:`guest_confinement`. - - - Interrupt isolation: - - Isolating the guest CPUs from interrupts can reduce the attack surface - further, but still allows a malicious guest to explore a limited amount - of host physical memory. This can at least be used to gain knowledge - about the host address space layout. The interrupts which have a fixed - affinity to the CPUs which run the untrusted guests can depending on - the scenario still trigger soft interrupts and schedule kernel threads - which might expose valuable information. See - :ref:`interrupt_isolation`. - -The above three mitigation methods combined can provide protection to a -certain degree, but the risk of the remaining attack surface has to be -carefully analyzed. For full protection the following methods are -available: - - - Disabling SMT: - - Disabling SMT and enforcing the L1D flushing provides the maximum - amount of protection. This mitigation is not depending on any of the - above mitigation methods. - - SMT control and L1D flushing can be tuned by the command line - parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run - time with the matching sysfs control files. See :ref:`smt_control`, - :ref:`mitigation_control_command_line` and - :ref:`mitigation_control_kvm`. - - - Disabling EPT: - - Disabling EPT provides the maximum amount of protection as well. It is - not depending on any of the above mitigation methods. SMT can stay - enabled and L1D flushing is not required, but the performance impact is - significant. - - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' - parameter. - -3.4. Nested virtual machines -"""""""""""""""""""""""""""" - -When nested virtualization is in use, three operating systems are involved: -the bare metal hypervisor, the nested hypervisor and the nested virtual -machine. VMENTER operations from the nested hypervisor into the nested -guest will always be processed by the bare metal hypervisor. If KVM is the -bare metal hypervisor it will: - - - Flush the L1D cache on every switch from the nested hypervisor to the - nested virtual machine, so that the nested hypervisor's secrets are not - exposed to the nested virtual machine; - - - Flush the L1D cache on every switch from the nested virtual machine to - the nested hypervisor; this is a complex operation, and flushing the L1D - cache avoids that the bare metal hypervisor's secrets are exposed to the - nested virtual machine; - - - Instruct the nested hypervisor to not perform any L1D cache flush. This - is an optimization to avoid double L1D flushing. - - -.. _default_mitigations: - -Default mitigations -------------------- - - The kernel default mitigations for vulnerable processors are: - - - PTE inversion to protect against malicious user space. This is done - unconditionally and cannot be controlled. The swap storage is limited - to ~16TB. - - - L1D conditional flushing on VMENTER when EPT is enabled for - a guest. - - The kernel does not by default enforce the disabling of SMT, which leaves - SMT systems vulnerable when running untrusted guests with EPT enabled. - - The rationale for this choice is: - - - Force disabling SMT can break existing setups, especially with - unattended updates. - - - If regular users run untrusted guests on their machine, then L1TF is - just an add on to other malware which might be embedded in an untrusted - guest, e.g. spam-bots or attacks on the local network. - - There is no technical way to prevent a user from running untrusted code - on their machines blindly. - - - It's technically extremely unlikely and from today's knowledge even - impossible that L1TF can be exploited via the most popular attack - mechanisms like JavaScript because these mechanisms have no way to - control PTEs. If this would be possible and not other mitigation would - be possible, then the default might be different. - - - The administrators of cloud and hosting setups have to carefully - analyze the risk for their scenarios and make the appropriate - mitigation choices, which might even vary across their deployed - machines and also result in other changes of their overall setup. - There is no way for the kernel to provide a sensible default for this - kind of scenarios. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 12ab114b294e..bcc110a85830 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1107,7 +1107,7 @@ static void __init l1tf_select_mitigation(void) pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); pr_info("However, doing so will make a part of your RAM unusable.\n"); - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n"); return; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index dadb6a6a9b2a..df6e325b288b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6800,8 +6800,8 @@ free_partial_vcpu: return ERR_PTR(err); } -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" static int vmx_vm_init(struct kvm *kvm) { -- cgit v1.2.3 From 08831a92501f68f9b6e5f43ae032d0b9b2299045 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Feb 2019 00:02:31 +0100 Subject: Documentation: Add MDS vulnerability documentation commit 5999bbe7a6ea3c62029532ec84dc06003a1fa258 upstream Add the initial MDS vulnerability documentation. Signed-off-by: Thomas Gleixner Reviewed-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 3 +- Documentation/admin-guide/hw-vuln/index.rst | 1 + Documentation/admin-guide/hw-vuln/l1tf.rst | 1 + Documentation/admin-guide/hw-vuln/mds.rst | 307 +++++++++++++++++++++ Documentation/admin-guide/kernel-parameters.txt | 2 + 5 files changed, 312 insertions(+), 2 deletions(-) create mode 100644 Documentation/admin-guide/hw-vuln/mds.rst diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 744c6d764b0c..141a7bb58b80 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -497,8 +497,7 @@ Description: Information about CPU vulnerabilities "Vulnerable" CPU is affected and no mitigation in effect "Mitigation: $M" CPU is affected and mitigation $M is in effect - Details about the l1tf file can be found in - Documentation/admin-guide/hw-vuln/l1tf.rst + See also: Documentation/admin-guide/hw-vuln/index.rst What: /sys/devices/system/cpu/smt /sys/devices/system/cpu/smt/active diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 8ce2009f1981..ffc064c1ec68 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -10,3 +10,4 @@ are configurable at compile, boot or run time. :maxdepth: 1 l1tf + mds diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst index 9af977384168..31653a9f0e1b 100644 --- a/Documentation/admin-guide/hw-vuln/l1tf.rst +++ b/Documentation/admin-guide/hw-vuln/l1tf.rst @@ -445,6 +445,7 @@ The default is 'cond'. If 'l1tf=full,force' is given on the kernel command line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush module parameter is ignored and writes to the sysfs file are rejected. +.. _mitigation_selection: Mitigation selection guide -------------------------- diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst new file mode 100644 index 000000000000..1de29d28903d --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/mds.rst @@ -0,0 +1,307 @@ +MDS - Microarchitectural Data Sampling +====================================== + +Microarchitectural Data Sampling is a hardware vulnerability which allows +unprivileged speculative access to data which is available in various CPU +internal buffers. + +Affected processors +------------------- + +This vulnerability affects a wide range of Intel processors. The +vulnerability is not present on: + + - Processors from AMD, Centaur and other non Intel vendors + + - Older processor models, where the CPU family is < 6 + + - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus) + + - Intel processors which have the ARCH_CAP_MDS_NO bit set in the + IA32_ARCH_CAPABILITIES MSR. + +Whether a processor is affected or not can be read out from the MDS +vulnerability file in sysfs. See :ref:`mds_sys_info`. + +Not all processors are affected by all variants of MDS, but the mitigation +is identical for all of them so the kernel treats them as a single +vulnerability. + +Related CVEs +------------ + +The following CVE entries are related to the MDS vulnerability: + + ============== ===== ============================================== + CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling + CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling + CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling + ============== ===== ============================================== + +Problem +------- + +When performing store, load, L1 refill operations, processors write data +into temporary microarchitectural structures (buffers). The data in the +buffer can be forwarded to load operations as an optimization. + +Under certain conditions, usually a fault/assist caused by a load +operation, data unrelated to the load memory address can be speculatively +forwarded from the buffers. Because the load operation causes a fault or +assist and its result will be discarded, the forwarded data will not cause +incorrect program execution or state changes. But a malicious operation +may be able to forward this speculative data to a disclosure gadget which +allows in turn to infer the value via a cache side channel attack. + +Because the buffers are potentially shared between Hyper-Threads cross +Hyper-Thread attacks are possible. + +Deeper technical information is available in the MDS specific x86 +architecture section: :ref:`Documentation/x86/mds.rst `. + + +Attack scenarios +---------------- + +Attacks against the MDS vulnerabilities can be mounted from malicious non +priviledged user space applications running on hosts or guest. Malicious +guest OSes can obviously mount attacks as well. + +Contrary to other speculation based vulnerabilities the MDS vulnerability +does not allow the attacker to control the memory target address. As a +consequence the attacks are purely sampling based, but as demonstrated with +the TLBleed attack samples can be postprocessed successfully. + +Web-Browsers +^^^^^^^^^^^^ + + It's unclear whether attacks through Web-Browsers are possible at + all. The exploitation through Java-Script is considered very unlikely, + but other widely used web technologies like Webassembly could possibly be + abused. + + +.. _mds_sys_info: + +MDS system information +----------------------- + +The Linux kernel provides a sysfs interface to enumerate the current MDS +status of the system: whether the system is vulnerable, and which +mitigations are active. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/mds + +The possible values in this file are: + + ========================================= ================================= + 'Not affected' The processor is not vulnerable + + 'Vulnerable' The processor is vulnerable, + but no mitigation enabled + + 'Vulnerable: Clear CPU buffers attempted' The processor is vulnerable but + microcode is not updated. + The mitigation is enabled on a + best effort basis. + See :ref:`vmwerv` + + 'Mitigation: CPU buffer clear' The processor is vulnerable and the + CPU buffer clearing mitigation is + enabled. + ========================================= ================================= + +If the processor is vulnerable then the following information is appended +to the above information: + + ======================== ============================================ + 'SMT vulnerable' SMT is enabled + 'SMT mitigated' SMT is enabled and mitigated + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== ============================================ + +.. _vmwerv: + +Best effort mitigation mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + If the processor is vulnerable, but the availability of the microcode based + mitigation mechanism is not advertised via CPUID the kernel selects a best + effort mitigation mode. This mode invokes the mitigation instructions + without a guarantee that they clear the CPU buffers. + + This is done to address virtualization scenarios where the host has the + microcode update applied, but the hypervisor is not yet updated to expose + the CPUID to the guest. If the host has updated microcode the protection + takes effect otherwise a few cpu cycles are wasted pointlessly. + + The state in the mds sysfs file reflects this situation accordingly. + + +Mitigation mechanism +------------------------- + +The kernel detects the affected CPUs and the presence of the microcode +which is required. + +If a CPU is affected and the microcode is available, then the kernel +enables the mitigation by default. The mitigation can be controlled at boot +time via a kernel command line option. See +:ref:`mds_mitigation_control_command_line`. + +.. _cpu_buffer_clear: + +CPU buffer clearing +^^^^^^^^^^^^^^^^^^^ + + The mitigation for MDS clears the affected CPU buffers on return to user + space and when entering a guest. + + If SMT is enabled it also clears the buffers on idle entry when the CPU + is only affected by MSBDS and not any other MDS variant, because the + other variants cannot be protected against cross Hyper-Thread attacks. + + For CPUs which are only affected by MSBDS the user space, guest and idle + transition mitigations are sufficient and SMT is not affected. + +.. _virt_mechanism: + +Virtualization mitigation +^^^^^^^^^^^^^^^^^^^^^^^^^ + + The protection for host to guest transition depends on the L1TF + vulnerability of the CPU: + + - CPU is affected by L1TF: + + If the L1D flush mitigation is enabled and up to date microcode is + available, the L1D flush mitigation is automatically protecting the + guest transition. + + If the L1D flush mitigation is disabled then the MDS mitigation is + invoked explicit when the host MDS mitigation is enabled. + + For details on L1TF and virtualization see: + :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst `. + + - CPU is not affected by L1TF: + + CPU buffers are flushed before entering the guest when the host MDS + mitigation is enabled. + + The resulting MDS protection matrix for the host to guest transition: + + ============ ===== ============= ============ ================= + L1TF MDS VMX-L1FLUSH Host MDS MDS-State + + Don't care No Don't care N/A Not affected + + Yes Yes Disabled Off Vulnerable + + Yes Yes Disabled Full Mitigated + + Yes Yes Enabled Don't care Mitigated + + No Yes N/A Off Vulnerable + + No Yes N/A Full Mitigated + ============ ===== ============= ============ ================= + + This only covers the host to guest transition, i.e. prevents leakage from + host to guest, but does not protect the guest internally. Guests need to + have their own protections. + +.. _xeon_phi: + +XEON PHI specific considerations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The XEON PHI processor family is affected by MSBDS which can be exploited + cross Hyper-Threads when entering idle states. Some XEON PHI variants allow + to use MWAIT in user space (Ring 3) which opens an potential attack vector + for malicious user space. The exposure can be disabled on the kernel + command line with the 'ring3mwait=disable' command line option. + + XEON PHI is not affected by the other MDS variants and MSBDS is mitigated + before the CPU enters a idle state. As XEON PHI is not affected by L1TF + either disabling SMT is not required for full protection. + +.. _mds_smt_control: + +SMT control +^^^^^^^^^^^ + + All MDS variants except MSBDS can be attacked cross Hyper-Threads. That + means on CPUs which are affected by MFBDS or MLPDS it is necessary to + disable SMT for full protection. These are most of the affected CPUs; the + exception is XEON PHI, see :ref:`xeon_phi`. + + Disabling SMT can have a significant performance impact, but the impact + depends on the type of workloads. + + See the relevant chapter in the L1TF mitigation documentation for details: + :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst `. + + +.. _mds_mitigation_control_command_line: + +Mitigation control on the kernel command line +--------------------------------------------- + +The kernel command line allows to control the MDS mitigations at boot +time with the option "mds=". The valid arguments for this option are: + + ============ ============================================================= + full If the CPU is vulnerable, enable all available mitigations + for the MDS vulnerability, CPU buffer clearing on exit to + userspace and when entering a VM. Idle transitions are + protected as well if SMT is enabled. + + It does not automatically disable SMT. + + off Disables MDS mitigations completely. + + ============ ============================================================= + +Not specifying this option is equivalent to "mds=full". + + +Mitigation selection guide +-------------------------- + +1. Trusted userspace +^^^^^^^^^^^^^^^^^^^^ + + If all userspace applications are from a trusted source and do not + execute untrusted code which is supplied externally, then the mitigation + can be disabled. + + +2. Virtualization with trusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The same considerations as above versus trusted user space apply. + +3. Virtualization with untrusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The protection depends on the state of the L1TF mitigations. + See :ref:`virt_mechanism`. + + If the MDS mitigation is enabled and SMT is disabled, guest to host and + guest to guest attacks are prevented. + +.. _mds_default_mitigations: + +Default mitigations +------------------- + + The kernel default mitigations for vulnerable processors are: + + - Enable CPU buffer clearing + + The kernel does not by default enforce the disabling of SMT, which leaves + SMT systems vulnerable when running untrusted code. The same rationale as + for L1TF applies. + See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst `. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9afcb240a673..7325319c2c23 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2378,6 +2378,8 @@ Not specifying this option is equivalent to mds=full. + For details see: Documentation/admin-guide/hw-vuln/mds.rst + mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory Amount of memory to be used when the kernel is not able to see the whole system memory or for test. -- cgit v1.2.3 From fb49e1bb20614a081b912e4d01ccbe096602d46a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 09:59:33 -0500 Subject: x86/speculation/mds: Add mds=full,nosmt cmdline option commit d71eb0ce109a124b0fa714832823b9452f2762cf upstream Add the mds=full,nosmt cmdline option. This is like mds=full, but with SMT disabled if the CPU is vulnerable. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/mds.rst | 3 +++ Documentation/admin-guide/kernel-parameters.txt | 6 ++++-- arch/x86/kernel/cpu/bugs.c | 10 ++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst index 1de29d28903d..244ab47d1fb3 100644 --- a/Documentation/admin-guide/hw-vuln/mds.rst +++ b/Documentation/admin-guide/hw-vuln/mds.rst @@ -260,6 +260,9 @@ time with the option "mds=". The valid arguments for this option are: It does not automatically disable SMT. + full,nosmt The same as mds=full, with SMT disabled on vulnerable + CPUs. This is the complete mitigation. + off Disables MDS mitigations completely. ============ ============================================================= diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7325319c2c23..8f04985d3122 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2372,8 +2372,10 @@ This parameter controls the MDS mitigation. The options are: - full - Enable MDS mitigation on vulnerable CPUs - off - Unconditionally disable MDS mitigation + full - Enable MDS mitigation on vulnerable CPUs + full,nosmt - Enable MDS mitigation and disable + SMT on vulnerable CPUs + off - Unconditionally disable MDS mitigation Not specifying this option is equivalent to mds=full. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bcc110a85830..4939addbd758 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -221,6 +221,7 @@ static void x86_amd_ssb_disable(void) /* Default mitigation for L1TF-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; +static bool mds_nosmt __ro_after_init = false; static const char * const mds_strings[] = { [MDS_MITIGATION_OFF] = "Vulnerable", @@ -238,8 +239,13 @@ static void __init mds_select_mitigation(void) if (mds_mitigation == MDS_MITIGATION_FULL) { if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; + static_branch_enable(&mds_user_clear); + + if (mds_nosmt && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + cpu_smt_disable(false); } + pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -255,6 +261,10 @@ static int __init mds_cmdline(char *str) mds_mitigation = MDS_MITIGATION_OFF; else if (!strcmp(str, "full")) mds_mitigation = MDS_MITIGATION_FULL; + else if (!strcmp(str, "full,nosmt")) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_nosmt = true; + } return 0; } -- cgit v1.2.3 From c98b736e763dbae0c9b1b2cf04e35fec33e39af3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 10:00:14 -0500 Subject: x86/speculation: Move arch_smt_update() call to after mitigation decisions commit 7c3658b20194a5b3209a143f63bc9c643c6a3ae2 upstream arch_smt_update() now has a dependency on both Spectre v2 and MDS mitigations. Move its initial call to after all the mitigation decisions have been made. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4939addbd758..0c2ff23906dc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -111,6 +111,8 @@ void __init check_bugs(void) mds_select_mitigation(); + arch_smt_update(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -638,9 +640,6 @@ specv2_set_mode: /* Set up IBPB and STIBP depending on the general spectre V2 command */ spectre_v2_user_select_mitigation(cmd); - - /* Enable STIBP if appropriate */ - arch_smt_update(); } static void update_stibp_msr(void * __unused) -- cgit v1.2.3 From 9cd62662dd7eb774e4c01c913f79b462b52952a7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 10:00:51 -0500 Subject: x86/speculation/mds: Add SMT warning message commit 39226ef02bfb43248b7db12a4fdccb39d95318e3 upstream MDS is vulnerable with SMT. Make that clear with a one-time printk whenever SMT first gets enabled. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0c2ff23906dc..2ee603a4863b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -673,6 +673,9 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +#undef pr_fmt +#define pr_fmt(fmt) fmt + /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { @@ -693,6 +696,8 @@ static void update_mds_branch_idle(void) static_branch_disable(&mds_idle_clear); } +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -717,6 +722,8 @@ void arch_smt_update(void) switch (mds_mitigation) { case MDS_MITIGATION_FULL: case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); update_mds_branch_idle(); break; case MDS_MITIGATION_OFF: @@ -1149,6 +1156,7 @@ static int __init l1tf_cmdline(char *str) early_param("l1tf", l1tf_cmdline); #undef pr_fmt +#define pr_fmt(fmt) fmt #ifdef CONFIG_SYSFS -- cgit v1.2.3 From 3f7fe4ad1f62c6179c375b7504b846993bd9704f Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 12 Apr 2019 17:50:57 -0400 Subject: x86/speculation/mds: Fix comment commit cae5ec342645746d617dd420d206e1588d47768a upstream s/L1TF/MDS/ Signed-off-by: Boris Ostrovsky Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Josh Poimboeuf Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2ee603a4863b..ada7cdd33f69 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -221,7 +221,7 @@ static void x86_amd_ssb_disable(void) #undef pr_fmt #define pr_fmt(fmt) "MDS: " fmt -/* Default mitigation for L1TF-affected CPUs */ +/* Default mitigation for MDS-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; static bool mds_nosmt __ro_after_init = false; -- cgit v1.2.3 From c2b25b6b3582fb82dd5171f6c83232dfd442774f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 12 Apr 2019 17:50:58 -0400 Subject: x86/speculation/mds: Print SMT vulnerable on MSBDS with mitigations off commit e2c3c94788b08891dcf3dbe608f9880523ecd71b upstream This code is only for CPUs which are affected by MSBDS, but are *not* affected by the other two MDS issues. For such CPUs, enabling the mds_idle_clear mitigation is enough to mitigate SMT. However if user boots with 'mds=off' and still has SMT enabled, we should not report that SMT is mitigated: $cat /sys//devices/system/cpu/vulnerabilities/mds Vulnerable; SMT mitigated But rather: Vulnerable; SMT vulnerable Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20190412215118.294906495@localhost.localdomain Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ada7cdd33f69..04c140ac36af 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1204,7 +1204,8 @@ static ssize_t mds_show_state(char *buf) if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - sched_smt_active() ? "mitigated" : "disabled"); + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); } return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], -- cgit v1.2.3 From 6d7407ef9272ad3b7d12ca9634075bf5b4aef49d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:28 -0500 Subject: cpu/speculation: Add 'mitigations=' cmdline option commit 98af8452945c55652de68536afdde3b520fec429 upstream Keeping track of the number of mitigations for all the CPU speculation bugs has become overwhelming for many users. It's getting more and more complicated to decide which mitigations are needed for a given architecture. Complicating matters is the fact that each arch tends to have its own custom way to mitigate the same vulnerability. Most users fall into a few basic categories: a) they want all mitigations off; b) they want all reasonable mitigations on, with SMT enabled even if it's vulnerable; or c) they want all reasonable mitigations on, with SMT disabled if vulnerable. Define a set of curated, arch-independent options, each of which is an aggregation of existing options: - mitigations=off: Disable all mitigations. - mitigations=auto: [default] Enable all the default mitigations, but leave SMT enabled, even if it's vulnerable. - mitigations=auto,nosmt: Enable all the default mitigations, disabling SMT if needed by a mitigation. Currently, these options are placeholders which don't actually do anything. They will be fleshed out in upcoming patches. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina (on x86) Reviewed-by: Jiri Kosina Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Waiman Long Cc: Andrea Arcangeli Cc: Jon Masters Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Tyler Hicks Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Price Cc: Phil Auld Link: https://lkml.kernel.org/r/b07a8ef9b7c5055c3a4637c87d07c296d5016fe0.1555085500.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 24 ++++++++++++++++++++++++ include/linux/cpu.h | 24 ++++++++++++++++++++++++ kernel/cpu.c | 15 +++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8f04985d3122..df6d9a7c1724 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2539,6 +2539,30 @@ in the "bleeding edge" mini2440 support kernel at http://repo.or.cz/w/linux-2.6/mini2440.git + mitigations= + Control optional mitigations for CPU vulnerabilities. + This is a set of curated, arch-independent options, each + of which is an aggregation of existing arch-specific + options. + + off + Disable all optional CPU mitigations. This + improves system performance, but it may also + expose users to several CPU vulnerabilities. + + auto (default) + Mitigate all CPU vulnerabilities, but leave SMT + enabled, even if it's vulnerable. This is for + users who don't want to be surprised by SMT + getting disabled across kernel upgrades, or who + have other ways of avoiding SMT-based attacks. + This is the default behavior. + + auto,nosmt + Mitigate all CPU vulnerabilities, disabling SMT + if needed. This is for users who always want to + be fully mitigated, even if it means losing SMT. + mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this parameter allows control of the logging verbosity for diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 3c87ad888ed3..57ae83c4d5f4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -189,4 +189,28 @@ static inline void cpu_smt_disable(bool force) { } static inline void cpu_smt_check_topology(void) { } #endif +/* + * These are used for a global "mitigations=" cmdline option for toggling + * optional CPU mitigations. + */ +enum cpu_mitigations { + CPU_MITIGATIONS_OFF, + CPU_MITIGATIONS_AUTO, + CPU_MITIGATIONS_AUTO_NOSMT, +}; + +extern enum cpu_mitigations cpu_mitigations; + +/* mitigations=off */ +static inline bool cpu_mitigations_off(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_OFF; +} + +/* mitigations=auto,nosmt */ +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; +} + #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 6754f3ecfd94..43e741e88691 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2304,3 +2304,18 @@ void __init boot_cpu_hotplug_init(void) #endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); } + +enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; + +static int __init mitigations_parse_cmdline(char *arg) +{ + if (!strcmp(arg, "off")) + cpu_mitigations = CPU_MITIGATIONS_OFF; + else if (!strcmp(arg, "auto")) + cpu_mitigations = CPU_MITIGATIONS_AUTO; + else if (!strcmp(arg, "auto,nosmt")) + cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; + + return 0; +} +early_param("mitigations", mitigations_parse_cmdline); -- cgit v1.2.3 From bd600de16b82a581423dab138bdece4fc8e26ac0 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:29 -0500 Subject: x86/speculation: Support 'mitigations=' cmdline option commit d68be4c4d31295ff6ae34a8ddfaa4c1a8ff42812 upstream Configure x86 runtime CPU speculation bug mitigations in accordance with the 'mitigations=' cmdline option. This affects Meltdown, Spectre v2, Speculative Store Bypass, and L1TF. The default behavior is unchanged. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina (on x86) Reviewed-by: Jiri Kosina Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Waiman Long Cc: Andrea Arcangeli Cc: Jon Masters Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Tyler Hicks Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Price Cc: Phil Auld Link: https://lkml.kernel.org/r/6616d0ae169308516cfdf5216bedd169f8a8291b.1555085500.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 16 +++++++++++----- arch/x86/kernel/cpu/bugs.c | 11 +++++++++-- arch/x86/mm/pti.c | 4 +++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index df6d9a7c1724..59a1181e52b8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2540,15 +2540,20 @@ http://repo.or.cz/w/linux-2.6/mini2440.git mitigations= - Control optional mitigations for CPU vulnerabilities. - This is a set of curated, arch-independent options, each - of which is an aggregation of existing arch-specific - options. + [X86] Control optional mitigations for CPU + vulnerabilities. This is a set of curated, + arch-independent options, each of which is an + aggregation of existing arch-specific options. off Disable all optional CPU mitigations. This improves system performance, but it may also expose users to several CPU vulnerabilities. + Equivalent to: nopti [X86] + nospectre_v2 [X86] + spectre_v2_user=off [X86] + spec_store_bypass_disable=off [X86] + l1tf=off [X86] auto (default) Mitigate all CPU vulnerabilities, but leave SMT @@ -2556,12 +2561,13 @@ users who don't want to be surprised by SMT getting disabled across kernel upgrades, or who have other ways of avoiding SMT-based attacks. - This is the default behavior. + Equivalent to: (default behavior) auto,nosmt Mitigate all CPU vulnerabilities, disabling SMT if needed. This is for users who always want to be fully mitigated, even if it means losing SMT. + Equivalent to: l1tf=flush,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 04c140ac36af..7c79672234e4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -506,7 +506,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) char arg[20]; int ret, i; - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") || + cpu_mitigations_off()) return SPECTRE_V2_CMD_NONE; ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); @@ -771,7 +772,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) char arg[20]; int ret, i; - if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") || + cpu_mitigations_off()) { return SPEC_STORE_BYPASS_CMD_NONE; } else { ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", @@ -1095,6 +1097,11 @@ static void __init l1tf_select_mitigation(void) if (!boot_cpu_has_bug(X86_BUG_L1TF)) return; + if (cpu_mitigations_off()) + l1tf_mitigation = L1TF_MITIGATION_OFF; + else if (cpu_mitigations_auto_nosmt()) + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; + override_cache_bits(&boot_cpu_data); switch (l1tf_mitigation) { diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 4fee5c3003ed..5890f09bfc19 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void) } } - if (cmdline_find_option_bool(boot_command_line, "nopti")) { + if (cmdline_find_option_bool(boot_command_line, "nopti") || + cpu_mitigations_off()) { pti_mode = PTI_FORCE_OFF; pti_print_if_insecure("disabled on command line."); return; -- cgit v1.2.3 From f905727fa11cef85acf277917c7d7237b519a01f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:30 -0500 Subject: powerpc/speculation: Support 'mitigations=' cmdline option commit 782e69efb3dfed6e8360bc612e8c7827a901a8f9 upstream Configure powerpc CPU runtime speculation bug mitigations in accordance with the 'mitigations=' cmdline option. This affects Meltdown, Spectre v1, Spectre v2, and Speculative Store Bypass. The default behavior is unchanged. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina (on x86) Reviewed-by: Jiri Kosina Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Waiman Long Cc: Andrea Arcangeli Cc: Jon Masters Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Tyler Hicks Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Price Cc: Phil Auld Link: https://lkml.kernel.org/r/245a606e1a42a558a310220312d9b6adb9159df6.1555085500.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 9 +++++---- arch/powerpc/kernel/security.c | 6 +++--- arch/powerpc/kernel/setup_64.c | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 59a1181e52b8..ed9ec2ea362d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2540,7 +2540,7 @@ http://repo.or.cz/w/linux-2.6/mini2440.git mitigations= - [X86] Control optional mitigations for CPU + [X86,PPC] Control optional mitigations for CPU vulnerabilities. This is a set of curated, arch-independent options, each of which is an aggregation of existing arch-specific options. @@ -2549,10 +2549,11 @@ Disable all optional CPU mitigations. This improves system performance, but it may also expose users to several CPU vulnerabilities. - Equivalent to: nopti [X86] - nospectre_v2 [X86] + Equivalent to: nopti [X86,PPC] + nospectre_v1 [PPC] + nospectre_v2 [X86,PPC] spectre_v2_user=off [X86] - spec_store_bypass_disable=off [X86] + spec_store_bypass_disable=off [X86,PPC] l1tf=off [X86] auto (default) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index b33bafb8fcea..70568ccbd9fd 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -57,7 +57,7 @@ void setup_barrier_nospec(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); - if (!no_nospec) + if (!no_nospec && !cpu_mitigations_off()) enable_barrier_nospec(enable); } @@ -116,7 +116,7 @@ static int __init handle_nospectre_v2(char *p) early_param("nospectre_v2", handle_nospectre_v2); void setup_spectre_v2(void) { - if (no_spectrev2) + if (no_spectrev2 || cpu_mitigations_off()) do_btb_flush_fixups(); else btb_flush_enabled = true; @@ -300,7 +300,7 @@ void setup_stf_barrier(void) stf_enabled_flush_types = type; - if (!no_stf_barrier) + if (!no_stf_barrier && !cpu_mitigations_off()) stf_barrier_enable(enable); } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 236c1151a3a7..c7ec27ba8926 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -958,7 +958,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush) + if (!no_rfi_flush && !cpu_mitigations_off()) rfi_flush_enable(enable); } -- cgit v1.2.3 From 394e3d8de828de254218b2c183a5858d037facf5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:31 -0500 Subject: s390/speculation: Support 'mitigations=' cmdline option commit 0336e04a6520bdaefdb0769d2a70084fa52e81ed upstream Configure s390 runtime CPU speculation bug mitigations in accordance with the 'mitigations=' cmdline option. This affects Spectre v1 and Spectre v2. The default behavior is unchanged. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina (on x86) Reviewed-by: Jiri Kosina Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Waiman Long Cc: Andrea Arcangeli Cc: Jon Masters Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Tyler Hicks Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Price Cc: Phil Auld Link: https://lkml.kernel.org/r/e4a161805458a5ec88812aac0307ae3908a030fc.1555085500.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 5 +++-- arch/s390/kernel/nospec-branch.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ed9ec2ea362d..9aa3543a8723 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2540,7 +2540,7 @@ http://repo.or.cz/w/linux-2.6/mini2440.git mitigations= - [X86,PPC] Control optional mitigations for CPU + [X86,PPC,S390] Control optional mitigations for CPU vulnerabilities. This is a set of curated, arch-independent options, each of which is an aggregation of existing arch-specific options. @@ -2551,7 +2551,8 @@ expose users to several CPU vulnerabilities. Equivalent to: nopti [X86,PPC] nospectre_v1 [PPC] - nospectre_v2 [X86,PPC] + nobp=0 [S390] + nospectre_v2 [X86,PPC,S390] spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86,PPC] l1tf=off [X86] diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index bdddaae96559..649135cbedd5 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include static int __init nobp_setup_early(char *str) @@ -58,7 +59,7 @@ early_param("nospectre_v2", nospectre_v2_setup_early); void __init nospec_auto_detect(void) { - if (test_facility(156)) { + if (test_facility(156) || cpu_mitigations_off()) { /* * The machine supports etokens. * Disable expolines and disable nobp. -- cgit v1.2.3 From 4cf168a94fd508c32c8e63aea2e8e7c0e0990f8a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Apr 2019 16:39:02 -0500 Subject: x86/speculation/mds: Add 'mitigations=' support for MDS commit 5c14068f87d04adc73ba3f41c2a303d3c3d1fa12 upstream Add MDS to the new 'mitigations=' cmdline option. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ arch/x86/kernel/cpu/bugs.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9aa3543a8723..18cad2b0392a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2556,6 +2556,7 @@ spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86,PPC] l1tf=off [X86] + mds=off [X86] auto (default) Mitigate all CPU vulnerabilities, but leave SMT @@ -2570,6 +2571,7 @@ if needed. This is for users who always want to be fully mitigated, even if it means losing SMT. Equivalent to: l1tf=flush,nosmt [X86] + mds=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7c79672234e4..1b2ce0c6c4da 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -233,7 +233,7 @@ static const char * const mds_strings[] = { static void __init mds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MDS)) { + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { mds_mitigation = MDS_MITIGATION_OFF; return; } @@ -244,7 +244,8 @@ static void __init mds_select_mitigation(void) static_branch_enable(&mds_user_clear); - if (mds_nosmt && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) cpu_smt_disable(false); } -- cgit v1.2.3 From 0acbbdf3e41da9987c57310ed5cad9ace740c757 Mon Sep 17 00:00:00 2001 From: speck for Pawan Gupta Date: Mon, 6 May 2019 12:23:50 -0700 Subject: x86/mds: Add MDSUM variant to the MDS documentation commit e672f8bf71c66253197e503f75c771dd28ada4a0 upstream Updated the documentation for a new CVE-2019-11091 Microarchitectural Data Sampling Uncacheable Memory (MDSUM) which is a variant of Microarchitectural Data Sampling (MDS). MDS is a family of side channel attacks on internal buffers in Intel CPUs. MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from memory that takes a fault or assist can leave data in a microarchitectural structure that may later be observed using one of the same methods used by MSBDS, MFBDS or MLPDS. There are no new code changes expected for MDSUM. The existing mitigation for MDS applies to MDSUM as well. Signed-off-by: Pawan Gupta Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Jon Masters Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/mds.rst | 5 +++-- Documentation/x86/mds.rst | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst index 244ab47d1fb3..e0dccf414eca 100644 --- a/Documentation/admin-guide/hw-vuln/mds.rst +++ b/Documentation/admin-guide/hw-vuln/mds.rst @@ -32,11 +32,12 @@ Related CVEs The following CVE entries are related to the MDS vulnerability: - ============== ===== ============================================== + ============== ===== =================================================== CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling - ============== ===== ============================================== + CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory + ============== ===== =================================================== Problem ------- diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 3d6f943f1afb..979945be257a 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -12,6 +12,7 @@ on internal buffers in Intel CPUs. The variants are: - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) + - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091) MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a dependent load (store-to-load forwarding) as an optimization. The forward @@ -38,6 +39,10 @@ faulting or assisting loads under certain conditions, which again can be exploited eventually. Load ports are shared between Hyper-Threads so cross thread leakage is possible. +MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from +memory that takes a fault or assist can leave data in a microarchitectural +structure that may later be observed using one of the same methods used by +MSBDS, MFBDS or MLPDS. Exposure assumptions -------------------- -- cgit v1.2.3 From 14e3ad8a1452fcbfa0aa95547e3728f5d1caf98f Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 6 May 2019 23:52:58 +0000 Subject: Documentation: Correct the possible MDS sysfs values commit ea01668f9f43021b28b3f4d5ffad50106a1e1301 upstream Adjust the last two rows in the table that display possible values when MDS mitigation is enabled. They both were slightly innacurate. In addition, convert the table of possible values and their descriptions to a list-table. The simple table format uses the top border of equals signs to determine cell width which resulted in the first column being far too wide in comparison to the second column that contained the majority of the text. Signed-off-by: Tyler Hicks Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/mds.rst | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst index e0dccf414eca..e3a796c0d3a2 100644 --- a/Documentation/admin-guide/hw-vuln/mds.rst +++ b/Documentation/admin-guide/hw-vuln/mds.rst @@ -95,22 +95,19 @@ mitigations are active. The relevant sysfs file is: The possible values in this file are: - ========================================= ================================= - 'Not affected' The processor is not vulnerable - - 'Vulnerable' The processor is vulnerable, - but no mitigation enabled - - 'Vulnerable: Clear CPU buffers attempted' The processor is vulnerable but - microcode is not updated. - The mitigation is enabled on a - best effort basis. - See :ref:`vmwerv` - - 'Mitigation: CPU buffer clear' The processor is vulnerable and the - CPU buffer clearing mitigation is - enabled. - ========================================= ================================= + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable but microcode is not updated. + + The mitigation is enabled on a best effort basis. See :ref:`vmwerv` + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. If the processor is vulnerable then the following information is appended to the above information: -- cgit v1.2.3 From f8bb2589227b16a73806c68cf34e3b9aab287d86 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 7 May 2019 15:05:22 -0500 Subject: x86/speculation/mds: Fix documentation typo commit 95310e348a321b45fb746c176961d4da72344282 upstream Fix a minor typo in the MDS documentation: "eanbled" -> "enabled". Reported-by: Jeff Bastian Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 979945be257a..534e9baa4e1d 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -116,7 +116,7 @@ Kernel internal mitigation modes off Mitigation is disabled. Either the CPU is not affected or mds=off is supplied on the kernel command line - full Mitigation is eanbled. CPU is affected and MD_CLEAR is + full Mitigation is enabled. CPU is affected and MD_CLEAR is advertised in CPUID. vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not -- cgit v1.2.3 From 89e11ec0280be9132b81e4cba5ea6c10a8012038 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2019 19:17:14 +0200 Subject: Linux 5.0.16 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 11c7f7844507..95670d520786 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 15 +SUBLEVEL = 16 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3 From 54c140c5b614400d7909a64719774c9ca9858125 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 18 Jan 2019 10:34:16 -0700 Subject: bfq: update internal depth state when queue depth changes [ Upstream commit 77f1e0a52d26242b6c2dba019f6ebebfb9ff701e ] A previous commit moved the shallow depth and BFQ depth map calculations to be done at init time, moving it outside of the hotter IO path. This potentially causes hangs if the users changes the depth of the scheduler map, by writing to the 'nr_requests' sysfs file for that device. Add a blk-mq-sched hook that allows blk-mq to inform the scheduler if the depth changes, so that the scheduler can update its internal state. Tested-by: Kai Krakow Reported-by: Paolo Valente Fixes: f0635b8a416e ("bfq: calculate shallow depths at init time") Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 8 +++++++- block/blk-mq.c | 2 ++ include/linux/elevator.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 72510c470001..356620414cf9 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5353,7 +5353,7 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, return min_shallow; } -static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) +static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; @@ -5361,6 +5361,11 @@ static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags); sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow); +} + +static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) +{ + bfq_depth_updated(hctx); return 0; } @@ -5783,6 +5788,7 @@ static struct elevator_type iosched_bfq_mq = { .requests_merged = bfq_requests_merged, .request_merged = bfq_request_merged, .has_work = bfq_has_work, + .depth_updated = bfq_depth_updated, .init_hctx = bfq_init_hctx, .init_sched = bfq_init_queue, .exit_sched = bfq_exit_queue, diff --git a/block/blk-mq.c b/block/blk-mq.c index 6930c82ab75f..5b920a82bfe6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3131,6 +3131,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) } if (ret) break; + if (q->elevator && q->elevator->type->ops.depth_updated) + q->elevator->type->ops.depth_updated(hctx); } if (!ret) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2e9e2763bf47..6e8bc53740f0 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -31,6 +31,7 @@ struct elevator_mq_ops { void (*exit_sched)(struct elevator_queue *); int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); + void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); -- cgit v1.2.3 From 8fb172181a291767cf5803b655bc61d81dd48eb3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 24 Apr 2019 13:09:34 -0500 Subject: platform/x86: sony-laptop: Fix unintentional fall-through commit 1cbd7a64959d33e7a2a1fa2bf36a62b350a9fcbd upstream. It seems that the default case should return AE_CTRL_TERMINATE, instead of falling through to case ACPI_RESOURCE_TYPE_END_TAG and returning AE_OK; otherwise the line of code at the end of the function is unreachable and makes no sense: return AE_CTRL_TERMINATE; This fix is based on the following thread of discussion: https://lore.kernel.org/patchwork/patch/959782/ Fixes: 33a04454527e ("sony-laptop: Add SNY6001 device handling (sonypi reimplementation)") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/sony-laptop.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index b205b037fd61..b50f8f73fb47 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -4424,14 +4424,16 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context) } return AE_OK; } + + case ACPI_RESOURCE_TYPE_END_TAG: + return AE_OK; + default: dprintk("Resource %d isn't an IRQ nor an IO port\n", resource->type); + return AE_CTRL_TERMINATE; - case ACPI_RESOURCE_TYPE_END_TAG: - return AE_OK; } - return AE_CTRL_TERMINATE; } static int sony_pic_possible_resources(struct acpi_device *device) -- cgit v1.2.3 From 22c8b3235eb2adf0839b0582f67fcc6ec5691225 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 7 Mar 2019 17:37:16 +0800 Subject: platform/x86: thinkpad_acpi: Disable Bluetooth for some machines commit f7db839fccf087664e5587966220821289b6a9cb upstream. Some AMD based ThinkPads have a firmware bug that calling "GBDC" will cause Bluetooth on Intel wireless cards blocked. Probe these models by DMI match and disable Bluetooth subdriver if specified Intel wireless card exist. Cc: stable # 4.14+ Signed-off-by: Jiaxun Yang Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/thinkpad_acpi.c | 72 +++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 726341f2b638..89ce14b35adc 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -79,7 +79,7 @@ #include #include #include -#include +#include #include #include #include @@ -4501,6 +4501,74 @@ static void bluetooth_exit(void) bluetooth_shutdown(); } +static const struct dmi_system_id bt_fwbug_list[] __initconst = { + { + .ident = "ThinkPad E485", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BOARD_NAME, "20KU"), + }, + }, + { + .ident = "ThinkPad E585", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BOARD_NAME, "20KV"), + }, + }, + { + .ident = "ThinkPad A285 - 20MW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BOARD_NAME, "20MW"), + }, + }, + { + .ident = "ThinkPad A285 - 20MX", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BOARD_NAME, "20MX"), + }, + }, + { + .ident = "ThinkPad A485 - 20MU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BOARD_NAME, "20MU"), + }, + }, + { + .ident = "ThinkPad A485 - 20MV", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BOARD_NAME, "20MV"), + }, + }, + {} +}; + +static const struct pci_device_id fwbug_cards_ids[] __initconst = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24FD) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2526) }, + {} +}; + + +static int __init have_bt_fwbug(void) +{ + /* + * Some AMD based ThinkPads have a firmware bug that calling + * "GBDC" will cause bluetooth on Intel wireless cards blocked + */ + if (dmi_check_system(bt_fwbug_list) && pci_dev_present(fwbug_cards_ids)) { + vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_RFKILL, + FW_BUG "disable bluetooth subdriver for Intel cards\n"); + return 1; + } else + return 0; +} + static int __init bluetooth_init(struct ibm_init_struct *iibm) { int res; @@ -4513,7 +4581,7 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm) /* bluetooth not supported on 570, 600e/x, 770e, 770x, A21e, A2xm/p, G4x, R30, R31, R40e, R50e, T20-22, X20-21 */ - tp_features.bluetooth = hkey_handle && + tp_features.bluetooth = !have_bt_fwbug() && hkey_handle && acpi_evalf(hkey_handle, &status, "GBDC", "qd"); vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_RFKILL, -- cgit v1.2.3 From b9d31180294a898414221a6e52697852641f5cc9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 27 Mar 2019 09:25:34 -0500 Subject: platform/x86: dell-laptop: fix rfkill functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6cc13c28da5beee0f706db6450e190709700b34a upstream. When converting the driver two arguments were transposed leading to rfkill not working. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=201427 Reported-by: Pepijn de Vos Fixes: 549b49 ("platform/x86: dell-smbios: Introduce dispatcher for SMM calls") Signed-off-by: Mario Limonciello Acked-by: Pali Rohár Cc: # 4.14.x Signed-off-by: Darren Hart (VMware) Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/dell-laptop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 95e6ca116e00..a561f653cf13 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -531,7 +531,7 @@ static void dell_rfkill_query(struct rfkill *rfkill, void *data) return; } - dell_fill_request(&buffer, 0, 0x2, 0, 0); + dell_fill_request(&buffer, 0x2, 0, 0, 0); ret = dell_send_request(&buffer, CLASS_INFO, SELECT_RFKILL); hwswitch = buffer.output[1]; @@ -562,7 +562,7 @@ static int dell_debugfs_show(struct seq_file *s, void *data) return ret; status = buffer.output[1]; - dell_fill_request(&buffer, 0, 0x2, 0, 0); + dell_fill_request(&buffer, 0x2, 0, 0, 0); hwswitch_ret = dell_send_request(&buffer, CLASS_INFO, SELECT_RFKILL); if (hwswitch_ret) return hwswitch_ret; @@ -647,7 +647,7 @@ static void dell_update_rfkill(struct work_struct *ignored) if (ret != 0) return; - dell_fill_request(&buffer, 0, 0x2, 0, 0); + dell_fill_request(&buffer, 0x2, 0, 0, 0); ret = dell_send_request(&buffer, CLASS_INFO, SELECT_RFKILL); if (ret == 0 && (status & BIT(0))) -- cgit v1.2.3 From a91e668131adb5ba7a07e279ee6237a4616e71f4 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 3 Apr 2019 14:48:33 +0200 Subject: hwmon: (pwm-fan) Disable PWM if fetching cooling data fails commit 53f1647da3e8fb3e89066798f0fdc045064d353d upstream. In case pwm_fan_of_get_cooling_data() fails we should disable the PWM just like in the other error cases. Fixes: 2e5219c77183 ("hwmon: (pwm-fan) Read PWM FAN configuration from device tree") Cc: # 4.14+ Reported-by: Guenter Rock Signed-off-by: Stefan Wahren Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/pwm-fan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 2c944825026f..af34f7ee1b04 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -254,7 +254,7 @@ static int pwm_fan_probe(struct platform_device *pdev) ret = pwm_fan_of_get_cooling_data(&pdev->dev, ctx); if (ret) - return ret; + goto err_pwm_disable; ctx->pwm_fan_state = ctx->pwm_fan_max_state; if (IS_ENABLED(CONFIG_THERMAL)) { -- cgit v1.2.3 From 5c3c0ffa9d74fdbaab2f366f48388a50f95ece84 Mon Sep 17 00:00:00 2001 From: Lei YU Date: Mon, 15 Apr 2019 18:37:20 +0800 Subject: hwmon: (occ) Fix extended status bits commit b88c5049219a7f322bb1fd65fc30d17472a23563 upstream. The occ's extended status is checked and shown as sysfs attributes. But the code was incorrectly checking the "status" bits. Fix it by checking the "ext_status" bits. Cc: stable@vger.kernel.org Fixes: df04ced684d4 ("hwmon (occ): Add sysfs attributes for additional OCC data") Signed-off-by: Lei YU Reviewed-by: Eddie James Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/occ/sysfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c index 743b26ec8e54..f04f502b2e69 100644 --- a/drivers/hwmon/occ/sysfs.c +++ b/drivers/hwmon/occ/sysfs.c @@ -51,16 +51,16 @@ static ssize_t occ_sysfs_show(struct device *dev, val = !!(header->status & OCC_STAT_ACTIVE); break; case 2: - val = !!(header->status & OCC_EXT_STAT_DVFS_OT); + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_OT); break; case 3: - val = !!(header->status & OCC_EXT_STAT_DVFS_POWER); + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_POWER); break; case 4: - val = !!(header->status & OCC_EXT_STAT_MEM_THROTTLE); + val = !!(header->ext_status & OCC_EXT_STAT_MEM_THROTTLE); break; case 5: - val = !!(header->status & OCC_EXT_STAT_QUICK_DROP); + val = !!(header->ext_status & OCC_EXT_STAT_QUICK_DROP); break; case 6: val = header->occ_state; -- cgit v1.2.3 From 0748cf2d9d1cb2e49a594e18c275391974458b84 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 11 Apr 2019 16:56:31 -0700 Subject: selftests/seccomp: Handle namespace failures gracefully commit 9dd3fcb0ab73cb1e00b8562ef027a38521aaff87 upstream. When running without USERNS or PIDNS the seccomp test would hang since it was waiting forever for the child to trigger the user notification since it seems the glibc() abort handler makes a call to getpid(), which would trap again. This changes the getpid filter to getppid, and makes sure ASSERTs execute to stop from spawning the listener. Reported-by: Shuah Khan Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Cc: stable@vger.kernel.org # > 5.0 Signed-off-by: Kees Cook Reviewed-by: Tycho Andersen Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 43 ++++++++++++++------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 1c2509104924..408431744d06 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3089,9 +3089,9 @@ TEST(user_notification_basic) /* Check that we get -ENOSYS with no listener attached */ if (pid == 0) { - if (user_trap_syscall(__NR_getpid, 0) < 0) + if (user_trap_syscall(__NR_getppid, 0) < 0) exit(1); - ret = syscall(__NR_getpid); + ret = syscall(__NR_getppid); exit(ret >= 0 || errno != ENOSYS); } @@ -3106,12 +3106,12 @@ TEST(user_notification_basic) EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); /* Check that the basic notification machinery works */ - listener = user_trap_syscall(__NR_getpid, + listener = user_trap_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); /* Installing a second listener in the chain should EBUSY */ - EXPECT_EQ(user_trap_syscall(__NR_getpid, + EXPECT_EQ(user_trap_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER), -1); EXPECT_EQ(errno, EBUSY); @@ -3120,7 +3120,7 @@ TEST(user_notification_basic) ASSERT_GE(pid, 0); if (pid == 0) { - ret = syscall(__NR_getpid); + ret = syscall(__NR_getppid); exit(ret != USER_NOTIF_MAGIC); } @@ -3138,7 +3138,7 @@ TEST(user_notification_basic) EXPECT_GT(poll(&pollfd, 1, -1), 0); EXPECT_EQ(pollfd.revents, POLLOUT); - EXPECT_EQ(req.data.nr, __NR_getpid); + EXPECT_EQ(req.data.nr, __NR_getppid); resp.id = req.id; resp.error = 0; @@ -3165,7 +3165,7 @@ TEST(user_notification_kill_in_middle) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; - listener = user_trap_syscall(__NR_getpid, + listener = user_trap_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); @@ -3177,7 +3177,7 @@ TEST(user_notification_kill_in_middle) ASSERT_GE(pid, 0); if (pid == 0) { - ret = syscall(__NR_getpid); + ret = syscall(__NR_getppid); exit(ret != USER_NOTIF_MAGIC); } @@ -3277,7 +3277,7 @@ TEST(user_notification_closed_listener) long ret; int status, listener; - listener = user_trap_syscall(__NR_getpid, + listener = user_trap_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); @@ -3288,7 +3288,7 @@ TEST(user_notification_closed_listener) ASSERT_GE(pid, 0); if (pid == 0) { close(listener); - ret = syscall(__NR_getpid); + ret = syscall(__NR_getppid); exit(ret != -1 && errno != ENOSYS); } @@ -3311,14 +3311,15 @@ TEST(user_notification_child_pid_ns) ASSERT_EQ(unshare(CLONE_NEWPID), 0); - listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_trap_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); ASSERT_GE(pid, 0); if (pid == 0) - exit(syscall(__NR_getpid) != USER_NOTIF_MAGIC); + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); EXPECT_EQ(req.pid, pid); @@ -3346,7 +3347,8 @@ TEST(user_notification_sibling_pid_ns) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; - listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_trap_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3359,7 +3361,7 @@ TEST(user_notification_sibling_pid_ns) ASSERT_GE(pid2, 0); if (pid2 == 0) - exit(syscall(__NR_getpid) != USER_NOTIF_MAGIC); + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); EXPECT_EQ(waitpid(pid2, &status, 0), pid2); EXPECT_EQ(true, WIFEXITED(status)); @@ -3368,11 +3370,11 @@ TEST(user_notification_sibling_pid_ns) } /* Create the sibling ns, and sibling in it. */ - EXPECT_EQ(unshare(CLONE_NEWPID), 0); - EXPECT_EQ(errno, 0); + ASSERT_EQ(unshare(CLONE_NEWPID), 0); + ASSERT_EQ(errno, 0); pid2 = fork(); - EXPECT_GE(pid2, 0); + ASSERT_GE(pid2, 0); if (pid2 == 0) { ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); @@ -3380,7 +3382,7 @@ TEST(user_notification_sibling_pid_ns) * The pid should be 0, i.e. the task is in some namespace that * we can't "see". */ - ASSERT_EQ(req.pid, 0); + EXPECT_EQ(req.pid, 0); resp.id = req.id; resp.error = 0; @@ -3408,14 +3410,15 @@ TEST(user_notification_fault_recv) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; - listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_trap_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); ASSERT_GE(pid, 0); if (pid == 0) - exit(syscall(__NR_getpid) != USER_NOTIF_MAGIC); + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); /* Do a bad recv() */ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); -- cgit v1.2.3 From a16532b59cca93292ae817dfa726fa3052b187ff Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Tue, 16 Apr 2019 14:17:11 +0200 Subject: kernfs: fix barrier usage in __kernfs_new_node() commit 998267900cee901c5d1dfa029a6304d00acbc29f upstream. smp_mb__before_atomic() can not be applied to atomic_set(). Remove the barrier and rely on RELEASE synchronization. Fixes: ba16b2846a8c6 ("kernfs: add an API to get kernfs node from inode number") Cc: stable@vger.kernel.org Signed-off-by: Andrea Parri Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 4ca0b5c18192..853a69e493f5 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -650,11 +650,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, kn->id.generation = gen; /* - * set ino first. This barrier is paired with atomic_inc_not_zero in + * set ino first. This RELEASE is paired with atomic_inc_not_zero in * kernfs_find_and_get_node_by_ino */ - smp_mb__before_atomic(); - atomic_set(&kn->count, 1); + atomic_set_release(&kn->count, 1); atomic_set(&kn->active, KN_DEACTIVATED_BIAS); RB_CLEAR_NODE(&kn->rb); -- cgit v1.2.3 From b5f2cb384e118657472350a246b77ba11e936560 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 4 Apr 2019 14:39:09 +0200 Subject: virt: vbox: Sanity-check parameter types for hgcm-calls coming from userspace commit cf4f2ad6b87dda2dbe0573b1ebeb0273f8d4aac6 upstream. Userspace can make host function calls, called hgcm-calls through the /dev/vboxguest device. In this case we should not accept all hgcm-function-parameter-types, some are only valid for in kernel calls. This commit adds proper hgcm-function-parameter-type validation to the ioctl for doing a hgcm-call from userspace. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_core.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index 1475ed5ffcde..0afef60d0638 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -1263,6 +1263,20 @@ static int vbg_ioctl_hgcm_disconnect(struct vbg_dev *gdev, return ret; } +static bool vbg_param_valid(enum vmmdev_hgcm_function_parameter_type type) +{ + switch (type) { + case VMMDEV_HGCM_PARM_TYPE_32BIT: + case VMMDEV_HGCM_PARM_TYPE_64BIT: + case VMMDEV_HGCM_PARM_TYPE_LINADDR: + case VMMDEV_HGCM_PARM_TYPE_LINADDR_IN: + case VMMDEV_HGCM_PARM_TYPE_LINADDR_OUT: + return true; + default: + return false; + } +} + static int vbg_ioctl_hgcm_call(struct vbg_dev *gdev, struct vbg_session *session, bool f32bit, struct vbg_ioctl_hgcm_call *call) @@ -1298,6 +1312,23 @@ static int vbg_ioctl_hgcm_call(struct vbg_dev *gdev, } call->hdr.size_out = actual_size; + /* Validate parameter types */ + if (f32bit) { + struct vmmdev_hgcm_function_parameter32 *parm = + VBG_IOCTL_HGCM_CALL_PARMS32(call); + + for (i = 0; i < call->parm_count; i++) + if (!vbg_param_valid(parm[i].type)) + return -EINVAL; + } else { + struct vmmdev_hgcm_function_parameter *parm = + VBG_IOCTL_HGCM_CALL_PARMS(call); + + for (i = 0; i < call->parm_count; i++) + if (!vbg_param_valid(parm[i].type)) + return -EINVAL; + } + /* * Validate the client id. */ -- cgit v1.2.3 From a41382ca99e87805c2717efbeeeb3eb88f45b82b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 25 Apr 2019 18:05:36 +0200 Subject: USB: serial: fix unthrottle races commit 3f5edd58d040bfa4b74fb89bc02f0bc6b9cd06ab upstream. Fix two long-standing bugs which could potentially lead to memory corruption or leave the port throttled until it is reopened (on weakly ordered systems), respectively, when read-URB completion races with unthrottle(). First, the URB must not be marked as free before processing is complete to prevent it from being submitted by unthrottle() on another CPU. CPU 1 CPU 2 ================ ================ complete() unthrottle() process_urb(); smp_mb__before_atomic(); set_bit(i, free); if (test_and_clear_bit(i, free)) submit_urb(); Second, the URB must be marked as free before checking the throttled flag to prevent unthrottle() on another CPU from failing to observe that the URB needs to be submitted if complete() sees that the throttled flag is set. CPU 1 CPU 2 ================ ================ complete() unthrottle() set_bit(i, free); throttled = 0; smp_mb__after_atomic(); smp_mb(); if (throttled) if (test_and_clear_bit(i, free)) return; submit_urb(); Note that test_and_clear_bit() only implies barriers when the test is successful. To handle the case where the URB is still in use an explicit barrier needs to be added to unthrottle() for the second race condition. Fixes: d83b405383c9 ("USB: serial: add support for multiple read urbs") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/generic.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 2274d9625f63..0fff4968ea1b 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -376,6 +376,7 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb) struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; unsigned long flags; + bool stopped = false; int status = urb->status; int i; @@ -383,33 +384,51 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb) if (urb == port->read_urbs[i]) break; } - set_bit(i, &port->read_urbs_free); dev_dbg(&port->dev, "%s - urb %d, len %d\n", __func__, i, urb->actual_length); switch (status) { case 0: + usb_serial_debug_data(&port->dev, __func__, urb->actual_length, + data); + port->serial->type->process_read_urb(urb); break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", __func__, status); - return; + stopped = true; + break; case -EPIPE: dev_err(&port->dev, "%s - urb stopped: %d\n", __func__, status); - return; + stopped = true; + break; default: dev_dbg(&port->dev, "%s - nonzero urb status: %d\n", __func__, status); - goto resubmit; + break; } - usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data); - port->serial->type->process_read_urb(urb); + /* + * Make sure URB processing is done before marking as free to avoid + * racing with unthrottle() on another CPU. Matches the barriers + * implied by the test_and_clear_bit() in + * usb_serial_generic_submit_read_urb(). + */ + smp_mb__before_atomic(); + set_bit(i, &port->read_urbs_free); + /* + * Make sure URB is marked as free before checking the throttled flag + * to avoid racing with unthrottle() on another CPU. Matches the + * smp_mb() in unthrottle(). + */ + smp_mb__after_atomic(); + + if (stopped) + return; -resubmit: /* Throttle the device if requested by tty */ spin_lock_irqsave(&port->lock, flags); port->throttled = port->throttle_req; @@ -484,6 +503,12 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty) port->throttled = port->throttle_req = 0; spin_unlock_irq(&port->lock); + /* + * Matches the smp_mb__after_atomic() in + * usb_serial_generic_read_bulk_callback(). + */ + smp_mb(); + if (was_throttled) usb_serial_generic_submit_read_urbs(port, GFP_KERNEL); } -- cgit v1.2.3 From fc92e97e24dfca591e53de7f2649005a7aaedc61 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Sun, 10 Mar 2019 14:58:24 -0400 Subject: iio: adc: xilinx: fix potential use-after-free on remove [ Upstream commit 62039b6aef63380ba7a37c113bbaeee8a55c5342 ] When cancel_delayed_work() returns, the delayed work may still be running. This means that the core could potentially free the private structure (struct xadc) while the delayed work is still using it. This is a potential use-after-free. Fix by calling cancel_delayed_work_sync(), which waits for any residual work to finish before returning. Signed-off-by: Sven Van Asbroeck Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/xilinx-xadc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 3f6be5ac049a..1960694e8007 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1320,7 +1320,7 @@ static int xadc_remove(struct platform_device *pdev) } free_irq(xadc->irq, indio_dev); clk_disable_unprepare(xadc->clk); - cancel_delayed_work(&xadc->zynq_unmask_work); + cancel_delayed_work_sync(&xadc->zynq_unmask_work); kfree(xadc->data); kfree(indio_dev->channels); -- cgit v1.2.3 From 1fc0aeddc0b7490e2fbfb688f66e3c8dc6b5c2d1 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Sun, 10 Mar 2019 14:58:25 -0400 Subject: iio: adc: xilinx: fix potential use-after-free on probe [ Upstream commit 862e4644fd2d7df8998edc65e0963ea2f567bde9 ] If probe errors out after request_irq(), its error path does not explicitly cancel the delayed work, which may have been scheduled by the interrupt handler. This means the delayed work may still be running when the core frees the private structure (struct xadc). This is a potential use-after-free. Fix by inserting cancel_delayed_work_sync() in the probe error path. Signed-off-by: Sven Van Asbroeck Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/xilinx-xadc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 1960694e8007..15e1a103f37d 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1290,6 +1290,7 @@ static int xadc_probe(struct platform_device *pdev) err_free_irq: free_irq(xadc->irq, indio_dev); + cancel_delayed_work_sync(&xadc->zynq_unmask_work); err_clk_disable_unprepare: clk_disable_unprepare(xadc->clk); err_free_samplerate_trigger: -- cgit v1.2.3 From a9189a36d66ea1f60e279641a3a53d51b0b5ca82 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Sun, 10 Mar 2019 14:58:26 -0400 Subject: iio: adc: xilinx: prevent touching unclocked h/w on remove [ Upstream commit 2e4b88f73966adead360e47621df0183586fac32 ] In remove, the clock is disabled before canceling the delayed work. This means that the delayed work may be touching unclocked hardware. Fix by disabling the clock after the delayed work is fully canceled. This is consistent with the probe error path order. Signed-off-by: Sven Van Asbroeck Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/xilinx-xadc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index 15e1a103f37d..1ae86e7359f7 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1320,8 +1320,8 @@ static int xadc_remove(struct platform_device *pdev) iio_triggered_buffer_cleanup(indio_dev); } free_irq(xadc->irq, indio_dev); - clk_disable_unprepare(xadc->clk); cancel_delayed_work_sync(&xadc->zynq_unmask_work); + clk_disable_unprepare(xadc->clk); kfree(xadc->data); kfree(indio_dev->channels); -- cgit v1.2.3 From 469cc616e03c22ed3371300dcf82df39902f8642 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 12 Mar 2019 12:28:03 -0700 Subject: acpi/nfit: Always dump _DSM output payload [ Upstream commit 351f339faa308c1c1461314a18c832239a841ca0 ] The dynamic-debug statements for command payload output only get emitted when the command is not ND_CMD_CALL. Move the output payload dumping ahead of the early return path for ND_CMD_CALL. Fixes: 31eca76ba2fc9 ("...whitelisted dimm command marshaling mechanism") Reported-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/acpi/nfit/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 4be4dc3e8aa6..38ec79bb3edd 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -563,6 +563,12 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, goto out; } + dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name, + cmd_name, out_obj->buffer.length); + print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, + out_obj->buffer.pointer, + min_t(u32, 128, out_obj->buffer.length), true); + if (call_pkg) { call_pkg->nd_fw_size = out_obj->buffer.length; memcpy(call_pkg->nd_payload + call_pkg->nd_size_in, @@ -581,12 +587,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, return 0; } - dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name, - cmd_name, out_obj->buffer.length); - print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, - out_obj->buffer.pointer, - min_t(u32, 128, out_obj->buffer.length), true); - for (i = 0, offset = 0; i < desc->out_num; i++) { u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf, (u32 *) out_obj->buffer.pointer, -- cgit v1.2.3 From e42bcea02b46d9a248d33014ec901f363a77a392 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 03:20:34 -0500 Subject: libnvdimm/namespace: Fix a potential NULL pointer dereference [ Upstream commit 55c1fc0af29a6c1b92f217b7eb7581a882e0c07c ] In case kmemdup fails, the fix goes to blk_err to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/nvdimm/namespace_devs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 33a3b23b3db7..e761b29f7160 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2249,9 +2249,12 @@ static struct device *create_namespace_blk(struct nd_region *nd_region, if (!nsblk->uuid) goto blk_err; memcpy(name, nd_label->name, NSLABEL_NAME_LEN); - if (name[0]) + if (name[0]) { nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, GFP_KERNEL); + if (!nsblk->alt_name) + goto blk_err; + } res = nsblk_add_resource(nd_region, ndd, nsblk, __le64_to_cpu(nd_label->dpa)); if (!res) -- cgit v1.2.3 From c64e4d22342cac48dd296dd10ef01a50215dcf92 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 13:59:08 -0800 Subject: HID: input: add mapping for Expose/Overview key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 96dd86871e1fffbc39e4fa61c9c75ec54ee9af0f ] According to HUTRR77 usage 0x29f from the consumer page is reserved for the Desktop application to present all running user’s application windows. Linux defines KEY_SCALE to request Compiz Scale (Expose) mode, so let's add the mapping. Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/hid/hid-input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index ff92a7b2fc89..468da6f6765d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1042,6 +1042,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break; case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break; + case 0x29f: map_key_clear(KEY_SCALE); break; + default: map_key_clear(KEY_UNKNOWN); } break; -- cgit v1.2.3 From 0e56b93040c2329e8f91fbd0d453fa4c652b8201 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 14:05:52 -0800 Subject: HID: input: add mapping for keyboard Brightness Up/Down/Toggle keys [ Upstream commit 7975a1d6a7afeb3eb61c971a153d24dd8fa032f3 ] According to HUTRR73 usages 0x79, 0x7a and 0x7c from the consumer page correspond to Brightness Up/Down/Toggle keys, so let's add the mappings. Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/hid/hid-input.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 468da6f6765d..290efac7e6bf 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -908,6 +908,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break; case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break; + case 0x079: map_key_clear(KEY_KBDILLUMUP); break; + case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break; + case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break; + case 0x082: map_key_clear(KEY_VIDEO_NEXT); break; case 0x083: map_key_clear(KEY_LAST); break; case 0x084: map_key_clear(KEY_ENTER); break; -- cgit v1.2.3 From f744a5e31a4756b3e33c3797e0b0fc5425d4d0b8 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Jan 2019 14:35:45 -0800 Subject: HID: input: add mapping for "Toggle Display" key [ Upstream commit c01908a14bf735b871170092807c618bb9dae654 ] According to HUT 1.12 usage 0xb5 from the generic desktop page is reserved for switching between external and internal display, so let's add the mapping. Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/hid/hid-input.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 290efac7e6bf..4f119300ce3f 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -677,6 +677,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel break; } + if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */ + switch (usage->hid & 0xf) { + case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break; + default: goto ignore; + } + break; + } + /* * Some lazy vendors declare 255 usages for System Control, * leading to the creation of ABS_X|Y axis and too many others. -- cgit v1.2.3 From 181518b8eb4eb44aae043d584336d715b572902e Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 25 Mar 2019 16:55:27 -0500 Subject: libnvdimm/btt: Fix a kmemdup failure check [ Upstream commit 486fa92df4707b5df58d6508728bdb9321a59766 ] In case kmemdup fails, the fix releases resources and returns to avoid the NULL pointer dereference. Signed-off-by: Aditya Pakki Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/nvdimm/btt_devs.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 795ad4ff35ca..e341498876ca 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -190,14 +190,15 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, return NULL; nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL); - if (nd_btt->id < 0) { - kfree(nd_btt); - return NULL; - } + if (nd_btt->id < 0) + goto out_nd_btt; nd_btt->lbasize = lbasize; - if (uuid) + if (uuid) { uuid = kmemdup(uuid, 16, GFP_KERNEL); + if (!uuid) + goto out_put_id; + } nd_btt->uuid = uuid; dev = &nd_btt->dev; dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id); @@ -212,6 +213,13 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, return NULL; } return dev; + +out_put_id: + ida_simple_remove(&nd_region->btt_ida, nd_btt->id); + +out_nd_btt: + kfree(nd_btt); + return NULL; } struct device *nd_btt_create(struct nd_region *nd_region) -- cgit v1.2.3 From 9d02fc4b7789af1875e4c0382897ac121389c580 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Fri, 22 Mar 2019 16:01:17 +0100 Subject: s390/dasd: Fix capacity calculation for large volumes [ Upstream commit 2cc9637ce825f3a9f51f8f78af7474e9e85bfa5f ] The DASD driver incorrectly limits the maximum number of blocks of ECKD DASD volumes to 32 bit numbers. Volumes with a capacity greater than 2^32-1 blocks are incorrectly recognized as smaller volumes. This results in the following volume capacity limits depending on the formatted block size: BLKSIZE MAX_GB MAX_CYL 512 2047 5843492 1024 4095 8676701 2048 8191 13634816 4096 16383 23860929 The same problem occurs when a volume with more than 17895697 cylinders is accessed in raw-track-access mode. Fix this problem by adding an explicit type cast when calculating the maximum number of blocks. Signed-off-by: Peter Oberparleiter Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- drivers/s390/block/dasd_eckd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 6e294b4d3635..f89f9d02e788 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2004,14 +2004,14 @@ static int dasd_eckd_end_analysis(struct dasd_block *block) blk_per_trk = recs_per_track(&private->rdc_data, 0, block->bp_block); raw: - block->blocks = (private->real_cyl * + block->blocks = ((unsigned long) private->real_cyl * private->rdc_data.trk_per_cyl * blk_per_trk); dev_info(&device->cdev->dev, - "DASD with %d KB/block, %d KB total size, %d KB/track, " + "DASD with %u KB/block, %lu KB total size, %u KB/track, " "%s\n", (block->bp_block >> 10), - ((private->real_cyl * + (((unsigned long) private->real_cyl * private->rdc_data.trk_per_cyl * blk_per_trk * (block->bp_block >> 9)) >> 1), ((blk_per_trk * block->bp_block) >> 10), -- cgit v1.2.3 From 0c077b14bdc0b33c81c0c614f1be3352b56c0468 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Mar 2019 18:54:27 +0100 Subject: mac80211: fix unaligned access in mesh table hash function [ Upstream commit 40586e3fc400c00c11151804dcdc93f8c831c808 ] The pointer to the last four bytes of the address is not guaranteed to be aligned, so we need to use __get_unaligned_cpu32 here Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 88a6d5e18ccc..ac1f5db52994 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ - return jhash_1word(*(u32 *)(addr+2), seed); + return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); } static const struct rhashtable_params mesh_rht_params = { -- cgit v1.2.3 From 7c7345f1cad884ba4ad030e4cee6ee9aada6940b Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 15 Mar 2019 17:38:57 +0200 Subject: mac80211: Increase MAX_MSG_LEN [ Upstream commit 78be2d21cc1cd3069c6138dcfecec62583130171 ] Looks that 100 chars isn't enough for messages, as we keep getting warnings popping from different places due to message shortening. Instead of trying to shorten the prints, just increase the buffer size. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/trace_msg.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 366b9e6f043e..40141df09f25 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright (C) 2019 Intel Corporation + */ + #ifdef CONFIG_MAC80211_MESSAGE_TRACING #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -11,7 +16,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg -#define MAX_MSG_LEN 100 +#define MAX_MSG_LEN 120 DECLARE_EVENT_CLASS(mac80211_msg_event, TP_PROTO(struct va_format *vaf), -- cgit v1.2.3 From c93951de3ecb2937270a6058a0d0ce7672b2bfea Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 15 Mar 2019 17:39:00 +0200 Subject: cfg80211: Handle WMM rules in regulatory domain intersection [ Upstream commit 08a75a887ee46828b54600f4bb7068d872a5edd5 ] The support added for regulatory WMM rules did not handle the case of regulatory domain intersections. Fix it. Signed-off-by: Ilan Peer Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database") Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index dd58b9909ac9..649c89946dec 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1298,6 +1298,16 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1, return dfs_region1; } +static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1, + const struct ieee80211_wmm_ac *wmm_ac2, + struct ieee80211_wmm_ac *intersect) +{ + intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min); + intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max); + intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot); + intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn); +} + /* * Helper for regdom_intersect(), this does the real * mathematical intersection fun @@ -1312,6 +1322,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule1, *power_rule2; struct ieee80211_power_rule *power_rule; + const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2; + struct ieee80211_wmm_rule *wmm_rule; u32 freq_diff, max_bandwidth1, max_bandwidth2; freq_range1 = &rule1->freq_range; @@ -1322,6 +1334,10 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, power_rule2 = &rule2->power_rule; power_rule = &intersected_rule->power_rule; + wmm_rule1 = &rule1->wmm_rule; + wmm_rule2 = &rule2->wmm_rule; + wmm_rule = &intersected_rule->wmm_rule; + freq_range->start_freq_khz = max(freq_range1->start_freq_khz, freq_range2->start_freq_khz); freq_range->end_freq_khz = min(freq_range1->end_freq_khz, @@ -1365,6 +1381,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1, intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms, rule2->dfs_cac_ms); + if (rule1->has_wmm && rule2->has_wmm) { + u8 ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + reg_wmm_rules_intersect(&wmm_rule1->client[ac], + &wmm_rule2->client[ac], + &wmm_rule->client[ac]); + reg_wmm_rules_intersect(&wmm_rule1->ap[ac], + &wmm_rule2->ap[ac], + &wmm_rule->ap[ac]); + } + + intersected_rule->has_wmm = true; + } else if (rule1->has_wmm) { + *wmm_rule = *wmm_rule1; + intersected_rule->has_wmm = true; + } else if (rule2->has_wmm) { + *wmm_rule = *wmm_rule2; + intersected_rule->has_wmm = true; + } else { + intersected_rule->has_wmm = false; + } + if (!is_valid_reg_rule(intersected_rule)) return -EINVAL; -- cgit v1.2.3 From 0e36c8ed2d529156e22f0d2fd796d71475e42832 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 16 Mar 2019 18:06:31 +0100 Subject: mac80211: fix memory accounting with A-MSDU aggregation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eb9b64e3a9f8483e6e54f4e03b2ae14ae5db2690 ] skb->truesize can change due to memory reallocation or when adding extra fragments. Adjust fq->memory_usage accordingly Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 928f13a208b0..714d80e48a10 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3214,6 +3214,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; + int orig_truesize; __be16 len; void *data; bool ret = false; @@ -3254,6 +3255,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!head || skb_is_gso(head)) goto out; + orig_truesize = head->truesize; orig_len = head->len; if (skb->len + head->len > max_amsdu_len) @@ -3311,6 +3313,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, *frag_tail = skb; out_recalc: + fq->memory_usage += head->truesize - orig_truesize; if (head->len != orig_len) { flow->backlog += head->len - orig_len; tin->backlog_bytes += head->len - orig_len; -- cgit v1.2.3 From 8f2e6b8c9b0b4e3fc32829bfe682f4786942e41c Mon Sep 17 00:00:00 2001 From: Sunil Dutt Date: Mon, 25 Feb 2019 15:37:20 +0530 Subject: nl80211: Add NL80211_FLAG_CLEAR_SKB flag for other NL commands [ Upstream commit d6db02a88a4aaa1cd7105137c67ddec7f3bdbc05 ] This commit adds NL80211_FLAG_CLEAR_SKB flag to other NL commands that carry key data to ensure they do not stick around on heap after the SKB is freed. Also introduced this flag for NL80211_CMD_VENDOR as there are sub commands which configure the keys. Signed-off-by: Sunil Dutt Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d91a408db113..156ce708b533 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13596,7 +13596,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEAUTHENTICATE, @@ -13647,7 +13648,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, @@ -13655,7 +13657,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DISCONNECT, @@ -13684,7 +13687,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMKSA, @@ -14036,7 +14040,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_SET_QOS_MAP, @@ -14091,7 +14096,8 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_set_pmk, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK, -- cgit v1.2.3 From 7547c20fdd1c33802397be5e2452293749b558ee Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 27 Mar 2019 11:10:44 -0700 Subject: libnvdimm/security: provide fix for secure-erase to use zero-key [ Upstream commit 037c8489ade669e0f09ad40d5b91e5e1159a14b1 ] Add a zero key in order to standardize hardware that want a key of 0's to be passed. Some platforms defaults to a zero-key with security enabled rather than allow the OS to enable the security. The zero key would allow us to manage those platform as well. This also adds a fix to secure erase so it can use the zero key to do crypto erase. Some other security commands already use zero keys. This introduces a standard zero-key to allow unification of semantics cross nvdimm security commands. Signed-off-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/nvdimm/security.c | 17 ++++++++++++----- tools/testing/nvdimm/test/nfit.c | 11 +++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index f8bb746a549f..6bea6852bf27 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -22,6 +22,8 @@ static bool key_revalidate = true; module_param(key_revalidate, bool, 0444); MODULE_PARM_DESC(key_revalidate, "Require key validation at init."); +static const char zero_key[NVDIMM_PASSPHRASE_LEN]; + static void *key_data(struct key *key) { struct encrypted_key_payload *epayload = dereference_key_locked(key); @@ -286,8 +288,9 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, { struct device *dev = &nvdimm->dev; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - struct key *key; + struct key *key = NULL; int rc; + const void *data; /* The bus lock should be held at the top level of the call stack */ lockdep_assert_held(&nvdimm_bus->reconfig_mutex); @@ -319,11 +322,15 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, return -EOPNOTSUPP; } - key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); - if (!key) - return -ENOKEY; + if (keyid != 0) { + key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY); + if (!key) + return -ENOKEY; + data = key_data(key); + } else + data = zero_key; - rc = nvdimm->sec.ops->erase(nvdimm, key_data(key), pass_type); + rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type); dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key), pass_type == NVDIMM_MASTER ? "(master)" : "(user)", rc == 0 ? "success" : "fail"); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index b579f962451d..cad719876ef4 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -225,6 +225,8 @@ static struct workqueue_struct *nfit_wq; static struct gen_pool *nfit_pool; +static const char zero_key[NVDIMM_PASSPHRASE_LEN]; + static struct nfit_test *to_nfit_test(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -1059,8 +1061,7 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t, struct device *dev = &t->pdev.dev; struct nfit_test_sec *sec = &dimm_sec_info[dimm]; - if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) || - (sec->state & ND_INTEL_SEC_STATE_FROZEN)) { + if (sec->state & ND_INTEL_SEC_STATE_FROZEN) { nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; dev_dbg(dev, "secure erase: wrong security state\n"); } else if (memcmp(nd_cmd->passphrase, sec->passphrase, @@ -1068,6 +1069,12 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t, nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; dev_dbg(dev, "secure erase: wrong passphrase\n"); } else { + if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) + && (memcmp(nd_cmd->passphrase, zero_key, + ND_INTEL_PASSPHRASE_SIZE) != 0)) { + dev_dbg(dev, "invalid zero key\n"); + return 0; + } memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); memset(sec->master_passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); sec->state = 0; -- cgit v1.2.3 From 40af621fefdbea783e27ace03e12dfd035178797 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 4 Apr 2019 10:58:01 +0800 Subject: libnvdimm/pmem: fix a possible OOB access when read and write pmem [ Upstream commit 9dc6488e84b0f64df17672271664752488cd6a25 ] If offset is not zero and length is bigger than PAGE_SIZE, this will cause to out of boundary access to a page memory Fixes: 98cc093cba1e ("block, THP: make block_device_operations.rw_page support THP") Co-developed-by: Liang ZhiCheng Signed-off-by: Liang ZhiCheng Signed-off-by: Li RongQing Reviewed-by: Ira Weiny Reviewed-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- drivers/nvdimm/pmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index bc2f700feef8..0279eb1da3ef 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -113,13 +113,13 @@ static void write_pmem(void *pmem_addr, struct page *page, while (len) { mem = kmap_atomic(page); - chunk = min_t(unsigned int, len, PAGE_SIZE); + chunk = min_t(unsigned int, len, PAGE_SIZE - off); memcpy_flushcache(pmem_addr, mem + off, chunk); kunmap_atomic(mem); len -= chunk; off = 0; page++; - pmem_addr += PAGE_SIZE; + pmem_addr += chunk; } } @@ -132,7 +132,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, while (len) { mem = kmap_atomic(page); - chunk = min_t(unsigned int, len, PAGE_SIZE); + chunk = min_t(unsigned int, len, PAGE_SIZE - off); rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); kunmap_atomic(mem); if (rem) @@ -140,7 +140,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, len -= chunk; off = 0; page++; - pmem_addr += PAGE_SIZE; + pmem_addr += chunk; } return BLK_STS_OK; } -- cgit v1.2.3 From adefea883c1f71e1613a3b49233c0f60da188c29 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 11 Mar 2019 12:47:14 -0700 Subject: tools/testing/nvdimm: Retain security state after overwrite [ Upstream commit 2170a0d53bee1a6c1a4ebd042f99d85aafc6c0ea ] Overwrite retains the security state after completion of operation. Fix nfit_test to reflect this so that the kernel can test the behavior it is more likely to see in practice. Fixes: 926f74802cb1 ("tools/testing/nvdimm: Add overwrite support for nfit_test") Signed-off-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- tools/testing/nvdimm/test/nfit.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index cad719876ef4..85ffdcfa596b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -146,6 +146,7 @@ static int dimm_fail_cmd_code[ARRAY_SIZE(handle)]; struct nfit_test_sec { u8 state; u8 ext_state; + u8 old_state; u8 passphrase[32]; u8 master_passphrase[32]; u64 overwrite_end_time; @@ -1100,7 +1101,7 @@ static int nd_intel_test_cmd_overwrite(struct nfit_test *t, return 0; } - memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); + sec->old_state = sec->state; sec->state = ND_INTEL_SEC_STATE_OVERWRITE; dev_dbg(dev, "overwrite progressing.\n"); sec->overwrite_end_time = get_jiffies_64() + 5 * HZ; @@ -1122,7 +1123,8 @@ static int nd_intel_test_cmd_query_overwrite(struct nfit_test *t, if (time_is_before_jiffies64(sec->overwrite_end_time)) { sec->overwrite_end_time = 0; - sec->state = 0; + sec->state = sec->old_state; + sec->old_state = 0; sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED; dev_dbg(dev, "overwrite is complete\n"); } else -- cgit v1.2.3 From 1e1bdaca091e1d47d31c39cae13e22050721658a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 3 Apr 2019 09:13:34 +0200 Subject: s390/3270: fix lockdep false positive on view->lock [ Upstream commit 5712f3301a12c0c3de9cc423484496b0464f2faf ] The spinlock in the raw3270_view structure is used by con3270, tty3270 and fs3270 in different ways. For con3270 the lock can be acquired in irq context, for tty3270 and fs3270 the highest context is bh. Lockdep sees the view->lock as a single class and if the 3270 driver is used for the console the following message is generated: WARNING: inconsistent lock state 5.1.0-rc3-05157-g5c168033979d #12 Not tainted -------------------------------- inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. swapper/0/1 [HC0[0]:SC1[1]:HE1:SE0] takes: (____ptrval____) (&(&view->lock)->rlock){?.-.}, at: tty3270_update+0x7c/0x330 Introduce a lockdep subclass for the view lock to distinguish bh from irq locks. Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- drivers/s390/char/con3270.c | 2 +- drivers/s390/char/fs3270.c | 3 ++- drivers/s390/char/raw3270.c | 3 ++- drivers/s390/char/raw3270.h | 4 +++- drivers/s390/char/tty3270.c | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index fd2146bcc0ad..e17364e13d2f 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -629,7 +629,7 @@ con3270_init(void) (void (*)(unsigned long)) con3270_read_tasklet, (unsigned long) condev->read); - raw3270_add_view(&condev->view, &con3270_fn, 1); + raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ); INIT_LIST_HEAD(&condev->freemem); for (i = 0; i < CON3270_STRING_PAGES; i++) { diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 8f3a2eeb28dc..8b48ba9c598e 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -463,7 +463,8 @@ fs3270_open(struct inode *inode, struct file *filp) init_waitqueue_head(&fp->wait); fp->fs_pid = get_pid(task_pid(current)); - rc = raw3270_add_view(&fp->view, &fs3270_fn, minor); + rc = raw3270_add_view(&fp->view, &fs3270_fn, minor, + RAW3270_VIEW_LOCK_BH); if (rc) { fs3270_free_view(&fp->view); goto out; diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index f8cd2935fbfd..63a41b168761 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -920,7 +920,7 @@ raw3270_deactivate_view(struct raw3270_view *view) * Add view to device with minor "minor". */ int -raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor) +raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor, int subclass) { unsigned long flags; struct raw3270 *rp; @@ -942,6 +942,7 @@ raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor) view->cols = rp->cols; view->ascebc = rp->ascebc; spin_lock_init(&view->lock); + lockdep_set_subclass(&view->lock, subclass); list_add(&view->list, &rp->view_list); rc = 0; spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags); diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 114ca7cbf889..3afaa35f7351 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -150,6 +150,8 @@ struct raw3270_fn { struct raw3270_view { struct list_head list; spinlock_t lock; +#define RAW3270_VIEW_LOCK_IRQ 0 +#define RAW3270_VIEW_LOCK_BH 1 atomic_t ref_count; struct raw3270 *dev; struct raw3270_fn *fn; @@ -158,7 +160,7 @@ struct raw3270_view { unsigned char *ascebc; /* ascii -> ebcdic table */ }; -int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int); +int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int); int raw3270_activate_view(struct raw3270_view *); void raw3270_del_view(struct raw3270_view *); void raw3270_deactivate_view(struct raw3270_view *); diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 2b0c36c2c568..98d7fc152e32 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -980,7 +980,8 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty) return PTR_ERR(tp); rc = raw3270_add_view(&tp->view, &tty3270_fn, - tty->index + RAW3270_FIRSTMINOR); + tty->index + RAW3270_FIRSTMINOR, + RAW3270_VIEW_LOCK_BH); if (rc) { tty3270_free_view(tp); return rc; -- cgit v1.2.3 From 6661203b4210f274f1997f6f6671c03a15ebf093 Mon Sep 17 00:00:00 2001 From: Lin Yi Date: Wed, 10 Apr 2019 10:23:34 +0800 Subject: drm/ttm: fix dma_fence refcount imbalance on error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 543c364d8eeeb42c0edfaac9764f4e9f3d777ec1 ] the ttm_bo_add_move_fence takes a reference to the struct dma_fence, but failed to release it on the error path, leading to a memory leak. add dma_fence_put before return when error occur. Signed-off-by: Lin Yi Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/ttm/ttm_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 996cadd83f24..d8e1b3f12904 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -881,8 +881,10 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, reservation_object_add_shared_fence(bo->resv, fence); ret = reservation_object_reserve_shared(bo->resv, 1); - if (unlikely(ret)) + if (unlikely(ret)) { + dma_fence_put(fence); return ret; + } dma_fence_put(bo->moving); bo->moving = fence; -- cgit v1.2.3 From 44d7638b39349222d97ac3ba197fef9650e75444 Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Tue, 26 Mar 2019 13:14:11 -0400 Subject: drm/amd/display: extending AUX SW Timeout [ Upstream commit f4bbebf8e7eb4d294b040ab2d2ba71e70e69b930 ] [Why] AUX takes longer to reply when using active DP-DVI dongle on some asics resulting in up to 2000+ us edid read (timeout). [How] 1. Adjust AUX poll to match spec 2. Extend the SW timeout. This does not affect normal operation since we exit the loop as soon as AUX acks. Signed-off-by: Martin Leung Reviewed-by: Jun Lei Acked-by: Joshua Aberback Acked-by: Leo Li Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 9 ++++++--- drivers/gpu/drm/amd/display/dc/dce/dce_aux.h | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index aaeb7faac0c4..e0fff5744b5f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -189,6 +189,12 @@ static void submit_channel_request( 1, 0); } + + REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); + + REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0, + 10, aux110->timeout_period/10); + /* set the delay and the number of bytes to write */ /* The length include @@ -241,9 +247,6 @@ static void submit_channel_request( } } - REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); - REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0, - 10, aux110->timeout_period/10); REG_UPDATE(AUX_SW_CONTROL, AUX_SW_GO, 1); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h index f7caab85dc80..2c6f50b4245a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h @@ -69,11 +69,11 @@ enum { /* This is the timeout as defined in DP 1.2a, * at most within ~240usec. That means, * increasing this timeout will not affect normal operation, * and we'll timeout after - * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 1600usec. + * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 2400usec. * This timeout is especially important for - * resume from S3 and CTS. + * converters, resume from S3, and CTS. */ - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 4 + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 6 }; struct aux_engine_dce110 { struct aux_engine base; -- cgit v1.2.3 From 641a9b94456d262e039402c0583d19f3ab731b8e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Mar 2019 14:24:48 +0100 Subject: clocksource/drivers/npcm: select TIMER_OF [ Upstream commit 99834eead2a04e93a120abb112542b87c42ff5e1 ] When this is disabled, we get a link failure: drivers/clocksource/timer-npcm7xx.o: In function `npcm7xx_timer_init': timer-npcm7xx.c:(.init.text+0xf): undefined reference to `timer_of_init' Fixes: 1c00289ecd12 ("clocksource/drivers/npcm: Add NPCM7xx timer driver") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 8dfd3bc448d0..9df90daa9c03 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -144,6 +144,7 @@ config VT8500_TIMER config NPCM7XX_TIMER bool "NPCM7xx timer driver" if COMPILE_TEST depends on HAS_IOMEM + select TIMER_OF select CLKSRC_MMIO help Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture, -- cgit v1.2.3 From b1e68de7659a8be67b56b264b7d833704146c1c4 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 12 Mar 2019 11:32:56 +0100 Subject: clocksource/drivers/oxnas: Fix OX820 compatible [ Upstream commit fbc87aa0f7c429999dc31f1bac3b2615008cac32 ] The OX820 compatible is wrong is the driver, fix it. Fixes: 2ea3401e2a84 ("clocksource/drivers/oxnas: Add OX820 compatible") Reported-by: Daniel Golle Signed-off-by: Neil Armstrong Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/timer-oxnas-rps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c index eed6feff8b5f..30c6f4ce672b 100644 --- a/drivers/clocksource/timer-oxnas-rps.c +++ b/drivers/clocksource/timer-oxnas-rps.c @@ -296,4 +296,4 @@ err_alloc: TIMER_OF_DECLARE(ox810se_rps, "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init); TIMER_OF_DECLARE(ox820_rps, - "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init); + "oxsemi,ox820-rps-timer", oxnas_rps_timer_init); -- cgit v1.2.3 From bf4b7bc690e32e900a475fc195c242686e13ce28 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 9 Apr 2019 14:23:10 -0700 Subject: selftests: fib_tests: Fix 'Command line is not complete' errors [ Upstream commit a5f622984a623df9a84cf43f6b098d8dd76fbe05 ] A couple of tests are verifying a route has been removed. The helper expects the prefix as the first part of the expected output. When checking that a route has been deleted the prefix is empty leading to an invalid ip command: $ ip ro ls match Command line is not complete. Try option "help" Fix by moving the comparison of expected output and output to a new function that is used by both check_route and check_route6. Use the new helper for the 2 checks on route removal. Also, remove the reset of 'set -x' in route_setup which overrides the user managed setting. Fixes: d69faad76584c ("selftests: fib_tests: Add prefix route tests with metric") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/fib_tests.sh | 94 ++++++++++++++------------------ 1 file changed, 40 insertions(+), 54 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 1080ff55a788..0d2a5f4f1e63 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -605,6 +605,39 @@ run_cmd() return $rc } +check_expected() +{ + local out="$1" + local expected="$2" + local rc=0 + + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route6() @@ -652,31 +685,7 @@ check_route6() pfx=$1 out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy '\'; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" } route_cleanup() @@ -725,7 +734,7 @@ route_setup() ip -netns ns2 addr add 172.16.103.2/24 dev veth4 ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 - set +ex + set +e } # assumption is that basic add of a single path route works @@ -960,7 +969,8 @@ ipv6_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route6 "" + out=$($IP -6 ro ls match 2001:db8:104::/64) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" @@ -1091,38 +1101,13 @@ check_route() local pfx local expected="$1" local out - local rc=0 set -- $expected pfx=$1 [ "${pfx}" = "unreachable" ] && pfx=$2 out=$($IP ro ls match ${pfx}) - [ "${out}" = "${expected}" ] && return 0 - - if [ -z "${out}" ]; then - if [ "$VERBOSE" = "1" ]; then - printf "\nNo route entry found\n" - printf "Expected:\n" - printf " ${expected}\n" - fi - return 1 - fi - - # tricky way to convert output to 1-line without ip's - # messy '\'; this drops all extra white space - out=$(echo ${out}) - if [ "${out}" != "${expected}" ]; then - rc=1 - if [ "${VERBOSE}" = "1" ]; then - printf " Unexpected route entry. Have:\n" - printf " ${out}\n" - printf " Expected:\n" - printf " ${expected}\n\n" - fi - fi - - return $rc + check_expected "${out}" "${expected}" } # assumption is that basic add of a single path route works @@ -1387,7 +1372,8 @@ ipv4_addr_metric_test() run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then - check_route "" + out=$($IP ro ls match 172.16.104.0/24) + check_expected "${out}" "" rc=$? fi log_test $rc 0 "Prefix route removed on link down" -- cgit v1.2.3 From 2d56b3f53ac8d9310a0ac20af663e1a44050f868 Mon Sep 17 00:00:00 2001 From: wentalou Date: Fri, 12 Apr 2019 15:01:14 +0800 Subject: drm/amdgpu: shadow in shadow_list without tbo.mem.start cause page fault in sriov TDR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b575f10dbd6f84c2c8744ff1f486bfae1e4f6f38 ] shadow was added into shadow_list by amdgpu_bo_create_shadow. meanwhile, shadow->tbo.mem was not fully configured. tbo.mem would be fully configured by amdgpu_vm_sdma_map_table until calling amdgpu_vm_clear_bo. If sriov TDR occurred between amdgpu_bo_create_shadow and amdgpu_vm_sdma_map_table, amdgpu_device_recover_vram would deal with shadow without tbo.mem.start. Signed-off-by: Wentao Lou Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d55dd570a702..27baac26d8e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3150,6 +3150,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) /* No need to recover an evicted BO */ if (shadow->tbo.mem.mem_type != TTM_PL_TT || + shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) continue; -- cgit v1.2.3 From 4230787c06558620288077c6d89dba88b2283ad3 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 12 Apr 2019 19:52:36 +0900 Subject: mISDN: Check address length before reading address family [ Upstream commit 238ffdc49ef98b15819cfd5e3fb23194e3ea3d39 ] KMSAN will complain if valid address length passed to bind() is shorter than sizeof("struct sockaddr_mISDN"->family) bytes. Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/mISDN/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 15d3ca37669a..04da3a17cd95 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -710,10 +710,10 @@ base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct sock *sk = sock->sk; int err = 0; - if (!maddr || maddr->family != AF_ISDN) + if (addr_len < sizeof(struct sockaddr_mISDN)) return -EINVAL; - if (addr_len < sizeof(struct sockaddr_mISDN)) + if (!maddr || maddr->family != AF_ISDN) return -EINVAL; lock_sock(sk); -- cgit v1.2.3 From 40990109952a3e7704f11e43abea639c0ebd5bad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Apr 2019 14:45:12 +0100 Subject: vxge: fix return of a free'd memblock on a failed dma mapping [ Upstream commit 0a2c34f18c94b596562bf3d019fceab998b8b584 ] Currently if a pci dma mapping failure is detected a free'd memblock address is returned rather than a NULL (that indicates an error). Fix this by ensuring NULL is returned on this error case. Addresses-Coverity: ("Use after free") Fixes: 528f727279ae ("vxge: code cleanup and reorganization") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/neterion/vxge/vxge-config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index 7cde387e5ec6..51cd57ab3d95 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -2366,6 +2366,7 @@ static void *__vxge_hw_blockpool_malloc(struct __vxge_hw_device *devh, u32 size, dma_object->addr))) { vxge_os_dma_free(devh->pdev, memblock, &dma_object->acc_handle); + memblock = NULL; goto exit; } -- cgit v1.2.3 From f4faab6c903429c14b1a3590993483626662cb86 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Apr 2019 15:13:27 +0100 Subject: qede: fix write to free'd pointer error and double free of ptp [ Upstream commit 1dc2b3d65523780ed1972d446c76e62e13f3e8f5 ] The err2 error return path calls qede_ptp_disable that cleans up on an error and frees ptp. After this, the free'd ptp is dereferenced when ptp->clock is set to NULL and the code falls-through to error path err1 that frees ptp again. Fix this by calling qede_ptp_disable and exiting via an error return path that does not set ptp->clock or kfree ptp. Addresses-Coverity: ("Write to pointer after free") Fixes: 035744975aec ("qede: Add support for PTP resource locking.") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 5f3f42a25361..bddb2b5982dc 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -490,18 +490,17 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc) ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev); if (IS_ERR(ptp->clock)) { - rc = -EINVAL; DP_ERR(edev, "PTP clock registration failed\n"); + qede_ptp_disable(edev); + rc = -EINVAL; goto err2; } return 0; -err2: - qede_ptp_disable(edev); - ptp->clock = NULL; err1: kfree(ptp); +err2: edev->ptp = NULL; return rc; -- cgit v1.2.3 From 35d71b00267edda6eb3f8c989a600c803fb60e7b Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Sat, 13 Apr 2019 08:37:37 +0100 Subject: afs: Unlock pages for __pagevec_release() [ Upstream commit 21bd68f196ca91fc0f3d9bd1b32f6e530e8c1c88 ] __pagevec_release() complains loudly if any page in the vector is still locked. The pages need to be locked for generic_error_remove_page(), but that function doesn't actually unlock them. Unlock the pages afterwards. Signed-off-by: Marc Dionne Signed-off-by: David Howells Tested-by: Jonathan Billings Signed-off-by: Sasha Levin --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index 72efcfcf9f95..0122d7445fba 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -264,6 +264,7 @@ static void afs_kill_pages(struct address_space *mapping, first = page->index + 1; lock_page(page); generic_error_remove_page(mapping, page); + unlock_page(page); } __pagevec_release(&pv); -- cgit v1.2.3 From 775e0e6132504bf36cbe1848626b191a234af840 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 13 Apr 2019 08:37:37 +0100 Subject: afs: Fix in-progess ops to ignore server-level callback invalidation [ Upstream commit eeba1e9cf31d064284dd1fa7bd6cfe01395bd03d ] The in-kernel afs filesystem client counts the number of server-level callback invalidation events (CB.InitCallBackState* RPC operations) that it receives from the server. This is stored in cb_s_break in various structures, including afs_server and afs_vnode. If an inode is examined by afs_validate(), say, the afs_server copy is compared, along with other break counters, to those in afs_vnode, and if one or more of the counters do not match, it is considered that the server's callback promise is broken. At points where this happens, AFS_VNODE_CB_PROMISED is cleared to indicate that the status must be refetched from the server. afs_validate() issues an FS.FetchStatus operation to get updated metadata - and based on the updated data_version may invalidate the pagecache too. However, the break counters are also used to determine whether to note a new callback in the vnode (which would set the AFS_VNODE_CB_PROMISED flag) and whether to cache the permit data included in the YFSFetchStatus record by the server. The problem comes when the server sends us a CB.InitCallBackState op. The first such instance doesn't cause cb_s_break to be incremented, but rather causes AFS_SERVER_FL_NEW to be cleared - but thereafter, say some hours after last use and all the volumes have been automatically unmounted and the server has forgotten about the client[*], this *will* likely cause an increment. [*] There are other circumstances too, such as the server restarting or needing to make space in its callback table. Note that the server won't send us a CB.InitCallBackState op until we talk to it again. So what happens is: (1) A mount for a new volume is attempted, a inode is created for the root vnode and vnode->cb_s_break and AFS_VNODE_CB_PROMISED aren't set immediately, as we don't have a nominated server to talk to yet - and we may iterate through a few to find one. (2) Before the operation happens, afs_fetch_status(), say, notes in the cursor (fc.cb_break) the break counter sum from the vnode, volume and server counters, but the server->cb_s_break is currently 0. (3) We send FS.FetchStatus to the server. The server sends us back CB.InitCallBackState. We increment server->cb_s_break. (4) Our FS.FetchStatus completes. The reply includes a callback record. (5) xdr_decode_AFSCallBack()/xdr_decode_YFSCallBack() check to see whether the callback promise was broken by checking the break counter sum from step (2) against the current sum. This fails because of step (3), so we don't set the callback record and, importantly, don't set AFS_VNODE_CB_PROMISED on the vnode. This does not preclude the syscall from progressing, and we don't loop here rechecking the status, but rather assume it's good enough for one round only and will need to be rechecked next time. (6) afs_validate() it triggered on the vnode, probably called from d_revalidate() checking the parent directory. (7) afs_validate() notes that AFS_VNODE_CB_PROMISED isn't set, so doesn't update vnode->cb_s_break and assumes the vnode to be invalid. (8) afs_validate() needs to calls afs_fetch_status(). Go back to step (2) and repeat, every time the vnode is validated. This primarily affects volume root dir vnodes. Everything subsequent to those inherit an already incremented cb_s_break upon mounting. The issue is that we assume that the callback record and the cached permit information in a reply from the server can't be trusted after getting a server break - but this is wrong since the server makes sure things are done in the right order, holding up our ops if necessary[*]. [*] There is an extremely unlikely scenario where a reply from before the CB.InitCallBackState could get its delivery deferred till after - at which point we think we have a promise when we don't. This, however, requires unlucky mass packet loss to one call. AFS_SERVER_FL_NEW tries to paper over the cracks for the initial mount from a server we've never contacted before, but this should be unnecessary. It's also further insulated from the problem on an initial mount by querying the server first with FS.GetCapabilities, which triggers the CB.InitCallBackState. Fix this by (1) Remove AFS_SERVER_FL_NEW. (2) In afs_calc_vnode_cb_break(), don't include cb_s_break in the calculation. (3) In afs_cb_is_broken(), don't include cb_s_break in the check. Signed-off-by: David Howells Signed-off-by: Sasha Levin --- fs/afs/callback.c | 3 +-- fs/afs/internal.h | 4 +--- fs/afs/server.c | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 1c7955f5cdaf..128f2dbe256a 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -203,8 +203,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) */ void afs_init_callback_state(struct afs_server *server) { - if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags)) - server->cb_s_break++; + server->cb_s_break++; } /* diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8871b9e8645f..465526f495b0 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -475,7 +475,6 @@ struct afs_server { time64_t put_time; /* Time at which last put */ time64_t update_at; /* Time at which to next update the record */ unsigned long flags; -#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */ #define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */ #define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */ #define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */ @@ -828,7 +827,7 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) { - return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break; + return vnode->cb_break + vnode->cb_v_break; } static inline bool afs_cb_is_broken(unsigned int cb_break, @@ -836,7 +835,6 @@ static inline bool afs_cb_is_broken(unsigned int cb_break, const struct afs_cb_interest *cbi) { return !cbi || cb_break != (vnode->cb_break + - cbi->server->cb_s_break + vnode->volume->cb_v_break); } diff --git a/fs/afs/server.c b/fs/afs/server.c index 642afa2e9783..65b33b6da48b 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -226,7 +226,6 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, RCU_INIT_POINTER(server->addresses, alist); server->addr_version = alist->version; server->uuid = *uuid; - server->flags = (1UL << AFS_SERVER_FL_NEW); server->update_at = ktime_get_real_seconds() + afs_server_update_delay; rwlock_init(&server->fs_lock); INIT_HLIST_HEAD(&server->cb_volumes); -- cgit v1.2.3 From 2de1573a5ed9640c7ac95e5f3d0c4dc7c3e3e31d Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:05 +0300 Subject: qed: Delete redundant doorbell recovery types [ Upstream commit 9ac6bb1414ac0d45fe9cefbd1f5b06f0e1a3c98a ] DB_REC_DRY_RUN (running doorbell recovery without sending doorbells) is never used. DB_REC_ONCE (send a single doorbell from the doorbell recovery) is not needed anymore because by running the periodic handler we make sure we check the overflow status later instead. This patch is needed because in the next patches, the only doorbell recovery type being used is DB_REC_REAL_DEAL, and the fixes are much cleaner without this enum. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed.h | 3 +- drivers/net/ethernet/qlogic/qed/qed_dev.c | 69 ++++++++++++------------------- drivers/net/ethernet/qlogic/qed/qed_int.c | 6 +-- drivers/net/ethernet/qlogic/qed/qed_int.h | 4 +- 4 files changed, 31 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 2d8a77cc156b..d5fece7eb169 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -918,8 +918,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc); /* doorbell recovery mechanism */ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn); -void qed_db_recovery_execute(struct qed_hwfn *p_hwfn, - enum qed_db_rec_exec db_exec); +void qed_db_recovery_execute(struct qed_hwfn *p_hwfn); bool qed_edpm_enabled(struct qed_hwfn *p_hwfn); /* Other Linux specific common definitions */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 2ecaaaa4469a..ff0bbf8d073d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -300,26 +300,19 @@ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn) /* Ring the doorbell of a single doorbell recovery entry */ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, - struct qed_db_recovery_entry *db_entry, - enum qed_db_rec_exec db_exec) -{ - if (db_exec != DB_REC_ONCE) { - /* Print according to width */ - if (db_entry->db_width == DB_REC_WIDTH_32B) { - DP_VERBOSE(p_hwfn, QED_MSG_SPQ, - "%s doorbell address %p data %x\n", - db_exec == DB_REC_DRY_RUN ? - "would have rung" : "ringing", - db_entry->db_addr, - *(u32 *)db_entry->db_data); - } else { - DP_VERBOSE(p_hwfn, QED_MSG_SPQ, - "%s doorbell address %p data %llx\n", - db_exec == DB_REC_DRY_RUN ? - "would have rung" : "ringing", - db_entry->db_addr, - *(u64 *)(db_entry->db_data)); - } + struct qed_db_recovery_entry *db_entry) +{ + /* Print according to width */ + if (db_entry->db_width == DB_REC_WIDTH_32B) { + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "ringing doorbell address %p data %x\n", + db_entry->db_addr, + *(u32 *)db_entry->db_data); + } else { + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "ringing doorbell address %p data %llx\n", + db_entry->db_addr, + *(u64 *)(db_entry->db_data)); } /* Sanity */ @@ -334,14 +327,12 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, wmb(); /* Ring the doorbell */ - if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) { - if (db_entry->db_width == DB_REC_WIDTH_32B) - DIRECT_REG_WR(db_entry->db_addr, - *(u32 *)(db_entry->db_data)); - else - DIRECT_REG_WR64(db_entry->db_addr, - *(u64 *)(db_entry->db_data)); - } + if (db_entry->db_width == DB_REC_WIDTH_32B) + DIRECT_REG_WR(db_entry->db_addr, + *(u32 *)(db_entry->db_data)); + else + DIRECT_REG_WR64(db_entry->db_addr, + *(u64 *)(db_entry->db_data)); /* Flush the write combined buffer. Next doorbell may come from a * different entity to the same address... @@ -350,29 +341,21 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, } /* Traverse the doorbell recovery entry list and ring all the doorbells */ -void qed_db_recovery_execute(struct qed_hwfn *p_hwfn, - enum qed_db_rec_exec db_exec) +void qed_db_recovery_execute(struct qed_hwfn *p_hwfn) { struct qed_db_recovery_entry *db_entry = NULL; - if (db_exec != DB_REC_ONCE) { - DP_NOTICE(p_hwfn, - "Executing doorbell recovery. Counter was %d\n", - p_hwfn->db_recovery_info.db_recovery_counter); + DP_NOTICE(p_hwfn, "Executing doorbell recovery. Counter was %d\n", + p_hwfn->db_recovery_info.db_recovery_counter); - /* Track amount of times recovery was executed */ - p_hwfn->db_recovery_info.db_recovery_counter++; - } + /* Track amount of times recovery was executed */ + p_hwfn->db_recovery_info.db_recovery_counter++; /* Protect the list */ spin_lock_bh(&p_hwfn->db_recovery_info.lock); list_for_each_entry(db_entry, - &p_hwfn->db_recovery_info.list, list_entry) { - qed_db_recovery_ring(p_hwfn, db_entry, db_exec); - if (db_exec == DB_REC_ONCE) - break; - } - + &p_hwfn->db_recovery_info.list, list_entry) + qed_db_recovery_ring(p_hwfn, db_entry); spin_unlock_bh(&p_hwfn->db_recovery_info.lock); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 92340919d852..b994f81eb51c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -409,10 +409,8 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow); - if (!overflow) { - qed_db_recovery_execute(p_hwfn, DB_REC_ONCE); + if (!overflow) return 0; - } if (qed_edpm_enabled(p_hwfn)) { rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); @@ -427,7 +425,7 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); /* Repeat all last doorbells (doorbell drop recovery) */ - qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL); + qed_db_recovery_execute(p_hwfn); return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h index d81a62ebd524..df26bf333893 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.h +++ b/drivers/net/ethernet/qlogic/qed/qed_int.h @@ -192,8 +192,8 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev); /** * @brief - Doorbell Recovery handler. - * Run DB_REAL_DEAL doorbell recovery in case of PF overflow - * (and flush DORQ if needed), otherwise run DB_REC_ONCE. + * Run doorbell recovery in case of PF overflow (and flush DORQ if + * needed). * * @param p_hwfn * @param p_ptt -- cgit v1.2.3 From deb2cc51dd5221abfe2f320625d3a388a6bf2a9f Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:06 +0300 Subject: qed: Fix the doorbell address sanity check [ Upstream commit b61b04ad81d5f975349d66abbecabf96ba211140 ] Fix the condition which verifies that doorbell address is inside the doorbell bar by checking that the end of the address is within range as well. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index ff0bbf8d073d..228891e459bc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -102,11 +102,15 @@ static void qed_db_recovery_dp_entry(struct qed_hwfn *p_hwfn, /* Doorbell address sanity (address within doorbell bar range) */ static bool qed_db_rec_sanity(struct qed_dev *cdev, - void __iomem *db_addr, void *db_data) + void __iomem *db_addr, + enum qed_db_rec_width db_width, + void *db_data) { + u32 width = (db_width == DB_REC_WIDTH_32B) ? 32 : 64; + /* Make sure doorbell address is within the doorbell bar */ if (db_addr < cdev->doorbells || - (u8 __iomem *)db_addr > + (u8 __iomem *)db_addr + width > (u8 __iomem *)cdev->doorbells + cdev->db_size) { WARN(true, "Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n", @@ -159,7 +163,7 @@ int qed_db_recovery_add(struct qed_dev *cdev, } /* Sanitize doorbell address */ - if (!qed_db_rec_sanity(cdev, db_addr, db_data)) + if (!qed_db_rec_sanity(cdev, db_addr, db_width, db_data)) return -EINVAL; /* Obtain hwfn from doorbell address */ @@ -205,10 +209,6 @@ int qed_db_recovery_del(struct qed_dev *cdev, return 0; } - /* Sanitize doorbell address */ - if (!qed_db_rec_sanity(cdev, db_addr, db_data)) - return -EINVAL; - /* Obtain hwfn from doorbell address */ p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr); @@ -317,7 +317,7 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, /* Sanity */ if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr, - db_entry->db_data)) + db_entry->db_width, db_entry->db_data)) return; /* Flush the write combined buffer. Since there are multiple doorbelling -- cgit v1.2.3 From 47ef1bab8d1dc1e0e9598cd0390587e84eb4be69 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:07 +0300 Subject: qed: Fix missing DORQ attentions [ Upstream commit d4476b8a6151b2dd86c09b5acec64f66430db55d ] When the DORQ (doorbell block) is overflowed, all PFs get attentions at the same time. If one PF finished handling the attention before another PF even started, the second PF might miss the DORQ's attention bit and not handle the attention at all. If the DORQ attention is missed and the issue is not resolved, another attention will not be sent, therefore each attention is treated as a potential DORQ attention. As a result, the attention callback is called more frequently so the debug print was moved to reduce its quantity. The number of periodic doorbell recovery handler schedules was reduced because it was the previous way to mitigating the missed attention issue. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed.h | 1 + drivers/net/ethernet/qlogic/qed/qed_int.c | 20 ++++++++++++++++++-- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index d5fece7eb169..07ae600d0f35 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -436,6 +436,7 @@ struct qed_db_recovery_info { /* Lock to protect the doorbell recovery mechanism list */ spinlock_t lock; + bool dorq_attn; u32 db_recovery_counter; }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index b994f81eb51c..00688f4c0464 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -436,17 +436,19 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; int rc; - int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); - DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts); + p_hwfn->db_recovery_info.dorq_attn = true; /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If * This PF also requires overflow recovery we will be interrupted again. * The masked almost full indication may also be set. Ignoring. */ + int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) return 0; + DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts); + /* check if db_drop or overflow happened */ if (int_sts & (DORQ_REG_INT_STS_DB_DROP | DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) { @@ -503,6 +505,17 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) return -EINVAL; } +static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn) +{ + if (p_hwfn->db_recovery_info.dorq_attn) + goto out; + + /* Call DORQ callback if the attention was missed */ + qed_dorq_attn_cb(p_hwfn); +out: + p_hwfn->db_recovery_info.dorq_attn = false; +} + /* Instead of major changes to the data-structure, we have a some 'special' * identifiers for sources that changed meaning between adapters. */ @@ -1076,6 +1089,9 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, } } + /* Handle missed DORQ attention */ + qed_dorq_attn_handler(p_hwfn); + /* Clear IGU indication for the deasserted bits */ DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_IGU_CMD + diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6adf5bda9811..26bfcbeebc4c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -966,7 +966,7 @@ static void qed_update_pf_params(struct qed_dev *cdev, } } -#define QED_PERIODIC_DB_REC_COUNT 100 +#define QED_PERIODIC_DB_REC_COUNT 10 #define QED_PERIODIC_DB_REC_INTERVAL_MS 100 #define QED_PERIODIC_DB_REC_INTERVAL \ msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS) -- cgit v1.2.3 From e3a41f93e3376dcf83c7192aa7aa55f8ca786668 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Sun, 14 Apr 2019 17:23:08 +0300 Subject: qed: Fix the DORQ's attentions handling [ Upstream commit 0d72c2ac89185f179da1e8a91c40c82f3fa38f0b ] Separate the overflow handling from the hardware interrupt status analysis. The interrupt status is a single register and is common for all PFs. The first PF reading the register is not necessarily the one who overflowed. All PFs must check their overflow status on every attention. In this change we clear the sticky indication in the attention handler to allow doorbells to be processed again as soon as possible, but running the doorbell recovery is scheduled for the periodic handler to reduce the time spent in the attention handler. Checking the need for DORQ flush was changed to "db_bar_no_edpm" because qed_edpm_enabled()'s result could change dynamically and might have prevented a needed flush. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed.h | 3 ++ drivers/net/ethernet/qlogic/qed/qed_int.c | 61 +++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 07ae600d0f35..f458c9776a89 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -431,6 +431,8 @@ struct qed_qm_info { u8 num_pf_rls; }; +#define QED_OVERFLOW_BIT 1 + struct qed_db_recovery_info { struct list_head list; @@ -438,6 +440,7 @@ struct qed_db_recovery_info { spinlock_t lock; bool dorq_attn; u32 db_recovery_counter; + unsigned long overflow; }; struct storm_stats { diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 00688f4c0464..a7e95f239317 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -376,6 +376,9 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn, u32 count = QED_DB_REC_COUNT; u32 usage = 1; + /* Flush any pending (e)dpms as they may never arrive */ + qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1); + /* wait for usage to zero or count to run out. This is necessary since * EDPM doorbell transactions can take multiple 64b cycles, and as such * can "split" over the pci. Possibly, the doorbell drop can happen with @@ -404,23 +407,24 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn, int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 overflow; + u32 attn_ovfl, cur_ovfl; int rc; - overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); - DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow); - if (!overflow) + attn_ovfl = test_and_clear_bit(QED_OVERFLOW_BIT, + &p_hwfn->db_recovery_info.overflow); + cur_ovfl = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); + if (!cur_ovfl && !attn_ovfl) return 0; - if (qed_edpm_enabled(p_hwfn)) { + DP_NOTICE(p_hwfn, "PF Overflow sticky: attn %u current %u\n", + attn_ovfl, cur_ovfl); + + if (cur_ovfl && !p_hwfn->db_bar_no_edpm) { rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); if (rc) return rc; } - /* Flush any pending (e)dpm as they may never arrive */ - qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1); - /* Release overflow sticky indication (stop silently dropping everything) */ qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); @@ -430,13 +434,35 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return 0; } -static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) +static void qed_dorq_attn_overflow(struct qed_hwfn *p_hwfn) { - u32 int_sts, first_drop_reason, details, address, all_drops_reason; struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; + u32 overflow; int rc; - p_hwfn->db_recovery_info.dorq_attn = true; + overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY); + if (!overflow) + goto out; + + /* Run PF doorbell recovery in next periodic handler */ + set_bit(QED_OVERFLOW_BIT, &p_hwfn->db_recovery_info.overflow); + + if (!p_hwfn->db_bar_no_edpm) { + rc = qed_db_rec_flush_queue(p_hwfn, p_ptt); + if (rc) + goto out; + } + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0); +out: + /* Schedule the handler even if overflow was not detected */ + qed_periodic_db_rec_start(p_hwfn); +} + +static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) +{ + u32 int_sts, first_drop_reason, details, address, all_drops_reason; + struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If @@ -475,11 +501,6 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4, first_drop_reason, all_drops_reason); - rc = qed_db_rec_handler(p_hwfn, p_ptt); - qed_periodic_db_rec_start(p_hwfn); - if (rc) - return rc; - /* Clear the doorbell drop details and prepare for next drop */ qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0); @@ -505,6 +526,14 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) return -EINVAL; } +static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) +{ + p_hwfn->db_recovery_info.dorq_attn = true; + qed_dorq_attn_overflow(p_hwfn); + + return qed_dorq_attn_int_sts(p_hwfn); +} + static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn) { if (p_hwfn->db_recovery_info.dorq_attn) -- cgit v1.2.3 From 7a96a56eea045e1178ce1c5894147f448cacd948 Mon Sep 17 00:00:00 2001 From: David Francis Date: Fri, 29 Mar 2019 13:23:15 -0400 Subject: drm/amd/display: If one stream full updates, full update all planes [ Upstream commit c238bfe0be9ef7420f7669a69e27c8c8f4d8a568 ] [Why] On some compositors, with two monitors attached, VT terminal switch can cause a graphical issue by the following means: There are two streams, one for each monitor. Each stream has one plane current state: M1:S1->P1 M2:S2->P2 The user calls for a terminal switch and a commit is made to change both planes to linear swizzle mode. In atomic check, a new dc_state is constructed with new planes on each stream new state: M1:S1->P3 M2:S2->P4 In commit tail, each stream is committed, one at a time. The first stream (S1) updates properly, triggerring a full update and replacing the state current state: M1:S1->P3 M2:S2->P4 The update for S2 comes in, but dc detects that there is no difference between the stream and plane in the new and current states, and so triggers a fast update. The fast update does not program swizzle, so the second monitor is corrupted [How] Add a flag to dc_plane_state that forces full updates When a stream undergoes a full update, set this flag on all changed planes, then clear it on the current stream Subsequent streams will get full updates as a result Signed-off-by: David Francis Signed-off-by: Nicholas Kazlauskas Reviewed-by: Roman Li Acked-by: Bhawanpreet Lakha Acked-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 1f92e7e8e3d3..5af2ea1f201d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1308,6 +1308,11 @@ static enum surface_update_type det_surface_update(const struct dc *dc, return UPDATE_TYPE_FULL; } + if (u->surface->force_full_update) { + update_flags->bits.full_update = 1; + return UPDATE_TYPE_FULL; + } + type = get_plane_info_update_type(u); elevate_update_type(&overall_type, type); @@ -1637,6 +1642,14 @@ void dc_commit_updates_for_stream(struct dc *dc, } dc_resource_state_copy_construct(state, context); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) + new_pipe->plane_state->force_full_update = true; + } } @@ -1680,6 +1693,12 @@ void dc_commit_updates_for_stream(struct dc *dc, dc->current_state = context; dc_release_state(old); + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->plane_state && pipe_ctx->stream == stream) + pipe_ctx->plane_state->force_full_update = false; + } } /*let's use current_state to update watermark etc*/ if (update_type >= UPDATE_TYPE_FULL) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4b5bbb13ce7f..7d5656d7e460 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -496,6 +496,9 @@ struct dc_plane_state { struct dc_plane_status status; struct dc_context *ctx; + /* HACK: Workaround for forcing full reprogramming under some conditions */ + bool force_full_update; + /* private to dc_surface.c */ enum dc_irq_source irq_source; struct kref refcount; -- cgit v1.2.3 From 5a8306e3b433ba0c9a06d59d407b9e7d0652fc03 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 12 Apr 2019 11:04:50 +0200 Subject: s390/pkey: add one more argument space for debug feature entry [ Upstream commit 6b1f16ba730d4c0cda1247568c3a1bf4fa3a2f2f ] The debug feature entries have been used with up to 5 arguents (including the pointer to the format string) but there was only space reserved for 4 arguemnts. So now the registration does reserve space for 5 times a long value. This fixes a sometime appearing weired value as the last value of an debug feature entry like this: ... pkey_sec2protkey zcrypt_send_cprb (cardnr=10 domain=12) failed with errno -2143346254 Signed-off-by: Harald Freudenberger Reported-by: Christian Rund Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- drivers/s390/crypto/pkey_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 2f92bbed4bf6..097e890e0d6d 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -51,7 +51,8 @@ static debug_info_t *debug_info; static void __init pkey_debug_init(void) { - debug_info = debug_register("pkey", 1, 1, 4 * sizeof(long)); + /* 5 arguments per dbf entry (including the format string ptr) */ + debug_info = debug_register("pkey", 1, 1, 5 * sizeof(long)); debug_register_view(debug_info, &debug_sprintf_view); debug_set_level(debug_info, 3); } -- cgit v1.2.3 From e011d319f7364eab0518b4896eb5a34a820c297a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 15 Apr 2019 09:49:56 -0700 Subject: x86/build/lto: Fix truncated .bss with -fdata-sections [ Upstream commit 6a03469a1edc94da52b65478f1e00837add869a3 ] With CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y, we compile the kernel with -fdata-sections, which also splits the .bss section. The new section, with a new .bss.* name, which pattern gets missed by the main x86 linker script which only expects the '.bss' name. This results in the discarding of the second part and a too small, truncated .bss section and an unhappy, non-working kernel. Use the common BSS_MAIN macro in the linker script to properly capture and merge all the generated BSS sections. Signed-off-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Cc: Borislav Petkov Cc: Kees Cook Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190415164956.124067-1-samitolvanen@google.com [ Extended the changelog. ] Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ee3b5c7d662e..c45214c44e61 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -362,7 +362,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) - *(.bss) + *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); __bss_stop = .; -- cgit v1.2.3 From 47155c33761e735e66361e514d0a525ff216901f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Apr 2019 10:46:07 +0200 Subject: x86/mm: Prevent bogus warnings with "noexec=off" [ Upstream commit 510bb96fe5b3480b4b22d815786377e54cb701e7 ] Xose Vazquez Perez reported boot warnings when NX is disabled on the kernel command line. __early_set_fixmap() triggers this warning: attempted to set unsupported pgprot: 8000000000000163 bits: 8000000000000000 supported: 7fffffffffffffff WARNING: CPU: 0 PID: 0 at arch/x86/include/asm/pgtable.h:537 __early_set_fixmap+0xa2/0xff because it uses __default_kernel_pte_mask to mask out unsupported bits. Use __supported_pte_mask instead. Disabling NX on the command line also triggers the NX warning in the page table mapping check: WARNING: CPU: 1 PID: 1 at arch/x86/mm/dump_pagetables.c:262 note_page+0x2ae/0x650 .... Make the warning depend on NX set in __supported_pte_mask. Reported-by: Xose Vazquez Perez Tested-by: Xose Vazquez Perez Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1904151037530.1729@nanos.tec.linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/mm/dump_pagetables.c | 3 ++- arch/x86/mm/ioremap.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e3cdc85ce5b6..84304626b1cb 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st) #endif /* Account the WX pages */ st->wx_pages += npages; - WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n", + WARN_ONCE(__supported_pte_mask & _PAGE_NX, + "x86/mm: Found insecure W+X mapping at address %pS\n", (void *)st->start_address); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 5378d10f1d31..3b76fe954978 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx, pte = early_ioremap_pte(addr); /* Sanitize 'prot' against any unsupported bits: */ - pgprot_val(flags) &= __default_kernel_pte_mask; + pgprot_val(flags) &= __supported_pte_mask; if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); -- cgit v1.2.3 From ad0b4845927e1e57994af11a52412ae22ba4b8a4 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Fri, 12 Apr 2019 16:01:53 +0800 Subject: x86/reboot, efi: Use EFI reboot for Acer TravelMate X514-51T [ Upstream commit 0082517fa4bce073e7cf542633439f26538a14cc ] Upon reboot, the Acer TravelMate X514-51T laptop appears to complete the shutdown process, but then it hangs in BIOS POST with a black screen. The problem is intermittent - at some points it has appeared related to Secure Boot settings or different kernel builds, but ultimately we have not been able to identify the exact conditions that trigger the issue to come and go. Besides, the EFI mode cannot be disabled in the BIOS of this model. However, after extensive testing, we observe that using the EFI reboot method reliably avoids the issue in all cases. So add a boot time quirk to use EFI reboot on such systems. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=203119 Signed-off-by: Jian-Hong Pan Signed-off-by: Daniel Drake Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: linux@endlessm.com Link: http://lkml.kernel.org/r/20190412080152.3718-1-jian-hong@endlessm.com [ Fix !CONFIG_EFI build failure, clarify the code and the changelog a bit. ] Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/kernel/reboot.c | 21 +++++++++++++++++++++ include/linux/efi.h | 7 ++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 725624b6c0c0..8fd3cedd9acc 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; } +/* + * Some machines don't handle the default ACPI reboot method and + * require the EFI reboot method: + */ +static int __init set_efi_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) { + reboot_type = BOOT_EFI; + pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident); + } + return 0; +} + void __noreturn machine_real_restart(unsigned int type) { local_irq_disable(); @@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), }, }, + { /* Handle reboot issue on Acer TravelMate X514-51T */ + .callback = set_efi_reboot, + .ident = "Acer TravelMate X514-51T", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"), + }, + }, /* Apple */ { /* Handle problems with rebooting on Apple MacBook5 */ diff --git a/include/linux/efi.h b/include/linux/efi.h index a86485ac7c87..de05a4302529 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1598,7 +1598,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size); -bool efi_runtime_disabled(void); +#ifdef CONFIG_EFI +extern bool efi_runtime_disabled(void); +#else +static inline bool efi_runtime_disabled(void) { return true; } +#endif + extern void efi_call_virt_check_flags(unsigned long flags, const char *call); enum efi_secureboot_mode { -- cgit v1.2.3 From ac0cd21ff7f3fedd07c74314d3b638f041e68021 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 15 Apr 2019 15:57:19 +0200 Subject: KVM: nVMX: always use early vmcs check when EPT is disabled [ Upstream commit 2b27924bb1d48e3775f432b70bdad5e6dd4e7798 ] The remaining failures of vmx.flat when EPT is disabled are caused by incorrectly reflecting VMfails to the L1 hypervisor. What happens is that nested_vmx_restore_host_state corrupts the guest CR3, reloading it with the host's shadow CR3 instead, because it blindly loads GUEST_CR3 from the vmcs01. For simplicity let's just always use hardware VMCS checks when EPT is disabled. This way, nested_vmx_restore_host_state is not reached at all (or at least shouldn't be reached). Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kvm/vmx/nested.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index f0b0c90dd398..d213ec5c3766 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -146,6 +146,7 @@ #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 +#define VMX_ABORT_VMCS_CORRUPTED 3 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8f8c42b04875..2a16bd887729 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3790,8 +3790,18 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); nested_ept_uninit_mmu_context(vcpu); - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + + /* + * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3 + * points to shadow pages! Fortunately we only get here after a WARN_ON + * if EPT is disabled, so a VMabort is perfectly fine. + */ + if (enable_ept) { + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + } else { + nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED); + } /* * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs @@ -5739,6 +5749,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) { int i; + /* + * Without EPT it is not possible to restore L1's CR3 and PDPTR on + * VMfail, because they are not available in vmcs01. Just always + * use hardware checks. + */ + if (!enable_ept) + nested_early_check = 1; + if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) { -- cgit v1.2.3 From 7cee966029037a183d98cb88251ceb92a233fe63 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Apr 2019 11:16:47 +0200 Subject: KVM: fix spectrev1 gadgets [ Upstream commit 1d487e9bf8ba66a7174c56a0029c54b1eca8f99c ] These were found with smatch, and then generalized when applicable. Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/lapic.c | 4 +++- include/linux/kvm_host.h | 10 ++++++---- virt/kvm/irqchip.c | 5 +++-- virt/kvm/kvm_main.c | 6 ++++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3339697de6e5..235687f3388f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -137,6 +137,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, if (offset <= max_apic_id) { u8 cluster_size = min(max_apic_id - offset + 1, 16U); + offset = array_index_nospec(offset, map->max_apic_id + 1); *cluster = &map->phys_map[offset]; *mask = dest_id & (0xffff >> (16 - cluster_size)); } else { @@ -899,7 +900,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, if (irq->dest_id > map->max_apic_id) { *bitmap = 0; } else { - *dst = &map->phys_map[irq->dest_id]; + u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); + *dst = &map->phys_map[dest_id]; *bitmap = 1; } return true; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cf761ff58224..e41503b2c5a1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -492,10 +493,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { - /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case - * the caller has read kvm->online_vcpus before (as is the case - * for kvm_for_each_vcpu, for example). - */ + int num_vcpus = atomic_read(&kvm->online_vcpus); + i = array_index_nospec(i, num_vcpus); + + /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); return kvm->vcpus[i]; } @@ -579,6 +580,7 @@ void kvm_put_kvm(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { + as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b1286c4e0712..0bd0683640bd 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm, { struct kvm_kernel_irq_routing_entry *ei; int r; + u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES); /* * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and non-irqchip routing. */ - hlist_for_each_entry(ei, &rt->map[ue->gsi], link) + hlist_for_each_entry(ei, &rt->map[gsi], link) if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return -EINVAL; - e->gsi = ue->gsi; + e->gsi = gsi; e->type = ue->type; r = kvm_set_routing_entry(kvm, e, ue); if (r) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b4f2d892a1d3..ff68b07e94e9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2974,12 +2974,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_device_ops *ops = NULL; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int type; int ret; if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) return -ENODEV; - ops = kvm_device_ops_table[cd->type]; + type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); + ops = kvm_device_ops_table[type]; if (ops == NULL) return -ENODEV; @@ -2994,7 +2996,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, dev->kvm = kvm; mutex_lock(&kvm->lock); - ret = ops->create(dev, cd->type); + ret = ops->create(dev, type); if (ret < 0) { mutex_unlock(&kvm->lock); kfree(dev); -- cgit v1.2.3 From b053700b6ce94ca0503594564b39c40e12cc187a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 27 Mar 2019 15:12:20 +0100 Subject: KVM: x86: avoid misreporting level-triggered irqs as edge-triggered in tracing [ Upstream commit 7a223e06b1a411cef6c4cd7a9b9a33c8d225b10e ] In __apic_accept_irq() interface trig_mode is int and actually on some code paths it is set above u8: kvm_apic_set_irq() extracts it from 'struct kvm_lapic_irq' where trig_mode is u16. This is done on purpose as e.g. kvm_set_msi_irq() sets it to (1 << 15) & e->msi.data kvm_apic_local_deliver sets it to reg & (1 << 15). Fix the immediate issue by making 'tm' into u16. We may also want to adjust __apic_accept_irq() interface and use proper sizes for vector, level, trig_mode but this is not urgent. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6432d08c7de7..4d47a2631d1f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi, ); TRACE_EVENT(kvm_apic_accept_irq, - TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec), + TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) - __field( __u8, tm ) + __field( __u16, tm ) __field( __u8, vec ) ), -- cgit v1.2.3 From 9b2395e2bfec918b2f3c1226957976a4a514fe10 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Tue, 9 Apr 2019 11:15:29 +0200 Subject: tools lib traceevent: Fix missing equality check for strcmp [ Upstream commit f32c2877bcb068a718bb70094cd59ccc29d4d082 ] There was a missing comparison with 0 when checking if type is "s64" or "u64". Therefore, the body of the if-statement was entered if "type" was "u64" or not "s64", which made the first strcmp() redundant since if type is "u64", it's not "s64". If type is "s64", the body of the if-statement is not entered but since the remainder of the function consists of if-statements which will not be entered if type is "s64", we will just return "val", which is correct, albeit at the cost of a few more calls to strcmp(), i.e., it will behave just as if the if-statement was entered. If type is neither "s64" or "u64", the body of the if-statement will be entered incorrectly and "val" returned. This means that any type that is checked after "s64" and "u64" is handled the same way as "s64" and "u64", i.e., the limiting of "val" to fit in for example "s8" is never reached. This was introduced in the kernel tree when the sources were copied from trace-cmd in commit f7d82350e597 ("tools/events: Add files to create libtraceevent.a"), and in the trace-cmd repo in 1cdbae6035cei ("Implement typecasting in parser") when the function was introduced, i.e., it has always behaved the wrong way. Detected by cppcheck. Signed-off-by: Rikard Falkeborn Reviewed-by: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov Fixes: f7d82350e597 ("tools/events: Add files to create libtraceevent.a") Link: http://lkml.kernel.org/r/20190409091529.2686-1-rikard.falkeborn@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 87494c7c619d..981c6ce2da2c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2233,7 +2233,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val & 0xffffffff; if (strcmp(type, "u64") == 0 || - strcmp(type, "s64")) + strcmp(type, "s64") == 0) return val; if (strcmp(type, "s8") == 0) -- cgit v1.2.3 From d2ae8127d693545cdc7cb36516f00699bd66aa6b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Apr 2019 14:53:33 +0200 Subject: perf top: Always sample time to satisfy needs of use of ordered queuing [ Upstream commit 1e6db2ee86e6a4399fc0ae5689e55e0fd1c43caf ] Bastian reported broken 'perf top -p PID' command, it won't display any data. The problem is that for -p option we monitor single thread, so we don't enable time in samples, because it's not needed. However since commit 16c66bc167cc we use ordered queues to stash data plus later commits added logic for dropping samples in case there's big load and we don't keep up. All this needs timestamp for sample. Enabling it unconditionally for perf top. Reported-by: Bastian Beischer Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bastian beischer Fixes: 16c66bc167cc ("perf top: Add processing thread") Link: http://lkml.kernel.org/r/20190415125333.27160-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-top.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 616408251e25..63750a711123 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1393,6 +1393,7 @@ int cmd_top(int argc, const char **argv) * */ .overwrite = 0, .sample_time = true, + .sample_time_set = true, }, .max_stack = sysctl__max_stack(), .annotation_opts = annotation__default_options, -- cgit v1.2.3 From d7200d0648e5c2cc575f1516712fedaa6468cc2e Mon Sep 17 00:00:00 2001 From: Tony Camuso Date: Tue, 9 Apr 2019 15:20:03 -0400 Subject: ipmi: ipmi_si_hardcode.c: init si_type array to fix a crash [ Upstream commit a885bcfd152f97b25005298ab2d6b741aed9b49c ] The intended behavior of function ipmi_hardcode_init_one() is to default to kcs interface when no type argument is presented when initializing ipmi with hard coded addresses. However, the array of char pointers allocated on the stack by function ipmi_hardcode_init() was not inited to zeroes, so it contained stack debris. Consequently, passing the cruft stored in this array to function ipmi_hardcode_init_one() caused a crash when it was unable to detect that the char * being passed was nonsense and tried to access the address specified by the bogus pointer. The fix is simply to initialize the si_type array to zeroes, so if there were no type argument given to at the command line, function ipmi_hardcode_init_one() could properly default to the kcs interface. Signed-off-by: Tony Camuso Message-Id: <1554837603-40299-1-git-send-email-tcamuso@redhat.com> Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_si_hardcode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/ipmi/ipmi_si_hardcode.c b/drivers/char/ipmi/ipmi_si_hardcode.c index 1e5783961b0d..ab7180c46d8d 100644 --- a/drivers/char/ipmi/ipmi_si_hardcode.c +++ b/drivers/char/ipmi/ipmi_si_hardcode.c @@ -201,6 +201,8 @@ void __init ipmi_hardcode_init(void) char *str; char *si_type[SI_MAX_PARMS]; + memset(si_type, 0, sizeof(si_type)); + /* Parse out the si_type string into its components. */ str = si_type_str; if (*str != '\0') { -- cgit v1.2.3 From d39036685e2253143901355a6d942150337d1a89 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 16 Apr 2019 17:51:58 +0300 Subject: ocelot: Don't sleep in atomic context (irqs_disabled()) [ Upstream commit a8fd48b50deaa20808bbf0f6685f6f1acba6a64c ] Preemption disabled at: [] dev_set_rx_mode+0x1c/0x38 Call trace: [] dump_backtrace+0x0/0x3d0 [] show_stack+0x14/0x20 [] dump_stack+0xac/0xe4 [] ___might_sleep+0x164/0x238 [] __might_sleep+0x50/0x88 [] kmem_cache_alloc+0x17c/0x1d0 [] ocelot_set_rx_mode+0x108/0x188 [mscc_ocelot_common] [] __dev_set_rx_mode+0x58/0xa0 [] dev_set_rx_mode+0x24/0x38 Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 215a45374d7b..0ef95abde6bb 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -613,7 +613,7 @@ static int ocelot_mact_mc_add(struct ocelot_port *port, struct netdev_hw_addr *hw_addr) { struct ocelot *ocelot = port->ocelot; - struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL); + struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_ATOMIC); if (!ha) return -ENOMEM; -- cgit v1.2.3 From e9697ba264c288144aced5f9cc05d1d9f51d026d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 16 Apr 2019 18:01:24 +0200 Subject: perf tools: Fix map reference counting [ Upstream commit b9abbdfa88024d52c8084d8f46ea4f161606c692 ] By calling maps__insert() we assume to get 2 references on the map, which we relese within maps__remove call. However if there's already same map name, we currently don't bump the reference and can crash, like: Program received signal SIGABRT, Aborted. 0x00007ffff75e60f5 in raise () from /lib64/libc.so.6 (gdb) bt #0 0x00007ffff75e60f5 in raise () from /lib64/libc.so.6 #1 0x00007ffff75d0895 in abort () from /lib64/libc.so.6 #2 0x00007ffff75d0769 in __assert_fail_base.cold () from /lib64/libc.so.6 #3 0x00007ffff75de596 in __assert_fail () from /lib64/libc.so.6 #4 0x00000000004fc006 in refcount_sub_and_test (i=1, r=0x1224e88) at tools/include/linux/refcount.h:131 #5 refcount_dec_and_test (r=0x1224e88) at tools/include/linux/refcount.h:148 #6 map__put (map=0x1224df0) at util/map.c:299 #7 0x00000000004fdb95 in __maps__remove (map=0x1224df0, maps=0xb17d80) at util/map.c:953 #8 maps__remove (maps=0xb17d80, map=0x1224df0) at util/map.c:959 #9 0x00000000004f7d8a in map_groups__remove (map=, mg=) at util/map_groups.h:65 #10 machine__process_ksymbol_unregister (sample=, event=0x7ffff7279670, machine=) at util/machine.c:728 #11 machine__process_ksymbol (machine=, event=0x7ffff7279670, sample=) at util/machine.c:741 #12 0x00000000004fffbb in perf_session__deliver_event (session=0xb11390, event=0x7ffff7279670, tool=0x7fffffffc7b0, file_offset=13936) at util/session.c:1362 #13 0x00000000005039bb in do_flush (show_progress=false, oe=0xb17e80) at util/ordered-events.c:243 #14 __ordered_events__flush (oe=0xb17e80, how=OE_FLUSH__ROUND, timestamp=) at util/ordered-events.c:322 #15 0x00000000005005e4 in perf_session__process_user_event (session=session@entry=0xb11390, event=event@entry=0x7ffff72a4af8, ... Add the map to the list and getting the reference event if we find the map with same name. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section") Link: http://lkml.kernel.org/r/20190416160127.30203-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/map.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 2b37f56f0549..e33f20d16c8d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -904,10 +904,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map) rc = strcmp(m->dso->short_name, map->dso->short_name); if (rc < 0) p = &(*p)->rb_left; - else if (rc > 0) - p = &(*p)->rb_right; else - return; + p = &(*p)->rb_right; } rb_link_node(&map->rb_node_name, parent, p); rb_insert_color(&map->rb_node_name, &maps->names); -- cgit v1.2.3 From 4176e671a44e6bc7be6ac4c77abe6113437a79f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Apr 2019 18:13:58 +0200 Subject: scsi: aic7xxx: fix EISA support [ Upstream commit 144ec97493af34efdb77c5aba146e9c7de8d0a06 ] Instead of relying on the now removed NULL argument to pci_alloc_consistent, switch to the generic DMA API, and store the struct device so that we can pass it. Fixes: 4167b2ad5182 ("PCI: Remove NULL device handling from PCI DMA API") Reported-by: Matthew Whitehead Signed-off-by: Christoph Hellwig Tested-by: Matthew Whitehead Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aic7xxx/aic7770_osm.c | 1 + drivers/scsi/aic7xxx/aic7xxx.h | 1 + drivers/scsi/aic7xxx/aic7xxx_osm.c | 10 ++++------ drivers/scsi/aic7xxx/aic7xxx_osm_pci.c | 1 + 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic7770_osm.c b/drivers/scsi/aic7xxx/aic7770_osm.c index 3d401d02c019..bdd177e3d762 100644 --- a/drivers/scsi/aic7xxx/aic7770_osm.c +++ b/drivers/scsi/aic7xxx/aic7770_osm.c @@ -91,6 +91,7 @@ aic7770_probe(struct device *dev) ahc = ahc_alloc(&aic7xxx_driver_template, name); if (ahc == NULL) return (ENOMEM); + ahc->dev = dev; error = aic7770_config(ahc, aic7770_ident_table + edev->id.driver_data, eisaBase); if (error != 0) { diff --git a/drivers/scsi/aic7xxx/aic7xxx.h b/drivers/scsi/aic7xxx/aic7xxx.h index 5614921b4041..88b90f9806c9 100644 --- a/drivers/scsi/aic7xxx/aic7xxx.h +++ b/drivers/scsi/aic7xxx/aic7xxx.h @@ -943,6 +943,7 @@ struct ahc_softc { * Platform specific device information. */ ahc_dev_softc_t dev_softc; + struct device *dev; /* * Bus specific device information. diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index 3c9c17450bb3..d5c4a0d23706 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -860,8 +860,8 @@ int ahc_dmamem_alloc(struct ahc_softc *ahc, bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { - *vaddr = pci_alloc_consistent(ahc->dev_softc, - dmat->maxsize, mapp); + /* XXX: check if we really need the GFP_ATOMIC and unwind this mess! */ + *vaddr = dma_alloc_coherent(ahc->dev, dmat->maxsize, mapp, GFP_ATOMIC); if (*vaddr == NULL) return ENOMEM; return 0; @@ -871,8 +871,7 @@ void ahc_dmamem_free(struct ahc_softc *ahc, bus_dma_tag_t dmat, void* vaddr, bus_dmamap_t map) { - pci_free_consistent(ahc->dev_softc, dmat->maxsize, - vaddr, map); + dma_free_coherent(ahc->dev, dmat->maxsize, vaddr, map); } int @@ -1123,8 +1122,7 @@ ahc_linux_register_host(struct ahc_softc *ahc, struct scsi_host_template *templa host->transportt = ahc_linux_transport_template; - retval = scsi_add_host(host, - (ahc->dev_softc ? &ahc->dev_softc->dev : NULL)); + retval = scsi_add_host(host, ahc->dev); if (retval) { printk(KERN_WARNING "aic7xxx: scsi_add_host failed\n"); scsi_host_put(host); diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c index 0fc14dac7070..717d8d1082ce 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c @@ -250,6 +250,7 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } ahc->dev_softc = pci; + ahc->dev = &pci->dev; error = ahc_pci_config(ahc, entry); if (error != 0) { ahc_free(ahc); -- cgit v1.2.3 From 247e9fe2f4ad55d591264358368aa307bf81481b Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 18 Apr 2019 17:49:55 -0700 Subject: slab: store tagged freelist for off-slab slabmgmt [ Upstream commit 1a62b18d51e5c5ecc0345c85bb9fef870ab721ed ] Commit 51dedad06b5f ("kasan, slab: make freelist stored without tags") calls kasan_reset_tag() for off-slab slab management object leading to freelist being stored non-tagged. However, cache_grow_begin() calls alloc_slabmgmt() which calls kmem_cache_alloc_node() assigns a tag for the address and stores it in the shadow address. As the result, it causes endless errors below during boot due to drain_freelist() -> slab_destroy() -> kasan_slab_free() which compares already untagged freelist against the stored tag in the shadow address. Since off-slab slab management object freelist is such a special case, just store it tagged. Non-off-slab management object freelist is still stored untagged which has not been assigned a tag and should not cause any other troubles with this inconsistency. BUG: KASAN: double-free or invalid-free in slab_destroy+0x84/0x88 Pointer tag: [ff], memory tag: [99] CPU: 0 PID: 1376 Comm: kworker/0:4 Tainted: G W 5.1.0-rc3+ #8 Hardware name: HPE Apollo 70 /C01_APACHE_MB , BIOS L50_5.13_1.0.6 07/10/2018 Workqueue: cgroup_destroy css_killed_work_fn Call trace: print_address_description+0x74/0x2a4 kasan_report_invalid_free+0x80/0xc0 __kasan_slab_free+0x204/0x208 kasan_slab_free+0xc/0x18 kmem_cache_free+0xe4/0x254 slab_destroy+0x84/0x88 drain_freelist+0xd0/0x104 __kmem_cache_shrink+0x1ac/0x224 __kmemcg_cache_deactivate+0x1c/0x28 memcg_deactivate_kmem_caches+0xa0/0xe8 memcg_offline_kmem+0x8c/0x3d4 mem_cgroup_css_offline+0x24c/0x290 css_killed_work_fn+0x154/0x618 process_one_work+0x9cc/0x183c worker_thread+0x9b0/0xe38 kthread+0x374/0x390 ret_from_fork+0x10/0x18 Allocated by task 1625: __kasan_kmalloc+0x168/0x240 kasan_slab_alloc+0x18/0x20 kmem_cache_alloc_node+0x1f8/0x3a0 cache_grow_begin+0x4fc/0xa24 cache_alloc_refill+0x2f8/0x3e8 kmem_cache_alloc+0x1bc/0x3bc sock_alloc_inode+0x58/0x334 alloc_inode+0xb8/0x164 new_inode_pseudo+0x20/0xec sock_alloc+0x74/0x284 __sock_create+0xb0/0x58c sock_create+0x98/0xb8 __sys_socket+0x60/0x138 __arm64_sys_socket+0xa4/0x110 el0_svc_handler+0x2c0/0x47c el0_svc+0x8/0xc Freed by task 1625: __kasan_slab_free+0x114/0x208 kasan_slab_free+0xc/0x18 kfree+0x1a8/0x1e0 single_release+0x7c/0x9c close_pdeo+0x13c/0x43c proc_reg_release+0xec/0x108 __fput+0x2f8/0x784 ____fput+0x1c/0x28 task_work_run+0xc0/0x1b0 do_notify_resume+0xb44/0x1278 work_pending+0x8/0x10 The buggy address belongs to the object at ffff809681b89e00 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 0 bytes inside of 128-byte region [ffff809681b89e00, ffff809681b89e80) The buggy address belongs to the page: page:ffff7fe025a06e00 count:1 mapcount:0 mapping:01ff80082000fb00 index:0xffff809681b8fe04 flags: 0x17ffffffc000200(slab) raw: 017ffffffc000200 ffff7fe025a06d08 ffff7fe022ef7b88 01ff80082000fb00 raw: ffff809681b8fe04 ffff809681b80000 00000001000000e0 0000000000000000 page dumped because: kasan: bad access detected page allocated via order 0, migratetype Unmovable, gfp_mask 0x2420c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_COMP|__GFP_THISNODE) prep_new_page+0x4e0/0x5e0 get_page_from_freelist+0x4ce8/0x50d4 __alloc_pages_nodemask+0x738/0x38b8 cache_grow_begin+0xd8/0xa24 ____cache_alloc_node+0x14c/0x268 __kmalloc+0x1c8/0x3fc ftrace_free_mem+0x408/0x1284 ftrace_free_init_mem+0x20/0x28 kernel_init+0x24/0x548 ret_from_fork+0x10/0x18 Memory state around the buggy address: ffff809681b89c00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe ffff809681b89d00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe >ffff809681b89e00: 99 99 99 99 99 99 99 99 fe fe fe fe fe fe fe fe ^ ffff809681b89f00: 43 43 43 43 43 fe fe fe fe fe fe fe fe fe fe fe ffff809681b8a000: 6d fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe Link: http://lkml.kernel.org/r/20190403022858.97584-1-cai@lca.pw Fixes: 51dedad06b5f ("kasan, slab: make freelist stored without tags") Signed-off-by: Qian Cai Reviewed-by: Andrey Konovalov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/slab.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 188c4b65255d..f4bbc53008f3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2371,7 +2371,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); - freelist = kasan_reset_tag(freelist); if (!freelist) return NULL; } else { -- cgit v1.2.3 From 47d1b202e3254536af7bf53cddbdc57fad8e776e Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 18 Apr 2019 17:50:30 -0700 Subject: mm/hotplug: treat CMA pages as unmovable [ Upstream commit 1a9f219157b22d0ffb340a9c5f431afd02cd2cf3 ] has_unmovable_pages() is used by allocating CMA and gigantic pages as well as the memory hotplug. The later doesn't know how to offline CMA pool properly now, but if an unused (free) CMA page is encountered, then has_unmovable_pages() happily considers it as a free memory and propagates this up the call chain. Memory offlining code then frees the page without a proper CMA tear down which leads to an accounting issues. Moreover if the same memory range is onlined again then the memory never gets back to the CMA pool. State after memory offline: # grep cma /proc/vmstat nr_free_cma 205824 # cat /sys/kernel/debug/cma/cma-kvm_cma/count 209920 Also, kmemleak still think those memory address are reserved below but have already been used by the buddy allocator after onlining. This patch fixes the situation by treating CMA pageblocks as unmovable except when has_unmovable_pages() is called as part of CMA allocation. Offlined Pages 4096 kmemleak: Cannot insert 0xc000201f7d040008 into the object search tree (overlaps existing) Call Trace: dump_stack+0xb0/0xf4 (unreliable) create_object+0x344/0x380 __kmalloc_node+0x3ec/0x860 kvmalloc_node+0x58/0x110 seq_read+0x41c/0x620 __vfs_read+0x3c/0x70 vfs_read+0xbc/0x1a0 ksys_read+0x7c/0x140 system_call+0x5c/0x70 kmemleak: Kernel memory leak detector disabled kmemleak: Object 0xc000201cc8000000 (size 13757317120): kmemleak: comm "swapper/0", pid 0, jiffies 4294937297 kmemleak: min_count = -1 kmemleak: count = 0 kmemleak: flags = 0x5 kmemleak: checksum = 0 kmemleak: backtrace: cma_declare_contiguous+0x2a4/0x3b0 kvm_cma_reserve+0x11c/0x134 setup_arch+0x300/0x3f8 start_kernel+0x9c/0x6e8 start_here_common+0x1c/0x4b0 kmemleak: Automatic memory scanning thread ended [cai@lca.pw: use is_migrate_cma_page() and update commit log] Link: http://lkml.kernel.org/r/20190416170510.20048-1-cai@lca.pw Link: http://lkml.kernel.org/r/20190413002623.8967-1-cai@lca.pw Signed-off-by: Qian Cai Acked-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page_alloc.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 318ef6ccdb3b..eedb57f9b40b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7945,7 +7945,10 @@ void *__init alloc_large_system_hash(const char *tablename, bool has_unmovable_pages(struct zone *zone, struct page *page, int count, int migratetype, int flags) { - unsigned long pfn, iter, found; + unsigned long found; + unsigned long iter = 0; + unsigned long pfn = page_to_pfn(page); + const char *reason = "unmovable page"; /* * TODO we could make this much more efficient by not checking every @@ -7955,17 +7958,20 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, * can still lead to having bootmem allocations in zone_movable. */ - /* - * CMA allocations (alloc_contig_range) really need to mark isolate - * CMA pageblocks even when they are not movable in fact so consider - * them movable here. - */ - if (is_migrate_cma(migratetype) && - is_migrate_cma(get_pageblock_migratetype(page))) - return false; + if (is_migrate_cma_page(page)) { + /* + * CMA allocations (alloc_contig_range) really need to mark + * isolate CMA pageblocks even when they are not movable in fact + * so consider them movable here. + */ + if (is_migrate_cma(migratetype)) + return false; + + reason = "CMA page"; + goto unmovable; + } - pfn = page_to_pfn(page); - for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { + for (found = 0; iter < pageblock_nr_pages; iter++) { unsigned long check = pfn + iter; if (!pfn_valid_within(check)) @@ -8045,7 +8051,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, unmovable: WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); if (flags & REPORT_FAILURE) - dump_page(pfn_to_page(pfn+iter), "unmovable page"); + dump_page(pfn_to_page(pfn + iter), reason); return true; } -- cgit v1.2.3 From 57c2301fc9c93484cecb928f467ca6128fa0b1a3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 18 Apr 2019 17:50:34 -0700 Subject: mm: fix inactive list balancing between NUMA nodes and cgroups [ Upstream commit 3b991208b897f52507168374033771a984b947b1 ] During !CONFIG_CGROUP reclaim, we expand the inactive list size if it's thrashing on the node that is about to be reclaimed. But when cgroups are enabled, we suddenly ignore the node scope and use the cgroup scope only. The result is that pressure bleeds between NUMA nodes depending on whether cgroups are merely compiled into Linux. This behavioral difference is unexpected and undesirable. When the refault adaptivity of the inactive list was first introduced, there were no statistics at the lruvec level - the intersection of node and memcg - so it was better than nothing. But now that we have that infrastructure, use lruvec_page_state() to make the list balancing decision always NUMA aware. [hannes@cmpxchg.org: fix bisection hole] Link: http://lkml.kernel.org/r/20190417155241.GB23013@cmpxchg.org Link: http://lkml.kernel.org/r/20190412144438.2645-1-hannes@cmpxchg.org Fixes: 2a2e48854d70 ("mm: vmscan: fix IO/refault regression in cache workingset transition") Signed-off-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/vmscan.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index e979705bbf32..022afabac3f6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2199,7 +2199,6 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct mem_cgroup *memcg, struct scan_control *sc, bool actual_reclaim) { enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; @@ -2220,16 +2219,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); - if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - /* * When refaults are being observed, it means a new workingset * is being established. Disable active list protection to get * rid of the stale workingset quickly. */ + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); if (file && actual_reclaim && lruvec->refaults != refaults) { inactive_ratio = 0; } else { @@ -2250,12 +2245,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct lruvec *lruvec, struct mem_cgroup *memcg, - struct scan_control *sc) + struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), - memcg, sc, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2355,7 +2348,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anonymous pages on the LRU in eligible zones. * Otherwise, the small LRU gets thrashed. */ - if (!inactive_list_is_low(lruvec, false, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, false, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_ANON; @@ -2373,7 +2366,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, memcg, sc, false) && + if (!inactive_list_is_low(lruvec, true, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2526,7 +2519,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, memcg, sc); + lruvec, sc); } } @@ -2593,7 +2586,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2993,12 +2986,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) unsigned long refaults; struct lruvec *lruvec; - if (memcg) - refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); - else - refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - lruvec = mem_cgroup_lruvec(pgdat, memcg); + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); lruvec->refaults = refaults; } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); } @@ -3363,7 +3352,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false, memcg, sc, true)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); -- cgit v1.2.3 From 5bc0352515ef810f7513adfcae84ca4fd00c67aa Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Apr 2019 17:50:44 -0700 Subject: init: initialize jump labels before command line option parsing [ Upstream commit 6041186a32585fc7a1d0f6cfe2f138b05fdc3c82 ] When a module option, or core kernel argument, toggles a static-key it requires jump labels to be initialized early. While x86, PowerPC, and ARM64 arrange for jump_label_init() to be called before parse_args(), ARM does not. Kernel command line: rdinit=/sbin/init page_alloc.shuffle=1 panic=-1 console=ttyAMA0,115200 page_alloc.shuffle=1 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at ./include/linux/jump_label.h:303 page_alloc_shuffle+0x12c/0x1ac static_key_enable(): static key 'page_alloc_shuffle_key+0x0/0x4' used before call to jump_label_init() Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-rc4-next-20190410-00003-g3367c36ce744 #1 Hardware name: ARM Integrator/CP (Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x18) [] (show_stack) from [] (dump_stack+0x18/0x24) [] (dump_stack) from [] (__warn+0xe0/0x108) [] (__warn) from [] (warn_slowpath_fmt+0x44/0x6c) [] (warn_slowpath_fmt) from [] (page_alloc_shuffle+0x12c/0x1ac) [] (page_alloc_shuffle) from [] (shuffle_store+0x28/0x48) [] (shuffle_store) from [] (parse_args+0x1f4/0x350) [] (parse_args) from [] (start_kernel+0x1c0/0x488) Move the fallback call to jump_label_init() to occur before parse_args(). The redundant calls to jump_label_init() in other archs are left intact in case they have static key toggling use cases that are even earlier than option parsing. Link: http://lkml.kernel.org/r/155544804466.1032396.13418949511615676665.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reported-by: Guenter Roeck Reviewed-by: Kees Cook Cc: Mathieu Desnoyers Cc: Thomas Gleixner Cc: Mike Rapoport Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- init/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index c86a1c8f19f4..7ae824545265 100644 --- a/init/main.c +++ b/init/main.c @@ -574,6 +574,8 @@ asmlinkage __visible void __init start_kernel(void) page_alloc_init(); pr_notice("Kernel command line: %s\n", boot_command_line); + /* parameters may set static keys */ + jump_label_init(); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, @@ -583,8 +585,6 @@ asmlinkage __visible void __init start_kernel(void) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, NULL, set_init_arg); - jump_label_init(); - /* * These use large bootmem allocations and must precede * kmem_cache_init() -- cgit v1.2.3 From 16b01614d36984ff86a1f5eb8c3f2a61fcd50114 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Wed, 20 Feb 2019 07:52:31 +0000 Subject: drm: bridge: dw-hdmi: Fix overflow workaround for Rockchip SoCs [ Upstream commit d15d9fd02575ecfada92d42f655940c4f10af842 ] The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have also been identified as needing this workaround with a single iteration. Fixes: be41fc55f1aa ("drm: bridge: dw-hdmi: Handle overflow workaround based on device version") Signed-off-by: Jonas Karlman Tested-by: Heiko Stueber Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/AM3PR03MB0966818FAAAE6192FF4ED11AAC7D0@AM3PR03MB0966.eurprd03.prod.outlook.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 64c3cf027518..14223c0ee784 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1655,6 +1655,8 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi) * iteration for others. * The Amlogic Meson GX SoCs (v2.01a) have been identified as needing * the workaround with a single iteration. + * The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have + * been identified as needing the workaround with a single iteration. */ switch (hdmi->version) { @@ -1663,7 +1665,9 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi) break; case 0x131a: case 0x132a: + case 0x200a: case 0x201a: + case 0x211a: case 0x212a: count = 1; break; -- cgit v1.2.3 From 44fbb3db2bdea2734c31c52b1390c8daf8be6f05 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 25 Mar 2019 23:11:53 +0100 Subject: selftests: netfilter: check icmp pkttoobig errors are set as related [ Upstream commit becf2319f320cae43e20cf179cc51a355a0deb5f ] When an icmp error such as pkttoobig is received, conntrack checks if the "inner" header (header of packet that did not fit link mtu) is matches an existing connection, and, if so, sets that packet as being related to the conntrack entry it found. It was recently reported that this "related" setting also works if the inner header is from another, different connection (i.e., artificial/forged icmp error). Add a test, followup patch will add additional "inner dst matches outer dst in reverse direction" check before setting related state. Link: https://www.synacktiv.com/posts/systems/icmp-reachable.html Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/netfilter/Makefile | 2 +- .../selftests/netfilter/conntrack_icmp_related.sh | 283 +++++++++++++++++++++ 2 files changed, 284 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/conntrack_icmp_related.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index c9ff2b47bd1c..a37cb1192c6a 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh nft_nat.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh new file mode 100755 index 000000000000..b48e1833bc89 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# +# check that ICMP df-needed/pkttoobig icmp are set are set as related +# state +# +# Setup is: +# +# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2 +# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280). +# ping nsclient2 from nsclient1, checking that conntrack did set RELATED +# 'fragmentation needed' icmp packet. +# +# In addition, nsrouter1 will perform IP masquerading, i.e. also +# check the icmp errors are propagated to the correct host as per +# nat of "established" icmp-echo "connection". + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + for i in 1 2;do ip netns del nsclient$i;done + for i in 1 2;do ip netns del nsrouter$i;done +} + +ipv4() { + echo -n 192.168.$1.2 +} + +ipv6 () { + echo -n dead:$1::2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +check_unknown() +{ + expect="packets 0 bytes 0" + for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + check_counter $n "unknown" "$expect" + if [ $? -ne 0 ] ;then + return 1 + fi + done + + return 0 +} + +for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do + ip netns add $n + ip -net $n link set lo up +done + +DEV=veth0 +ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1 +DEV=veth0 +ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2 + +DEV=veth0 +ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2 + +DEV=veth0 +for i in 1 2; do + ip -net nsclient$i link set $DEV up + ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV + ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV +done + +ip -net nsrouter1 link set eth1 up +ip -net nsrouter1 link set veth0 up + +ip -net nsrouter2 link set eth1 up +ip -net nsrouter2 link set eth2 up + +ip -net nsclient1 route add default via 192.168.1.1 +ip -net nsclient1 -6 route add default via dead:1::1 + +ip -net nsclient2 route add default via 192.168.2.1 +ip -net nsclient2 route add default via dead:2::1 + +i=3 +ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1 +ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0 +ip -net nsrouter1 addr add dead:1::1/64 dev eth1 +ip -net nsrouter1 addr add dead:3::1/64 dev veth0 +ip -net nsrouter1 route add default via 192.168.3.10 +ip -net nsrouter1 -6 route add default via dead:3::10 + +ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1 +ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2 +ip -net nsrouter2 addr add dead:2::1/64 dev eth1 +ip -net nsrouter2 addr add dead:3::10/64 dev eth2 +ip -net nsrouter2 route add default via 192.168.3.1 +ip -net nsrouter2 route add default via dead:3::1 + +sleep 2 +for i in 4 6; do + ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1 +done + +for netns in nsrouter1 nsrouter2; do +ip netns exec $netns nft -f - </dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ip routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi +ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null +if [ $? -ne 0 ]; then + echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2 + cleanup + exit 1 +fi + +check_unknown +if [ $? -ne 0 ]; then + ret=1 +fi + +expect="packets 0 bytes 0" +for netns in nsrouter1 nsrouter2 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +expect="packets 2 bytes 2076" +check_counter nsclient2 "new" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +# nsrouter2 should have generated the icmp error, so +# related counter should be 0 (its in forward). +expect="packets 0 bytes 0" +check_counter "nsrouter2" "related" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +# but nsrouter1 should have seen it, same for nsclient1. +expect="packets 1 bytes 576" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null +if [ $? -eq 0 ]; then + echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +expect="packets 2 bytes 1856" +for netns in nsrouter1 nsclient1;do + check_counter "$netns" "related" "$expect" + if [ $? -ne 0 ]; then + ret=1 + fi +done + +if [ $ret -eq 0 ];then + echo "PASS: icmp mtu error had RELATED state" +else + echo "ERROR: icmp error RELATED state test has failed" +fi + +cleanup +exit $ret -- cgit v1.2.3 From a3a5ad78488b6c8ef1cb8d3def72df64a05fbf98 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 31 Mar 2019 13:24:52 +0300 Subject: ipvs: do not schedule icmp errors from tunnels [ Upstream commit 0261ea1bd1eb0da5c0792a9119b8655cf33c80a3 ] We can receive ICMP errors from client or from tunneling real server. While the former can be scheduled to real server, the latter should not be scheduled, they are decapsulated only when existing connection is found. Fixes: 6044eeffafbe ("ipvs: attempt to schedule icmp packets") Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 235205c93e14..df112b27246a 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1647,7 +1647,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (!cp) { int v; - if (!sysctl_schedule_icmp(ipvs)) + if (ipip || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) -- cgit v1.2.3 From b0a90cae081d7ee14eaa46524fb70f4e23ae8905 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 1 Apr 2019 13:08:54 +0200 Subject: netfilter: ctnetlink: don't use conntrack/expect object addresses as id [ Upstream commit 3c79107631db1f7fd32cf3f7368e4672004a3010 ] else, we leak the addresses to userspace via ctnetlink events and dumps. Compute an ID on demand based on the immutable parts of nf_conn struct. Another advantage compared to using an address is that there is no immediate re-use of the same ID in case the conntrack entry is freed and reallocated again immediately. Fixes: 3583240249ef ("[NETFILTER]: nf_conntrack_expect: kill unique ID") Fixes: 7f85f914721f ("[NETFILTER]: nf_conntrack: kill unique ID") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 35 +++++++++++++++++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 249d0a5b12b8..63fd47e924b9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -318,6 +318,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); +u32 nf_ct_get_id(const struct nf_conn *ct); + static inline void nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9dd4c2048a2b..1982faf21ebb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -424,6 +425,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); +/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn after it has been committed to main hash table: + * + * 1. nf_conn address + * 2. nf_conn->ext address + * 3. nf_conn->master address (normally NULL) + * 4. tuple + * 5. the associated net namespace + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); + c = (unsigned long)ct->ext; + d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1213beb5a714..36619ad8ab8c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -485,7 +486,9 @@ nla_put_failure: static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0; @@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2694,6 +2698,25 @@ nla_put_failure: static const union nf_inet_addr any_addr; +static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2741,7 +2764,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -3046,7 +3069,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; } -- cgit v1.2.3 From c2987d193f8b1c0f3d59362684798e37049079a3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 6 Apr 2019 08:26:52 +0300 Subject: netfilter: nf_tables: prevent shift wrap in nft_chain_parse_hook() [ Upstream commit 33d1c018179d0a30c39cc5f1682b77867282694b ] I believe that "hook->num" can be up to UINT_MAX. Shifting more than 31 bits would is undefined in C but in practice it would lead to shift wrapping. That would lead to an array overflow in nf_tables_addchain(): ops->hook = hook.type->hooks[ops->hooknum]; Fixes: fe19c04ca137 ("netfilter: nf_tables: remove nhooks field from struct nft_af_info") Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e2aac80f9b7b..25c2b98b9a96 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1502,7 +1502,7 @@ static int nft_chain_parse_hook(struct net *net, if (IS_ERR(type)) return PTR_ERR(type); } - if (!(type->hook_mask & (1 << hook->num))) + if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; if (type->type == NFT_CHAIN_T_NAT && -- cgit v1.2.3 From ecef50c35a07b396fa8977c2312b1f83702fb01e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Apr 2019 14:45:20 +0200 Subject: netfilter: nat: fix icmp id randomization [ Upstream commit 5bdac418f33f60b07a34e01e722889140ee8fac9 ] Sven Auhagen reported that a 2nd ping request will fail if 'fully-random' mode is used. Reason is that if no proto information is given, min/max are both 0, so we set the icmp id to 0 instead of chosing a random value between 0 and 65535. Update test case as well to catch this, without fix this yields: [..] ERROR: cannot ping ns1 from ns2 with ip masquerade fully-random (attempt 2) ERROR: cannot ping ns1 from ns2 with ipv6 masquerade fully-random (attempt 2) ... becaus 2nd ping clashes with existing 'id 0' icmp conntrack and gets dropped. Fixes: 203f2e78200c27e ("netfilter: nat: remove l4proto->unique_tuple") Reported-by: Sven Auhagen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_nat_core.c | 11 ++++++--- tools/testing/selftests/netfilter/nft_nat.sh | 36 +++++++++++++++++++++------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index d159e9e7835b..ade527565127 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -358,9 +358,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, case IPPROTO_ICMPV6: /* id is same for either direction... */ keyptr = &tuple->src.u.icmp.id; - min = range->min_proto.icmp.id; - range_size = ntohs(range->max_proto.icmp.id) - - ntohs(range->min_proto.icmp.id) + 1; + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + min = 0; + range_size = 65536; + } else { + min = ntohs(range->min_proto.icmp.id); + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + } goto find_free_id; #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) case IPPROTO_GRE: diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 8ec76681605c..3194007cf8d1 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -321,6 +321,7 @@ EOF test_masquerade6() { + local natflags=$1 local lret=0 ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -354,13 +355,13 @@ ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags" lret=1 fi @@ -397,19 +398,26 @@ EOF fi done + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)" + lret=1 + fi + ip netns exec ns0 nft flush chain ip6 nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2" return $lret } test_masquerade() { + local natflags=$1 local lret=0 ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null @@ -417,7 +425,7 @@ test_masquerade() ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: canot ping ns1 from ns2" + echo "ERROR: cannot ping ns1 from ns2 $natflags" lret=1 fi @@ -443,13 +451,13 @@ ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags" lret=1 fi @@ -485,13 +493,19 @@ EOF fi done + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)" + lret=1 + fi + ip netns exec ns0 nft flush chain ip nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2" return $lret } @@ -750,8 +764,12 @@ test_local_dnat test_local_dnat6 reset_counters -test_masquerade -test_masquerade6 +test_masquerade "" +test_masquerade6 "" + +reset_counters +test_masquerade "fully-random" +test_masquerade6 "fully-random" reset_counters test_redirect -- cgit v1.2.3 From b3a64096c5ee90e76bb27c72f2af9d2d28e04f22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 12 Apr 2019 23:08:32 +0200 Subject: MIPS: perf: ath79: Fix perfcount IRQ assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a1e8783db8e0d58891681bc1e6d9ada66eae8e20 ] Currently it's not possible to use perf on ath79 due to genirq flags mismatch happening on static virtual IRQ 13 which is used for performance counters hardware IRQ 5. On TP-Link Archer C7v5: CPU0 2: 0 MIPS 2 ath9k 4: 318 MIPS 4 19000000.eth 7: 55034 MIPS 7 timer 8: 1236 MISC 3 ttyS0 12: 0 INTC 1 ehci_hcd:usb1 13: 0 gpio-ath79 2 keys 14: 0 gpio-ath79 5 keys 15: 31 AR724X PCI 1 ath10k_pci $ perf top genirq: Flags mismatch irq 13. 00014c83 (mips_perf_pmu) vs. 00002003 (keys) On TP-Link Archer C7v4: CPU0 4: 0 MIPS 4 19000000.eth 5: 7135 MIPS 5 1a000000.eth 7: 98379 MIPS 7 timer 8: 30 MISC 3 ttyS0 12: 90028 INTC 0 ath9k 13: 5520 INTC 1 ehci_hcd:usb1 14: 4623 INTC 2 ehci_hcd:usb2 15: 32844 AR724X PCI 1 ath10k_pci 16: 0 gpio-ath79 16 keys 23: 0 gpio-ath79 23 keys $ perf top genirq: Flags mismatch irq 13. 00014c80 (mips_perf_pmu) vs. 00000080 (ehci_hcd:usb1) This problem is happening, because currently statically assigned virtual IRQ 13 for performance counters is not claimed during the initialization of MIPS PMU during the bootup, so the IRQ subsystem doesn't know, that this interrupt isn't available for further use. So this patch fixes the issue by simply booking hardware IRQ 5 for MIPS PMU. Tested-by: Kevin 'ldir' Darbyshire-Bryant Signed-off-by: Petr Štetiar Acked-by: John Crispin Acked-by: Marc Zyngier Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Cc: Thomas Gleixner Cc: Jason Cooper Signed-off-by: Sasha Levin --- arch/mips/ath79/setup.c | 6 ------ drivers/irqchip/irq-ath79-misc.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 9728abcb18fa..c04ae685003f 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -211,12 +211,6 @@ const char *get_system_type(void) return ath79_sys_type; } -int get_c0_perfcount_int(void) -{ - return ATH79_MISC_IRQ(5); -} -EXPORT_SYMBOL_GPL(get_c0_perfcount_int); - unsigned int get_c0_compare_int(void) { return CP0_LEGACY_COMPARE_IRQ; diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c index aa7290784636..0390603170b4 100644 --- a/drivers/irqchip/irq-ath79-misc.c +++ b/drivers/irqchip/irq-ath79-misc.c @@ -22,6 +22,15 @@ #define AR71XX_RESET_REG_MISC_INT_ENABLE 4 #define ATH79_MISC_IRQ_COUNT 32 +#define ATH79_MISC_PERF_IRQ 5 + +static int ath79_perfcount_irq; + +int get_c0_perfcount_int(void) +{ + return ath79_perfcount_irq; +} +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); static void ath79_misc_irq_handler(struct irq_desc *desc) { @@ -113,6 +122,8 @@ static void __init ath79_misc_intc_domain_init( { void __iomem *base = domain->host_data; + ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ); + /* Disable and clear all interrupts */ __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS); -- cgit v1.2.3 From 143c8279955e7a8c614d5912d001e2478b1af271 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Wed, 10 Apr 2019 10:59:45 +0300 Subject: IB/mlx5: Fix scatter to CQE in DCT QP creation [ Upstream commit 7249c8ea227a582c14f63e9e8853eb7369122f10 ] When scatter to CQE is enabled on a DCT QP it corrupts the mailbox command since it tried to treat it as as QP create mailbox command instead of a DCT create command. The corrupted mailbox command causes userspace to malfunction as the device doesn't create the QP as expected. A new mlx5 capability is exposed to user-space which ensures that it will not enable the feature on DCT without this fix in the kernel. Fixes: 5d6ff1babe78 ("IB/mlx5: Support scatter to CQE for DC transport type") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/qp.c | 11 +++++++---- include/uapi/rdma/mlx5-abi.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 497181f5ba09..c6bdd0d16c4b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1025,6 +1025,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, qp_packet_based)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; + + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } if (field_avail(typeof(resp), sw_parsing_caps, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7db778d96ef5..afc88e6e172e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1724,13 +1724,16 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); - if (rcqe_sz == 128) { - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + if (init_attr->qp_type == MLX5_IB_QPT_DCT) { + if (rcqe_sz == 128) + MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + return; } - if (init_attr->qp_type != MLX5_IB_QPT_DCT) - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); + MLX5_SET(qpc, qpc, cs_res, + rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : + MLX5_RES_SCAT_DATA32_CQE); } static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 87b3198f4b5d..f4d4010b7e3e 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -238,6 +238,7 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, + MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3, }; enum mlx5_ib_tunnel_offloads { -- cgit v1.2.3 From 1330679d64f83741b1512eeb87d2ea305ea5c7f1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Apr 2019 18:29:13 +0200 Subject: s390: ctcm: fix ctcm_new_device error return code [ Upstream commit 27b141fc234a3670d21bd742c35d7205d03cbb3a ] clang points out that the return code from this function is undefined for one of the error paths: ../drivers/s390/net/ctcm_main.c:1595:7: warning: variable 'result' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (priv->channel[direction] == NULL) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/s390/net/ctcm_main.c:1638:9: note: uninitialized use occurs here return result; ^~~~~~ ../drivers/s390/net/ctcm_main.c:1595:3: note: remove the 'if' if its condition is always false if (priv->channel[direction] == NULL) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/s390/net/ctcm_main.c:1539:12: note: initialize the variable 'result' to silence this warning int result; ^ Make it return -ENODEV here, as in the related failure cases. gcc has a known bug in underreporting some of these warnings when it has already eliminated the assignment of the return code based on some earlier optimization step. Reviewed-by: Nathan Chancellor Signed-off-by: Arnd Bergmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/ctcm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 7617d21cb296..f63c5c871d3d 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1595,6 +1595,7 @@ static int ctcm_new_device(struct ccwgroup_device *cgdev) if (priv->channel[direction] == NULL) { if (direction == CTCM_WRITE) channel_free(priv->channel[CTCM_READ]); + result = -ENODEV; goto out_dev; } priv->channel[direction]->netdev = dev; -- cgit v1.2.3 From 3000bdec55c83f85d457d669a9f633f0594c21ea Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 18 Apr 2019 15:27:26 +0200 Subject: drm/sun4i: Set device driver data at bind time for use in unbind [ Upstream commit 02b92adbe33e6dbd15dc6e32540b22f47c4ff0a2 ] Our sun4i_drv_unbind gets the drm device using dev_get_drvdata. However, that driver data is never set in sun4i_drv_bind. Set it there to avoid getting a NULL pointer at unbind time. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-3-paul.kocialkowski@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 9e4c375ccc96..c6b65a969979 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -85,6 +85,8 @@ static int sun4i_drv_bind(struct device *dev) ret = -ENOMEM; goto free_drm; } + + dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->frontend_list); INIT_LIST_HEAD(&drv->engine_list); -- cgit v1.2.3 From 7ba51c0e84b385eefb145afe584180c227dbce99 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 18 Apr 2019 15:27:27 +0200 Subject: drm/sun4i: Fix component unbinding and component master deletion [ Upstream commit f5a9ed867c83875546c9aadd4ed8e785e9adcc3c ] For our component-backed driver to be properly removed, we need to delete the component master in sun4i_drv_remove and make sure to call component_unbind_all in the master's unbind so that all components are unbound when the master is. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-4-paul.kocialkowski@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index c6b65a969979..9a5713fa03b2 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -148,6 +148,8 @@ static void sun4i_drv_unbind(struct device *dev) drm_mode_config_cleanup(drm); of_reserved_mem_device_release(dev); drm_dev_put(drm); + + component_unbind_all(dev, NULL); } static const struct component_master_ops sun4i_drv_master_ops = { @@ -395,6 +397,8 @@ static int sun4i_drv_probe(struct platform_device *pdev) static int sun4i_drv_remove(struct platform_device *pdev) { + component_master_del(&pdev->dev, &sun4i_drv_master_ops); + return 0; } -- cgit v1.2.3 From 2d83e90c2375db836438d14c7d9ecf1e845c7400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Wed, 17 Apr 2019 22:09:12 +0200 Subject: of_net: Fix residues after of_get_nvmem_mac_address removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 36ad7022536e0c65f8baeeaa5efde11dec44808a ] I've discovered following discrepancy in the bindings/net/ethernet.txt documentation, where it states following: - nvmem-cells: phandle, reference to an nvmem node for the MAC address; - nvmem-cell-names: string, should be "mac-address" if nvmem is to be.. which is actually misleading and confusing. There are only two ethernet drivers in the tree, cadence/macb and davinci which supports this properties. This nvmem-cell* properties were introduced in commit 9217e566bdee ("of_net: Implement of_get_nvmem_mac_address helper"), but commit afa64a72b862 ("of: net: kill of_get_nvmem_mac_address()") forget to properly clean up this parts. So this patch fixes the documentation by moving the nvmem-cell* properties at the appropriate places. While at it, I've removed unused include as well. Cc: Bartosz Golaszewski Fixes: afa64a72b862 ("of: net: kill of_get_nvmem_mac_address()") Signed-off-by: Petr Štetiar Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/net/davinci_emac.txt | 2 ++ Documentation/devicetree/bindings/net/ethernet.txt | 2 -- Documentation/devicetree/bindings/net/macb.txt | 4 ++++ drivers/of/of_net.c | 1 - 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt index 24c5cdaba8d2..ca83dcc84fb8 100644 --- a/Documentation/devicetree/bindings/net/davinci_emac.txt +++ b/Documentation/devicetree/bindings/net/davinci_emac.txt @@ -20,6 +20,8 @@ Required properties: Optional properties: - phy-handle: See ethernet.txt file in the same directory. If absent, davinci_emac driver defaults to 100/FULL. +- nvmem-cells: phandle, reference to an nvmem node for the MAC address +- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used - ti,davinci-rmii-en: 1 byte, 1 means use RMII - ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM? diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index cfc376bc977a..2974e63ba311 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -10,8 +10,6 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt. the boot program; should be used in cases where the MAC address assigned to the device by the boot program is different from the "local-mac-address" property; -- nvmem-cells: phandle, reference to an nvmem node for the MAC address; -- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used; - max-speed: number, specifies maximum speed in Mbit/s supported by the device; - max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than the maximum frame size (there's contradiction in the Devicetree diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index 3e17ac1d5d58..1a914116f4c2 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -26,6 +26,10 @@ Required properties: Optional elements: 'tsu_clk' - clocks: Phandles to input clocks. +Optional properties: +- nvmem-cells: phandle, reference to an nvmem node for the MAC address +- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used + Optional properties for PHY child node: - reset-gpios : Should specify the gpio for phy reset - magic-packet : If present, indicates that the hardware supports waking diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c index 810ab0fbcccb..d820f3edd431 100644 --- a/drivers/of/of_net.c +++ b/drivers/of/of_net.c @@ -7,7 +7,6 @@ */ #include #include -#include #include #include #include -- cgit v1.2.3 From 1a20185157b5b4204922cd7be240d83d451ef6a2 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Thu, 18 Apr 2019 19:57:25 +0800 Subject: selftests/net: correct the return value for run_netsocktests [ Upstream commit 30c04d796b693e22405c38e9b78e9a364e4c77e6 ] The run_netsocktests will be marked as passed regardless the actual test result from the ./socket: selftests: net: run_netsocktests ======================================== -------------------- running socket test -------------------- [FAIL] ok 1..6 selftests: net: run_netsocktests [PASS] This is because the test script itself has been successfully executed. Fix this by exit 1 when the test failed. Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/run_netsocktests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index b093f39c298c..14e41faf2c57 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -7,7 +7,7 @@ echo "--------------------" ./socket if [ $? -ne 0 ]; then echo "[FAIL]" + exit 1 else echo "[PASS]" fi - -- cgit v1.2.3 From 226ef4f27a451262bd6199fd0022aa22c4065393 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 19 Apr 2019 19:01:13 +0800 Subject: selftests/net: correct the return value for run_afpackettests [ Upstream commit 8c03557c3f25271e62e39154af66ebdd1b59c9ca ] The run_afpackettests will be marked as passed regardless the return value of those sub-tests in the script: -------------------- running psock_tpacket test -------------------- [FAIL] selftests: run_afpackettests [PASS] Fix this by changing the return value for each tests. Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/run_afpackettests | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index 2dc95fda7ef7..ea5938ec009a 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -6,12 +6,14 @@ if [ $(id -u) != 0 ]; then exit 0 fi +ret=0 echo "--------------------" echo "running psock_fanout test" echo "--------------------" ./in_netns.sh ./psock_fanout if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi @@ -22,6 +24,7 @@ echo "--------------------" ./in_netns.sh ./psock_tpacket if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi @@ -32,6 +35,8 @@ echo "--------------------" ./in_netns.sh ./txring_overwrite if [ $? -ne 0 ]; then echo "[FAIL]" + ret=1 else echo "[PASS]" fi +exit $ret -- cgit v1.2.3 From ddb632889faeb7fb462b3e3e814f0a51186a8ad4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Apr 2019 02:17:23 +0200 Subject: netfilter: never get/set skb->tstamp [ Upstream commit 916f6efae62305796e012e7c3a7884a267cbacbf ] setting net.netfilter.nf_conntrack_timestamp=1 breaks xmit with fq scheduler. skb->tstamp might be "refreshed" using ktime_get_real(), but fq expects CLOCK_MONOTONIC. This patch removes all places in netfilter that check/set skb->tstamp: 1. To fix the bogus "start" time seen with conntrack timestamping for outgoing packets, never use skb->tstamp and always use current time. 2. In nfqueue and nflog, only use skb->tstamp for incoming packets, as determined by current hook (prerouting, input, forward). 3. xt_time has to use system clock as well rather than skb->tstamp. We could still use skb->tstamp for prerouting/input/foward, but I see no advantage to make this conditional. Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Cc: Eric Dumazet Reported-by: Michal Soltys Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 7 ++----- net/netfilter/nfnetlink_log.c | 2 +- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/xt_time.c | 23 ++++++++++++++--------- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1982faf21ebb..d7ac2f82bb6d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -983,12 +983,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* set conntrack timestamp, if enabled. */ tstamp = nf_conn_tstamp_find(ct); - if (tstamp) { - if (skb->tstamp == 0) - __net_timestamp(skb); + if (tstamp) + tstamp->start = ktime_get_real_ns(); - tstamp->start = ktime_to_ns(skb->tstamp); - } /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers * guarantee that no other CPU can find the conntrack before the above diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1f9c5303f02..0b3347570265 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -540,7 +540,7 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; } - if (skb->tstamp) { + if (hooknum <= NF_INET_FORWARD && skb->tstamp) { struct nfulnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(skb->tstamp); ts.sec = cpu_to_be64(kts.tv_sec); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 0dcc3592d053..e057b2961d31 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -582,7 +582,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (nfqnl_put_bridge(entry, skb) < 0) goto nla_put_failure; - if (entskb->tstamp) { + if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) { struct nfqnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index c13bcd0ab491..8dbb4d48f2ed 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -163,19 +163,24 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) s64 stamp; /* - * We cannot use get_seconds() instead of __net_timestamp() here. + * We need real time here, but we can neither use skb->tstamp + * nor __net_timestamp(). + * + * skb->tstamp and skb->skb_mstamp_ns overlap, however, they + * use different clock types (real vs monotonic). + * * Suppose you have two rules: - * 1. match before 13:00 - * 2. match after 13:00 + * 1. match before 13:00 + * 2. match after 13:00 + * * If you match against processing time (get_seconds) it * may happen that the same packet matches both rules if - * it arrived at the right moment before 13:00. + * it arrived at the right moment before 13:00, so it would be + * better to check skb->tstamp and set it via __net_timestamp() + * if needed. This however breaks outgoing packets tx timestamp, + * and causes them to get delayed forever by fq packet scheduler. */ - if (skb->tstamp == 0) - __net_timestamp((struct sk_buff *)skb); - - stamp = ktime_to_ns(skb->tstamp); - stamp = div_s64(stamp, NSEC_PER_SEC); + stamp = get_seconds(); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ -- cgit v1.2.3 From d4dc7d99b111b2a14c3054175998f5d7664455c9 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 17 Apr 2019 09:49:44 -0700 Subject: netfilter: fix nf_l4proto_log_invalid to log invalid packets [ Upstream commit d48668052b2603b6262459625c86108c493588dd ] It doesn't log a packet if sysctl_log_invalid isn't equal to protonum OR sysctl_log_invalid isn't equal to IPPROTO_RAW. This sentence is always true. I believe we need to replace OR to AND. Cc: Florian Westphal Fixes: c4f3db1595827 ("netfilter: conntrack: add and use nf_l4proto_log_invalid") Signed-off-by: Andrei Vagin Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 859f5d07a915..78361e462e80 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -86,7 +86,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb, struct va_format vaf; va_list args; - if (net->ct.sysctl_log_invalid != protonum || + if (net->ct.sysctl_log_invalid != protonum && net->ct.sysctl_log_invalid != IPPROTO_RAW) return; -- cgit v1.2.3 From d0f8faa2f540fcc68745ae3da1318d8f9a5ca5a5 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 1 Apr 2019 20:38:19 +0200 Subject: dmaengine: bcm2835: Avoid GFP_KERNEL in device_prep_slave_sg [ Upstream commit f147384774a7b24dda4783a3dcd61af272757ea8 ] The commit af19b7ce76ba ("mmc: bcm2835: Avoid possible races on data requests") introduces a possible circular locking dependency, which is triggered by swapping to the sdhost interface. So instead of reintroduce the race condition again, we could also avoid this situation by using GFP_NOWAIT for the allocation of the DMA buffer descriptors. Reported-by: Aaro Koskinen Signed-off-by: Stefan Wahren Fixes: af19b7ce76ba ("mmc: bcm2835: Avoid possible races on data requests") Link: http://lists.infradead.org/pipermail/linux-rpi-kernel/2019-March/008615.html Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/bcm2835-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index ae10f5614f95..bf5119203637 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -674,7 +674,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( d = bcm2835_dma_create_cb_chain(chan, direction, false, info, extra, frames, src, dst, 0, 0, - GFP_KERNEL); + GFP_NOWAIT); if (!d) return NULL; -- cgit v1.2.3 From 28e4593bb1489f2dbb6406b4130337246e63d1a0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Apr 2019 23:59:25 -0700 Subject: arm64/module: ftrace: deal with place relative nature of PLTs [ Upstream commit 4e69ecf4da1ee0b2ac735e1f1bb13935acd5a38d ] Another bodge for the ftrace PLT code: plt_entries_equal() now takes the place relative nature of the ADRP/ADD based PLT entries into account, which means that a struct trampoline instance on the stack is no longer equal to the same set of opcodes in the module struct, given that they don't point to the same place in memory anymore. Work around this by using memcmp() in the ftrace PLT handling code. Acked-by: Will Deacon Tested-by: dann frazier Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/ftrace.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 07b298120182..65a51331088e 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -103,10 +103,15 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * to be revisited if support for multiple ftrace entry points * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. + * + * Note that PLTs are place relative, and plt_entries_equal() + * checks whether they point to the same target. Here, we need + * to check if the actual opcodes are in fact identical, + * regardless of the offset in memory so use memcmp() instead. */ trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline); - if (!plt_entries_equal(mod->arch.ftrace_trampoline, - &trampoline)) { + if (memcmp(mod->arch.ftrace_trampoline, &trampoline, + sizeof(trampoline))) { if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; -- cgit v1.2.3 From 7680e881fdaa9513f1c3321ffb4ec9ac8f9208b3 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 12 Apr 2019 17:59:40 +0200 Subject: gpu: ipu-v3: dp: fix CSC handling [ Upstream commit d4fad0a426c6e26f48c9a7cdd21a7fe9c198d645 ] Initialize the flow input colorspaces to unknown and reset to that value when the channel gets disabled. This avoids the state getting mixed up with a previous mode. Also keep the CSC settings for the background flow intact when disabling the foreground flow. Root-caused-by: Jonathan Marek Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/ipu-v3/ipu-dp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c index 9b2b3fa479c4..5e44ff1f2085 100644 --- a/drivers/gpu/ipu-v3/ipu-dp.c +++ b/drivers/gpu/ipu-v3/ipu-dp.c @@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp, ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs, DP_COM_CONF_CSC_DEF_BOTH); } else { - if (flow->foreground.in_cs == flow->out_cs) + if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN || + flow->foreground.in_cs == flow->out_cs) /* * foreground identical to output, apply color * conversion on background @@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync) struct ipu_dp_priv *priv = flow->priv; u32 reg, csc; + dp->in_cs = IPUV3_COLORSPACE_UNKNOWN; + if (!dp->foreground) return; @@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync) reg = readl(flow->base + DP_COM_CONF); csc = reg & DP_COM_CONF_CSC_DEF_MASK; - if (csc == DP_COM_CONF_CSC_DEF_FG) - reg &= ~DP_COM_CONF_CSC_DEF_MASK; + reg &= ~DP_COM_CONF_CSC_DEF_MASK; + if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG) + reg |= DP_COM_CONF_CSC_DEF_BG; reg &= ~DP_COM_CONF_FG_EN; writel(reg, flow->base + DP_COM_CONF); @@ -347,6 +351,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) mutex_init(&priv->mutex); for (i = 0; i < IPUV3_NUM_FLOWS; i++) { + priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN; + priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN; priv->flow[i].foreground.foreground = true; priv->flow[i].base = priv->base + ipu_dp_flow_base[i]; priv->flow[i].priv = priv; -- cgit v1.2.3 From 0cb06e339cee49f6a1f865cbba2a19306cb9200c Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 12 Apr 2019 17:59:41 +0200 Subject: drm/imx: don't skip DP channel disable for background plane [ Upstream commit 7bcde275eb1d0ac8793c77c7e666a886eb16633d ] In order to make sure that the plane color space gets reset correctly. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/drm/imx/ipuv3-crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 058b53c0aa7e..1bb3e598cb84 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -70,7 +70,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc, if (disable_partial) ipu_plane_disable(ipu_crtc->plane[1], true); if (disable_full) - ipu_plane_disable(ipu_crtc->plane[0], false); + ipu_plane_disable(ipu_crtc->plane[0], true); } static void ipu_crtc_atomic_disable(struct drm_crtc *crtc, -- cgit v1.2.3 From dc41fe5d6fb56a13732fda728651b19045911dd7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 23 Apr 2019 17:09:38 +0100 Subject: ARM: fix function graph tracer and unwinder dependencies [ Upstream commit 503621628b32782a07b2318e4112bd4372aa3401 ] Naresh Kamboju recently reported that the function-graph tracer crashes on ARM. The function-graph tracer assumes that the kernel is built with frame pointers. We explicitly disabled the function-graph tracer when building Thumb2, since the Thumb2 ABI doesn't have frame pointers. We recently changed the way the unwinder method was selected, which seems to have made it more likely that we can end up with the function- graph tracer enabled but without the kernel built with frame pointers. Fix up the function graph tracer dependencies so the option is not available when we have no possibility of having frame pointers, and adjust the dependencies on the unwinder option to hide the non-frame pointer unwinder options if the function-graph tracer is enabled. Reviewed-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/Kconfig | 2 +- arch/arm/Kconfig.debug | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e5d56d9b712c..3b353af9c48d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -69,7 +69,7 @@ config ARM select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_GENERIC_DMA_COHERENT diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 6d6e0330930b..e388af4594a6 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -47,8 +47,8 @@ config DEBUG_WX choice prompt "Choose kernel unwinder" - default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER - default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER + default UNWINDER_ARM if AEABI + default UNWINDER_FRAME_POINTER if !AEABI help This determines which method will be used for unwinding kernel stack traces for panics, oopses, bugs, warnings, perf, /proc//stack, @@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER config UNWINDER_ARM bool "ARM EABI stack unwinder" - depends on AEABI + depends on AEABI && !FUNCTION_GRAPH_TRACER select ARM_UNWIND help This option enables stack unwinding support in the kernel -- cgit v1.2.3 From 4c6df58231f8dde91bcd9fe712c4314d6eec06e1 Mon Sep 17 00:00:00 2001 From: Tigran Tadevosyan Date: Fri, 5 Apr 2019 14:16:13 +0100 Subject: ARM: 8856/1: NOMMU: Fix CCR register faulty initialization when MPU is disabled [ Upstream commit c3143967807adb1357c36b68a7563fc0c4e1f615 ] When CONFIG_ARM_MPU is not defined, the base address of v7M SCB register is not initialized with correct value. This prevents enabling I/D caches when the L1 cache poilcy is applied in kernel. Fixes: 3c24121039c9da14692eb48f6e39565b28c0f3cf ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init") Signed-off-by: Tigran Tadevosyan Signed-off-by: Vladimir Murzin Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/head-nommu.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index ec29de250076..cab89479d15e 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -133,9 +133,9 @@ __secondary_data: */ .text __after_proc_init: -#ifdef CONFIG_ARM_MPU M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB) M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB) +#ifdef CONFIG_ARM_MPU M_CLASS(ldr r3, [r12, 0x50]) AR_CLASS(mrc p15, 0, r3, c0, c1, 4) @ Read ID_MMFR0 and r3, r3, #(MMFR0_PMSA) @ PMSA field -- cgit v1.2.3 From 943609acd62856e3277f6cdbc6e5945fe07759f0 Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Mon, 22 Apr 2019 21:08:03 +0200 Subject: spi: Micrel eth switch: declare missing of table [ Upstream commit 2f23a2a768bee7ad2ff1e9527c3f7e279e794a46 ] Add missing table for SPI driver relying on SPI device match since compatible is in a DT binding or in a DTS. Before this patch: modinfo drivers/net/phy/spi_ks8995.ko | grep alias alias: spi:ksz8795 alias: spi:ksz8864 alias: spi:ks8995 After this patch: modinfo drivers/net/phy/spi_ks8995.ko | grep alias alias: spi:ksz8795 alias: spi:ksz8864 alias: spi:ks8995 alias: of:N*T*Cmicrel,ksz8795C* alias: of:N*T*Cmicrel,ksz8795 alias: of:N*T*Cmicrel,ksz8864C* alias: of:N*T*Cmicrel,ksz8864 alias: of:N*T*Cmicrel,ks8995C* alias: of:N*T*Cmicrel,ks8995 Reported-by: Javier Martinez Canillas Signed-off-by: Daniel Gomez Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/spi_ks8995.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index f17b3441779b..d8ea4147dfe7 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -162,6 +162,14 @@ static const struct spi_device_id ks8995_id[] = { }; MODULE_DEVICE_TABLE(spi, ks8995_id); +static const struct of_device_id ks8895_spi_of_match[] = { + { .compatible = "micrel,ks8995" }, + { .compatible = "micrel,ksz8864" }, + { .compatible = "micrel,ksz8795" }, + { }, + }; +MODULE_DEVICE_TABLE(of, ks8895_spi_of_match); + static inline u8 get_chip_id(u8 val) { return (val >> ID1_CHIPID_S) & ID1_CHIPID_M; @@ -529,6 +537,7 @@ static int ks8995_remove(struct spi_device *spi) static struct spi_driver ks8995_driver = { .driver = { .name = "spi-ks8995", + .of_match_table = of_match_ptr(ks8895_spi_of_match), }, .probe = ks8995_probe, .remove = ks8995_remove, -- cgit v1.2.3 From d3a9275364d9c4257a76fdc2290cc19586b5acf6 Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Mon, 22 Apr 2019 21:08:04 +0200 Subject: spi: ST ST95HF NFC: declare missing of table [ Upstream commit d04830531d0c4a99c897a44038e5da3d23331d2f ] Add missing table for SPI driver relying on SPI device match since compatible is in a DT binding or in a DTS. Before this patch: modinfo drivers/nfc/st95hf/st95hf.ko | grep alias alias: spi:st95hf After this patch: modinfo drivers/nfc/st95hf/st95hf.ko | grep alias alias: spi:st95hf alias: of:N*T*Cst,st95hfC* alias: of:N*T*Cst,st95hf Reported-by: Javier Martinez Canillas Signed-off-by: Daniel Gomez Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/st95hf/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index 2b26f762fbc3..01acb6e53365 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1074,6 +1074,12 @@ static const struct spi_device_id st95hf_id[] = { }; MODULE_DEVICE_TABLE(spi, st95hf_id); +static const struct of_device_id st95hf_spi_of_match[] = { + { .compatible = "st,st95hf" }, + { }, +}; +MODULE_DEVICE_TABLE(of, st95hf_spi_of_match); + static int st95hf_probe(struct spi_device *nfc_spi_dev) { int ret; @@ -1260,6 +1266,7 @@ static struct spi_driver st95hf_driver = { .driver = { .name = "st95hf", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(st95hf_spi_of_match), }, .id_table = st95hf_id, .probe = st95hf_probe, -- cgit v1.2.3 From 2455f6cbd19b99642f8022696fbe2094e32a813e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 15 Apr 2019 12:00:42 -0400 Subject: ceph: handle the case where a dentry has been renamed on outstanding req [ Upstream commit 4b8222870032715f9d995f3eb7c7acd8379a275d ] It's possible for us to issue a lookup to revalidate a dentry concurrently with a rename. If done in the right order, then we could end up processing dentry info in the reply that no longer reflects the state of the dentry. If req->r_dentry->d_name differs from the one in the trace, then just ignore the trace in the reply. We only need to do this however if the parent's i_rwsem is not held. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/inode.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f7f9e305aaf8..fd3db2e112d6 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1152,6 +1152,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) return 0; } +static int d_name_cmp(struct dentry *dentry, const char *name, size_t len) +{ + int ret; + + /* take d_lock to ensure dentry->d_name stability */ + spin_lock(&dentry->d_lock); + ret = dentry->d_name.len - len; + if (!ret) + ret = memcmp(dentry->d_name.name, name, len); + spin_unlock(&dentry->d_lock); + return ret; +} + /* * Incorporate results into the local cache. This is either just * one inode, or a directory, dentry, and possibly linked-to inode (e.g., @@ -1401,7 +1414,8 @@ retry_lookup: err = splice_dentry(&req->r_dentry, in); if (err < 0) goto done; - } else if (rinfo->head->is_dentry) { + } else if (rinfo->head->is_dentry && + !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) { struct ceph_vino *ptvino = NULL; if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || -- cgit v1.2.3 From f79084e5372b449c09d0d3771b06a3f010b924b6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Apr 2019 10:52:20 +1000 Subject: Revert "drm/virtio: drop prime import/export callbacks" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a0cecc23cfcbf2626497a8c8770856dd56b67917 ] This patch does more harm than good, as it breaks both Xwayland and gnome-shell with X11. Xwayland requires DRI3 & DRI3 requires PRIME. X11 crash for obscure double-free reason which are hard to debug (starting X11 by hand doesn't trigger the crash). I don't see an apparent problem implementing those stub prime functions, they may return an error at run-time, and it seems to be handled fine by GNOME at least. This reverts commit b318e3ff7ca065d6b107e424c85a63d7a6798a69. [airlied: This broke userspace for virtio-gpus, and regressed things from DRI3 to DRI2. This brings back the original problem, but it's better than regressions.] Fixes: b318e3ff7ca065d6b107e424c85a63d7a6798a ("drm/virtio: drop prime import/export callbacks") Signed-off-by: Marc-André Lureau Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/virtio/virtgpu_drv.c | 4 ++++ drivers/gpu/drm/virtio/virtgpu_drv.h | 4 ++++ drivers/gpu/drm/virtio/virtgpu_prime.c | 12 ++++++++++++ 3 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 2d1aaca49105..f7f32a885af7 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -127,10 +127,14 @@ static struct drm_driver driver = { #if defined(CONFIG_DEBUG_FS) .debugfs_init = virtio_gpu_debugfs_init, #endif + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, .gem_prime_import = drm_gem_prime_import, .gem_prime_pin = virtgpu_gem_prime_pin, .gem_prime_unpin = virtgpu_gem_prime_unpin, + .gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table, + .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table, .gem_prime_vmap = virtgpu_gem_prime_vmap, .gem_prime_vunmap = virtgpu_gem_prime_vunmap, .gem_prime_mmap = virtgpu_gem_prime_mmap, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 0c15000f926e..1deb41d42ea4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -372,6 +372,10 @@ int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait); /* virtgpu_prime.c */ int virtgpu_gem_prime_pin(struct drm_gem_object *obj); void virtgpu_gem_prime_unpin(struct drm_gem_object *obj); +struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object *virtgpu_gem_prime_import_sg_table( + struct drm_device *dev, struct dma_buf_attachment *attach, + struct sg_table *sgt); void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj); void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int virtgpu_gem_prime_mmap(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index c59ec34c80a5..eb51a78e1199 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -39,6 +39,18 @@ void virtgpu_gem_prime_unpin(struct drm_gem_object *obj) WARN_ONCE(1, "not implemented"); } +struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) +{ + return ERR_PTR(-ENODEV); +} + +struct drm_gem_object *virtgpu_gem_prime_import_sg_table( + struct drm_device *dev, struct dma_buf_attachment *attach, + struct sg_table *table) +{ + return ERR_PTR(-ENODEV); +} + void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj) { struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); -- cgit v1.2.3 From 21a3f7c2c76c71d49452437735a852e3fa923c3a Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Wed, 24 Apr 2019 11:04:13 +0200 Subject: drm/sun4i: Unbind components before releasing DRM and memory [ Upstream commit e02bc29b2cfa7806830d6da8b2322cddd67e8dfe ] Our components may still be using the DRM device driver (if only to access our driver's private data), so make sure to unbind them before the final drm_dev_put. Also release our reserved memory after component unbind instead of before to match reverse creation order. Fixes: f5a9ed867c83 ("drm/sun4i: Fix component unbinding and component master deletion") Signed-off-by: Paul Kocialkowski Reviewed-by: Chen-Yu Tsai Link: https://patchwork.freedesktop.org/patch/msgid/20190424090413.6918-1-paul.kocialkowski@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 9a5713fa03b2..f8bf5bbec2df 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -146,10 +146,11 @@ static void sun4i_drv_unbind(struct device *dev) drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); drm_mode_config_cleanup(drm); - of_reserved_mem_device_release(dev); - drm_dev_put(drm); component_unbind_all(dev, NULL); + of_reserved_mem_device_release(dev); + + drm_dev_put(drm); } static const struct component_master_ops sun4i_drv_master_ops = { -- cgit v1.2.3 From 6a2abf951ed3970db129bf060863662121d4784f Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Fri, 5 Apr 2019 10:31:09 -0700 Subject: Input: snvs_pwrkey - make it depend on ARCH_MXC [ Upstream commit f06eba72274788db6a43012a05a99915c0283aef ] The SNVS power key is not only used on i.MX6SX and i.MX7D, it is also used by i.MX6UL and NXP's latest ARMv8 based i.MX8M series SOC. So update the config dependency to use ARCH_MXC, and add the COMPILE_TEST too. Signed-off-by: Jacky Bai Reviewed-by: Dong Aisheng Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index a878351f1643..52d7f55fca32 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -420,7 +420,7 @@ config KEYBOARD_MPR121 config KEYBOARD_SNVS_PWRKEY tristate "IMX SNVS Power Key Driver" - depends on SOC_IMX6SX || SOC_IMX7D + depends on ARCH_MXC || COMPILE_TEST depends on OF help This is the snvs powerkey driver for the Freescale i.MX application -- cgit v1.2.3 From ec69b3c91b3bc83e5bdc70c846358e4fbb30c321 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 19 Apr 2019 07:39:00 +0000 Subject: Input: synaptics-rmi4 - fix possible double free [ Upstream commit bce1a78423961fce676ac65540a31b6ffd179e6d ] The RMI4 function structure has been released in rmi_register_function if error occurs. However, it will be released again in the function rmi_create_function, which may result in a double-free bug. Signed-off-by: Pan Bian Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/rmi4/rmi_driver.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index fc3ab93b7aea..7fb358f96195 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -860,7 +860,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev, error = rmi_register_function(fn); if (error) - goto err_put_fn; + return error; if (pdt->function_number == 0x01) data->f01_container = fn; @@ -870,10 +870,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev, list_add_tail(&fn->node, &data->function_list); return RMI_SCAN_CONTINUE; - -err_put_fn: - put_device(&fn->dev); - return error; } void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake) -- cgit v1.2.3 From fb53ebc9eb9a889b0db66d00932d31e3c7ad49b4 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 20 Apr 2019 12:09:39 +0800 Subject: net: vrf: Fix operation not supported when set vrf mac [ Upstream commit 6819e3f6d83a24777813b0d031ebe0861694db5a ] Vrf device is not able to change mac address now because lack of ndo_set_mac_address. Complete this in case some apps need to do this. Reported-by: Hui Wang Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vrf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index cd15c32b2e43..9ee4d7402ca2 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -875,6 +875,7 @@ static const struct net_device_ops vrf_netdev_ops = { .ndo_init = vrf_dev_init, .ndo_uninit = vrf_dev_uninit, .ndo_start_xmit = vrf_xmit, + .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = vrf_get_stats64, .ndo_add_slave = vrf_add_slave, .ndo_del_slave = vrf_del_slave, @@ -1274,6 +1275,7 @@ static void vrf_setup(struct net_device *dev) /* default to no qdisc; user can add if desired */ dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_NO_RX_HANDLER; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; /* VRF devices do not care about MTU, but if the MTU is set * too low then the ipv4 and ipv6 protocols are disabled -- cgit v1.2.3 From 26f70c4e0aef85b2bf3bfedb5061bab7d9a5d8a2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 24 Apr 2019 15:59:33 +0200 Subject: gpio: Fix gpiochip_add_data_with_key() error path [ Upstream commit 357798909164bf423eac6a78ff7da7e98d2d7f7f ] The err_remove_chip block is too coarse, and may perform cleanup that must not be done. E.g. if of_gpiochip_add() fails, of_gpiochip_remove() is still called, causing: OF: ERROR: Bad of_node_put() on /soc/gpio@e6050000 CPU: 1 PID: 20 Comm: kworker/1:1 Not tainted 5.1.0-rc2-koelsch+ #407 Hardware name: Generic R-Car Gen2 (Flattened Device Tree) Workqueue: events deferred_probe_work_func [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0x9c) [] (dump_stack) from [] (kobject_put+0x94/0xbc) [] (kobject_put) from [] (gpiochip_add_data_with_key+0x8d8/0xa3c) [] (gpiochip_add_data_with_key) from [] (gpio_rcar_probe+0x1d4/0x314) [] (gpio_rcar_probe) from [] (platform_drv_probe+0x48/0x94) and later, if a GPIO consumer tries to use a GPIO from a failed controller: WARNING: CPU: 0 PID: 1 at lib/refcount.c:156 kobject_get+0x38/0x4c refcount_t: increment on 0; use-after-free. Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc2-koelsch+ #407 Hardware name: Generic R-Car Gen2 (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0x9c) [] (dump_stack) from [] (__warn+0xd0/0xec) [] (__warn) from [] (warn_slowpath_fmt+0x44/0x6c) [] (warn_slowpath_fmt) from [] (kobject_get+0x38/0x4c) [] (kobject_get) from [] (of_node_get+0x14/0x1c) [] (of_node_get) from [] (of_find_node_by_phandle+0xc0/0xf0) [] (of_find_node_by_phandle) from [] (of_phandle_iterator_next+0x68/0x154) [] (of_phandle_iterator_next) from [] (__of_parse_phandle_with_args+0x40/0xd0) [] (__of_parse_phandle_with_args) from [] (of_parse_phandle_with_args_map+0x100/0x3ac) [] (of_parse_phandle_with_args_map) from [] (of_get_named_gpiod_flags+0x38/0x380) [] (of_get_named_gpiod_flags) from [] (gpiod_get_from_of_node+0x24/0xd8) [] (gpiod_get_from_of_node) from [] (devm_fwnode_get_index_gpiod_from_child+0xa0/0x144) [] (devm_fwnode_get_index_gpiod_from_child) from [] (gpio_keys_probe+0x418/0x7bc) [] (gpio_keys_probe) from [] (platform_drv_probe+0x48/0x94) Fix this by splitting the cleanup block, and adding a missing call to gpiochip_irqchip_remove(). Fixes: 28355f81969962cf ("gpio: defer probe if pinctrl cannot be found") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mukesh Ojha Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index d1adfdf50fb3..34fbf879411f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1379,7 +1379,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) - goto err_remove_chip; + goto err_free_gpiochip_mask; status = of_gpiochip_add(chip); if (status) @@ -1387,7 +1387,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, status = gpiochip_init_valid_mask(chip); if (status) - goto err_remove_chip; + goto err_remove_of_chip; for (i = 0; i < chip->ngpio; i++) { struct gpio_desc *desc = &gdev->descs[i]; @@ -1415,14 +1415,18 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (gpiolib_initialized) { status = gpiochip_setup_dev(gdev); if (status) - goto err_remove_chip; + goto err_remove_acpi_chip; } return 0; -err_remove_chip: +err_remove_acpi_chip: acpi_gpiochip_remove(chip); +err_remove_of_chip: gpiochip_free_hogs(chip); of_gpiochip_remove(chip); +err_remove_chip: + gpiochip_irqchip_remove(chip); +err_free_gpiochip_mask: gpiochip_free_valid_mask(chip); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(chip); -- cgit v1.2.3 From 59c58e43e80fff1b4f3b0845b58dc7dbcab0d902 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Tue, 23 Apr 2019 17:30:26 +0800 Subject: RDMA/hns: Bugfix for mapping user db [ Upstream commit 2557fabd6e29f349bfa0ac13f38ac98aa5eafc74 ] When the maximum send wr delivered by the user is zero, the qp does not have a sq. When allocating the sq db buffer to store the user sq pi pointer and map it to the kernel mode, max_send_wr is used as the trigger condition, while the kernel does not consider the max_send_wr trigger condition when mapmping db. It will cause sq record doorbell map fail and create qp fail. The failed print information as follows: hns3 0000:7d:00.1: Send cmd: tail - 418, opcode - 0x8504, flag - 0x0011, retval - 0x0000 hns3 0000:7d:00.1: Send cmd: 0xe59dc000 0x00000000 0x00000000 0x00000000 0x00000116 0x0000ffff hns3 0000:7d:00.1: sq record doorbell map failed! hns3 0000:7d:00.1: Create RC QP failed Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space") Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 54031c5b53fa..89dd2380fc81 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -517,7 +517,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) { - if (attr->qp_type == IB_QPT_XRC_TGT) + if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr) return 0; return 1; -- cgit v1.2.3 From 61fadd8a5000780622ed647a39ab26cae480b365 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 25 Apr 2019 22:23:37 -0700 Subject: mm/memory_hotplug.c: drop memory device reference after find_memory_block() [ Upstream commit 89c02e69fc5245f8a2f34b58b42d43a737af1a5e ] Right now we are using find_memory_block() to get the node id for the pfn range to online. We are missing to drop a reference to the memory block device. While the device still gets unregistered via device_unregister(), resulting in no user visible problem, the device is never released via device_release(), resulting in a memory leak. Fix that by properly using a put_device(). Link: http://lkml.kernel.org/r/20190411110955.1430-1-david@redhat.com Fixes: d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug") Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Reviewed-by: Wei Yang Acked-by: Michal Hocko Acked-by: Pankaj Gupta Cc: David Hildenbrand Cc: Pavel Tatashin Cc: Qian Cai Cc: Arun KS Cc: Mathieu Malaterre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory_hotplug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 11593a03c051..7493f50ee880 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -858,6 +858,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ */ mem = find_memory_block(__pfn_to_section(pfn)); nid = mem->nid; + put_device(&mem->dev); /* associate pfn range with the zone */ zone = move_pfn_range(online_type, nid, pfn, nr_pages); -- cgit v1.2.3 From 073d8f286f34b50058ad8c3929a4421f4b0e449f Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 25 Apr 2019 22:23:58 -0700 Subject: mm/page_alloc.c: avoid potential NULL pointer dereference [ Upstream commit 8139ad043d632c0e9e12d760068a7a8e91659aa1 ] ac.preferred_zoneref->zone passed to alloc_flags_nofragment() can be NULL. 'zone' pointer unconditionally derefernced in alloc_flags_nofragment(). Bail out on NULL zone to avoid potential crash. Currently we don't see any crashes only because alloc_flags_nofragment() has another bug which allows compiler to optimize away all accesses to 'zone'. Link: http://lkml.kernel.org/r/20190423120806.3503-1-aryabinin@virtuozzo.com Fixes: 6bb154504f8b ("mm, page_alloc: spread allocations across zones before introducing fragmentation") Signed-off-by: Andrey Ryabinin Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/page_alloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eedb57f9b40b..d59be95ba45c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3385,6 +3385,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) alloc_flags |= ALLOC_KSWAPD; #ifdef CONFIG_ZONE_DMA32 + if (!zone) + return alloc_flags; + if (zone_idx(zone) != ZONE_NORMAL) goto out; -- cgit v1.2.3 From 4b84cde61ce9135b9c98641307b8a22f44609914 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 6 Mar 2019 14:35:15 -0500 Subject: bpf: only test gso type on gso packets [ Upstream commit 4c3024debf62de4c6ac6d3cb4c0063be21d4f652 ] BPF can adjust gso only for tcp bytestreams. Fail on other gso types. But only on gso packets. It does not touch this field if !gso_size. Fixes: b90efd225874 ("bpf: only adjust gso_size on bytestream protocols") Signed-off-by: Willem de Bruijn Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 4 ++-- net/core/filter.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bdb9563c64a0..b8679dcba96f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4212,10 +4212,10 @@ static inline bool skb_is_gso_sctp(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; } +/* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_tcp(const struct sk_buff *skb) { - return skb_is_gso(skb) && - skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6); + return skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6); } static inline void skb_gso_reset(struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index f7d0004fc160..ff07996515f2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2789,7 +2789,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2830,7 +2830,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2955,7 +2955,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2984,7 +2984,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); -- cgit v1.2.3 From 45e1075e04cb394d7d526bfaac87c14e9d6fd547 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Fri, 22 Mar 2019 12:37:35 +0000 Subject: net: sched: fix cleanup NULL pointer exception in act_mirr [ Upstream commit 064c5d6881e897077639e04973de26440ee205e6 ] A new mirred action is created by the tcf_mirred_init function. This contains a list head struct which is inserted into a global list on successful creation of a new action. However, after a creation, it is still possible to error out and call the tcf_idr_release function. This, in turn, calls the act_mirr cleanup function via __tcf_idr_release and __tcf_action_put. This cleanup function tries to delete the list entry which is as yet uninitialised, leading to a NULL pointer exception. Fix this by initialising the list entry on creation of a new action. Bug report: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 PGD 8000000840c73067 P4D 8000000840c73067 PUD 858dcc067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 32 PID: 5636 Comm: handler194 Tainted: G OE 5.0.0+ #186 Hardware name: Dell Inc. PowerEdge R730/0599V5, BIOS 1.3.6 06/03/2015 RIP: 0010:tcf_mirred_release+0x42/0xa7 [act_mirred] Code: f0 90 39 c0 e8 52 04 57 c8 48 c7 c7 b8 80 39 c0 e8 94 fa d4 c7 48 8b 93 d0 00 00 00 48 8b 83 d8 00 00 00 48 c7 c7 f0 90 39 c0 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 d0 00 RSP: 0018:ffffac4aa059f688 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff9dcd1b214d00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9dcd1fa165f8 RDI: ffffffffc03990f0 RBP: ffff9dccf9c7af80 R08: 0000000000000a3b R09: 0000000000000000 R10: ffff9dccfa11f420 R11: 0000000000000000 R12: 0000000000000001 R13: ffff9dcd16b433c0 R14: ffff9dcd1b214d80 R15: 0000000000000000 FS: 00007f441bfff700(0000) GS:ffff9dcd1fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000839e64004 CR4: 00000000001606e0 Call Trace: tcf_action_cleanup+0x59/0xca __tcf_action_put+0x54/0x6b __tcf_idr_release.cold.33+0x9/0x12 tcf_mirred_init.cold.20+0x22e/0x3b0 [act_mirred] tcf_action_init_1+0x3d0/0x4c0 tcf_action_init+0x9c/0x130 tcf_exts_validate+0xab/0xc0 fl_change+0x1ca/0x982 [cls_flower] tc_new_tfilter+0x647/0x8d0 ? load_balance+0x14b/0x9e0 rtnetlink_rcv_msg+0xe3/0x370 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1d4/0x2b0 ? rtnl_calcit.isra.31+0xf0/0xf0 netlink_rcv_skb+0x49/0x110 netlink_unicast+0x16f/0x210 netlink_sendmsg+0x1df/0x390 sock_sendmsg+0x36/0x40 ___sys_sendmsg+0x27b/0x2c0 ? futex_wake+0x80/0x140 ? do_futex+0x2b9/0xac0 ? ep_scan_ready_list.constprop.22+0x1f2/0x210 ? ep_poll+0x7a/0x430 __sys_sendmsg+0x47/0x80 do_syscall_64+0x55/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock") Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/act_mirred.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c8cf4d10c435..971dc03304f4 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -159,6 +159,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } m = to_mirred(*a); + if (ret == ACT_P_CREATED) + INIT_LIST_HEAD(&m->tcfm_list); + spin_lock_bh(&m->tcf_lock); m->tcf_action = parm->action; m->tcfm_eaction = parm->eaction; -- cgit v1.2.3 From 5b4ef3c5faf9c4a08fb544addc1ae7dc483b83c5 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 1 Mar 2019 11:52:08 +0100 Subject: net: mvpp2: fix validate for PPv2.1 [ Upstream commit 8b318f30ab4ef9bbc1241e6f8c1db366dbd347f2 ] The Phylink validate function is the Marvell PPv2 driver makes a check on the GoP id. This is valid an has to be done when using PPv2.2 engines but makes no sense when using PPv2.1. The check done when using an RGMII interface makes sure the GoP id is not 0, but this breaks PPv2.1. Fixes it. Fixes: 0fb628f0f250 ("net: mvpp2: fix phylink handling of invalid PHY modes") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 931beac3359d..70031e2b2294 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4370,7 +4370,7 @@ static void mvpp2_phylink_validate(struct net_device *dev, case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - if (port->gop_id == 0) + if (port->priv->hw_version == MVPP22 && port->gop_id == 0) goto empty_set; break; default: -- cgit v1.2.3 From 92a9787bb38c3545e838d76b14e7e620d0450dc5 Mon Sep 17 00:00:00 2001 From: Damian Kos Date: Mon, 19 Nov 2018 15:14:14 +0000 Subject: drm/rockchip: fix for mailbox read validation. [ Upstream commit e4056bbb6719fe713bfc4030ac78e8e97ddf7574 ] This is basically the same fix as in commit fa68d4f8476b ("drm/rockchip: fix for mailbox read size") but for cdn_dp_mailbox_validate_receive function. See patchwork.kernel.org/patch/10671981/ for details. Signed-off-by: Damian Kos Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/1542640463-18332-1-git-send-email-dkos@cadence.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/cdn-dp-reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.c b/drivers/gpu/drm/rockchip/cdn-dp-reg.c index 5a485489a1e2..6c8b14fb1d2f 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-reg.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.c @@ -113,7 +113,7 @@ static int cdp_dp_mailbox_write(struct cdn_dp_device *dp, u8 val) static int cdn_dp_mailbox_validate_receive(struct cdn_dp_device *dp, u8 module_id, u8 opcode, - u8 req_size) + u16 req_size) { u32 mbox_size, i; u8 header[4]; -- cgit v1.2.3 From 202436fe081084781b2ef2bcbae907cc2ddb0634 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 22 Dec 2018 10:34:54 +0000 Subject: cw1200: fix missing unlock on error in cw1200_hw_scan() commit 51c8d24101c79ffce3e79137e2cee5dfeb956dd7 upstream. Add the missing unlock before return from function cw1200_hw_scan() in the error handling case. Fixes: 4f68ef64cd7f ("cw1200: Fix concurrency use-after-free bugs in cw1200_hw_scan()") Signed-off-by: Wei Yongjun Acked-by: Jia-Ju Bai Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/st/cw1200/scan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/st/cw1200/scan.c b/drivers/net/wireless/st/cw1200/scan.c index 0a9eac93dd01..71e9b91cf15b 100644 --- a/drivers/net/wireless/st/cw1200/scan.c +++ b/drivers/net/wireless/st/cw1200/scan.c @@ -84,8 +84,11 @@ int cw1200_hw_scan(struct ieee80211_hw *hw, frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0, req->ie_len); - if (!frame.skb) + if (!frame.skb) { + mutex_unlock(&priv->conf_mutex); + up(&priv->scan.lock); return -ENOMEM; + } if (req->ie_len) skb_put_data(frame.skb, req->ie, req->ie_len); -- cgit v1.2.3 From 14654a1f2e2bb9057aed4f6e668a65c81197a4ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Thu, 11 Apr 2019 20:13:30 +0200 Subject: mwl8k: Fix rate_idx underflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b583201fa219b7b1b6aebd8966c8fd9357ef9f4 upstream. It was reported on OpenWrt bug tracking system[1], that several users are affected by the endless reboot of their routers if they configure 5GHz interface with channel 44 or 48. The reboot loop is caused by the following excessive number of WARN_ON messages: WARNING: CPU: 0 PID: 0 at backports-4.19.23-1/net/mac80211/rx.c:4516 ieee80211_rx_napi+0x1fc/0xa54 [mac80211] as the messages are being correctly emitted by the following guard: case RX_ENC_LEGACY: if (WARN_ON(status->rate_idx >= sband->n_bitrates)) as the rate_idx is in this case erroneously set to 251 (0xfb). This fix simply converts previously used magic number to proper constant and guards against substraction which is leading to the currently observed underflow. 1. https://bugs.openwrt.org/index.php?do=details&task_id=2218 Fixes: 854783444bab ("mwl8k: properly set receive status rate index on 5 GHz receive") Cc: Tested-by: Eubert Bao Reported-by: Eubert Bao Signed-off-by: Petr Štetiar Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwl8k.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 8e4e9b6919e0..ffc565ac2192 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -441,6 +441,9 @@ static const struct ieee80211_rate mwl8k_rates_50[] = { #define MWL8K_CMD_UPDATE_STADB 0x1123 #define MWL8K_CMD_BASTREAM 0x1125 +#define MWL8K_LEGACY_5G_RATE_OFFSET \ + (ARRAY_SIZE(mwl8k_rates_24) - ARRAY_SIZE(mwl8k_rates_50)) + static const char *mwl8k_cmd_name(__le16 cmd, char *buf, int bufsize) { u16 command = le16_to_cpu(cmd); @@ -1016,8 +1019,9 @@ mwl8k_rxd_ap_process(void *_rxd, struct ieee80211_rx_status *status, if (rxd->channel > 14) { status->band = NL80211_BAND_5GHZ; - if (!(status->encoding == RX_ENC_HT)) - status->rate_idx -= 5; + if (!(status->encoding == RX_ENC_HT) && + status->rate_idx >= MWL8K_LEGACY_5G_RATE_OFFSET) + status->rate_idx -= MWL8K_LEGACY_5G_RATE_OFFSET; } else { status->band = NL80211_BAND_2GHZ; } @@ -1124,8 +1128,9 @@ mwl8k_rxd_sta_process(void *_rxd, struct ieee80211_rx_status *status, if (rxd->channel > 14) { status->band = NL80211_BAND_5GHZ; - if (!(status->encoding == RX_ENC_HT)) - status->rate_idx -= 5; + if (!(status->encoding == RX_ENC_HT) && + status->rate_idx >= MWL8K_LEGACY_5G_RATE_OFFSET) + status->rate_idx -= MWL8K_LEGACY_5G_RATE_OFFSET; } else { status->band = NL80211_BAND_2GHZ; } -- cgit v1.2.3 From 843135c1ce1e566aeff8b2ecf053ee904a0b1cf0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Apr 2019 10:17:22 -0500 Subject: rtlwifi: rtl8723ae: Fix missing break in switch statement commit 84242b82d81c54e009a2aaa74d3d9eff70babf56 upstream. Add missing break statement in order to prevent the code from falling through to case 0x1025, and erroneously setting rtlhal->oem_id to RT_CID_819X_ACER when rtlefuse->eeprom_svid is equal to 0x10EC and none of the cases in switch (rtlefuse->eeprom_smid) match. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: 238ad2ddf34b ("rtlwifi: rtl8723ae: Clean up the hardware info routine") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index f783e4a8083d..0de7551ae14a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -1697,6 +1697,7 @@ static void _rtl8723e_read_adapter_info(struct ieee80211_hw *hw, rtlhal->oem_id = RT_CID_819X_LENOVO; break; } + break; case 0x1025: rtlhal->oem_id = RT_CID_819X_ACER; break; -- cgit v1.2.3 From d3ff0184ebd26e9ab985173a778415947ff9f225 Mon Sep 17 00:00:00 2001 From: Nigel Croxon Date: Fri, 29 Mar 2019 10:46:15 -0700 Subject: Don't jump to compute_result state from check_result state commit 4f4fd7c5798bbdd5a03a60f6269cf1177fbd11ef upstream. Changing state from check_state_check_result to check_state_compute_result not only is unsafe but also doesn't appear to serve a valid purpose. A raid6 check should only be pushing out extra writes if doing repair and a mis-match occurs. The stripe dev management will already try and do repair writes for failing sectors. This patch makes the raid6 check_state_check_result handling work more like raid5's. If somehow too many failures for a check, just quit the check operation for the stripe. When any checks pass, don't try and use check_state_compute_result for a purpose it isn't needed for and is unsafe for. Just mark the stripe as in sync for passing its parity checks and let the stripe dev read/write code and the bad blocks list do their job handling I/O errors. Repro steps from Xiao: These are the steps to reproduce this problem: 1. redefined OPT_MEDIUM_ERR_ADDR to 12000 in scsi_debug.c 2. insmod scsi_debug.ko dev_size_mb=11000 max_luns=1 num_tgts=1 3. mdadm --create /dev/md127 --level=6 --raid-devices=5 /dev/sde1 /dev/sde2 /dev/sde3 /dev/sde5 /dev/sde6 sde is the disk created by scsi_debug 4. echo "2" >/sys/module/scsi_debug/parameters/opts 5. raid-check It panic: [ 4854.730899] md: data-check of RAID array md127 [ 4854.857455] sd 5:0:0:0: [sdr] tag#80 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 4854.859246] sd 5:0:0:0: [sdr] tag#80 Sense Key : Medium Error [current] [ 4854.860694] sd 5:0:0:0: [sdr] tag#80 Add. Sense: Unrecovered read error [ 4854.862207] sd 5:0:0:0: [sdr] tag#80 CDB: Read(10) 28 00 00 00 2d 88 00 04 00 00 [ 4854.864196] print_req_error: critical medium error, dev sdr, sector 11656 flags 0 [ 4854.867409] sd 5:0:0:0: [sdr] tag#100 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 4854.869469] sd 5:0:0:0: [sdr] tag#100 Sense Key : Medium Error [current] [ 4854.871206] sd 5:0:0:0: [sdr] tag#100 Add. Sense: Unrecovered read error [ 4854.872858] sd 5:0:0:0: [sdr] tag#100 CDB: Read(10) 28 00 00 00 2e e0 00 00 08 00 [ 4854.874587] print_req_error: critical medium error, dev sdr, sector 12000 flags 4000 [ 4854.876456] sd 5:0:0:0: [sdr] tag#101 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 4854.878552] sd 5:0:0:0: [sdr] tag#101 Sense Key : Medium Error [current] [ 4854.880278] sd 5:0:0:0: [sdr] tag#101 Add. Sense: Unrecovered read error [ 4854.881846] sd 5:0:0:0: [sdr] tag#101 CDB: Read(10) 28 00 00 00 2e e8 00 00 08 00 [ 4854.883691] print_req_error: critical medium error, dev sdr, sector 12008 flags 4000 [ 4854.893927] sd 5:0:0:0: [sdr] tag#166 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [ 4854.896002] sd 5:0:0:0: [sdr] tag#166 Sense Key : Medium Error [current] [ 4854.897561] sd 5:0:0:0: [sdr] tag#166 Add. Sense: Unrecovered read error [ 4854.899110] sd 5:0:0:0: [sdr] tag#166 CDB: Read(10) 28 00 00 00 2e e0 00 00 10 00 [ 4854.900989] print_req_error: critical medium error, dev sdr, sector 12000 flags 0 [ 4854.902757] md/raid:md127: read error NOT corrected!! (sector 9952 on sdr1). [ 4854.904375] md/raid:md127: read error NOT corrected!! (sector 9960 on sdr1). [ 4854.906201] ------------[ cut here ]------------ [ 4854.907341] kernel BUG at drivers/md/raid5.c:4190! raid5.c:4190 above is this BUG_ON: handle_parity_checks6() ... BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ Cc: # v3.16+ OriginalAuthor: David Jeffery Cc: Xiao Ni Tested-by: David Jeffery Signed-off-by: David Jeffy Signed-off-by: Nigel Croxon Signed-off-by: Song Liu Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5b68f2d0da60..3ae13c06b200 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4233,26 +4233,15 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, case check_state_check_result: sh->check_state = check_state_idle; + if (s->failed > 1) + break; /* handle a successful check operation, if parity is correct * we are done. Otherwise update the mismatch count and repair * parity if !MD_RECOVERY_CHECK */ if (sh->ops.zero_sum_result == 0) { - /* both parities are correct */ - if (!s->failed) - set_bit(STRIPE_INSYNC, &sh->state); - else { - /* in contrast to the raid5 case we can validate - * parity, but still have a failure to write - * back - */ - sh->check_state = check_state_compute_result; - /* Returning at this point means that we may go - * off and bring p and/or q uptodate again so - * we make sure to check zero_sum_result again - * to verify if p or q need writeback - */ - } + /* Any parity checked was correct */ + set_bit(STRIPE_INSYNC, &sh->state); } else { atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { -- cgit v1.2.3 From 69798384ba38bbf5560b1e0554dd35c524850b0a Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 10 May 2019 17:57:09 -0400 Subject: bonding: fix arp_validate toggling in active-backup mode [ Upstream commit a9b8a2b39ce65df45687cf9ef648885c2a99fe75 ] There's currently a problem with toggling arp_validate on and off with an active-backup bond. At the moment, you can start up a bond, like so: modprobe bonding mode=1 arp_interval=100 arp_validate=0 arp_ip_targets=192.168.1.1 ip link set bond0 down echo "ens4f0" > /sys/class/net/bond0/bonding/slaves echo "ens4f1" > /sys/class/net/bond0/bonding/slaves ip link set bond0 up ip addr add 192.168.1.2/24 dev bond0 Pings to 192.168.1.1 work just fine. Now turn on arp_validate: echo 1 > /sys/class/net/bond0/bonding/arp_validate Pings to 192.168.1.1 continue to work just fine. Now when you go to turn arp_validate off again, the link falls flat on it's face: echo 0 > /sys/class/net/bond0/bonding/arp_validate dmesg ... [133191.911987] bond0: Setting arp_validate to none (0) [133194.257793] bond0: bond_should_notify_peers: slave ens4f0 [133194.258031] bond0: link status definitely down for interface ens4f0, disabling it [133194.259000] bond0: making interface ens4f1 the new active one [133197.330130] bond0: link status definitely down for interface ens4f1, disabling it [133197.331191] bond0: now running without any active interface! The problem lies in bond_options.c, where passing in arp_validate=0 results in bond->recv_probe getting set to NULL. This flies directly in the face of commit 3fe68df97c7f, which says we need to set recv_probe = bond_arp_recv, even if we're not using arp_validate. Said commit fixed this in bond_option_arp_interval_set, but missed that we can get to that same state in bond_option_arp_validate_set as well. One solution would be to universally set recv_probe = bond_arp_recv here as well, but I don't think bond_option_arp_validate_set has any business touching recv_probe at all, and that should be left to the arp_interval code, so we can just make things much tidier here. Fixes: 3fe68df97c7f ("bonding: always set recv_probe to bond_arp_rcv in arp monitor") CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: "David S. Miller" CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_options.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 4d5d01cb8141..80867bd8f44c 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1098,13 +1098,6 @@ static int bond_option_arp_validate_set(struct bonding *bond, { netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n", newval->string, newval->value); - - if (bond->dev->flags & IFF_UP) { - if (!newval->value) - bond->recv_probe = NULL; - else if (bond->params.arp_interval) - bond->recv_probe = bond_arp_rcv; - } bond->params.arp_validate = newval->value; return 0; -- cgit v1.2.3 From 430a64f6fa2c7ce59e5b51fd610349d4e71d1e21 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 10 May 2019 12:52:12 +1000 Subject: bridge: Fix error path for kobject_init_and_add() [ Upstream commit bdfad5aec1392b93495b77b864d58d7f101dc1c1 ] Currently error return from kobject_init_and_add() is not followed by a call to kobject_put(). This means there is a memory leak. We currently set p to NULL so that kfree() may be called on it as a noop, the code is arguably clearer if we move the kfree() up closer to where it is called (instead of after goto jump). Remove a goto label 'err1' and jump to call to kobject_put() in error return from kobject_init_and_add() fixing the memory leak. Re-name goto label 'put_back' to 'err1' now that we don't use err1, following current nomenclature (err1, err2 ...). Move call to kfree out of the error code at bottom of function up to closer to where memory was allocated. Add comment to clarify call to kfree(). Signed-off-by: Tobin C. Harding Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_if.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 41f0a696a65f..0cb0aa0313a8 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -602,13 +602,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, call_netdevice_notifiers(NETDEV_JOIN, dev); err = dev_set_allmulti(dev, 1); - if (err) - goto put_back; + if (err) { + kfree(p); /* kobject not yet init'd, manually free */ + goto err1; + } err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), SYSFS_BRIDGE_PORT_ATTR); if (err) - goto err1; + goto err2; err = br_sysfs_addif(p); if (err) @@ -700,12 +702,9 @@ err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: kobject_put(&p->kobj); - p = NULL; /* kobject_put frees */ -err1: dev_set_allmulti(dev, -1); -put_back: +err1: dev_put(dev); - kfree(p); return err; } -- cgit v1.2.3 From e4607de103badfd2b03e334f321342dcad9df414 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Fri, 3 May 2019 16:03:11 +0300 Subject: dpaa_eth: fix SG frame cleanup [ Upstream commit 17170e6570c082717c142733d9a638bcd20551f8 ] Fix issue with the entry indexing in the sg frame cleanup code being off-by-1. This problem showed up when doing some basic iperf tests and manifested in traffic coming to a halt. Signed-off-by: Laurentiu Tudor Acked-by: Madalin Bucur Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index dfebc30c4841..d3f2408dc9e8 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1648,7 +1648,7 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv, qm_sg_entry_get_len(&sgt[0]), dma_dir); /* remaining pages were mapped with skb_frag_dma_map() */ - for (i = 1; i < nr_frags; i++) { + for (i = 1; i <= nr_frags; i++) { WARN_ON(qm_sg_entry_is_ext(&sgt[i])); dma_unmap_page(dev, qm_sg_addr(&sgt[i]), -- cgit v1.2.3 From f8351176aed60c92a1335cc8c50960c2aa4f5f92 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 7 May 2019 17:11:18 +0800 Subject: fib_rules: return 0 directly if an exactly same rule exists when NLM_F_EXCL not supplied [ Upstream commit e9919a24d3022f72bcadc407e73a6ef17093a849 ] With commit 153380ec4b9 ("fib_rules: Added NLM_F_EXCL support to fib_nl_newrule") we now able to check if a rule already exists. But this only works with iproute2. For other tools like libnl, NetworkManager, it still could add duplicate rules with only NLM_F_CREATE flag, like [localhost ~ ]# ip rule 0: from all lookup local 32766: from all lookup main 32767: from all lookup default 100000: from 192.168.7.5 lookup 5 100000: from 192.168.7.5 lookup 5 As it doesn't make sense to create two duplicate rules, let's just return 0 if the rule exists. Fixes: 153380ec4b9 ("fib_rules: Added NLM_F_EXCL support to fib_nl_newrule") Reported-by: Thomas Haller Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/fib_rules.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ffbb827723a2..c49b752ea7eb 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -756,9 +756,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) goto errout; - if ((nlh->nlmsg_flags & NLM_F_EXCL) && - rule_exists(ops, frh, tb, rule)) { - err = -EEXIST; + if (rule_exists(ops, frh, tb, rule)) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + err = -EEXIST; goto errout_free; } -- cgit v1.2.3 From 2f679c41821f0b52d769062bd6182fd829d0d046 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 7 May 2019 20:44:59 -0700 Subject: ipv4: Fix raw socket lookup for local traffic [ Upstream commit 19e4e768064a87b073a4b4c138b55db70e0cfb9f ] inet_iif should be used for the raw socket lookup. inet_iif considers rt_iif which handles the case of local traffic. As it stands, ping to a local address with the '-I ' option fails ever since ping was changed to use SO_BINDTODEVICE instead of cmsg + IP_PKTINFO. IPv6 works fine. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/raw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c55a5432cf37..dc91c27bb788 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -173,6 +173,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { int sdif = inet_sdif(skb); + int dif = inet_iif(skb); struct sock *sk; struct hlist_head *head; int delivered = 0; @@ -185,8 +186,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, - iph->saddr, iph->daddr, - skb->dev->ifindex, sdif); + iph->saddr, iph->daddr, dif, sdif); while (sk) { delivered = 1; -- cgit v1.2.3 From 261a8958a57e3f9e0bd9c0cf7d7b637b2cb8def9 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 6 May 2019 23:25:29 +0800 Subject: net: dsa: Fix error cleanup path in dsa_init_module [ Upstream commit 68be930249d051fd54d3d99156b3dcadcb2a1f9b ] BUG: unable to handle kernel paging request at ffffffffa01c5430 PGD 3270067 P4D 3270067 PUD 3271063 PMD 230bc5067 PTE 0 Oops: 0000 [#1 CPU: 0 PID: 6159 Comm: modprobe Not tainted 5.1.0+ #33 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:raw_notifier_chain_register+0x16/0x40 Code: 63 f8 66 90 e9 5d ff ff ff 90 90 90 90 90 90 90 90 90 90 90 55 48 8b 07 48 89 e5 48 85 c0 74 1c 8b 56 10 3b 50 10 7e 07 eb 12 <39> 50 10 7c 0d 48 8d 78 08 48 8b 40 08 48 85 c0 75 ee 48 89 46 08 RSP: 0018:ffffc90001c33c08 EFLAGS: 00010282 RAX: ffffffffa01c5420 RBX: ffffffffa01db420 RCX: 4fcef45928070a8b RDX: 0000000000000000 RSI: ffffffffa01db420 RDI: ffffffffa01b0068 RBP: ffffc90001c33c08 R08: 000000003e0a33d0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000094443661 R12: ffff88822c320700 R13: ffff88823109be80 R14: 0000000000000000 R15: ffffc90001c33e78 FS: 00007fab8bd08540(0000) GS:ffff888237a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa01c5430 CR3: 00000002297ea000 CR4: 00000000000006f0 Call Trace: register_netdevice_notifier+0x43/0x250 ? 0xffffffffa01e0000 dsa_slave_register_notifier+0x13/0x70 [dsa_core ? 0xffffffffa01e0000 dsa_init_module+0x2e/0x1000 [dsa_core do_one_initcall+0x6c/0x3cc ? do_init_module+0x22/0x1f1 ? rcu_read_lock_sched_held+0x97/0xb0 ? kmem_cache_alloc_trace+0x325/0x3b0 do_init_module+0x5b/0x1f1 load_module+0x1db1/0x2690 ? m_show+0x1d0/0x1d0 __do_sys_finit_module+0xc5/0xd0 __x64_sys_finit_module+0x15/0x20 do_syscall_64+0x6b/0x1d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Cleanup allocated resourses if there are errors, otherwise it will trgger memleak. Fixes: c9eb3e0f8701 ("net: dsa: Add support for learning FDB through notification") Signed-off-by: YueHaibing Reviewed-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/dsa.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index aee909bcddc4..41d534d3f42b 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -342,15 +342,22 @@ static int __init dsa_init_module(void) rc = dsa_slave_register_notifier(); if (rc) - return rc; + goto register_notifier_fail; rc = dsa_legacy_register(); if (rc) - return rc; + goto legacy_register_fail; dev_add_pack(&dsa_pack_type); return 0; + +legacy_register_fail: + dsa_slave_unregister_notifier(); +register_notifier_fail: + destroy_workqueue(dsa_owq); + + return rc; } module_init(dsa_init_module); -- cgit v1.2.3 From a3bf31d7ffb276e6c6fbd0f51ca8a80dcb5d1b2e Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 13 May 2019 13:06:39 +0000 Subject: net: ethernet: stmmac: dwmac-sun8i: enable support of unicast filtering [ Upstream commit d4c26eb6e721683a0f93e346ce55bc8dc3cbb175 ] When adding more MAC addresses to a dwmac-sun8i interface, the device goes directly in promiscuous mode. This is due to IFF_UNICAST_FLT missing flag. So since the hardware support unicast filtering, let's add IFF_UNICAST_FLT. Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 0f660af01a4b..49a896a16391 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1015,6 +1015,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) mac->mac = &sun8i_dwmac_ops; mac->dma = &sun8i_dwmac_dma_ops; + priv->dev->priv_flags |= IFF_UNICAST_FLT; + /* The loopback bit seems to be re-set when link change * Simply mask it each time * Speed 10/100/1000 are set in BIT(2)/BIT(3) -- cgit v1.2.3 From 224b04c9e0284988a14d253e5663a77ffb5a5262 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Tue, 7 May 2019 19:59:10 +0530 Subject: net: macb: Change interrupt and napi enable order in open [ Upstream commit 0504453139ef5a593c9587e1e851febee859c7d8 ] Current order in open: -> Enable interrupts (macb_init_hw) -> Enable NAPI -> Start PHY Sequence of RX handling: -> RX interrupt occurs -> Interrupt is cleared and interrupt bits disabled in handler -> NAPI is scheduled -> In NAPI, RX budget is processed and RX interrupts are re-enabled With the above, on QEMU or fixed link setups (where PHY state doesn't matter), there's a chance macb RX interrupt occurs before NAPI is enabled. This will result in NAPI being scheduled before it is enabled. Fix this macb open by changing the order. Fixes: ae1f2a56d273 ("net: macb: Added support for many RX queues") Signed-off-by: Harini Katakam Acked-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6cbe515bfdeb..8a57888e9765 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2414,12 +2414,12 @@ static int macb_open(struct net_device *dev) return err; } - bp->macbgem_ops.mog_init_rings(bp); - macb_init_hw(bp); - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) napi_enable(&queue->napi); + bp->macbgem_ops.mog_init_rings(bp); + macb_init_hw(bp); + /* schedule a link state check */ phy_start(dev->phydev); -- cgit v1.2.3 From 57ee33b4819059a64c4ed7bf24b79c9649a50cf3 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 13 May 2019 13:15:17 +0200 Subject: net: seeq: fix crash caused by not set dev.parent [ Upstream commit 5afcd14cfc7fed1bcc8abcee2cef82732772bfc2 ] The old MIPS implementation of dma_cache_sync() didn't use the dev argument, but commit c9eb6172c328 ("dma-mapping: turn dma_cache_sync into a dma_map_ops method") changed that, so we now need to set dev.parent. Signed-off-by: Thomas Bogendoerfer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/seeq/sgiseeq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 70cce63a6081..696037d5ac3d 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -735,6 +735,7 @@ static int sgiseeq_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, dev); + SET_NETDEV_DEV(dev, &pdev->dev); sp = netdev_priv(dev); /* Make private data page aligned */ -- cgit v1.2.3 From 7bc936f4f226d744cc45a5b9ac45e3263ea9f80d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 3 May 2019 13:33:23 +0000 Subject: net: ucc_geth - fix Oops when changing number of buffers in the ring [ Upstream commit ee0df19305d9fabd9479b785918966f6e25b733b ] When changing the number of buffers in the RX ring while the interface is running, the following Oops is encountered due to the new number of buffers being taken into account immediately while their allocation is done when opening the device only. [ 69.882706] Unable to handle kernel paging request for data at address 0xf0000100 [ 69.890172] Faulting instruction address: 0xc033e164 [ 69.895122] Oops: Kernel access of bad area, sig: 11 [#1] [ 69.900494] BE PREEMPT CMPCPRO [ 69.907120] CPU: 0 PID: 0 Comm: swapper Not tainted 4.14.115-00006-g179ade8ce3-dirty #269 [ 69.915956] task: c0684310 task.stack: c06da000 [ 69.920470] NIP: c033e164 LR: c02e44d0 CTR: c02e41fc [ 69.925504] REGS: dfff1e20 TRAP: 0300 Not tainted (4.14.115-00006-g179ade8ce3-dirty) [ 69.934161] MSR: 00009032 CR: 22004428 XER: 20000000 [ 69.940869] DAR: f0000100 DSISR: 20000000 [ 69.940869] GPR00: c0352d70 dfff1ed0 c0684310 f00000a4 00000040 dfff1f68 00000000 0000001f [ 69.940869] GPR08: df53f410 1cc00040 00000021 c0781640 42004424 100c82b6 f00000a4 df53f5b0 [ 69.940869] GPR16: df53f6c0 c05daf84 00000040 00000000 00000040 c0782be4 00000000 00000001 [ 69.940869] GPR24: 00000000 df53f400 000001b0 df53f410 df53f000 0000003f df708220 1cc00044 [ 69.978348] NIP [c033e164] skb_put+0x0/0x5c [ 69.982528] LR [c02e44d0] ucc_geth_poll+0x2d4/0x3f8 [ 69.987384] Call Trace: [ 69.989830] [dfff1ed0] [c02e4554] ucc_geth_poll+0x358/0x3f8 (unreliable) [ 69.996522] [dfff1f20] [c0352d70] net_rx_action+0x248/0x30c [ 70.002099] [dfff1f80] [c04e93e4] __do_softirq+0xfc/0x310 [ 70.007492] [dfff1fe0] [c0021124] irq_exit+0xd0/0xd4 [ 70.012458] [dfff1ff0] [c000e7e0] call_do_irq+0x24/0x3c [ 70.017683] [c06dbe80] [c0006bac] do_IRQ+0x64/0xc4 [ 70.022474] [c06dbea0] [c001097c] ret_from_except+0x0/0x14 [ 70.027964] --- interrupt: 501 at rcu_idle_exit+0x84/0x90 [ 70.027964] LR = rcu_idle_exit+0x74/0x90 [ 70.037585] [c06dbf60] [20000000] 0x20000000 (unreliable) [ 70.042984] [c06dbf80] [c004bb0c] do_idle+0xb4/0x11c [ 70.047945] [c06dbfa0] [c004bd14] cpu_startup_entry+0x18/0x1c [ 70.053682] [c06dbfb0] [c05fb034] start_kernel+0x370/0x384 [ 70.059153] [c06dbff0] [00003438] 0x3438 [ 70.063062] Instruction dump: [ 70.066023] 38a00000 38800000 90010014 4bfff015 80010014 7c0803a6 3123ffff 7c691910 [ 70.073767] 38210010 4e800020 38600000 4e800020 <80e3005c> 80c30098 3107ffff 7d083910 [ 70.081690] ---[ end trace be7ccd9c1e1a9f12 ]--- This patch forbids the modification of the number of buffers in the ring while the interface is running. Fixes: ac421852b3a0 ("ucc_geth: add ethtool support") Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/ucc_geth_ethtool.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 0beee2cc2ddd..722b6de24816 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -252,14 +252,12 @@ uec_set_ringparam(struct net_device *netdev, return -EINVAL; } + if (netif_running(netdev)) + return -EBUSY; + ug_info->bdRingLenRx[queue] = ring->rx_pending; ug_info->bdRingLenTx[queue] = ring->tx_pending; - if (netif_running(netdev)) { - /* FIXME: restart automatically */ - netdev_info(netdev, "Please re-open the interface\n"); - } - return ret; } -- cgit v1.2.3 From c3954f8f5a7e0de2c6be51f6f65736cc2d193b86 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 9 May 2019 22:52:20 +0800 Subject: packet: Fix error path in packet_init [ Upstream commit 36096f2f4fa05f7678bc87397665491700bae757 ] kernel BUG at lib/list_debug.c:47! invalid opcode: 0000 [#1 CPU: 0 PID: 12914 Comm: rmmod Tainted: G W 5.1.0+ #47 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:__list_del_entry_valid+0x53/0x90 Code: 48 8b 32 48 39 fe 75 35 48 8b 50 08 48 39 f2 75 40 b8 01 00 00 00 5d c3 48 89 fe 48 89 c2 48 c7 c7 18 75 fe 82 e8 cb 34 78 ff <0f> 0b 48 89 fe 48 c7 c7 50 75 fe 82 e8 ba 34 78 ff 0f 0b 48 89 f2 RSP: 0018:ffffc90001c2fe40 EFLAGS: 00010286 RAX: 000000000000004e RBX: ffffffffa0184000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff888237a17788 RDI: 00000000ffffffff RBP: ffffc90001c2fe40 R08: 0000000000000000 R09: 0000000000000000 R10: ffffc90001c2fe10 R11: 0000000000000000 R12: 0000000000000000 R13: ffffc90001c2fe50 R14: ffffffffa0184000 R15: 0000000000000000 FS: 00007f3d83634540(0000) GS:ffff888237a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000555c350ea818 CR3: 0000000231677000 CR4: 00000000000006f0 Call Trace: unregister_pernet_operations+0x34/0x120 unregister_pernet_subsys+0x1c/0x30 packet_exit+0x1c/0x369 [af_packet __x64_sys_delete_module+0x156/0x260 ? lockdep_hardirqs_on+0x133/0x1b0 ? do_syscall_64+0x12/0x1f0 do_syscall_64+0x6e/0x1f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe When modprobe af_packet, register_pernet_subsys fails and does a cleanup, ops->list is set to LIST_POISON1, but the module init is considered to success, then while rmmod it, BUG() is triggered in __list_del_entry_valid which is called from unregister_pernet_subsys. This patch fix error handing path in packet_init to avoid possilbe issue if some error occur. Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index faa2bc50cfa0..b6c23af4a315 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4604,14 +4604,29 @@ static void __exit packet_exit(void) static int __init packet_init(void) { - int rc = proto_register(&packet_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&packet_proto, 0); + if (rc) goto out; + rc = sock_register(&packet_family_ops); + if (rc) + goto out_proto; + rc = register_pernet_subsys(&packet_net_ops); + if (rc) + goto out_sock; + rc = register_netdevice_notifier(&packet_netdev_notifier); + if (rc) + goto out_pernet; - sock_register(&packet_family_ops); - register_pernet_subsys(&packet_net_ops); - register_netdevice_notifier(&packet_netdev_notifier); + return 0; + +out_pernet: + unregister_pernet_subsys(&packet_net_ops); +out_sock: + sock_unregister(PF_PACKET); +out_proto: + proto_unregister(&packet_proto); out: return rc; } -- cgit v1.2.3 From be6a9818866dce876f65e7fba6030eabfca9721c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 8 May 2019 15:32:51 +0200 Subject: selinux: do not report error on connect(AF_UNSPEC) [ Upstream commit c7e0d6cca86581092cbbf2cd868b3601495554cf ] calling connect(AF_UNSPEC) on an already connected TCP socket is an established way to disconnect() such socket. After commit 68741a8adab9 ("selinux: Fix ltp test connect-syscall failure") it no longer works and, in the above scenario connect() fails with EAFNOSUPPORT. Fix the above falling back to the generic/old code when the address family is not AF_INET{4,6}, but leave the SCTP code path untouched, as it has specific constraints. Fixes: 68741a8adab9 ("selinux: Fix ltp test connect-syscall failure") Reported-by: Tom Deseyn Signed-off-by: Paolo Abeni Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b005283f0090..bc4aec97723a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4586,7 +4586,7 @@ static int selinux_socket_connect_helper(struct socket *sock, struct lsm_network_audit net = {0,}; struct sockaddr_in *addr4 = NULL; struct sockaddr_in6 *addr6 = NULL; - unsigned short snum; + unsigned short snum = 0; u32 sid, perm; /* sctp_connectx(3) calls via selinux_sctp_bind_connect() @@ -4609,12 +4609,12 @@ static int selinux_socket_connect_helper(struct socket *sock, break; default: /* Note that SCTP services expect -EINVAL, whereas - * others expect -EAFNOSUPPORT. + * others must handle this at the protocol level: + * connect(AF_UNSPEC) on a connected socket is + * a documented way disconnect the socket. */ if (sksec->sclass == SECCLASS_SCTP_SOCKET) return -EINVAL; - else - return -EAFNOSUPPORT; } err = sel_netport_sid(sk->sk_protocol, snum, &sid); -- cgit v1.2.3 From 83c25477e944b9f87ce0d32646ab131fd2139540 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Thu, 9 May 2019 07:13:42 +0200 Subject: tipc: fix hanging clients using poll with EPOLLOUT flag [ Upstream commit ff946833b70e0c7f93de9a3f5b329b5ae2287b38 ] commit 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets") introduced a regression for clients using non-blocking sockets. After the commit, we send EPOLLOUT event to the client even in TIPC_CONNECTING state. This causes the subsequent send() to fail with ENOTCONN, as the socket is still not in TIPC_ESTABLISHED state. In this commit, we: - improve the fix for hanging poll() by replacing sk_data_ready() with sk_state_change() to wake up all clients. - revert the faulty updates introduced by commit 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets"). Fixes: 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets") Signed-off-by: Parthasarathy Bhuvaragan Acked-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4dca9161f99b..020477ff91a2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -734,11 +734,11 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: - case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= EPOLLOUT; /* fall thru' */ case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) revents |= EPOLLIN | EPOLLRDNORM; break; @@ -2041,7 +2041,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) if (msg_data_sz(hdr)) return true; /* Empty ACK-, - wake up sleeping connect() and drop */ - sk->sk_data_ready(sk); + sk->sk_state_change(sk); msg_set_dest_droppable(hdr, 1); return false; } -- cgit v1.2.3 From 13d54150e74663a168c23ef7626cb4ef994c9f6d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 9 May 2019 14:55:07 +0800 Subject: vlan: disable SIOCSHWTSTAMP in container [ Upstream commit 873017af778439f2f8e3d87f28ddb1fcaf244a76 ] With NET_ADMIN enabled in container, a normal user could be mapped to root and is able to change the real device's rx filter via ioctl on vlan, which would affect the other ptp process on host. Fix it by disabling SIOCSHWTSTAMP in container. Fixes: a6111d3c93d0 ("vlan: Pass SIOC[SG]HWTSTAMP ioctls to real device") Signed-off-by: Hangbin Liu Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index b2d9c8f27cd7..1991ce2eb268 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -368,10 +368,12 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ifrr.ifr_ifru = ifr->ifr_ifru; switch (cmd) { + case SIOCSHWTSTAMP: + if (!net_eq(dev_net(dev), &init_net)) + break; case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - case SIOCSHWTSTAMP: case SIOCGHWTSTAMP: if (netif_device_present(real_dev) && ops->ndo_do_ioctl) err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); -- cgit v1.2.3 From 92edcf205388b6bb211f0e56e91582f0cfa7c3f8 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Mon, 6 May 2019 15:00:01 -0400 Subject: vrf: sit mtu should not be updated when vrf netdev is the link [ Upstream commit ff6ab32bd4e073976e4d8797b4d514a172cfe6cb ] VRF netdev mtu isn't typically set and have an mtu of 65536. When the link of a tunnel is set, the tunnel mtu is changed from 1480 to the link mtu minus tunnel header. In the case of VRF netdev is the link, then the tunnel mtu becomes 65516. So, fix it by not setting the tunnel mtu in this case. Signed-off-by: Stephen Suryaputra Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b2109b74857d..971d60bf9640 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1084,7 +1084,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); - if (tdev) { + if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); -- cgit v1.2.3 From e0630246658aad0d220e2e73ade47d5639e9c350 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 8 May 2019 23:20:17 -0400 Subject: tuntap: fix dividing by zero in ebpf queue selection [ Upstream commit a35d310f03a692bf4798eb309a1950a06a150620 ] We need check if tun->numqueues is zero (e.g for the persist device) before trying to use it for modular arithmetic. Reported-by: Eric Dumazet Fixes: 96f84061620c6("tun: add eBPF based queue selection method") Signed-off-by: Jason Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 448d5439ff6a..58175dc8d1ff 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -596,13 +596,18 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) { struct tun_prog *prog; + u32 numqueues; u16 ret = 0; + numqueues = READ_ONCE(tun->numqueues); + if (!numqueues) + return 0; + prog = rcu_dereference(tun->steering_prog); if (prog) ret = bpf_prog_run_clear_cb(prog->prog, skb); - return ret % tun->numqueues; + return ret % numqueues; } static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, -- cgit v1.2.3 From 4a91e5e4c58f395e390472174448132fe7a87bf1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 8 May 2019 23:20:18 -0400 Subject: tuntap: synchronize through tfiles array instead of tun->numqueues [ Upstream commit 9871a9e47a2646fe30ae7fd2e67668a8d30912f6 ] When a queue(tfile) is detached through __tun_detach(), we move the last enabled tfile to the position where detached one sit but don't NULL out last position. We expect to synchronize the datapath through tun->numqueues. Unfortunately, this won't work since we're lacking sufficient mechanism to order or synchronize the access to tun->numqueues. To fix this, NULL out the last position during detaching and check RCU protected tfile against NULL instead of checking tun->numqueues in datapath. Cc: YueHaibing Cc: Cong Wang Cc: weiyongjun (A) Cc: Eric Dumazet Fixes: c8d68e6be1c3b ("tuntap: multiqueue support") Signed-off-by: Jason Wang Reviewed-by: Wei Yongjun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 58175dc8d1ff..8888c097375b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -705,6 +705,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; + rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], + NULL); --tun->numqueues; if (clean) { @@ -1087,7 +1089,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) tfile = rcu_dereference(tun->tfiles[txq]); /* Drop packet if interface is not attached */ - if (txq >= tun->numqueues) + if (!tfile) goto drop; if (!rcu_dereference(tun->steering_prog)) @@ -1310,6 +1312,7 @@ static int tun_xdp_xmit(struct net_device *dev, int n, rcu_read_lock(); +resample: numqueues = READ_ONCE(tun->numqueues); if (!numqueues) { rcu_read_unlock(); @@ -1318,6 +1321,8 @@ static int tun_xdp_xmit(struct net_device *dev, int n, tfile = rcu_dereference(tun->tfiles[smp_processor_id() % numqueues]); + if (unlikely(!tfile)) + goto resample; spin_lock(&tfile->tx_ring.producer_lock); for (i = 0; i < n; i++) { -- cgit v1.2.3 From 39f7b3941969e2303373963ffe1e588804a1f33c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 1 May 2019 21:54:28 +0200 Subject: net: phy: fix phy_validate_pause [ Upstream commit b4010af981ac8cdf1f7f58eb6b131c482e5dee02 ] We have valid scenarios where ETHTOOL_LINK_MODE_Pause_BIT doesn't need to be supported. Therefore extend the first check to check for rx_pause being set. See also phy_set_asym_pause: rx=0 and tx=1: advertise asym pause only rx=0 and tx=0: stop advertising both pause modes The fixed commit isn't wrong, it's just the one that introduced the linkmode bitmaps. Fixes: 3c1bcc8614db ("net: ethernet: Convert phydev advertize and supported from u32 to link mode") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index adf79614c2db..ff2426e00682 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2083,11 +2083,14 @@ bool phy_validate_pause(struct phy_device *phydev, struct ethtool_pauseparam *pp) { if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported) || - (!linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported) && - pp->rx_pause != pp->tx_pause)) + phydev->supported) && pp->rx_pause) return false; + + if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->supported) && + pp->rx_pause != pp->tx_pause) + return false; + return true; } EXPORT_SYMBOL(phy_validate_pause); -- cgit v1.2.3 From 12786188dcf310db3666474408887504a387bda4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 May 2019 09:38:55 -0700 Subject: flow_dissector: disable preemption around BPF calls [ Upstream commit b1c17a9a353878602fd5bfe9103e4afe5e9a3f96 ] Various things in eBPF really require us to disable preemption before running an eBPF program. syzbot reported : BUG: assuming atomic context at net/core/flow_dissector.c:737 in_atomic(): 0, irqs_disabled(): 0, pid: 24710, name: syz-executor.3 2 locks held by syz-executor.3/24710: #0: 00000000e81a4bf1 (&tfile->napi_mutex){+.+.}, at: tun_get_user+0x168e/0x3ff0 drivers/net/tun.c:1850 #1: 00000000254afebd (rcu_read_lock){....}, at: __skb_flow_dissect+0x1e1/0x4bb0 net/core/flow_dissector.c:822 CPU: 1 PID: 24710 Comm: syz-executor.3 Not tainted 5.1.0+ #6 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 __cant_sleep kernel/sched/core.c:6165 [inline] __cant_sleep.cold+0xa3/0xbb kernel/sched/core.c:6142 bpf_flow_dissect+0xfe/0x390 net/core/flow_dissector.c:737 __skb_flow_dissect+0x362/0x4bb0 net/core/flow_dissector.c:853 skb_flow_dissect_flow_keys_basic include/linux/skbuff.h:1322 [inline] skb_probe_transport_header include/linux/skbuff.h:2500 [inline] skb_probe_transport_header include/linux/skbuff.h:2493 [inline] tun_get_user+0x2cfe/0x3ff0 drivers/net/tun.c:1940 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2037 call_write_iter include/linux/fs.h:1872 [inline] do_iter_readv_writev+0x5fd/0x900 fs/read_write.c:693 do_iter_write fs/read_write.c:970 [inline] do_iter_write+0x184/0x610 fs/read_write.c:951 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1015 do_writev+0x15b/0x330 fs/read_write.c:1058 __do_sys_writev fs/read_write.c:1131 [inline] __se_sys_writev fs/read_write.c:1128 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1128 do_syscall_64+0x103/0x670 arch/x86/entry/common.c:298 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Petar Penkov Cc: Stanislav Fomichev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9f2840510e63..afc6e025c85c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -786,7 +786,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb, flow_keys.thoff = nhoff; bpf_compute_data_pointers((struct sk_buff *)skb); + + preempt_disable(); result = BPF_PROG_RUN(attached, skb); + preempt_enable(); /* Restore state */ memcpy(cb, &cb_saved, sizeof(cb_saved)); -- cgit v1.2.3 From 8b8fc62b6c6740b25ed77a268c0bbd077e28ead6 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 1 May 2019 23:19:03 +0200 Subject: isdn: bas_gigaset: use usb_fill_int_urb() properly [ Upstream commit 4014dfae3ccaaf3ec19c9ae0691a3f14e7132eae ] The switch to make bas_gigaset use usb_fill_int_urb() - instead of filling that urb "by hand" - missed the subtle ordering of the previous code. See, before the switch urb->dev was set to a member somewhere deep in a complicated structure and then supplied to usb_rcvisocpipe() and usb_sndisocpipe(). After that switch urb->dev wasn't set to anything specific before being supplied to those two macros. This triggers a nasty oops: BUG: unable to handle kernel NULL pointer dereference at 00000000 #PF error: [normal kernel read fault] *pde = 00000000 Oops: 0000 [#1] SMP CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.1.0-0.rc4.1.local0.fc28.i686 #1 Hardware name: IBM 2525FAG/2525FAG, BIOS 74ET64WW (2.09 ) 12/14/2006 EIP: gigaset_init_bchannel+0x89/0x320 [bas_gigaset] Code: 75 07 83 8b 84 00 00 00 40 8d 47 74 c7 07 01 00 00 00 89 45 f0 8b 44 b7 68 85 c0 0f 84 6a 02 00 00 8b 48 28 8b 93 88 00 00 00 <8b> 09 8d 54 12 03 c1 e2 0f c1 e1 08 09 ca 8b 8b 8c 00 00 00 80 ca EAX: f05ec200 EBX: ed404200 ECX: 00000000 EDX: 00000000 ESI: 00000000 EDI: f065a000 EBP: f30c9f40 ESP: f30c9f20 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010086 CR0: 80050033 CR2: 00000000 CR3: 0ddc7000 CR4: 000006d0 Call Trace: ? gigaset_isdn_connD+0xf6/0x140 [gigaset] gigaset_handle_event+0x173e/0x1b90 [gigaset] tasklet_action_common.isra.16+0x4e/0xf0 tasklet_action+0x1e/0x20 __do_softirq+0xb2/0x293 ? __irqentry_text_end+0x3/0x3 call_on_stack+0x45/0x50 ? irq_exit+0xb5/0xc0 ? do_IRQ+0x78/0xd0 ? acpi_idle_enter_s2idle+0x50/0x50 ? common_interrupt+0xd4/0xdc ? acpi_idle_enter_s2idle+0x50/0x50 ? sched_cpu_activate+0x1b/0xf0 ? acpi_fan_resume.cold.7+0x9/0x18 ? cpuidle_enter_state+0x152/0x4c0 ? cpuidle_enter+0x14/0x20 ? call_cpuidle+0x21/0x40 ? do_idle+0x1c8/0x200 ? cpu_startup_entry+0x25/0x30 ? rest_init+0x88/0x8a ? arch_call_rest_init+0xd/0x19 ? start_kernel+0x42f/0x448 ? i386_start_kernel+0xac/0xb0 ? startup_32_smp+0x164/0x168 Modules linked in: ppp_generic slhc capi bas_gigaset gigaset kernelcapi nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c iptable_mangle iptable_raw iptable_security ebtable_filter ebtables ip6table_filter ip6_tables sunrpc ipw2200 iTCO_wdt gpio_ich snd_intel8x0 libipw iTCO_vendor_support snd_ac97_codec lib80211 ppdev ac97_bus snd_seq cfg80211 snd_seq_device pcspkr thinkpad_acpi lpc_ich snd_pcm i2c_i801 snd_timer ledtrig_audio snd soundcore rfkill parport_pc parport pcc_cpufreq acpi_cpufreq i915 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sdhci_pci sysimgblt cqhci fb_sys_fops drm sdhci mmc_core tg3 ata_generic serio_raw yenta_socket pata_acpi video CR2: 0000000000000000 ---[ end trace 1fe07487b9200c73 ]--- EIP: gigaset_init_bchannel+0x89/0x320 [bas_gigaset] Code: 75 07 83 8b 84 00 00 00 40 8d 47 74 c7 07 01 00 00 00 89 45 f0 8b 44 b7 68 85 c0 0f 84 6a 02 00 00 8b 48 28 8b 93 88 00 00 00 <8b> 09 8d 54 12 03 c1 e2 0f c1 e1 08 09 ca 8b 8b 8c 00 00 00 80 ca EAX: f05ec200 EBX: ed404200 ECX: 00000000 EDX: 00000000 ESI: 00000000 EDI: f065a000 EBP: f30c9f40 ESP: cddcb3bc DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010086 CR0: 80050033 CR2: 00000000 CR3: 0ddc7000 CR4: 000006d0 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0xcc00000 from 0xc0400000 (relocation range: 0xc0000000-0xf6ffdfff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- No-one noticed because this Oops is apparently only triggered by setting up an ISDN data connection on a live ISDN line on a gigaset base (ie, the PBX that the gigaset driver support). Very few people do that running present day kernels. Anyhow, a little code reorganization makes this problem go away, while avoiding the subtle ordering that was used in the past. So let's do that. Fixes: 78c696c19578 ("isdn: gigaset: use usb_fill_int_urb()") Signed-off-by: Paul Bolle Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/gigaset/bas-gigaset.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index ecdeb89645d0..149b1aca52a2 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -958,6 +958,7 @@ static void write_iso_callback(struct urb *urb) */ static int starturbs(struct bc_state *bcs) { + struct usb_device *udev = bcs->cs->hw.bas->udev; struct bas_bc_state *ubc = bcs->hw.bas; struct urb *urb; int j, k; @@ -975,8 +976,8 @@ static int starturbs(struct bc_state *bcs) rc = -EFAULT; goto error; } - usb_fill_int_urb(urb, bcs->cs->hw.bas->udev, - usb_rcvisocpipe(urb->dev, 3 + 2 * bcs->channel), + usb_fill_int_urb(urb, udev, + usb_rcvisocpipe(udev, 3 + 2 * bcs->channel), ubc->isoinbuf + k * BAS_INBUFSIZE, BAS_INBUFSIZE, read_iso_callback, bcs, BAS_FRAMETIME); @@ -1006,8 +1007,8 @@ static int starturbs(struct bc_state *bcs) rc = -EFAULT; goto error; } - usb_fill_int_urb(urb, bcs->cs->hw.bas->udev, - usb_sndisocpipe(urb->dev, 4 + 2 * bcs->channel), + usb_fill_int_urb(urb, udev, + usb_sndisocpipe(udev, 4 + 2 * bcs->channel), ubc->isooutbuf->data, sizeof(ubc->isooutbuf->data), write_iso_callback, &ubc->isoouturbs[k], -- cgit v1.2.3 From 1a84219f73b2d9827a1a0612960c3d5c2c37d136 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 May 2019 15:47:00 -0700 Subject: drivers/virt/fsl_hypervisor.c: dereferencing error pointers in ioctl commit c8ea3663f7a8e6996d44500ee818c9330ac4fd88 upstream. strndup_user() returns error pointers on error, and then in the error handling we pass the error pointers to kfree(). It will cause an Oops. Link: http://lkml.kernel.org/r/20181218082003.GD32567@kadam Fixes: 6db7199407ca ("drivers/virt: introduce Freescale hypervisor management driver") Signed-off-by: Dan Carpenter Reviewed-by: Andrew Morton Cc: Timur Tabi Cc: Mihai Caraman Cc: Kumar Gala Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/virt/fsl_hypervisor.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c index 8ba726e600e9..7b7f8e9a2801 100644 --- a/drivers/virt/fsl_hypervisor.c +++ b/drivers/virt/fsl_hypervisor.c @@ -331,8 +331,8 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set) struct fsl_hv_ioctl_prop param; char __user *upath, *upropname; void __user *upropval; - char *path = NULL, *propname = NULL; - void *propval = NULL; + char *path, *propname; + void *propval; int ret = 0; /* Get the parameters from the user. */ @@ -344,32 +344,30 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set) upropval = (void __user *)(uintptr_t)param.propval; path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN); - if (IS_ERR(path)) { - ret = PTR_ERR(path); - goto out; - } + if (IS_ERR(path)) + return PTR_ERR(path); propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN); if (IS_ERR(propname)) { ret = PTR_ERR(propname); - goto out; + goto err_free_path; } if (param.proplen > FH_DTPROP_MAX_PROPLEN) { ret = -EINVAL; - goto out; + goto err_free_propname; } propval = kmalloc(param.proplen, GFP_KERNEL); if (!propval) { ret = -ENOMEM; - goto out; + goto err_free_propname; } if (set) { if (copy_from_user(propval, upropval, param.proplen)) { ret = -EFAULT; - goto out; + goto err_free_propval; } param.ret = fh_partition_set_dtprop(param.handle, @@ -388,7 +386,7 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set) if (copy_to_user(upropval, propval, param.proplen) || put_user(param.proplen, &p->proplen)) { ret = -EFAULT; - goto out; + goto err_free_propval; } } } @@ -396,10 +394,12 @@ static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set) if (put_user(param.ret, &p->ret)) ret = -EFAULT; -out: - kfree(path); +err_free_propval: kfree(propval); +err_free_propname: kfree(propname); +err_free_path: + kfree(path); return ret; } -- cgit v1.2.3 From 79e981a8503f662770ad038c7454f88421cfc882 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 May 2019 15:47:03 -0700 Subject: drivers/virt/fsl_hypervisor.c: prevent integer overflow in ioctl commit 6a024330650e24556b8a18cc654ad00cfecf6c6c upstream. The "param.count" value is a u64 thatcomes from the user. The code later in the function assumes that param.count is at least one and if it's not then it leads to an Oops when we dereference the ZERO_SIZE_PTR. Also the addition can have an integer overflow which would lead us to allocate a smaller "pages" array than required. I can't immediately tell what the possible run times implications are, but it's safest to prevent the overflow. Link: http://lkml.kernel.org/r/20181218082129.GE32567@kadam Fixes: 6db7199407ca ("drivers/virt: introduce Freescale hypervisor management driver") Signed-off-by: Dan Carpenter Reviewed-by: Andrew Morton Cc: Timur Tabi Cc: Mihai Caraman Cc: Kumar Gala Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/virt/fsl_hypervisor.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c index 7b7f8e9a2801..1bbd910d4ddb 100644 --- a/drivers/virt/fsl_hypervisor.c +++ b/drivers/virt/fsl_hypervisor.c @@ -215,6 +215,9 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) * hypervisor. */ lb_offset = param.local_vaddr & (PAGE_SIZE - 1); + if (param.count == 0 || + param.count > U64_MAX - lb_offset - PAGE_SIZE + 1) + return -EINVAL; num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT; /* Allocate the buffers we need */ -- cgit v1.2.3 From d314437d17adcbc696e61816972a9c35df084193 Mon Sep 17 00:00:00 2001 From: Rick Lindsley Date: Sun, 5 May 2019 17:20:43 -0700 Subject: powerpc/book3s/64: check for NULL pointer in pgd_alloc() commit f39356261c265a0689d7ee568132d516e8b6cecc upstream. When the memset code was added to pgd_alloc(), it failed to consider that kmem_cache_alloc() can return NULL. It's uncommon, but not impossible under heavy memory contention. Example oops: Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc0000000000a4000 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 NUMA pSeries CPU: 70 PID: 48471 Comm: entrypoint.sh Kdump: loaded Not tainted 4.14.0-115.6.1.el7a.ppc64le #1 task: c000000334a00000 task.stack: c000000331c00000 NIP: c0000000000a4000 LR: c00000000012f43c CTR: 0000000000000020 REGS: c000000331c039c0 TRAP: 0300 Not tainted (4.14.0-115.6.1.el7a.ppc64le) MSR: 800000010280b033 CR: 44022840 XER: 20040000 CFAR: c000000000008874 DAR: 0000000000000000 DSISR: 42000000 SOFTE: 1 ... NIP [c0000000000a4000] memset+0x68/0x104 LR [c00000000012f43c] mm_init+0x27c/0x2f0 Call Trace: mm_init+0x260/0x2f0 (unreliable) copy_mm+0x11c/0x638 copy_process.isra.28.part.29+0x6fc/0x1080 _do_fork+0xdc/0x4c0 ppc_clone+0x8/0xc Instruction dump: 409e000c b0860000 38c60002 409d000c 90860000 38c60004 78a0d183 78a506a0 7c0903a6 41820034 60000000 60420000 f8860008 f8860010 f8860018 Fixes: fc5c2f4a55a2 ("powerpc/mm/hash64: Zero PGD pages on allocation") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Rick Lindsley Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 9c1173283b96..6c6211a82b9a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -81,6 +81,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); + if (unlikely(!pgd)) + return pgd; + /* * Don't scan the PGD for pointers, it contains references to PUDs but * those references are not full pointers and so can't be recognised by -- cgit v1.2.3 From 0da52ad69b37346335848e0811467a3ffe6f0c68 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Thu, 18 Apr 2019 16:51:16 +1000 Subject: powerpc/powernv/idle: Restore IAMR after idle commit a3f3072db6cad40895c585dce65e36aab997f042 upstream. Without restoring the IAMR after idle, execution prevention on POWER9 with Radix MMU is overwritten and the kernel can freely execute userspace without faulting. This is necessary when returning from any stop state that modifies user state, as well as hypervisor state. To test how this fails without this patch, load the lkdtm driver and do the following: $ echo EXEC_USERSPACE > /sys/kernel/debug/provoke-crash/DIRECT which won't fault, then boot the kernel with powersave=off, where it will fault. Applying this patch will fix this. Fixes: 3b10d0095a1e ("powerpc/mm/radix: Prevent kernel execution of user space") Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Russell Currey Reviewed-by: Akshay Adiga Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/idle_book3s.S | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 7f5ac2e8581b..36178000a2f2 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -170,6 +170,9 @@ core_idle_lock_held: bne- core_idle_lock_held blr +/* Reuse an unused pt_regs slot for IAMR */ +#define PNV_POWERSAVE_IAMR _DAR + /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 @@ -200,6 +203,12 @@ pnv_powersave_common: /* Continue saving state */ SAVE_GPR(2, r1) SAVE_NVGPRS(r1) + +BEGIN_FTR_SECTION + mfspr r5, SPRN_IAMR + std r5, PNV_POWERSAVE_IAMR(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + mfcr r5 std r5,_CCR(r1) std r1,PACAR1(r13) @@ -924,6 +933,17 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_NVGPRS(r1) REST_GPR(2, r1) + +BEGIN_FTR_SECTION + /* IAMR was saved in pnv_powersave_common() */ + ld r5, PNV_POWERSAVE_IAMR(r1) + mtspr SPRN_IAMR, r5 + /* + * We don't need an isync here because the upcoming mtmsrd is + * execution synchronizing. + */ +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + ld r4,PACAKMSR(r13) ld r5,_LINK(r1) ld r6,_CCR(r1) -- cgit v1.2.3 From ba81b50090a42542d48ba3395e5429ce689d6a59 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Mon, 15 Apr 2019 14:52:11 +0300 Subject: powerpc/booke64: set RI in default MSR commit 5266e58d6cd90ac85c187d673093ad9cb649e16d upstream. Set RI in the default kernel's MSR so that the architected way of detecting unrecoverable machine check interrupts has a chance to work. This is inline with the MSR setup of the rest of booke powerpc architectures configured here. Signed-off-by: Laurentiu Tudor Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/reg_booke.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index eb2a33d5df26..e382bd6ede84 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -41,7 +41,7 @@ #if defined(CONFIG_PPC_BOOK3E_64) #define MSR_64BIT MSR_CM -#define MSR_ (MSR_ME | MSR_CE) +#define MSR_ (MSR_ME | MSR_RI | MSR_CE) #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) -- cgit v1.2.3 From 7f9572e798ea8c7a6a7b846c8fb577905e3d4a6c Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 12 Mar 2019 15:06:53 +0800 Subject: virtio_ring: Fix potential mem leak in virtqueue_add_indirect_packed commit df0bfe7501e9319546ea380d39674a4179e059c3 upstream. 'desc' should be freed before leaving from err handing path. Fixes: 1ce9e6055fa0 ("virtio_ring: introduce packed ring support") Signed-off-by: YueHaibing Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a38b65b97be0..a659e52cf79c 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -993,6 +993,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); + kfree(desc); END_USE(vq); return -ENOSPC; } -- cgit v1.2.3 From d3a9cd23b172d1a873c05fb9d904538e58090f8b Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 4 Mar 2019 21:34:48 +0000 Subject: PCI: hv: Fix a memory leak in hv_eject_device_work() commit 05f151a73ec2b23ffbff706e5203e729a995cdc2 upstream. When a device is created in new_pcichild_device(), hpdev->refs is set to 2 (i.e. the initial value of 1 plus the get_pcichild()). When we hot remove the device from the host, in a Linux VM we first call hv_pci_eject_device(), which increases hpdev->refs by get_pcichild() and then schedules a work of hv_eject_device_work(), so hpdev->refs becomes 3 (let's ignore the paired get/put_pcichild() in other places). But in hv_eject_device_work(), currently we only call put_pcichild() twice, meaning the 'hpdev' struct can't be freed in put_pcichild(). Add one put_pcichild() to fix the memory leak. The device can also be removed when we run "rmmod pci-hyperv". On this path (hv_pci_remove() -> hv_pci_bus_exit() -> hv_pci_devices_present()), hpdev->refs is 2, and we do correctly call put_pcichild() twice in pci_devices_present_work(). Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") Signed-off-by: Dexuan Cui [lorenzo.pieralisi@arm.com: commit log rework] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Stephen Hemminger Reviewed-by: Michael Kelley Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pci-hyperv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 9ba4d12c179c..5b54d36077a9 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1905,6 +1905,9 @@ static void hv_eject_device_work(struct work_struct *work) sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt, VM_PKT_DATA_INBAND, 0); + /* For the get_pcichild() in hv_pci_eject_device() */ + put_pcichild(hpdev); + /* For the two refs got in new_pcichild_device() */ put_pcichild(hpdev); put_pcichild(hpdev); put_hvpcibus(hpdev->hbus); -- cgit v1.2.3 From f20f463b32bc09c2ef793765326798e5b0df2361 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 4 Mar 2019 21:34:48 +0000 Subject: PCI: hv: Add hv_pci_remove_slots() when we unload the driver commit 15becc2b56c6eda3d9bf5ae993bafd5661c1fad1 upstream. When we unload the pci-hyperv host controller driver, the host does not send us a PCI_EJECT message. In this case we also need to make sure the sysfs PCI slot directory is removed, otherwise a command on a slot file eg: "cat /sys/bus/pci/slots/2/address" will trigger a "BUG: unable to handle kernel paging request" and, if we unload/reload the driver several times we would end up with stale slot entries in PCI slot directories in /sys/bus/pci/slots/ root@localhost:~# ls -rtl /sys/bus/pci/slots/ total 0 drwxr-xr-x 2 root root 0 Feb 7 10:49 2 drwxr-xr-x 2 root root 0 Feb 7 10:49 2-1 drwxr-xr-x 2 root root 0 Feb 7 10:51 2-2 Add the missing code to remove the PCI slot and fix the current behaviour. Fixes: a15f2c08c708 ("PCI: hv: support reporting serial number as slot information") Signed-off-by: Dexuan Cui [lorenzo.pieralisi@arm.com: reformatted the log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Stephen Hemminger Reviewed-by: Michael Kelley Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pci-hyperv.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 5b54d36077a9..3f3136762fc5 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1491,6 +1491,21 @@ static void hv_pci_assign_slots(struct hv_pcibus_device *hbus) } } +/* + * Remove entries in sysfs pci slot directory. + */ +static void hv_pci_remove_slots(struct hv_pcibus_device *hbus) +{ + struct hv_pci_dev *hpdev; + + list_for_each_entry(hpdev, &hbus->children, list_entry) { + if (!hpdev->pci_slot) + continue; + pci_destroy_slot(hpdev->pci_slot); + hpdev->pci_slot = NULL; + } +} + /** * create_root_hv_pci_bus() - Expose a new root PCI bus * @hbus: Root PCI bus, as understood by this driver @@ -2685,6 +2700,7 @@ static int hv_pci_remove(struct hv_device *hdev) pci_lock_rescan_remove(); pci_stop_root_bus(hbus->pci_bus); pci_remove_root_bus(hbus->pci_bus); + hv_pci_remove_slots(hbus); pci_unlock_rescan_remove(); hbus->state = hv_pcibus_removed; } -- cgit v1.2.3 From 78b8c59eee72b4e854b8f1e58eae308f750f843c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 4 Mar 2019 21:34:49 +0000 Subject: PCI: hv: Add pci_destroy_slot() in pci_devices_present_work(), if necessary commit 340d455699400f2c2c0f9b3f703ade3085cdb501 upstream. When we hot-remove a device, usually the host sends us a PCI_EJECT message, and a PCI_BUS_RELATIONS message with bus_rel->device_count == 0. When we execute the quick hot-add/hot-remove test, the host may not send us the PCI_EJECT message if the guest has not fully finished the initialization by sending the PCI_RESOURCES_ASSIGNED* message to the host, so it's potentially unsafe to only depend on the pci_destroy_slot() in hv_eject_device_work() because the code path create_root_hv_pci_bus() -> hv_pci_assign_slots() is not called in this case. Note: in this case, the host still sends the guest a PCI_BUS_RELATIONS message with bus_rel->device_count == 0. In the quick hot-add/hot-remove test, we can have such a race before the code path pci_devices_present_work() -> new_pcichild_device() adds the new device into the hbus->children list, we may have already received the PCI_EJECT message, and since the tasklet handler hv_pci_onchannelcallback() may fail to find the "hpdev" by calling get_pcichild_wslot(hbus, dev_message->wslot.slot) hv_pci_eject_device() is not called; Later, by continuing execution create_root_hv_pci_bus() -> hv_pci_assign_slots() creates the slot and the PCI_BUS_RELATIONS message with bus_rel->device_count == 0 removes the device from hbus->children, and we end up being unable to remove the slot in hv_pci_remove() -> hv_pci_remove_slots() Remove the slot in pci_devices_present_work() when the device is removed to address this race. pci_devices_present_work() and hv_eject_device_work() run in the singled-threaded hbus->wq, so there is not a double-remove issue for the slot. We cannot offload hv_pci_eject_device() from hv_pci_onchannelcallback() to the workqueue, because we need the hv_pci_onchannelcallback() synchronously call hv_pci_eject_device() to poll the channel ringbuffer to work around the "hangs in hv_compose_msi_msg()" issue fixed in commit de0aa7b2f97d ("PCI: hv: Fix 2 hang issues in hv_compose_msi_msg()") Fixes: a15f2c08c708 ("PCI: hv: support reporting serial number as slot information") Signed-off-by: Dexuan Cui [lorenzo.pieralisi@arm.com: rewritten commit log] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Stephen Hemminger Reviewed-by: Michael Kelley Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pci-hyperv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 3f3136762fc5..808a182830e5 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1781,6 +1781,10 @@ static void pci_devices_present_work(struct work_struct *work) hpdev = list_first_entry(&removed, struct hv_pci_dev, list_entry); list_del(&hpdev->list_entry); + + if (hpdev->pci_slot) + pci_destroy_slot(hpdev->pci_slot); + put_pcichild(hpdev); } -- cgit v1.2.3 From ba686f90778b73e9e7e144914a7f88f8f88cbc3d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 16 Mar 2019 09:13:06 +0900 Subject: f2fs: Fix use of number of devices commit 0916878da355650d7e77104a7ac0fa1784eca852 upstream. For a single device mount using a zoned block device, the zone information for the device is stored in the sbi->devs single entry array and sbi->s_ndevs is set to 1. This differs from a single device mount using a regular block device which does not allocate sbi->devs and sets sbi->s_ndevs to 0. However, sbi->s_devs == 0 condition is used throughout the code to differentiate a single device mount from a multi-device mount where sbi->s_ndevs is always larger than 1. This results in problems with single zoned block device volumes as these are treated as multi-device mounts but do not have the start_blk and end_blk information set. One of the problem observed is skipping of zone discard issuing resulting in write commands being issued to full zones or unaligned to a zone write pointer. Fix this problem by simply treating the cases sbi->s_ndevs == 0 (single regular block device mount) and sbi->s_ndevs == 1 (single zoned block device mount) in the same manner. This is done by introducing the helper function f2fs_is_multi_device() and using this helper in place of direct tests of sbi->s_ndevs value, improving code readability. Fixes: 7bb3a371d199 ("f2fs: Fix zoned block device support") Cc: Signed-off-by: Damien Le Moal Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 17 +++++++++++------ fs/f2fs/f2fs.h | 13 ++++++++++++- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 13 +++++++------ 5 files changed, 32 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c99aab23efea..6e598dd85fb5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -218,12 +218,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, struct block_device *bdev = sbi->sb->s_bdev; int i; - for (i = 0; i < sbi->s_ndevs; i++) { - if (FDEV(i).start_blk <= blk_addr && - FDEV(i).end_blk >= blk_addr) { - blk_addr -= FDEV(i).start_blk; - bdev = FDEV(i).bdev; - break; + if (f2fs_is_multi_device(sbi)) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (FDEV(i).start_blk <= blk_addr && + FDEV(i).end_blk >= blk_addr) { + blk_addr -= FDEV(i).start_blk; + bdev = FDEV(i).bdev; + break; + } } } if (bio) { @@ -237,6 +239,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) { int i; + if (!f2fs_is_multi_device(sbi)) + return 0; + for (i = 0; i < sbi->s_ndevs; i++) if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) return i; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6d9186a6528c..48f1bbf3e87e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1364,6 +1364,17 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) } #endif +/* + * Test if the mounted volume is a multi-device volume. + * - For a single regular disk volume, sbi->s_ndevs is 0. + * - For a single zoned disk volume, sbi->s_ndevs is 1. + * - For a multi-device volume, sbi->s_ndevs is always 2 or more. + */ +static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) +{ + return sbi->s_ndevs > 1; +} + /* For write statistics. Suppose sector size is 512 bytes, * and the return value is in kbytes. s is of struct f2fs_sb_info. */ @@ -3612,7 +3623,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (f2fs_post_read_required(inode)) return true; - if (sbi->s_ndevs) + if (f2fs_is_multi_device(sbi)) return true; /* * for blkzoned device, fallback direct IO to buffered IO, so diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 30ed43bce110..bc078aea60ae 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2570,7 +2570,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) sizeof(range))) return -EFAULT; - if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || + if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || __is_large_section(sbi)) { f2fs_msg(sbi->sb, KERN_WARNING, "Can't flush %u in %d for segs_per_sec %u != 1\n", diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 195cf0f9d9ef..ab764bd106de 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1346,7 +1346,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ - if (sbi->s_ndevs && !__is_large_section(sbi)) + if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi)) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b6c8b0696ef6..2b809b54d81b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -576,7 +576,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) int ret = 0; int i; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return __submit_flush_wait(sbi, sbi->sb->s_bdev); for (i = 0; i < sbi->s_ndevs; i++) { @@ -644,7 +644,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return ret; } - if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) { + if (atomic_inc_return(&fcc->queued_flush) == 1 || + f2fs_is_multi_device(sbi)) { ret = submit_flush_wait(sbi, ino); atomic_dec(&fcc->queued_flush); @@ -750,7 +751,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) { int ret = 0, i; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return 0; for (i = 1; i < sbi->s_ndevs; i++) { @@ -1359,7 +1360,7 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, trace_f2fs_queue_discard(bdev, blkstart, blklen); - if (sbi->s_ndevs) { + if (f2fs_is_multi_device(sbi)) { int devi = f2fs_target_device_index(sbi, blkstart); blkstart -= FDEV(devi).start_blk; @@ -1714,7 +1715,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, block_t lblkstart = blkstart; int devi = 0; - if (sbi->s_ndevs) { + if (f2fs_is_multi_device(sbi)) { devi = f2fs_target_device_index(sbi, blkstart); blkstart -= FDEV(devi).start_blk; } @@ -3071,7 +3072,7 @@ static void update_device_state(struct f2fs_io_info *fio) struct f2fs_sb_info *sbi = fio->sbi; unsigned int devidx; - if (!sbi->s_ndevs) + if (!f2fs_is_multi_device(sbi)) return; devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); -- cgit v1.2.3 From d59f5a01fa438635ae098b2e170a18644df73c06 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2019 19:40:30 +0200 Subject: Linux 5.0.17 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 95670d520786..6325ac97c7e2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 0 -SUBLEVEL = 16 +SUBLEVEL = 17 EXTRAVERSION = NAME = Shy Crocodile -- cgit v1.2.3