From 6be388f4a35d2ce5ef7dbf635a8964a5da7f799f Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Sun, 25 Apr 2021 23:03:53 +0530 Subject: HID: usbhid: fix info leak in hid_submit_ctrl In hid_submit_ctrl(), the way of calculating the report length doesn't take into account that report->size can be zero. When running the syzkaller reproducer, a report of size 0 causes hid_submit_ctrl) to calculate transfer_buffer_length as 16384. When this urb is passed to the usb core layer, KMSAN reports an info leak of 16384 bytes. To fix this, first modify hid_report_len() to account for the zero report size case by using DIV_ROUND_UP for the division. Then, call it from hid_submit_ctrl(). Reported-by: syzbot+7c2bb71996f95a82524c@syzkaller.appspotmail.com Signed-off-by: Anirudh Rayabharam Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 271021e20a3f..10e922cee4eb 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1167,8 +1167,7 @@ static inline void hid_hw_wait(struct hid_device *hdev) */ static inline u32 hid_report_len(struct hid_report *report) { - /* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */ - return ((report->size - 1) >> 3) + 1 + (report->id > 0); + return DIV_ROUND_UP(report->size, 8) + (report->id > 0); } int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, -- cgit v1.2.3-71-gd317 From 28ec344bb8911bb0d4910456b22ba0dd4f662521 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 5 May 2021 17:44:22 -0700 Subject: usb: typec: tcpm: Don't block probing of consumers of "connector" nodes fw_devlink expects DT device nodes with "compatible" property to have struct devices created for them. Since the connector node might not be populated as a device, mark it as such so that fw_devlink knows not to wait on this fwnode being populated as a struct device. Without this patch, USB functionality can be broken on some boards. Fixes: f7514a663016 ("of: property: fw_devlink: Add support for remote-endpoint") Reported-by: John Stultz Tested-by: John Stultz Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210506004423.345199-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 ++- drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++ include/linux/fwnode.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 4a8bf8cda52b..628e33939aca 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -150,7 +150,7 @@ void fwnode_links_purge(struct fwnode_handle *fwnode) fwnode_links_purge_consumers(fwnode); } -static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) +void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) { struct fwnode_handle *child; @@ -164,6 +164,7 @@ static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) fwnode_for_each_available_child_node(fwnode, child) fw_devlink_purge_absent_suppliers(child); } +EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers); #ifdef CONFIG_SRCU static DEFINE_MUTEX(device_links_lock); diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index c4fdc00a3bc8..bffa342d4e38 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5754,6 +5754,15 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, if (!fwnode) return -EINVAL; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This it breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + /* USB data support is optional */ ret = fwnode_property_read_string(fwnode, "data-role", &cap_str); if (ret == 0) { diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index ed4e67a7ff1c..59828516ebaf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -187,5 +187,6 @@ extern u32 fw_devlink_get_flags(void); extern bool fw_devlink_is_strict(void); int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); void fwnode_links_purge(struct fwnode_handle *fwnode); +void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); #endif -- cgit v1.2.3-71-gd317 From c745253e2a691a40c66790defe85c104a887e14a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 5 May 2021 14:09:15 +0300 Subject: PM: runtime: Fix unpaired parent child_count for force_resume As pm_runtime_need_not_resume() relies also on usage_count, it can return a different value in pm_runtime_force_suspend() compared to when called in pm_runtime_force_resume(). Different return values can happen if anything calls PM runtime functions in between, and causes the parent child_count to increase on every resume. So far I've seen the issue only for omapdrm that does complicated things with PM runtime calls during system suspend for legacy reasons: omap_atomic_commit_tail() for omapdrm.0 dispc_runtime_get() wakes up 58000000.dss as it's the dispc parent dispc_runtime_resume() rpm_resume() increases parent child_count dispc_runtime_put() won't idle, PM runtime suspend blocked pm_runtime_force_suspend() for 58000000.dss, !pm_runtime_need_not_resume() __update_runtime_status() system suspended pm_runtime_force_resume() for 58000000.dss, pm_runtime_need_not_resume() pm_runtime_enable() only called because of pm_runtime_need_not_resume() omap_atomic_commit_tail() for omapdrm.0 dispc_runtime_get() wakes up 58000000.dss as it's the dispc parent dispc_runtime_resume() rpm_resume() increases parent child_count dispc_runtime_put() won't idle, PM runtime suspend blocked ... rpm_suspend for 58000000.dss but parent child_count is now unbalanced Let's fix the issue by adding a flag for needs_force_resume and use it in pm_runtime_force_resume() instead of pm_runtime_need_not_resume(). Additionally omapdrm system suspend could be simplified later on to avoid lots of unnecessary PM runtime calls and the complexity it adds. The driver can just use internal functions that are shared between the PM runtime and system suspend related functions. Fixes: 4918e1f87c5f ("PM / runtime: Rework pm_runtime_force_suspend/resume()") Signed-off-by: Tony Lindgren Reviewed-by: Ulf Hansson Tested-by: Tomi Valkeinen Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 10 +++++++--- include/linux/pm.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 1fc1a992f90c..b570848d23e0 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1637,6 +1637,7 @@ void pm_runtime_init(struct device *dev) dev->power.request_pending = false; dev->power.request = RPM_REQ_NONE; dev->power.deferred_resume = false; + dev->power.needs_force_resume = 0; INIT_WORK(&dev->power.work, pm_runtime_work); dev->power.timer_expires = 0; @@ -1804,10 +1805,12 @@ int pm_runtime_force_suspend(struct device *dev) * its parent, but set its status to RPM_SUSPENDED anyway in case this * function will be called again for it in the meantime. */ - if (pm_runtime_need_not_resume(dev)) + if (pm_runtime_need_not_resume(dev)) { pm_runtime_set_suspended(dev); - else + } else { __update_runtime_status(dev, RPM_SUSPENDED); + dev->power.needs_force_resume = 1; + } return 0; @@ -1834,7 +1837,7 @@ int pm_runtime_force_resume(struct device *dev) int (*callback)(struct device *); int ret = 0; - if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev)) + if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume) goto out; /* @@ -1853,6 +1856,7 @@ int pm_runtime_force_resume(struct device *dev) pm_runtime_mark_last_busy(dev); out: + dev->power.needs_force_resume = 0; pm_runtime_enable(dev); return ret; } diff --git a/include/linux/pm.h b/include/linux/pm.h index c9657408fee1..1d8209c09686 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -601,6 +601,7 @@ struct dev_pm_info { unsigned int idle_notification:1; unsigned int request_pending:1; unsigned int deferred_resume:1; + unsigned int needs_force_resume:1; unsigned int runtime_auto:1; bool ignore_children:1; unsigned int no_callbacks:1; -- cgit v1.2.3-71-gd317 From 2515dd6ce8e545b0b2eece84920048ef9ed846c4 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 19 Apr 2021 16:17:41 -0700 Subject: stack: Replace "o" output with "r" input constraint "o" isn't a common asm() constraint to use; it triggers an assertion in assert-enabled builds of LLVM that it's not recognized when targeting aarch64 (though it appears to fall back to "m"). It's fixed in LLVM 13 now, but there isn't really a good reason to use "o" in particular here. To avoid causing build issues for those using assert-enabled builds of earlier LLVM versions, the constraint needs changing. Instead, if the point is to retain the __builtin_alloca(), make ptr appear to "escape" via being an input to an empty inline asm block. This is preferable anyways, since otherwise this looks like a dead store. While the use of "r" was considered in https://lore.kernel.org/lkml/202104011447.2E7F543@keescook/ it was only tested as an output (which looks like a dead store, and wasn't sufficient). Use "r" as an input constraint instead, which behaves correctly across compilers and architectures. Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall") Signed-off-by: Nick Desaulniers Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Tested-by: Kees Cook Tested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Link: https://reviews.llvm.org/D100412 Link: https://bugs.llvm.org/show_bug.cgi?id=49956 Link: https://lore.kernel.org/r/20210419231741.4084415-1-keescook@chromium.org --- include/linux/randomize_kstack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index fd80fab663a9..bebc911161b6 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -38,7 +38,7 @@ void *__builtin_alloca(size_t size); u32 offset = raw_cpu_read(kstack_offset); \ u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ - asm volatile("" : "=o"(*ptr) :: "memory"); \ + asm volatile("" :: "r"(ptr) : "memory"); \ } \ } while (0) -- cgit v1.2.3-71-gd317 From 35f3f8504c3b60a1ae5576e178b27fc0ddd6157d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:42 +0300 Subject: spi: Switch to signed types for *_native_cs SPI controller fields While fixing undefined behaviour the commit f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs") missed the case when all CSs are GPIOs and thus unused_native_cs will be evaluated to -1 in unsigned representation. This will falsely trigger a condition in the spi_get_gpio_descs(). Switch to signed types for *_native_cs SPI controller fields to fix above. Fixes: f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131242.49455-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 360a3bc767ca..74239d65c7fd 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -644,8 +644,8 @@ struct spi_controller { int *cs_gpios; struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; - u8 unused_native_cs; - u8 max_native_cs; + s8 unused_native_cs; + s8 max_native_cs; /* statistics */ struct spi_statistics statistics; -- cgit v1.2.3-71-gd317 From efed9a3337e341bd0989161b97453b52567bc59d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 May 2021 17:05:35 -0700 Subject: kyber: fix out of bounds access when preempted __blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx for the current CPU again and uses that to get the corresponding Kyber context in the passed hctx. However, the thread may be preempted between the two calls to blk_mq_get_ctx(), and the ctx returned the second time may no longer correspond to the passed hctx. This "works" accidentally most of the time, but it can cause us to read garbage if the second ctx came from an hctx with more ctx's than the first one (i.e., if ctx->index_hw[hctx->type] > hctx->nr_ctx). This manifested as this UBSAN array index out of bounds error reported by Jakub: UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9 index 13106 is out of range for type 'long unsigned int [128]' Call Trace: dump_stack+0xa4/0xe5 ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34 queued_spin_lock_slowpath+0x476/0x480 do_raw_spin_lock+0x1c2/0x1d0 kyber_bio_merge+0x112/0x180 blk_mq_submit_bio+0x1f5/0x1100 submit_bio_noacct+0x7b0/0x870 submit_bio+0xc2/0x3a0 btrfs_map_bio+0x4f0/0x9d0 btrfs_submit_data_bio+0x24e/0x310 submit_one_bio+0x7f/0xb0 submit_extent_page+0xc4/0x440 __extent_writepage_io+0x2b8/0x5e0 __extent_writepage+0x28d/0x6e0 extent_write_cache_pages+0x4d7/0x7a0 extent_writepages+0xa2/0x110 do_writepages+0x8f/0x180 __writeback_single_inode+0x99/0x7f0 writeback_sb_inodes+0x34e/0x790 __writeback_inodes_wb+0x9e/0x120 wb_writeback+0x4d2/0x660 wb_workfn+0x64d/0xa10 process_one_work+0x53a/0xa80 worker_thread+0x69/0x5b0 kthread+0x20b/0x240 ret_from_fork+0x1f/0x30 Only Kyber uses the hctx, so fix it by passing the request_queue to ->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can map the queues itself to avoid the mismatch. Fixes: a6088845c2bf ("block: kyber: make kyber more friendly with merging") Reported-by: Jakub Kicinski Signed-off-by: Omar Sandoval Link: https://lore.kernel.org/r/c7598605401a48d5cfeadebb678abd10af22b83f.1620691329.git.osandov@fb.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 3 +-- block/blk-mq-sched.c | 8 +++++--- block/kyber-iosched.c | 5 +++-- block/mq-deadline.c | 3 +-- include/linux/elevator.h | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0270cd7ca165..59b2499d3f8b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2263,10 +2263,9 @@ static void bfq_remove_request(struct request_queue *q, } -static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; struct request *free = NULL; /* diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 42a365b1b9c0..996a4b2f73aa 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -358,14 +358,16 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { struct elevator_queue *e = q->elevator; - struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); + struct blk_mq_ctx *ctx; + struct blk_mq_hw_ctx *hctx; bool ret = false; enum hctx_type type; if (e && e->type->ops.bio_merge) - return e->type->ops.bio_merge(hctx, bio, nr_segs); + return e->type->ops.bio_merge(q, bio, nr_segs); + ctx = blk_mq_get_ctx(q); + hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); type = hctx->type; if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || list_empty_careful(&ctx->rq_lists[type])) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 8969e122f081..81e3279ecd57 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -561,11 +561,12 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) } } -static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool kyber_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); struct kyber_hctx_data *khd = hctx->sched_data; - struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); struct list_head *rq_list = &kcq->rq_list[sched_domain]; diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 04aded71ead2..8eea2cbf2bf4 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -461,10 +461,9 @@ static int dd_request_merge(struct request_queue *q, struct request **rq, return ELEVATOR_NO_MERGE; } -static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool dd_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; struct request *free = NULL; bool ret; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1fe8e105b83b..dcb2f9022c1d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -34,7 +34,7 @@ struct elevator_mq_ops { void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int); + bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int); int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); -- cgit v1.2.3-71-gd317 From 190515f610946db025cdedebde93958b725fb583 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Wed, 12 May 2021 18:01:24 +0800 Subject: blkdev.h: remove unused codes blk_account_rq Last users of blk_account_rq gone with patch commit a1ce35fa49852db ("block: remove dead elevator code") and now it gets no caller, it can be safely removed. Signed-off-by: Lin Feng Link: https://lore.kernel.org/r/20210512100124.173769-1-linf@wangsu.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b91ba6207365..26c3e368656f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -677,11 +677,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -static inline bool blk_account_rq(struct request *rq) -{ - return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); -} - #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) -- cgit v1.2.3-71-gd317 From 681865a03d3ec6ac3dda147044ed2a1a0f49f7bf Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 19 Apr 2021 19:27:25 +0800 Subject: libnvdimm: Remove duplicate struct declaration struct device is declared at 133rd line. The second declaration is unnecessary, remove it. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20210419112725.42145-1-wanjiabing@vivo.com Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 01f251b6e36c..89b69e645ac7 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -141,7 +141,6 @@ static inline void __iomem *devm_nvdimm_ioremap(struct device *dev, struct nvdimm_bus; struct module; -struct device; struct nd_blk_region; struct nd_blk_region_desc { int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); -- cgit v1.2.3-71-gd317 From 860dafa902595fb5f1d23bbcce1215188c3341e6 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Thu, 13 May 2021 11:51:50 +0200 Subject: vt: Fix character height handling with VT_RESIZEX Restore the original intent of the VT_RESIZEX ioctl's `v_clin' parameter which is the number of pixel rows per character (cell) rather than the height of the font used. For framebuffer devices the two values are always the same, because the former is inferred from the latter one. For VGA used as a true text mode device these two parameters are independent from each other: the number of pixel rows per character is set in the CRT controller, while font height is in fact hardwired to 32 pixel rows and fonts of heights below that value are handled by padding their data with blanks when loaded to hardware for use by the character generator. One can change the setting in the CRT controller and it will update the screen contents accordingly regardless of the font loaded. The `v_clin' parameter is used by the `vgacon' driver to set the height of the character cell and then the cursor position within. Make the parameter explicit then, by defining a new `vc_cell_height' struct member of `vc_data', set it instead of `vc_font.height' from `v_clin' in the VT_RESIZEX ioctl, and then use it throughout the `vgacon' driver except where actual font data is accessed which as noted above is independent from the CRTC setting. This way the framebuffer console driver is free to ignore the `v_clin' parameter as irrelevant, as it always should have, avoiding any issues attempts to give the parameter a meaning there could have caused, such as one that has led to commit 988d0763361b ("vt_ioctl: make VT_RESIZEX behave like VT_RESIZE"): "syzbot is reporting UAF/OOB read at bit_putcs()/soft_cursor() [1][2], for vt_resizex() from ioctl(VT_RESIZEX) allows setting font height larger than actual font height calculated by con_font_set() from ioctl(PIO_FONT). Since fbcon_set_font() from con_font_set() allocates minimal amount of memory based on actual font height calculated by con_font_set(), use of vt_resizex() can cause UAF/OOB read for font data." The problem first appeared around Linux 2.5.66 which predates our repo history, but the origin could be identified with the old MIPS/Linux repo also at: as commit 9736a3546de7 ("Merge with Linux 2.5.66."), where VT_RESIZEX code in `vt_ioctl' was updated as follows: if (clin) - video_font_height = clin; + vc->vc_font.height = clin; making the parameter apply to framebuffer devices as well, perhaps due to the use of "font" in the name of the original `video_font_height' variable. Use "cell" in the new struct member then to avoid ambiguity. References: [1] https://syzkaller.appspot.com/bug?id=32577e96d88447ded2d3b76d71254fb855245837 [2] https://syzkaller.appspot.com/bug?id=6b8355d27b2b94fb5cedf4655e3a59162d9e48e3 Signed-off-by: Maciej W. Rozycki Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org # v2.6.12+ Signed-off-by: Linus Torvalds --- drivers/tty/vt/vt_ioctl.c | 6 +++--- drivers/video/console/vgacon.c | 44 +++++++++++++++++++++--------------------- include/linux/console_struct.h | 1 + 3 files changed, 26 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 95d10197566b..0e0cd9e9e589 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -706,17 +706,17 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs) if (vcp) { int ret; int save_scan_lines = vcp->vc_scan_lines; - int save_font_height = vcp->vc_font.height; + int save_cell_height = vcp->vc_cell_height; if (v.v_vlin) vcp->vc_scan_lines = v.v_vlin; if (v.v_clin) - vcp->vc_font.height = v.v_clin; + vcp->vc_cell_height = v.v_clin; vcp->vc_resize_user = 1; ret = vc_resize(vcp, v.v_cols, v.v_rows); if (ret) { vcp->vc_scan_lines = save_scan_lines; - vcp->vc_font.height = save_font_height; + vcp->vc_cell_height = save_cell_height; console_unlock(); return ret; } diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 511e7d06b148..631eb918f8e1 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -383,7 +383,7 @@ static void vgacon_init(struct vc_data *c, int init) vc_resize(c, vga_video_num_columns, vga_video_num_lines); c->vc_scan_lines = vga_scan_lines; - c->vc_font.height = vga_video_font_height; + c->vc_font.height = c->vc_cell_height = vga_video_font_height; c->vc_complement_mask = 0x7700; if (vga_512_chars) c->vc_hi_font_mask = 0x0800; @@ -518,32 +518,32 @@ static void vgacon_cursor(struct vc_data *c, int mode) switch (CUR_SIZE(c->vc_cursor_type)) { case CUR_UNDERLINE: vgacon_set_cursor_size(c->state.x, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height - + (c->vc_cell_height < 10 ? 2 : 3), - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_TWO_THIRDS: vgacon_set_cursor_size(c->state.x, - c->vc_font.height / 3, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height / 3, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_LOWER_THIRD: vgacon_set_cursor_size(c->state.x, - (c->vc_font.height * 2) / 3, - c->vc_font.height - - (c->vc_font.height < + (c->vc_cell_height * 2) / 3, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_LOWER_HALF: vgacon_set_cursor_size(c->state.x, - c->vc_font.height / 2, - c->vc_font.height - - (c->vc_font.height < + c->vc_cell_height / 2, + c->vc_cell_height - + (c->vc_cell_height < 10 ? 1 : 2)); break; case CUR_NONE: @@ -554,7 +554,7 @@ static void vgacon_cursor(struct vc_data *c, int mode) break; default: vgacon_set_cursor_size(c->state.x, 1, - c->vc_font.height); + c->vc_cell_height); break; } break; @@ -565,13 +565,13 @@ static int vgacon_doresize(struct vc_data *c, unsigned int width, unsigned int height) { unsigned long flags; - unsigned int scanlines = height * c->vc_font.height; + unsigned int scanlines = height * c->vc_cell_height; u8 scanlines_lo = 0, r7 = 0, vsync_end = 0, mode, max_scan; raw_spin_lock_irqsave(&vga_lock, flags); vgacon_xres = width * VGA_FONTWIDTH; - vgacon_yres = height * c->vc_font.height; + vgacon_yres = height * c->vc_cell_height; if (vga_video_type >= VIDEO_TYPE_VGAC) { outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg); max_scan = inb_p(vga_video_port_val); @@ -626,9 +626,9 @@ static int vgacon_doresize(struct vc_data *c, static int vgacon_switch(struct vc_data *c) { int x = c->vc_cols * VGA_FONTWIDTH; - int y = c->vc_rows * c->vc_font.height; + int y = c->vc_rows * c->vc_cell_height; int rows = screen_info.orig_video_lines * vga_default_font_height/ - c->vc_font.height; + c->vc_cell_height; /* * We need to save screen size here as it's the only way * we can spot the screen has been resized and we need to @@ -1041,7 +1041,7 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight) cursor_size_lastto = 0; c->vc_sw->con_cursor(c, CM_DRAW); } - c->vc_font.height = fontheight; + c->vc_font.height = c->vc_cell_height = fontheight; vc_resize(c, 0, rows); /* Adjust console size */ } } @@ -1096,12 +1096,12 @@ static int vgacon_resize(struct vc_data *c, unsigned int width, */ screen_info.orig_video_cols = width; screen_info.orig_video_lines = height; - vga_default_font_height = c->vc_font.height; + vga_default_font_height = c->vc_cell_height; return 0; } if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ - c->vc_font.height) + c->vc_cell_height) return -EINVAL; if (con_is_visible(c) && !vga_is_gfx) /* who knows */ diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 153734816b49..d5b9c8d40c18 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -101,6 +101,7 @@ struct vc_data { unsigned int vc_rows; unsigned int vc_size_row; /* Bytes per row */ unsigned int vc_scan_lines; /* # of scan lines */ + unsigned int vc_cell_height; /* CRTC character cell height */ unsigned long vc_origin; /* [!] Start of real screen */ unsigned long vc_scr_end; /* [!] End of real screen */ unsigned long vc_visible_origin; /* [!] Top of visible window */ -- cgit v1.2.3-71-gd317 From 640d1eaff2c09e382a23bd831094ebbfaa16fef5 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Tue, 4 May 2021 16:22:34 -0600 Subject: dyndbg: avoid calling dyndbg_emit_prefix when it has no work Wrap function in a static-inline one, which checks flags to avoid calling the function unnecessarily. And hoist its output-buffer initialization to the grand-caller, which is already allocating the buffer on the stack, and can trivially initialize it too. Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20210504222235.1033685-2-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/dynamic_debug.h | 5 +++++ lib/dynamic_debug.c | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index a57ee75342cf..dce631e678dd 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -32,6 +32,11 @@ struct _ddebug { #define _DPRINTK_FLAGS_INCL_FUNCNAME (1<<2) #define _DPRINTK_FLAGS_INCL_LINENO (1<<3) #define _DPRINTK_FLAGS_INCL_TID (1<<4) + +#define _DPRINTK_FLAGS_INCL_ANY \ + (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\ + _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID) + #if defined DEBUG #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT #else diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 921d0a654243..398920403321 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -586,13 +586,11 @@ static int remaining(int wrote) return 0; } -static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf) +static char *__dynamic_emit_prefix(const struct _ddebug *desc, char *buf) { int pos_after_tid; int pos = 0; - *buf = '\0'; - if (desc->flags & _DPRINTK_FLAGS_INCL_TID) { if (in_interrupt()) pos += snprintf(buf + pos, remaining(pos), " "); @@ -618,11 +616,18 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf) return buf; } +static inline char *dynamic_emit_prefix(struct _ddebug *desc, char *buf) +{ + if (unlikely(desc->flags & _DPRINTK_FLAGS_INCL_ANY)) + return __dynamic_emit_prefix(desc, buf); + return buf; +} + void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...) { va_list args; struct va_format vaf; - char buf[PREFIX_SIZE]; + char buf[PREFIX_SIZE] = ""; BUG_ON(!descriptor); BUG_ON(!fmt); @@ -655,7 +660,7 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor, if (!dev) { printk(KERN_DEBUG "(NULL device *): %pV", &vaf); } else { - char buf[PREFIX_SIZE]; + char buf[PREFIX_SIZE] = ""; dev_printk_emit(LOGLEVEL_DEBUG, dev, "%s%s %s: %pV", dynamic_emit_prefix(descriptor, buf), @@ -684,7 +689,7 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, vaf.va = &args; if (dev && dev->dev.parent) { - char buf[PREFIX_SIZE]; + char buf[PREFIX_SIZE] = ""; dev_printk_emit(LOGLEVEL_DEBUG, dev->dev.parent, "%s%s %s %s%s: %pV", @@ -720,7 +725,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, vaf.va = &args; if (ibdev && ibdev->dev.parent) { - char buf[PREFIX_SIZE]; + char buf[PREFIX_SIZE] = ""; dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent, "%s%s %s %s: %pV", -- cgit v1.2.3-71-gd317 From 22247efd822e6d263f3c8bd327f3f769aea9b1d9 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 14 May 2021 17:27:04 -0700 Subject: mm/hugetlb: fix F_SEAL_FUTURE_WRITE Patch series "mm/hugetlb: Fix issues on file sealing and fork", v2. Hugh reported issue with F_SEAL_FUTURE_WRITE not applied correctly to hugetlbfs, which I can easily verify using the memfd_test program, which seems that the program is hardly run with hugetlbfs pages (as by default shmem). Meanwhile I found another probably even more severe issue on that hugetlb fork won't wr-protect child cow pages, so child can potentially write to parent private pages. Patch 2 addresses that. After this series applied, "memfd_test hugetlbfs" should start to pass. This patch (of 2): F_SEAL_FUTURE_WRITE is missing for hugetlb starting from the first day. There is a test program for that and it fails constantly. $ ./memfd_test hugetlbfs memfd-hugetlb: CREATE memfd-hugetlb: BASIC memfd-hugetlb: SEAL-WRITE memfd-hugetlb: SEAL-FUTURE-WRITE mmap() didn't fail as expected Aborted (core dumped) I think it's probably because no one is really running the hugetlbfs test. Fix it by checking FUTURE_WRITE also in hugetlbfs_file_mmap() as what we do in shmem_mmap(). Generalize a helper for that. Link: https://lkml.kernel.org/r/20210503234356.9097-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210503234356.9097-2-peterx@redhat.com Fixes: ab3948f58ff84 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd") Signed-off-by: Peter Xu Reported-by: Hugh Dickins Reviewed-by: Mike Kravetz Cc: Joel Fernandes (Google) Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 5 +++++ include/linux/mm.h | 32 ++++++++++++++++++++++++++++++++ mm/shmem.c | 22 ++++------------------ 3 files changed, 41 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a2a42335e8fd..9d9e0097c1d3 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -131,6 +131,7 @@ static void huge_pagevec_release(struct pagevec *pvec) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); @@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; + ret = seal_check_future_write(info->seals, vma); + if (ret) + return ret; + /* * page based offset in vm_pgoff could be sufficiently large to * overflow a loff_t when converted to byte offset. This can diff --git a/include/linux/mm.h b/include/linux/mm.h index 322ec61d0da7..c274f75efcf9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3216,5 +3216,37 @@ void mem_dump_obj(void *object); static inline void mem_dump_obj(void *object) {} #endif +/** + * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it + * @seals: the seals to check + * @vma: the vma to operate on + * + * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on + * the vma flags. Return 0 if check pass, or <0 for errors. + */ +static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) +{ + if (seals & F_SEAL_FUTURE_WRITE) { + /* + * New PROT_WRITE and MAP_SHARED mmaps are not allowed when + * "future write" seal active. + */ + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) + return -EPERM; + + /* + * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as + * MAP_SHARED and read-only, take care to not allow mprotect to + * revert protections on such mappings. Do this only for shared + * mappings. For private mappings, don't need to mask + * VM_MAYWRITE as we still want them to be COW-writable. + */ + if (vma->vm_flags & VM_SHARED) + vma->vm_flags &= ~(VM_MAYWRITE); + } + + return 0; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/shmem.c b/mm/shmem.c index a08cedefbfaa..eb131b9fb190 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2258,25 +2258,11 @@ out_nomem: static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { struct shmem_inode_info *info = SHMEM_I(file_inode(file)); + int ret; - if (info->seals & F_SEAL_FUTURE_WRITE) { - /* - * New PROT_WRITE and MAP_SHARED mmaps are not allowed when - * "future write" seal active. - */ - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) - return -EPERM; - - /* - * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as - * MAP_SHARED and read-only, take care to not allow mprotect to - * revert protections on such mappings. Do this only for shared - * mappings. For private mappings, don't need to mask - * VM_MAYWRITE as we still want them to be COW-writable. - */ - if (vma->vm_flags & VM_SHARED) - vma->vm_flags &= ~(VM_MAYWRITE); - } + ret = seal_check_future_write(info->seals, vma); + if (ret) + return ret; /* arm64 - allow memory tagging on RAM-based files */ vma->vm_flags |= VM_MTE_ALLOWED; -- cgit v1.2.3-71-gd317 From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 May 2021 17:27:24 -0700 Subject: mm: fix struct page layout on 32-bit systems 32-bit architectures which expect 8-byte alignment for 8-byte integers and need 64-bit DMA addresses (arm, mips, ppc) had their struct page inadvertently expanded in 2019. When the dma_addr_t was added, it forced the alignment of the union to 8 bytes, which inserted a 4 byte gap between 'flags' and the union. Fix this by storing the dma_addr_t in one or two adjacent unsigned longs. This restores the alignment to that of an unsigned long. We always store the low bits in the first word to prevent the PageTail bit from being inadvertently set on a big endian platform. If that happened, get_user_pages_fast() racing against a page which was freed and reallocated to the page_pool could dereference a bogus compound_head(), which would be hard to trace back to this cause. Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org Fixes: c25fff7171be ("mm: add dma_addr_t to struct page") Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Ilias Apalodimas Acked-by: Jesper Dangaard Brouer Acked-by: Vlastimil Babka Tested-by: Matteo Croce Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 4 ++-- include/net/page_pool.h | 12 +++++++++++- net/core/page_pool.c | 12 +++++++----- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6613b26a8894..5aacc1c10a45 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -97,10 +97,10 @@ struct page { }; struct { /* page_pool used by netstack */ /** - * @dma_addr: might require a 64-bit value even on + * @dma_addr: might require a 64-bit value on * 32-bit architectures. */ - dma_addr_t dma_addr; + unsigned long dma_addr[2]; }; struct { /* slab, slob and slub */ union { diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 6d517a37c18b..b4b6de909c93 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - return page->dma_addr; + dma_addr_t ret = page->dma_addr[0]; + if (sizeof(dma_addr_t) > sizeof(unsigned long)) + ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; + return ret; +} + +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +{ + page->dma_addr[0] = addr; + if (sizeof(dma_addr_t) > sizeof(unsigned long)) + page->dma_addr[1] = upper_32_bits(addr); } static inline bool is_page_pool_compiled_in(void) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 9ec1aa9640ad..3c4c4c7a0402 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool, struct page *page, unsigned int dma_sync_size) { + dma_addr_t dma_addr = page_pool_get_dma_addr(page); + dma_sync_size = min(dma_sync_size, pool->p.max_len); - dma_sync_single_range_for_device(pool->p.dev, page->dma_addr, + dma_sync_single_range_for_device(pool->p.dev, dma_addr, pool->p.offset, dma_sync_size, pool->p.dma_dir); } @@ -195,7 +197,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (dma_mapping_error(pool->p.dev, dma)) return false; - page->dma_addr = dma; + page_pool_set_dma_addr(page, dma); if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, pool->p.max_len); @@ -331,13 +333,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page) */ goto skip_dma_unmap; - dma = page->dma_addr; + dma = page_pool_get_dma_addr(page); - /* When page is unmapped, it cannot be returned our pool */ + /* When page is unmapped, it cannot be returned to our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); - page->dma_addr = 0; + page_pool_set_dma_addr(page, 0); skip_dma_unmap: /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. -- cgit v1.2.3-71-gd317 From 076171a67789ad0107de44c2964f2e46a7d0d7b8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 May 2021 17:27:30 -0700 Subject: mm/filemap: fix readahead return types A readahead request will not allocate more memory than can be represented by a size_t, even on systems that have HIGHMEM available. Change the length functions from returning an loff_t to a size_t. Link: https://lkml.kernel.org/r/20210510201201.1558972-1-willy@infradead.org Fixes: 32c0a6bcaa1f57 ("btrfs: add and use readahead_batch_length") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reported-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/iomap/buffered-io.c | 4 ++-- include/linux/pagemap.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f2cd2034a87b..9023717c5188 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -394,7 +394,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) { struct inode *inode = rac->mapping->host; loff_t pos = readahead_pos(rac); - loff_t length = readahead_length(rac); + size_t length = readahead_length(rac); struct iomap_readpage_ctx ctx = { .rac = rac, }; @@ -402,7 +402,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) trace_iomap_readahead(inode, readahead_count(rac)); while (length > 0) { - loff_t ret = iomap_apply(inode, pos, length, 0, ops, + ssize_t ret = iomap_apply(inode, pos, length, 0, ops, &ctx, iomap_readahead_actor); if (ret <= 0) { WARN_ON_ONCE(ret == 0); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a4bd41128bf3..e89df447fae3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -997,9 +997,9 @@ static inline loff_t readahead_pos(struct readahead_control *rac) * readahead_length - The number of bytes in this readahead request. * @rac: The readahead request. */ -static inline loff_t readahead_length(struct readahead_control *rac) +static inline size_t readahead_length(struct readahead_control *rac) { - return (loff_t)rac->_nr_pages * PAGE_SIZE; + return rac->_nr_pages * PAGE_SIZE; } /** @@ -1024,7 +1024,7 @@ static inline unsigned int readahead_count(struct readahead_control *rac) * readahead_batch_length - The number of bytes in the current batch. * @rac: The readahead request. */ -static inline loff_t readahead_batch_length(struct readahead_control *rac) +static inline size_t readahead_batch_length(struct readahead_control *rac) { return rac->_batch_count * PAGE_SIZE; } -- cgit v1.2.3-71-gd317 From 0cfe5a6e758fb20be8ad3e8f10cb087cc8033eeb Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 1 Apr 2021 17:41:04 +0200 Subject: gpu: host1x: Split up client initalization and registration In some cases we may need to initialize the host1x client first before registering it. This commit adds a new helper that will do nothing but the initialization of the data structure. At the same time, the initialization is removed from the registration function. Note, however, that for simplicity we explicitly initialize the client when the host1x_client_register() function is called, as opposed to the low-level __host1x_client_register() function. This allows existing callers to remain unchanged. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/bus.c | 30 ++++++++++++++++++++++++------ include/linux/host1x.h | 30 ++++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 46f69c532b6b..218e3718fd68 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -735,6 +735,29 @@ void host1x_driver_unregister(struct host1x_driver *driver) } EXPORT_SYMBOL(host1x_driver_unregister); +/** + * __host1x_client_init() - initialize a host1x client + * @client: host1x client + * @key: lock class key for the client-specific mutex + */ +void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key) +{ + INIT_LIST_HEAD(&client->list); + __mutex_init(&client->lock, "host1x client lock", key); + client->usecount = 0; +} +EXPORT_SYMBOL(__host1x_client_init); + +/** + * host1x_client_exit() - uninitialize a host1x client + * @client: host1x client + */ +void host1x_client_exit(struct host1x_client *client) +{ + mutex_destroy(&client->lock); +} +EXPORT_SYMBOL(host1x_client_exit); + /** * __host1x_client_register() - register a host1x client * @client: host1x client @@ -747,16 +770,11 @@ EXPORT_SYMBOL(host1x_driver_unregister); * device and call host1x_device_init(), which will in turn call each client's * &host1x_client_ops.init implementation. */ -int __host1x_client_register(struct host1x_client *client, - struct lock_class_key *key) +int __host1x_client_register(struct host1x_client *client) { struct host1x *host1x; int err; - INIT_LIST_HEAD(&client->list); - __mutex_init(&client->lock, "host1x client lock", key); - client->usecount = 0; - mutex_lock(&devices_lock); list_for_each_entry(host1x, &devices, list) { diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 232e1bd507a7..9b0487c88571 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -332,12 +332,30 @@ static inline struct host1x_device *to_host1x_device(struct device *dev) int host1x_device_init(struct host1x_device *device); int host1x_device_exit(struct host1x_device *device); -int __host1x_client_register(struct host1x_client *client, - struct lock_class_key *key); -#define host1x_client_register(class) \ - ({ \ - static struct lock_class_key __key; \ - __host1x_client_register(class, &__key); \ +void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key); +void host1x_client_exit(struct host1x_client *client); + +#define host1x_client_init(client) \ + ({ \ + static struct lock_class_key __key; \ + __host1x_client_init(client, &__key); \ + }) + +int __host1x_client_register(struct host1x_client *client); + +/* + * Note that this wrapper calls __host1x_client_init() for compatibility + * with existing callers. Callers that want to separately initialize and + * register a host1x client must first initialize using either of the + * __host1x_client_init() or host1x_client_init() functions and then use + * the low-level __host1x_client_register() function to avoid the client + * getting reinitialized. + */ +#define host1x_client_register(client) \ + ({ \ + static struct lock_class_key __key; \ + __host1x_client_init(client, &__key); \ + __host1x_client_register(client); \ }) int host1x_client_unregister(struct host1x_client *client); -- cgit v1.2.3-71-gd317 From 4d7b324e231366ea772ab10df46be31273ca39af Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 18 May 2021 09:47:23 +0300 Subject: bus: ti-sysc: Fix am335x resume hang for usb otg module On am335x, suspend and resume only works once, and the system hangs if suspend is attempted again. However, turns out suspend and resume works fine multiple times if the USB OTG driver for musb controller is loaded. The issue is caused my the interconnect target module losing context during suspend, and it needs a restore on resume to be reconfigure again as debugged earlier by Dave Gerlach . There are also other modules that need a restore on resume, like gpmc as noted by Dave. So let's add a common way to restore an interconnect target module based on a quirk flag. For now, let's enable the quirk for am335x otg only to fix the suspend and resume issue. As gpmc is not causing hangs based on tests with BeagleBone, let's patch gpmc separately. For gpmc, we also need a hardware reset done before restore according to Dave. To reinit the modules, we decouple system suspend from PM runtime. We replace calls to pm_runtime_force_suspend() and pm_runtime_force_resume() with direct calls to internal functions and rely on the driver internal state. There no point trying to handle complex system suspend and resume quirks via PM runtime. This is issue should have already been noticed with commit 1819ef2e2d12 ("bus: ti-sysc: Use swsup quirks also for am335x musb") when quirk handling was added for am335x otg for swsup. But the issue went unnoticed as having musb driver loaded hides the issue, and suspend and resume works once without the driver loaded. Fixes: 1819ef2e2d12 ("bus: ti-sysc: Use swsup quirks also for am335x musb") Suggested-by: Dave Gerlach Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 53 +++++++++++++++++++++++++++++++++-- include/linux/platform_data/ti-sysc.h | 1 + 2 files changed, 51 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 50a9f34b9e6c..4ff319863be2 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1334,6 +1334,34 @@ err_allow_idle: return error; } +static int sysc_reinit_module(struct sysc *ddata, bool leave_enabled) +{ + struct device *dev = ddata->dev; + int error; + + /* Disable target module if it is enabled */ + if (ddata->enabled) { + error = sysc_runtime_suspend(dev); + if (error) + dev_warn(dev, "reinit suspend failed: %i\n", error); + } + + /* Enable target module */ + error = sysc_runtime_resume(dev); + if (error) + dev_warn(dev, "reinit resume failed: %i\n", error); + + if (leave_enabled) + return error; + + /* Disable target module if no leave_enabled was set */ + error = sysc_runtime_suspend(dev); + if (error) + dev_warn(dev, "reinit suspend failed: %i\n", error); + + return error; +} + static int __maybe_unused sysc_noirq_suspend(struct device *dev) { struct sysc *ddata; @@ -1344,12 +1372,18 @@ static int __maybe_unused sysc_noirq_suspend(struct device *dev) (SYSC_QUIRK_LEGACY_IDLE | SYSC_QUIRK_NO_IDLE)) return 0; - return pm_runtime_force_suspend(dev); + if (!ddata->enabled) + return 0; + + ddata->needs_resume = 1; + + return sysc_runtime_suspend(dev); } static int __maybe_unused sysc_noirq_resume(struct device *dev) { struct sysc *ddata; + int error = 0; ddata = dev_get_drvdata(dev); @@ -1357,7 +1391,19 @@ static int __maybe_unused sysc_noirq_resume(struct device *dev) (SYSC_QUIRK_LEGACY_IDLE | SYSC_QUIRK_NO_IDLE)) return 0; - return pm_runtime_force_resume(dev); + if (ddata->cfg.quirks & SYSC_QUIRK_REINIT_ON_RESUME) { + error = sysc_reinit_module(ddata, ddata->needs_resume); + if (error) + dev_warn(dev, "noirq_resume failed: %i\n", error); + } else if (ddata->needs_resume) { + error = sysc_runtime_resume(dev); + if (error) + dev_warn(dev, "noirq_resume failed: %i\n", error); + } + + ddata->needs_resume = 0; + + return error; } static const struct dev_pm_ops sysc_pm_ops = { @@ -1468,7 +1514,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050, 0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY), SYSC_QUIRK("usb_otg_hs", 0, 0, 0x10, -ENODEV, 0x4ea2080d, 0xffffffff, - SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY), + SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY | + SYSC_QUIRK_REINIT_ON_RESUME), SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0, SYSC_MODULE_QUIRK_WDT), /* PRUSS on am3, am4 and am5 */ diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index fafc1beea504..9837fb011f2f 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_QUIRK_REINIT_ON_RESUME BIT(27) #define SYSC_QUIRK_GPMC_DEBUG BIT(26) #define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) #define SYSC_MODULE_QUIRK_PRUSS BIT(24) -- cgit v1.2.3-71-gd317 From add0b32ef9146a8559a60aed54c37692a5f9d34f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 30 Apr 2021 17:06:01 -0500 Subject: siginfo: Move si_trapno inside the union inside _si_fault It turns out that linux uses si_trapno very sparingly, and as such it can be considered extra information for a very narrow selection of signals, rather than information that is present with every fault reported in siginfo. As such move si_trapno inside the union inside of _si_fault. This results in no change in placement, and makes it eaiser to extend _si_fault in the future as this reduces the number of special cases. In particular with si_trapno included in the union it is no longer a concern that the union must be pointer aligned on most architectures because the union follows immediately after si_addr which is a pointer. This change results in a difference in siginfo field placement on sparc and alpha for the fields si_addr_lsb, si_lower, si_upper, si_pkey, and si_perf. These architectures do not implement the signals that would use si_addr_lsb, si_lower, si_upper, si_pkey, and si_perf. Further these architecture have not yet implemented the userspace that would use si_perf. The point of this change is in fact to correct these placement issues before sparc or alpha grow userspace that cares. This change was discussed[1] and the agreement is that this change is currently safe. [1]: https://lkml.kernel.org/r/CAK8P3a0+uKYwL1NhY6Hvtieghba2hKYGD6hcKx5n8=4Gtt+pHA@mail.gmail.com Acked-by: Marco Elver v1: https://lkml.kernel.org/r/m1tunns7yf.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210505141101.11519-5-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-1-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/x86/kernel/signal_compat.c | 3 +++ include/linux/compat.h | 5 ++--- include/uapi/asm-generic/siginfo.h | 7 ++----- kernel/signal.c | 1 + 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 0e5d0a7e203b..a9fcabd8a5e5 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -127,6 +127,9 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_trapno) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_trapno) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); diff --git a/include/linux/compat.h b/include/linux/compat.h index f0d2dd35d408..6af7bef15e94 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -214,12 +214,11 @@ typedef struct compat_siginfo { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { compat_uptr_t _addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif #define __COMPAT_ADDR_BND_PKEY_PAD (__alignof__(compat_uptr_t) < sizeof(short) ? \ sizeof(short) : __alignof__(compat_uptr_t)) union { + /* used on alpha and sparc */ + int _trapno; /* TRAP # which caused the signal */ /* * used when si_code=BUS_MCEERR_AR or * used when si_code=BUS_MCEERR_AO diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 03d6f6d2c1fe..e663bf117b46 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -63,9 +63,6 @@ union __sifields { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { void __user *_addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif #ifdef __ia64__ int _imm; /* immediate value for "break" */ unsigned int _flags; /* see ia64 si_flags */ @@ -75,6 +72,8 @@ union __sifields { #define __ADDR_BND_PKEY_PAD (__alignof__(void *) < sizeof(short) ? \ sizeof(short) : __alignof__(void *)) union { + /* used on alpha and sparc */ + int _trapno; /* TRAP # which caused the signal */ /* * used when si_code=BUS_MCEERR_AR or * used when si_code=BUS_MCEERR_AO @@ -150,9 +149,7 @@ typedef struct siginfo { #define si_int _sifields._rt._sigval.sival_int #define si_ptr _sifields._rt._sigval.sival_ptr #define si_addr _sifields._sigfault._addr -#ifdef __ARCH_SI_TRAPNO #define si_trapno _sifields._sigfault._trapno -#endif #define si_addr_lsb _sifields._sigfault._addr_lsb #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper diff --git a/kernel/signal.c b/kernel/signal.c index c3017aa8024a..65888aec65a0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4607,6 +4607,7 @@ static inline void siginfo_buildtime_checks(void) /* sigfault */ CHECK_OFFSET(si_addr); + CHECK_OFFSET(si_trapno); CHECK_OFFSET(si_addr_lsb); CHECK_OFFSET(si_lower); CHECK_OFFSET(si_upper); -- cgit v1.2.3-71-gd317 From 9abcabe3111811aeae0f3a14e159b14248631875 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 30 Apr 2021 17:29:36 -0500 Subject: signal: Implement SIL_FAULT_TRAPNO Now that si_trapno is part of the union in _si_fault and available on all architectures, add SIL_FAULT_TRAPNO and update siginfo_layout to return SIL_FAULT_TRAPNO when the code assumes si_trapno is valid. There is room for future changes to reduce when si_trapno is valid but this is all that is needed to make si_trapno and the other members of the the union in _sigfault mutually exclusive. Update the code that uses siginfo_layout to deal with SIL_FAULT_TRAPNO and have the same code ignore si_trapno in in all other cases. v1: https://lkml.kernel.org/r/m1o8dvs7s7.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210505141101.11519-6-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-2-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- fs/signalfd.c | 8 +++----- include/linux/signal.h | 1 + kernel/signal.c | 34 ++++++++++++---------------------- 3 files changed, 16 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/signalfd.c b/fs/signalfd.c index 040a1142915f..e87e59581653 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -123,15 +123,13 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, */ case SIL_FAULT: new.ssi_addr = (long) kinfo->si_addr; -#ifdef __ARCH_SI_TRAPNO + break; + case SIL_FAULT_TRAPNO: + new.ssi_addr = (long) kinfo->si_addr; new.ssi_trapno = kinfo->si_trapno; -#endif break; case SIL_FAULT_MCEERR: new.ssi_addr = (long) kinfo->si_addr; -#ifdef __ARCH_SI_TRAPNO - new.ssi_trapno = kinfo->si_trapno; -#endif new.ssi_addr_lsb = (short) kinfo->si_addr_lsb; break; case SIL_PERF_EVENT: diff --git a/include/linux/signal.h b/include/linux/signal.h index 1e98548d7cf6..5160fd45e5ca 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -40,6 +40,7 @@ enum siginfo_layout { SIL_TIMER, SIL_POLL, SIL_FAULT, + SIL_FAULT_TRAPNO, SIL_FAULT_MCEERR, SIL_FAULT_BNDERR, SIL_FAULT_PKUERR, diff --git a/kernel/signal.c b/kernel/signal.c index 65888aec65a0..597594ee72de 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1194,6 +1194,7 @@ static inline bool has_si_pid_and_uid(struct kernel_siginfo *info) case SIL_TIMER: case SIL_POLL: case SIL_FAULT: + case SIL_FAULT_TRAPNO: case SIL_FAULT_MCEERR: case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: @@ -2527,6 +2528,7 @@ static void hide_si_addr_tag_bits(struct ksignal *ksig) { switch (siginfo_layout(ksig->sig, ksig->info.si_code)) { case SIL_FAULT: + case SIL_FAULT_TRAPNO: case SIL_FAULT_MCEERR: case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: @@ -3214,6 +3216,10 @@ enum siginfo_layout siginfo_layout(unsigned sig, int si_code) #endif else if ((sig == SIGTRAP) && (si_code == TRAP_PERF)) layout = SIL_PERF_EVENT; +#ifdef __ARCH_SI_TRAPNO + else if (layout == SIL_FAULT) + layout = SIL_FAULT_TRAPNO; +#endif } else if (si_code <= NSIGPOLL) layout = SIL_POLL; @@ -3317,30 +3323,22 @@ void copy_siginfo_to_external32(struct compat_siginfo *to, break; case SIL_FAULT: to->si_addr = ptr_to_compat(from->si_addr); -#ifdef __ARCH_SI_TRAPNO + break; + case SIL_FAULT_TRAPNO: + to->si_addr = ptr_to_compat(from->si_addr); to->si_trapno = from->si_trapno; -#endif break; case SIL_FAULT_MCEERR: to->si_addr = ptr_to_compat(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_addr_lsb = from->si_addr_lsb; break; case SIL_FAULT_BNDERR: to->si_addr = ptr_to_compat(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_lower = ptr_to_compat(from->si_lower); to->si_upper = ptr_to_compat(from->si_upper); break; case SIL_FAULT_PKUERR: to->si_addr = ptr_to_compat(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_pkey = from->si_pkey; break; case SIL_PERF_EVENT: @@ -3401,30 +3399,22 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to, break; case SIL_FAULT: to->si_addr = compat_ptr(from->si_addr); -#ifdef __ARCH_SI_TRAPNO + break; + case SIL_FAULT_TRAPNO: + to->si_addr = compat_ptr(from->si_addr); to->si_trapno = from->si_trapno; -#endif break; case SIL_FAULT_MCEERR: to->si_addr = compat_ptr(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_addr_lsb = from->si_addr_lsb; break; case SIL_FAULT_BNDERR: to->si_addr = compat_ptr(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_lower = compat_ptr(from->si_lower); to->si_upper = compat_ptr(from->si_upper); break; case SIL_FAULT_PKUERR: to->si_addr = compat_ptr(from->si_addr); -#ifdef __ARCH_SI_TRAPNO - to->si_trapno = from->si_trapno; -#endif to->si_pkey = from->si_pkey; break; case SIL_PERF_EVENT: -- cgit v1.2.3-71-gd317 From af5eeab7e8e8c2f0fad10e4ab8cc8092012a2d5b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 May 2021 14:27:24 -0500 Subject: signal: Factor force_sig_perf out of perf_sigtrap Separate filling in siginfo for TRAP_PERF from deciding that siginal needs to be sent. There are enough little details that need to be correct when properly filling in siginfo_t that it is easy to make mistakes if filling in the siginfo_t is in the same function with other logic. So factor out force_sig_perf to reduce the cognative load of on reviewers, maintainers and implementors. v1: https://lkml.kernel.org/r/m17dkjqqxz.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210505141101.11519-10-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-3-ebiederm@xmission.com Reviewed-by: Marco Elver Acked-by: Peter Zijlstra (Intel) Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 1 + kernel/events/core.c | 11 ++--------- kernel/signal.c | 13 +++++++++++++ 3 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 3f6a0fcaa10c..7f4278fa21fe 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -326,6 +326,7 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *); int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); int force_sig_pkuerr(void __user *addr, u32 pkey); +int force_sig_perf(void __user *addr, u32 type, u64 sig_data); int force_sig_ptrace_errno_trap(int errno, void __user *addr); diff --git a/kernel/events/core.c b/kernel/events/core.c index 928b166d888e..48ea8863183b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6394,8 +6394,6 @@ void perf_event_wakeup(struct perf_event *event) static void perf_sigtrap(struct perf_event *event) { - struct kernel_siginfo info; - /* * We'd expect this to only occur if the irq_work is delayed and either * ctx->task or current has changed in the meantime. This can be the @@ -6410,13 +6408,8 @@ static void perf_sigtrap(struct perf_event *event) if (current->flags & PF_EXITING) return; - clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_code = TRAP_PERF; - info.si_errno = event->attr.type; - info.si_perf = event->attr.sig_data; - info.si_addr = (void __user *)event->pending_addr; - force_sig_info(&info); + force_sig_perf((void __user *)event->pending_addr, + event->attr.type, event->attr.sig_data); } static void perf_pending_event_disable(struct perf_event *event) diff --git a/kernel/signal.c b/kernel/signal.c index 597594ee72de..3a18d13c39b2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1763,6 +1763,19 @@ int force_sig_pkuerr(void __user *addr, u32 pkey) } #endif +int force_sig_perf(void __user *addr, u32 type, u64 sig_data) +{ + struct kernel_siginfo info; + + clear_siginfo(&info); + info.si_signo = SIGTRAP; + info.si_errno = type; + info.si_code = TRAP_PERF; + info.si_addr = addr; + info.si_perf = sig_data; + return force_sig_info(&info); +} + /* For the crazy architectures that include trap information in * the errno field, instead of an actual errno value. */ -- cgit v1.2.3-71-gd317 From 0683b53197b55343a166f1507086823030809a19 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 May 2021 17:28:31 -0500 Subject: signal: Deliver all of the siginfo perf data in _perf Don't abuse si_errno and deliver all of the perf data in _perf member of siginfo_t. Note: The data field in the perf data structures in a u64 to allow a pointer to be encoded without needed to implement a 32bit and 64bit version of the same structure. There already exists a 32bit and 64bit versions siginfo_t, and the 32bit version can not include a 64bit member as it only has 32bit alignment. So unsigned long is used in siginfo_t instead of a u64 as unsigned long can encode a pointer on all architectures linux supports. v1: https://lkml.kernel.org/r/m11rarqqx2.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210503203814.25487-10-ebiederm@xmission.com v3: https://lkml.kernel.org/r/20210505141101.11519-11-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-4-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- arch/m68k/kernel/signal.c | 3 ++- arch/x86/kernel/signal_compat.c | 6 ++++-- fs/signalfd.c | 3 ++- include/linux/compat.h | 5 ++++- include/uapi/asm-generic/siginfo.h | 8 ++++++-- include/uapi/linux/perf_event.h | 2 +- include/uapi/linux/signalfd.h | 4 ++-- kernel/signal.c | 21 +++++++++++++-------- .../testing/selftests/perf_events/sigtrap_threads.c | 14 +++++++------- 9 files changed, 41 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index a4b7ee1df211..8f215e79e70e 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -623,7 +623,8 @@ static inline void siginfo_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12); /* _sigfault._perf */ - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14); /* _sigpoll */ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c); diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index a9fcabd8a5e5..06743ec054d2 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -141,8 +141,10 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18); - BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14); CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); diff --git a/fs/signalfd.c b/fs/signalfd.c index e87e59581653..373df2f12415 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -134,7 +134,8 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, break; case SIL_PERF_EVENT: new.ssi_addr = (long) kinfo->si_addr; - new.ssi_perf = kinfo->si_perf; + new.ssi_perf_type = kinfo->si_perf_type; + new.ssi_perf_data = kinfo->si_perf_data; break; case SIL_CHLD: new.ssi_pid = kinfo->si_pid; diff --git a/include/linux/compat.h b/include/linux/compat.h index 6af7bef15e94..a27fffaae121 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -236,7 +236,10 @@ typedef struct compat_siginfo { u32 _pkey; } _addr_pkey; /* used when si_code=TRAP_PERF */ - compat_ulong_t _perf; + struct { + compat_ulong_t _data; + u32 _type; + } _perf; }; } _sigfault; diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index e663bf117b46..5a3c221f4c9d 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -91,7 +91,10 @@ union __sifields { __u32 _pkey; } _addr_pkey; /* used when si_code=TRAP_PERF */ - unsigned long _perf; + struct { + unsigned long _data; + __u32 _type; + } _perf; }; } _sigfault; @@ -154,7 +157,8 @@ typedef struct siginfo { #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper #define si_pkey _sifields._sigfault._addr_pkey._pkey -#define si_perf _sifields._sigfault._perf +#define si_perf_data _sifields._sigfault._perf._data +#define si_perf_type _sifields._sigfault._perf._type #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd #define si_call_addr _sifields._sigsys._call_addr diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e54e639248c8..7b14753b3d38 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -464,7 +464,7 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via - * siginfo_t::si_perf, e.g. to permit user to identify the event. + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. */ __u64 sig_data; }; diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index 7e333042c7e3..e78dddf433fc 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,8 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 __pad3; - __u64 ssi_perf; + __u32 ssi_perf_type; + __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the diff --git a/kernel/signal.c b/kernel/signal.c index 3a18d13c39b2..dca53515ae3f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1768,11 +1768,13 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data) struct kernel_siginfo info; clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_errno = type; - info.si_code = TRAP_PERF; - info.si_addr = addr; - info.si_perf = sig_data; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_PERF; + info.si_addr = addr; + info.si_perf_data = sig_data; + info.si_perf_type = type; + return force_sig_info(&info); } @@ -3356,7 +3358,8 @@ void copy_siginfo_to_external32(struct compat_siginfo *to, break; case SIL_PERF_EVENT: to->si_addr = ptr_to_compat(from->si_addr); - to->si_perf = from->si_perf; + to->si_perf_data = from->si_perf_data; + to->si_perf_type = from->si_perf_type; break; case SIL_CHLD: to->si_pid = from->si_pid; @@ -3432,7 +3435,8 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to, break; case SIL_PERF_EVENT: to->si_addr = compat_ptr(from->si_addr); - to->si_perf = from->si_perf; + to->si_perf_data = from->si_perf_data; + to->si_perf_type = from->si_perf_type; break; case SIL_CHLD: to->si_pid = from->si_pid; @@ -4615,7 +4619,8 @@ static inline void siginfo_buildtime_checks(void) CHECK_OFFSET(si_lower); CHECK_OFFSET(si_upper); CHECK_OFFSET(si_pkey); - CHECK_OFFSET(si_perf); + CHECK_OFFSET(si_perf_data); + CHECK_OFFSET(si_perf_type); /* sigpoll */ CHECK_OFFSET(si_band); diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c index 78ddf5e11625..8e83cf91513a 100644 --- a/tools/testing/selftests/perf_events/sigtrap_threads.c +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -43,7 +43,7 @@ static struct { siginfo_t first_siginfo; /* First observed siginfo_t. */ } ctx; -/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */ +/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */ #define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) @@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress) EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); } TEST_HARNESS_MAIN -- cgit v1.2.3-71-gd317 From 3410fbcd47dc6479af4309febf760ccaa5efb472 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 12 May 2021 13:52:27 +0300 Subject: {net, RDMA}/mlx5: Fix override of log_max_qp by other device mlx5_core_dev holds pointer to static profile, hence when the log_max_qp of the profile is override by some device, then it effect all other mlx5 devices that share the same profile. Fix it by having a profile instance for every mlx5 device. Fixes: 883371c453b9 ("net/mlx5: Check FW limitations on log_max_qp before setting it") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/mr.c | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 +++---- include/linux/mlx5/driver.h | 44 +++++++++++++------------- 3 files changed, 29 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4388afeff251..9662cd39c7ff 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -743,10 +743,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && !dev->is_rep && mlx5_core_is_pf(dev->mdev) && mlx5_ib_can_load_pas_with_umr(dev, 0)) - ent->limit = dev->mdev->profile->mr_cache[i].limit; + ent->limit = dev->mdev->profile.mr_cache[i].limit; else ent->limit = 0; spin_lock_irq(&ent->lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c114365eb126..a1d67bd7fb43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -503,7 +503,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) { - struct mlx5_profile *prof = dev->profile; + struct mlx5_profile *prof = &dev->profile; void *set_hca_cap; int err; @@ -524,11 +524,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) to_fw_pkey_sz(dev, 128)); /* Check log_max_qp from HCA caps to set in current profile */ - if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", - profile[prof_sel].log_max_qp, + prof->log_max_qp, MLX5_CAP_GEN_MAX(dev, log_max_qp)); - profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, @@ -1381,8 +1381,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) struct mlx5_priv *priv = &dev->priv; int err; - dev->profile = &profile[profile_idx]; - + memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&dev->intf_state_mutex); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f8e8d7e90616..020a8f7fdbdd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -703,6 +703,27 @@ struct mlx5_hv_vhca; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) +enum { + MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, +}; + +enum { + MR_CACHE_LAST_STD_ENTRY = 20, + MLX5_IMR_MTT_CACHE_ENTRY, + MLX5_IMR_KSM_CACHE_ENTRY, + MAX_MR_CACHE_ENTRIES +}; + +struct mlx5_profile { + u64 mask; + u8 log_max_qp; + struct { + int size; + int limit; + } mr_cache[MAX_MR_CACHE_ENTRIES]; +}; + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -731,7 +752,7 @@ struct mlx5_core_dev { struct mutex intf_state_mutex; unsigned long intf_state; struct mlx5_priv priv; - struct mlx5_profile *profile; + struct mlx5_profile profile; u32 issi; struct mlx5e_resources mlx5e_res; struct mlx5_dm *dm; @@ -1083,18 +1104,6 @@ static inline u8 mlx5_mkey_variant(u32 mkey) return mkey & 0xff; } -enum { - MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, - MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, -}; - -enum { - MR_CACHE_LAST_STD_ENTRY = 20, - MLX5_IMR_MTT_CACHE_ENTRY, - MLX5_IMR_KSM_CACHE_ENTRY, - MAX_MR_CACHE_ENTRIES -}; - /* Async-atomic event notifier used by mlx5 core to forward FW * evetns recived from event queue to mlx5 consumers. * Optimise event queue dipatching. @@ -1148,15 +1157,6 @@ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, struct ib_device *device, struct rdma_netdev_alloc_params *params); -struct mlx5_profile { - u64 mask; - u8 log_max_qp; - struct { - int size; - int limit; - } mr_cache[MAX_MR_CACHE_ENTRIES]; -}; - enum { MLX5_PCI_DEV_IS_VF = 1 << 0, }; -- cgit v1.2.3-71-gd317 From 7c9f131f366ab414691907fa0407124ea2b2f3bc Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 22 Apr 2021 15:48:10 +0300 Subject: {net,vdpa}/mlx5: Configure interface MAC into mpfs L2 table net/mlx5: Expose MPFS configuration API MPFS is the multi physical function switch that bridges traffic between the physical port and any physical functions associated with it. The driver is required to add or remove MAC entries to properly forward incoming traffic to the correct physical function. We export the API to control MPFS so that other drivers, such as mlx5_vdpa are able to add MAC addresses of their network interfaces. The MAC address of the vdpa interface must be configured into the MPFS L2 address. Failing to do so could cause, in some NIC configurations, failure to forward packets to the vdpa network device instance. Fix this by adding calls to update the MPFS table. CC: CC: CC: Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h | 5 +---- drivers/vdpa/mlx5/net/mlx5_vnet.c | 19 ++++++++++++++++++- include/linux/mlx5/mpfs.h | 18 ++++++++++++++++++ 6 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 include/linux/mlx5/mpfs.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 0d571a0c76d9..0b75fab41ae8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "en.h" #include "en_rep.h" #include "lib/mpfs.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 570f2280823c..b88705a3a1a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "esw/acl/lgcy.h" #include "esw/legacy.h" #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index fd8449ff9e17..839a01da110f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "mlx5_core.h" #include "lib/mpfs.h" @@ -175,6 +176,7 @@ out: mutex_unlock(&mpfs->lock); return err; } +EXPORT_SYMBOL(mlx5_mpfs_add_mac); int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { @@ -206,3 +208,4 @@ unlock: mutex_unlock(&mpfs->lock); return err; } +EXPORT_SYMBOL(mlx5_mpfs_del_mac); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h index 4a7b2c3203a7..4a293542a7aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -84,12 +84,9 @@ struct l2addr_node { #ifdef CONFIG_MLX5_MPFS int mlx5_mpfs_init(struct mlx5_core_dev *dev); void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); -int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); -int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); #else /* #ifndef CONFIG_MLX5_MPFS */ static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} -static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } -static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } #endif + #endif diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 189e4385df40..dda5dc6f7737 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "mlx5_vdpa.h" MODULE_AUTHOR("Eli Cohen "); @@ -1859,11 +1860,16 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb static void mlx5_vdpa_free(struct vdpa_device *vdev) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_core_dev *pfmdev; struct mlx5_vdpa_net *ndev; ndev = to_mlx5_vdpa_ndev(mvdev); free_resources(ndev); + if (!is_zero_ether_addr(ndev->config.mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); + mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); + } mlx5_vdpa_free_resources(&ndev->mvdev); mutex_destroy(&ndev->reslock); } @@ -1990,6 +1996,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct virtio_net_config *config; + struct mlx5_core_dev *pfmdev; struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_net *ndev; struct mlx5_core_dev *mdev; @@ -2023,10 +2030,17 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) if (err) goto err_mtu; + if (!is_zero_ether_addr(config->mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); + err = mlx5_mpfs_add_mac(pfmdev, config->mac); + if (err) + goto err_mtu; + } + mvdev->vdev.dma_dev = mdev->device; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) - goto err_mtu; + goto err_mpfs; err = alloc_resources(ndev); if (err) @@ -2044,6 +2058,9 @@ err_reg: free_resources(ndev); err_res: mlx5_vdpa_free_resources(&ndev->mvdev); +err_mpfs: + if (!is_zero_ether_addr(config->mac)) + mlx5_mpfs_del_mac(pfmdev, config->mac); err_mtu: mutex_destroy(&ndev->reslock); put_device(&mvdev->vdev.dev); diff --git a/include/linux/mlx5/mpfs.h b/include/linux/mlx5/mpfs.h new file mode 100644 index 000000000000..bf700c8d5516 --- /dev/null +++ b/include/linux/mlx5/mpfs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2021 Mellanox Technologies Ltd. + */ + +#ifndef _MLX5_MPFS_ +#define _MLX5_MPFS_ + +struct mlx5_core_dev; + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +#endif + +#endif -- cgit v1.2.3-71-gd317 From ba6e1d8422bd476ad79da409639a773c02f0cbad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 May 2021 22:04:36 +0200 Subject: platform/surface: aggregator: avoid clang -Wconstant-conversion warning Clang complains about the assignment of SSAM_ANY_IID to ssam_device_uid->instance: drivers/platform/surface/surface_aggregator_registry.c:478:25: error: implicit conversion from 'int' to '__u8' (aka 'unsigned char') changes value from 65535 to 255 [-Werror,-Wconstant-conversion] { SSAM_VDEV(HUB, 0x02, SSAM_ANY_IID, 0x00) }, ~ ^~~~~~~~~~~~ include/linux/surface_aggregator/device.h:71:23: note: expanded from macro 'SSAM_ANY_IID' #define SSAM_ANY_IID 0xffff ^~~~~~ include/linux/surface_aggregator/device.h:126:63: note: expanded from macro 'SSAM_VDEV' SSAM_DEVICE(SSAM_DOMAIN_VIRTUAL, SSAM_VIRTUAL_TC_##cat, tid, iid, fun) ^~~ include/linux/surface_aggregator/device.h:102:41: note: expanded from macro 'SSAM_DEVICE' .instance = ((iid) != SSAM_ANY_IID) ? (iid) : 0, \ ^~~ The assignment doesn't actually happen, but clang checks the type limits before checking whether this assignment is reached. Replace the ?: operator with a __builtin_choose_expr() invocation that avoids the warning for the untaken part. Fixes: eb0e90a82098 ("platform/surface: aggregator: Add dedicated bus and device type") Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20210514200453.1542978-1-arnd@kernel.org Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/device.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 4441ad667c3f..6ff9c58b3e17 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -98,9 +98,9 @@ struct ssam_device_uid { | (((fun) != SSAM_ANY_FUN) ? SSAM_MATCH_FUNCTION : 0), \ .domain = d, \ .category = cat, \ - .target = ((tid) != SSAM_ANY_TID) ? (tid) : 0, \ - .instance = ((iid) != SSAM_ANY_IID) ? (iid) : 0, \ - .function = ((fun) != SSAM_ANY_FUN) ? (fun) : 0 \ + .target = __builtin_choose_expr((tid) != SSAM_ANY_TID, (tid), 0), \ + .instance = __builtin_choose_expr((iid) != SSAM_ANY_IID, (iid), 0), \ + .function = __builtin_choose_expr((fun) != SSAM_ANY_FUN, (fun), 0) /** * SSAM_VDEV() - Initialize a &struct ssam_device_id as virtual device with -- cgit v1.2.3-71-gd317 From 6c60ff048ca1e0739f39aa25996543c6e662a46c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 May 2021 15:18:41 +0200 Subject: block: prevent block device lookups at the beginning of del_gendisk As an artifact of how gendisk lookup used to work in earlier kernels, GENHD_FL_UP is only cleared very late in del_gendisk, and a global lock is used to prevent opens from succeeding while del_gendisk is tearing down the gendisk. Switch to clearing the flag early and under bd_mutex so that callers can use bd_mutex to stabilize the flag, which removes the need for the global mutex. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210514131842.1600568-2-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 11 +---------- fs/block_dev.c | 15 +++++---------- include/linux/genhd.h | 2 -- 3 files changed, 6 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 39ca97b0edc6..9f8cb7beaad1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -29,8 +29,6 @@ static struct kobject *block_depr; -DECLARE_RWSEM(bdev_lookup_sem); - /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) static DEFINE_IDA(ext_devt_ida); @@ -609,13 +607,8 @@ void del_gendisk(struct gendisk *disk) blk_integrity_del(disk); disk_del_events(disk); - /* - * Block lookups of the disk until all bdevs are unhashed and the - * disk is marked as dead (GENHD_FL_UP cleared). - */ - down_write(&bdev_lookup_sem); - mutex_lock(&disk->part0->bd_mutex); + disk->flags &= ~GENHD_FL_UP; blk_drop_partitions(disk); mutex_unlock(&disk->part0->bd_mutex); @@ -629,8 +622,6 @@ void del_gendisk(struct gendisk *disk) remove_inode_hash(disk->part0->bd_inode); set_capacity(disk, 0); - disk->flags &= ~GENHD_FL_UP; - up_write(&bdev_lookup_sem); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); diff --git a/fs/block_dev.c b/fs/block_dev.c index eb265d72fce8..580bae995b87 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1298,6 +1298,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) struct gendisk *disk = bdev->bd_disk; int ret = 0; + if (!(disk->flags & GENHD_FL_UP)) + return -ENXIO; + if (!bdev->bd_openers) { if (!bdev_is_partition(bdev)) { ret = 0; @@ -1332,8 +1335,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode) whole->bd_part_count++; mutex_unlock(&whole->bd_mutex); - if (!(disk->flags & GENHD_FL_UP) || - !bdev_nr_sectors(bdev)) { + if (!bdev_nr_sectors(bdev)) { __blkdev_put(whole, mode, 1); bdput(whole); return -ENXIO; @@ -1364,16 +1366,12 @@ struct block_device *blkdev_get_no_open(dev_t dev) struct block_device *bdev; struct gendisk *disk; - down_read(&bdev_lookup_sem); bdev = bdget(dev); if (!bdev) { - up_read(&bdev_lookup_sem); blk_request_module(dev); - down_read(&bdev_lookup_sem); - bdev = bdget(dev); if (!bdev) - goto unlock; + return NULL; } disk = bdev->bd_disk; @@ -1383,14 +1381,11 @@ struct block_device *blkdev_get_no_open(dev_t dev) goto put_disk; if (!try_module_get(bdev->bd_disk->fops->owner)) goto put_disk; - up_read(&bdev_lookup_sem); return bdev; put_disk: put_disk(disk); bdput: bdput(bdev); -unlock: - up_read(&bdev_lookup_sem); return NULL; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7e9660ea967d..6fc26f7bdf71 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -306,8 +306,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ -extern struct rw_semaphore bdev_lookup_sem; - dev_t blk_lookup_devt(const char *name, int partno); void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK -- cgit v1.2.3-71-gd317 From 80dd33cf72d1ab4f0af303f1fa242c6d6c8d328f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 14 May 2021 14:10:15 +0200 Subject: drivers: base: Fix device link removal When device_link_free() drops references to the supplier and consumer devices of the device link going away and the reference being dropped turns out to be the last one for any of those device objects, its ->release callback will be invoked and it may sleep which goes against the SRCU callback execution requirements. To address this issue, make the device link removal code carry out the device_link_free() actions preceded by SRCU synchronization from a separate work item (the "long" workqueue is used for that, because it does not matter when the device link memory is released and it may take time to get to that point) instead of using SRCU callbacks. While at it, make the code work analogously when SRCU is not enabled to reduce the differences between the SRCU and non-SRCU cases. Fixes: 843e600b8a2b ("driver core: Fix sleeping in invalid context during device link deletion") Cc: stable Reported-by: chenxiang (M) Tested-by: chenxiang (M) Reviewed-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/5722787.lOV4Wx5bFT@kreacher Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 37 +++++++++++++++++++++++-------------- include/linux/device.h | 6 ++---- 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 628e33939aca..61c19641e1d0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -194,6 +194,11 @@ int device_links_read_lock_held(void) { return srcu_read_lock_held(&device_links_srcu); } + +static void device_link_synchronize_removal(void) +{ + synchronize_srcu(&device_links_srcu); +} #else /* !CONFIG_SRCU */ static DECLARE_RWSEM(device_links_lock); @@ -224,6 +229,10 @@ int device_links_read_lock_held(void) return lockdep_is_held(&device_links_lock); } #endif + +static inline void device_link_synchronize_removal(void) +{ +} #endif /* !CONFIG_SRCU */ static bool device_is_ancestor(struct device *dev, struct device *target) @@ -445,8 +454,13 @@ static struct attribute *devlink_attrs[] = { }; ATTRIBUTE_GROUPS(devlink); -static void device_link_free(struct device_link *link) +static void device_link_release_fn(struct work_struct *work) { + struct device_link *link = container_of(work, struct device_link, rm_work); + + /* Ensure that all references to the link object have been dropped. */ + device_link_synchronize_removal(); + while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier); @@ -455,24 +469,19 @@ static void device_link_free(struct device_link *link) kfree(link); } -#ifdef CONFIG_SRCU -static void __device_link_free_srcu(struct rcu_head *rhead) -{ - device_link_free(container_of(rhead, struct device_link, rcu_head)); -} - static void devlink_dev_release(struct device *dev) { struct device_link *link = to_devlink(dev); - call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu); -} -#else -static void devlink_dev_release(struct device *dev) -{ - device_link_free(to_devlink(dev)); + INIT_WORK(&link->rm_work, device_link_release_fn); + /* + * It may take a while to complete this work because of the SRCU + * synchronization in device_link_release_fn() and if the consumer or + * supplier devices get deleted when it runs, so put it into the "long" + * workqueue. + */ + queue_work(system_long_wq, &link->rm_work); } -#endif static struct class devlink_class = { .name = "devlink", diff --git a/include/linux/device.h b/include/linux/device.h index 38a2071cf776..f1a00040fa53 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -570,7 +570,7 @@ struct device { * @flags: Link flags. * @rpm_active: Whether or not the consumer device is runtime-PM-active. * @kref: Count repeated addition of the same link. - * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks. + * @rm_work: Work structure used for removing the link. * @supplier_preactivated: Supplier has been made active before consumer probe. */ struct device_link { @@ -583,9 +583,7 @@ struct device_link { u32 flags; refcount_t rpm_active; struct kref kref; -#ifdef CONFIG_SRCU - struct rcu_head rcu_head; -#endif + struct work_struct rm_work; bool supplier_preactivated; /* Owned by consumer probe. */ }; -- cgit v1.2.3-71-gd317 From f747e6667ebb2ffb8133486c9cd19800d72b0d98 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sat, 22 May 2021 17:42:02 -0700 Subject: linux/bits.h: fix compilation error with GENMASK GENMASK() has an input check which uses __builtin_choose_expr() to enable a compile time sanity check of its inputs if they are known at compile time. However, it turns out that __builtin_constant_p() does not always return a compile time constant [0]. It was thought this problem was fixed with gcc 4.9 [1], but apparently this is not the case [2]. Switch to use __is_constexpr() instead which always returns a compile time constant, regardless of its inputs. Link: https://lore.kernel.org/lkml/42b4342b-aefc-a16a-0d43-9f9c0d63ba7a@rasmusvillemoes.dk [0] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19449 [1] Link: https://lore.kernel.org/lkml/1ac7bbc2-45d9-26ed-0b33-bf382b8d858b@I-love.SAKURA.ne.jp [2] Link: https://lkml.kernel.org/r/20210511203716.117010-1-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Reported-by: Tetsuo Handa Acked-by: Arnd Bergmann Reviewed-by: Andy Shevchenko Cc: Ard Biesheuvel Cc: Yury Norov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bits.h | 2 +- include/linux/const.h | 8 ++++++++ include/linux/minmax.h | 10 ++-------- tools/include/linux/bits.h | 2 +- tools/include/linux/const.h | 8 ++++++++ 5 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bits.h b/include/linux/bits.h index 7f475d59a097..87d112650dfb 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -22,7 +22,7 @@ #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/include/linux/const.h b/include/linux/const.h index 81b8aae5a855..435ddd72d2c4 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -3,4 +3,12 @@ #include +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #endif /* _LINUX_CONST_H */ diff --git a/include/linux/minmax.h b/include/linux/minmax.h index c0f57b0c64d9..5433c08fcc68 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,6 +2,8 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include + /* * min()/max()/clamp() macros must accomplish three things: * @@ -17,14 +19,6 @@ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - #define __no_side_effects(x, y) \ (__is_constexpr(x) && __is_constexpr(y)) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7f475d59a097..87d112650dfb 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -22,7 +22,7 @@ #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h index 81b8aae5a855..435ddd72d2c4 100644 --- a/tools/include/linux/const.h +++ b/tools/include/linux/const.h @@ -3,4 +3,12 @@ #include +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #endif /* _LINUX_CONST_H */ -- cgit v1.2.3-71-gd317 From 08b2b6fdf6b26032f025084ce2893924a0cdb4a2 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 24 May 2021 16:29:43 +0800 Subject: cgroup: fix spelling mistakes Fix some spelling mistakes in comments: hierarhcy ==> hierarchy automtically ==> automatically overriden ==> overridden In absense of .. or ==> In absence of .. and assocaited ==> associated taget ==> target initate ==> initiate succeded ==> succeeded curremt ==> current udpated ==> updated Signed-off-by: Zhen Lei Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 6 +++--- include/linux/cgroup.h | 2 +- kernel/cgroup/cgroup-v1.c | 2 +- kernel/cgroup/cgroup.c | 8 ++++---- kernel/cgroup/cpuset.c | 2 +- kernel/cgroup/rdma.c | 2 +- kernel/cgroup/rstat.c | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 559ee05f86b2..fb8f6d2cd104 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -232,7 +232,7 @@ struct css_set { struct list_head task_iters; /* - * On the default hierarhcy, ->subsys[ssid] may point to a css + * On the default hierarchy, ->subsys[ssid] may point to a css * attached to an ancestor instead of the cgroup this css_set is * associated with. The following node is anchored at * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to @@ -668,7 +668,7 @@ struct cgroup_subsys { */ bool threaded:1; - /* the following two fields are initialized automtically during boot */ + /* the following two fields are initialized automatically during boot */ int id; const char *name; @@ -757,7 +757,7 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. * On boot, sock_cgroup_data records the cgroup that the sock was created * in so that cgroup2 matches can be made; however, once either net_prio or - * net_cls starts being used, the area is overriden to carry prioidx and/or + * net_cls starts being used, the area is overridden to carry prioidx and/or * classid. The two modes are distinguished by whether the lowest bit is * set. Clear bit indicates cgroup pointer while set bit prioidx and * classid. diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4f2f79de083e..6bc9c76680b2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -32,7 +32,7 @@ struct kernel_clone_args; #ifdef CONFIG_CGROUPS /* - * All weight knobs on the default hierarhcy should use the following min, + * All weight knobs on the default hierarchy should use the following min, * default and max values. The default value is the logarithmic center of * MIN and MAX and allows 100x to be expressed in both directions. */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 391aa570369b..8190b6bfc978 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1001,7 +1001,7 @@ static int check_cgroupfs_options(struct fs_context *fc) ctx->subsys_mask &= enabled; /* - * In absense of 'none', 'name=' or subsystem name options, + * In absence of 'none', 'name=' and subsystem name options, * let's default to 'all'. */ if (!ctx->subsys_mask && !ctx->none && !ctx->name) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e7a9a2998245..21ecc6ee6a6d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -468,7 +468,7 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, * @cgrp: the cgroup of interest * @ss: the subsystem of interest * - * Find and get @cgrp's css assocaited with @ss. If the css doesn't exist + * Find and get @cgrp's css associated with @ss. If the css doesn't exist * or is offline, %NULL is returned. */ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, @@ -1633,7 +1633,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) /** * css_clear_dir - remove subsys files in a cgroup directory - * @css: taget css + * @css: target css */ static void css_clear_dir(struct cgroup_subsys_state *css) { @@ -5350,7 +5350,7 @@ out_unlock: /* * This is called when the refcnt of a css is confirmed to be killed. * css_tryget_online() is now guaranteed to fail. Tell the subsystem to - * initate destruction and put the css ref from kill_css(). + * initiate destruction and put the css ref from kill_css(). */ static void css_killed_work_fn(struct work_struct *work) { @@ -6052,7 +6052,7 @@ out_revert: * @kargs: the arguments passed to create the child process * * This calls the cancel_fork() callbacks if a fork failed *after* - * cgroup_can_fork() succeded and cleans up references we took to + * cgroup_can_fork() succeeded and cleans up references we took to * prepare a new css_set for the child process in cgroup_can_fork(). */ void cgroup_cancel_fork(struct task_struct *child, diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a945504c0ae7..adb5190c4429 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3376,7 +3376,7 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) } /** - * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed + * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed * @nodemask: the nodemask to be checked * * Are any of the nodes in the nodemask allowed in current->mems_allowed? diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index ae042c347c64..3135406608c7 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdmacg_uncharge); * This function follows charging resource in hierarchical way. * It will fail if the charge would cause the new value to exceed the * hierarchical limit. - * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. + * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. * Returns pointer to rdmacg for this resource when charging is successful. * * Charger needs to account resources on two criteria. diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 3a3fd2993a65..cee265cb535c 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -75,7 +75,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) * @root: root of the tree to traversal * @cpu: target cpu * - * Walks the udpated rstat_cpu tree on @cpu from @root. %NULL @pos starts + * Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts * the traversal and %NULL return indicates the end. During traversal, * each returned cgroup is unlinked from the tree. Must be called with the * matching cgroup_rstat_cpu_lock held. -- cgit v1.2.3-71-gd317 From 1cb61759d40716643281b8e0f8c7afebc8699249 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 21 May 2021 09:26:10 +0200 Subject: init: verify that function is initcall_t at compile-time In the spirit of making it hard to misuse an interface, add a compile-time assertion in the CONFIG_HAVE_ARCH_PREL32_RELOCATIONS case to verify the initcall function matches initcall_t, because the inline asm bypasses any type-checking the compiler would otherwise do. This will help developers catch incorrect API use in all configurations. A recent example of this is: https://lkml.kernel.org/r/20210514140015.2944744-1-arnd@kernel.org Signed-off-by: Marco Elver Reviewed-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Reviewed-by: Sami Tolvanen Tested-by: Paul E. McKenney Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210521072610.2880286-1-elver@google.com --- include/linux/init.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 045ad1650ed1..d82b4b2e1d25 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -242,7 +242,8 @@ extern bool initcall_debug; asm(".section \"" __sec "\", \"a\" \n" \ __stringify(__name) ": \n" \ ".long " __stringify(__stub) " - . \n" \ - ".previous \n"); + ".previous \n"); \ + static_assert(__same_type(initcall_t, &fn)); #else #define ____define_initcall(fn, __unused, __name, __sec) \ static initcall_t __name __used \ -- cgit v1.2.3-71-gd317 From a8b98c808eab3ec8f1b5a64be967b0f4af4cae43 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 24 May 2021 16:53:21 +0300 Subject: fanotify: fix permission model of unprivileged group Reporting event->pid should depend on the privileges of the user that initialized the group, not the privileges of the user reading the events. Use an internal group flag FANOTIFY_UNPRIV to record the fact that the group was initialized by an unprivileged user. To be on the safe side, the premissions to setup filesystem and mount marks now require that both the user that initialized the group and the user setting up the mark have CAP_SYS_ADMIN. Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxiA77_P5vtv7e83g0+9d7B5W9ZTE4GfQEYbWmfT1rA=VA@mail.gmail.com/ Fixes: 7cea2a3c505e ("fanotify: support limited functionality for unprivileged users") Cc: # v5.12+ Link: https://lore.kernel.org/r/20210524135321.2190062-1-amir73il@gmail.com Reviewed-by: Matthew Bobrowski Acked-by: Christian Brauner Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 30 ++++++++++++++++++++++++------ fs/notify/fdinfo.c | 2 +- include/linux/fanotify.h | 4 ++++ 3 files changed, 29 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 71fefb30e015..be5b6d2c01e7 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -424,11 +424,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, * events generated by the listener process itself, without disclosing * the pids of other processes. */ - if (!capable(CAP_SYS_ADMIN) && + if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && task_tgid(current) != event->pid) metadata.pid = 0; - if (path && path->mnt && path->dentry) { + /* + * For now, fid mode is required for an unprivileged listener and + * fid mode does not report fd in events. Keep this check anyway + * for safety in case fid mode requirement is relaxed in the future + * to allow unprivileged listener to get events with no fd and no fid. + */ + if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && + path && path->mnt && path->dentry) { fd = create_fd(group, path, &f); if (fd < 0) return fd; @@ -1040,6 +1047,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) int f_flags, fd; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; + unsigned int internal_flags = 0; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -1053,6 +1061,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) */ if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) return -EPERM; + + /* + * Setting the internal flag FANOTIFY_UNPRIV on the group + * prevents setting mount/filesystem marks on this group and + * prevents reporting pid and open fd in events. + */ + internal_flags |= FANOTIFY_UNPRIV; } #ifdef CONFIG_AUDITSYSCALL @@ -1105,7 +1120,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_destroy_group; } - group->fanotify_data.flags = flags; + group->fanotify_data.flags = flags | internal_flags; group->memcg = get_mem_cgroup_from_mm(current->mm); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); @@ -1305,11 +1320,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, group = f.file->private_data; /* - * An unprivileged user is not allowed to watch a mount point nor - * a filesystem. + * An unprivileged user is not allowed to setup mount nor filesystem + * marks. This also includes setting up such marks by a group that + * was initialized by an unprivileged user. */ ret = -EPERM; - if (!capable(CAP_SYS_ADMIN) && + if ((!capable(CAP_SYS_ADMIN) || + FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && mark_type != FAN_MARK_INODE) goto fput_and_out; @@ -1460,6 +1477,7 @@ static int __init fanotify_user_setup(void) max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS, FANOTIFY_DEFAULT_MAX_USER_MARKS); + BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index a712b2aaa9ac..57f0d5d9f934 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -144,7 +144,7 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f) struct fsnotify_group *group = f->private_data; seq_printf(m, "fanotify flags:%x event-flags:%x\n", - group->fanotify_data.flags, + group->fanotify_data.flags & FANOTIFY_INIT_FLAGS, group->fanotify_data.f_flags); show_fdinfo(m, f, fanotify_fdinfo); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index bad41bcb25df..a16dbeced152 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -51,6 +51,10 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \ FANOTIFY_USER_INIT_FLAGS) +/* Internal group flags */ +#define FANOTIFY_UNPRIV 0x80000000 +#define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV) + #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ FAN_MARK_FILESYSTEM) -- cgit v1.2.3-71-gd317 From 85aabbd7b315c65673084b6227bee92c00405239 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 10 May 2021 19:31:30 +0200 Subject: PCI/MSI: Fix MSIs for generic hosts that use device-tree's "msi-map" Since commit 9ec37efb8783 ("PCI/MSI: Make pci_host_common_probe() declare its reliance on MSI domains"), platforms that rely on the "msi-map" device-tree property don't get MSIs anymore. On the Arm Fast Model for example [1], the host bridge doesn't have a "msi-parent" property since it doesn't itself generate MSIs, and so doesn't get a MSI domain. It has an "msi-map" property instead to describe MSI controllers of child devices. As a result, due to the new msi_domain check in pci_register_host_bridge(), the whole bus gets PCI_BUS_FLAGS_NO_MSI. Check whether the root complex has an "msi-map" property before giving up on MSIs. [1] arch/arm64/boot/dts/arm/fvp-base-revc.dts Fixes: 9ec37efb8783 ("PCI/MSI: Make pci_host_common_probe() declare its reliance on MSI domains") Link: https://lore.kernel.org/r/20210510173129.750496-1-jean-philippe@linaro.org Signed-off-by: Jean-Philippe Brucker Signed-off-by: Bjorn Helgaas Acked-by: Marc Zyngier --- drivers/pci/of.c | 7 +++++++ drivers/pci/probe.c | 3 ++- include/linux/pci.h | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/of.c b/drivers/pci/of.c index da5b414d585a..85dcb7097da4 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -103,6 +103,13 @@ struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus) #endif } +bool pci_host_of_has_msi_map(struct device *dev) +{ + if (dev && dev->of_node) + return of_get_property(dev->of_node, "msi-map", NULL); + return false; +} + static inline int __of_pci_pci_compare(struct device_node *node, unsigned int data) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 3a62d09b8869..275204646c68 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -925,7 +925,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) device_enable_async_suspend(bus->bridge); pci_set_bus_of_node(bus); pci_set_bus_msi_domain(bus); - if (bridge->msi_domain && !dev_get_msi_domain(&bus->dev)) + if (bridge->msi_domain && !dev_get_msi_domain(&bus->dev) && + !pci_host_of_has_msi_map(parent)) bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI; if (!parent) diff --git a/include/linux/pci.h b/include/linux/pci.h index c20211e59a57..24306504226a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2344,6 +2344,7 @@ int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off, struct device_node; struct irq_domain; struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus); +bool pci_host_of_has_msi_map(struct device *dev); /* Arch may override this (weak) */ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus); @@ -2351,6 +2352,7 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus); #else /* CONFIG_OF */ static inline struct irq_domain * pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; } +static inline bool pci_host_of_has_msi_map(struct device *dev) { return false; } #endif /* CONFIG_OF */ static inline struct device_node * -- cgit v1.2.3-71-gd317 From e86be3a04bc4aeaf12f93af35f08f8d4385bcd98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 May 2021 18:43:38 -0400 Subject: SUNRPC: More fixes for backlog congestion Ensure that we fix the XPRT_CONGESTED starvation issue for RDMA as well as socket based transports. Ensure we always initialise the request after waking up from the backlog list. Fixes: e877a88d1f06 ("SUNRPC in case of backlog, hand free slots directly to waiting task") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/xprt.c | 58 ++++++++++++++++++++--------------------- net/sunrpc/xprtrdma/transport.c | 12 ++++----- net/sunrpc/xprtrdma/verbs.c | 18 ++++++++++--- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 5 files changed, 52 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d81fe8b364d0..61b622e334ee 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -368,6 +368,8 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size, unsigned int num_prealloc, unsigned int max_req); void xprt_free(struct rpc_xprt *); +void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task); +bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req); static inline int xprt_enable_swap(struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5b3981fd3783..3509a7f139b9 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1607,11 +1607,18 @@ xprt_transmit(struct rpc_task *task) spin_unlock(&xprt->queue_lock); } -static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) +static void xprt_complete_request_init(struct rpc_task *task) +{ + if (task->tk_rqstp) + xprt_request_init(task); +} + +void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) { set_bit(XPRT_CONGESTED, &xprt->state); - rpc_sleep_on(&xprt->backlog, task, NULL); + rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init); } +EXPORT_SYMBOL_GPL(xprt_add_backlog); static bool __xprt_set_rq(struct rpc_task *task, void *data) { @@ -1619,14 +1626,13 @@ static bool __xprt_set_rq(struct rpc_task *task, void *data) if (task->tk_rqstp == NULL) { memset(req, 0, sizeof(*req)); /* mark unused */ - task->tk_status = -EAGAIN; task->tk_rqstp = req; return true; } return false; } -static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req) +bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req) { if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) { clear_bit(XPRT_CONGESTED, &xprt->state); @@ -1634,6 +1640,7 @@ static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req) } return true; } +EXPORT_SYMBOL_GPL(xprt_wake_up_backlog); static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -1643,7 +1650,7 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task goto out; spin_lock(&xprt->reserve_lock); if (test_bit(XPRT_CONGESTED, &xprt->state)) { - rpc_sleep_on(&xprt->backlog, task, NULL); + xprt_add_backlog(xprt, task); ret = true; } spin_unlock(&xprt->reserve_lock); @@ -1812,10 +1819,6 @@ xprt_request_init(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; - if (req->rq_task) - /* Already initialized */ - return; - req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; @@ -1876,10 +1879,8 @@ void xprt_retry_reserve(struct rpc_task *task) struct rpc_xprt *xprt = task->tk_xprt; task->tk_status = 0; - if (task->tk_rqstp != NULL) { - xprt_request_init(task); + if (task->tk_rqstp != NULL) return; - } task->tk_status = -EAGAIN; xprt_do_reserve(xprt, task); @@ -1904,24 +1905,21 @@ void xprt_release(struct rpc_task *task) } xprt = req->rq_xprt; - if (xprt) { - xprt_request_dequeue_xprt(task); - spin_lock(&xprt->transport_lock); - xprt->ops->release_xprt(xprt, task); - if (xprt->ops->release_request) - xprt->ops->release_request(task); - xprt_schedule_autodisconnect(xprt); - spin_unlock(&xprt->transport_lock); - if (req->rq_buffer) - xprt->ops->buf_free(task); - xdr_free_bvec(&req->rq_rcv_buf); - xdr_free_bvec(&req->rq_snd_buf); - if (req->rq_cred != NULL) - put_rpccred(req->rq_cred); - if (req->rq_release_snd_buf) - req->rq_release_snd_buf(req); - } else - xprt = task->tk_xprt; + xprt_request_dequeue_xprt(task); + spin_lock(&xprt->transport_lock); + xprt->ops->release_xprt(xprt, task); + if (xprt->ops->release_request) + xprt->ops->release_request(task); + xprt_schedule_autodisconnect(xprt); + spin_unlock(&xprt->transport_lock); + if (req->rq_buffer) + xprt->ops->buf_free(task); + xdr_free_bvec(&req->rq_rcv_buf); + xdr_free_bvec(&req->rq_snd_buf); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); + if (req->rq_release_snd_buf) + req->rq_release_snd_buf(req); task->tk_rqstp = NULL; if (likely(!bc_prealloc(req))) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 09953597d055..19a49d26b1e4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -520,9 +520,8 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) return; out_sleep: - set_bit(XPRT_CONGESTED, &xprt->state); - rpc_sleep_on(&xprt->backlog, task, NULL); task->tk_status = -EAGAIN; + xprt_add_backlog(xprt, task); } /** @@ -537,10 +536,11 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, rx_xprt); - memset(rqst, 0, sizeof(*rqst)); - rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); - if (unlikely(!rpc_wake_up_next(&xprt->backlog))) - clear_bit(XPRT_CONGESTED, &xprt->state); + rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); + if (!xprt_wake_up_backlog(xprt, rqst)) { + memset(rqst, 0, sizeof(*rqst)); + rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); + } } static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1e965a380896..649c23518ec0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1200,6 +1200,20 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) return mr; } +/** + * rpcrdma_reply_put - Put reply buffers back into pool + * @buffers: buffer pool + * @req: object to return + * + */ +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) +{ + if (req->rl_reply) { + rpcrdma_rep_put(buffers, req->rl_reply); + req->rl_reply = NULL; + } +} + /** * rpcrdma_buffer_get - Get a request buffer * @buffers: Buffer pool from which to obtain a buffer @@ -1228,9 +1242,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) */ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) { - if (req->rl_reply) - rpcrdma_rep_put(buffers, req->rl_reply); - req->rl_reply = NULL; + rpcrdma_reply_put(buffers, req); spin_lock(&buffers->rb_lock); list_add(&req->rl_list, &buffers->rb_send_bufs); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 436ad7312614..5d231d94e944 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -479,6 +479,7 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req); void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep); +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req); bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags); -- cgit v1.2.3-71-gd317 From 62f3415db237b8d2aa9a804ff84ce2efa87df179 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 May 2021 11:46:17 -0700 Subject: net: phy: Document phydev::dev_flags bits allocation Document the phydev::dev_flags bit allocation to allow bits 15:0 to define PHY driver specific behavior, bits 23:16 to be reserved for now, and bits 31:24 to hold generic PHY driver flags. Signed-off-by: Florian Fainelli Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20210526184617.3105012-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 60d2b26026a2..852743f07e3e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -496,6 +496,11 @@ struct macsec_ops; * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. + * Bits [15:0] are free to use by the PHY driver to communicate + * driver specific behavior. + * Bits [23:16] are currently reserved for future use. + * Bits [31:24] are reserved for defining generic + * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY -- cgit v1.2.3-71-gd317 From 6bd5b743686243dae7351d5dcceeb7f171201bb4 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 18 May 2021 05:00:31 -0700 Subject: KVM: PPC: exit halt polling on need_resched() This is inspired by commit 262de4102c7bb8 (kvm: exit halt polling on need_resched() as well). Due to PPC implements an arch specific halt polling logic, we have to the need_resched() check there as well. This patch adds a helper function that can be shared between book3s and generic halt-polling loops. Reviewed-by: David Matlack Reviewed-by: Venkatesh Srinivas Cc: Ben Segall Cc: Venkatesh Srinivas Cc: Jim Mattson Cc: David Matlack Cc: Paul Mackerras Cc: Suraj Jitindar Singh Signed-off-by: Wanpeng Li Message-Id: <1621339235-11131-1-git-send-email-wanpengli@tencent.com> [Make the function inline. - Paolo] Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_hv.c | 2 +- include/linux/kvm_host.h | 6 ++++++ virt/kvm/kvm_main.c | 3 +-- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 28a80d240b76..7360350e66ff 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3936,7 +3936,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) break; } cur = ktime_get(); - } while (single_task_running() && ktime_before(cur, stop)); + } while (kvm_vcpu_can_poll(cur, stop)); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2f34487e21f2..5d4b96b36ec0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -265,6 +266,11 @@ static inline bool kvm_vcpu_mapped(struct kvm_host_map *map) return !!map->hva; } +static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop) +{ + return single_task_running() && !need_resched() && ktime_before(cur, stop); +} + /* * Sometimes a large or cross-page mmio needs to be broken up into separate * exits for userspace servicing. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6b4feb92dc79..5f40725144f5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2973,8 +2973,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) goto out; } poll_end = cur = ktime_get(); - } while (single_task_running() && !need_resched() && - ktime_before(cur, stop)); + } while (kvm_vcpu_can_poll(cur, stop)); } prepare_to_rcuwait(&vcpu->wait); -- cgit v1.2.3-71-gd317 From 084071d5e9226add45a6031928bf10e6afc855fd Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 25 May 2021 10:41:17 -0300 Subject: KVM: rename KVM_REQ_PENDING_TIMER to KVM_REQ_UNBLOCK KVM_REQ_UNBLOCK will be used to exit a vcpu from its inner vcpu halt emulation loop. Rename KVM_REQ_PENDING_TIMER to KVM_REQ_UNBLOCK, switch PowerPC to arch specific request bit. Signed-off-by: Marcelo Tosatti Message-Id: <20210525134321.303768132@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/vcpu-requests.rst | 8 +++++--- arch/powerpc/include/asm/kvm_host.h | 1 + arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 2 ++ 6 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/Documentation/virt/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst index 5feb3706a7ae..af1b37441e0a 100644 --- a/Documentation/virt/kvm/vcpu-requests.rst +++ b/Documentation/virt/kvm/vcpu-requests.rst @@ -118,10 +118,12 @@ KVM_REQ_MMU_RELOAD necessary to inform each VCPU to completely refresh the tables. This request is used for that. -KVM_REQ_PENDING_TIMER +KVM_REQ_UNBLOCK - This request may be made from a timer handler run on the host on behalf - of a VCPU. It informs the VCPU thread to inject a timer interrupt. + This request informs the vCPU to exit kvm_vcpu_block. It is used for + example from timer handlers that run on the host on behalf of a vCPU, + or in order to update the interrupt routing and ensure that assigned + devices will wake up the vCPU. KVM_REQ_UNHALT diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1e83359f286b..7f2e90db2050 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -51,6 +51,7 @@ /* PPC-specific vcpu->requests bit members */ #define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0) #define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1) +#define KVM_REQ_PENDING_TIMER KVM_ARCH_REQ(2) #include diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5d91f2367c31..8120e8614b92 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1669,7 +1669,7 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) } atomic_inc(&apic->lapic_timer.pending); - kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_make_request(KVM_REQ_UNBLOCK, vcpu); if (from_timer_fn) kvm_vcpu_kick(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 98538b1cb453..fe464b66898f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9501,7 +9501,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (r <= 0) break; - kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_clear_request(KVM_REQ_UNBLOCK, vcpu); if (kvm_cpu_has_pending_timer(vcpu)) kvm_inject_pending_timer_irqs(vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5d4b96b36ec0..76102efbf079 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -147,7 +147,7 @@ static inline bool is_error_page(struct page *page) */ #define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_PENDING_TIMER 2 +#define KVM_REQ_UNBLOCK 2 #define KVM_REQ_UNHALT 3 #define KVM_REQUEST_ARCH_BASE 8 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5f40725144f5..37a2d500a148 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2929,6 +2929,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) goto out; if (signal_pending(current)) goto out; + if (kvm_check_request(KVM_REQ_UNBLOCK, vcpu)) + goto out; ret = 0; out: -- cgit v1.2.3-71-gd317 From 0b78f8bcf4951af30b0ae83ea4fad27d641ab617 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 1 Jun 2021 15:30:30 +0100 Subject: Revert "fb_defio: Remove custom address_space_operations" Commit ccf953d8f3d6 makes framebuffers which use deferred I/O stop displaying updates after the first one. This is because the pages handled by fb_defio no longer have a page_mapping(). That prevents page_mkclean() from marking the PTEs as clean, and so writes are only noticed the first time. Reported-by: Andy Shevchenko Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/YLZEhv0cpZp8uVE3@casper.infradead.org --- drivers/video/fbdev/core/fb_defio.c | 35 +++++++++++++++++++++++++++++++++++ drivers/video/fbdev/core/fbmem.c | 4 ++++ include/linux/fb.h | 3 +++ 3 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index b292887a2481..a591d291b231 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -52,6 +52,13 @@ static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; get_page(page); + + if (vmf->vma->vm_file) + page->mapping = vmf->vma->vm_file->f_mapping; + else + printk(KERN_ERR "no mapping available\n"); + + BUG_ON(!page->mapping); page->index = vmf->pgoff; vmf->page = page; @@ -144,6 +151,17 @@ static const struct vm_operations_struct fb_deferred_io_vm_ops = { .page_mkwrite = fb_deferred_io_mkwrite, }; +static int fb_deferred_io_set_page_dirty(struct page *page) +{ + if (!PageDirty(page)) + SetPageDirty(page); + return 0; +} + +static const struct address_space_operations fb_deferred_io_aops = { + .set_page_dirty = fb_deferred_io_set_page_dirty, +}; + int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma) { vma->vm_ops = &fb_deferred_io_vm_ops; @@ -194,12 +212,29 @@ void fb_deferred_io_init(struct fb_info *info) } EXPORT_SYMBOL_GPL(fb_deferred_io_init); +void fb_deferred_io_open(struct fb_info *info, + struct inode *inode, + struct file *file) +{ + file->f_mapping->a_ops = &fb_deferred_io_aops; +} +EXPORT_SYMBOL_GPL(fb_deferred_io_open); + void fb_deferred_io_cleanup(struct fb_info *info) { struct fb_deferred_io *fbdefio = info->fbdefio; + struct page *page; + int i; BUG_ON(!fbdefio); cancel_delayed_work_sync(&info->deferred_work); + + /* clear out the mapping that we setup */ + for (i = 0 ; i < info->fix.smem_len; i += PAGE_SIZE) { + page = fb_deferred_io_page(info, i); + page->mapping = NULL; + } + mutex_destroy(&fbdefio->lock); } EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 072780b0e570..98f193078c05 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1415,6 +1415,10 @@ __releases(&info->lock) if (res) module_put(info->fbops->owner); } +#ifdef CONFIG_FB_DEFERRED_IO + if (info->fbdefio) + fb_deferred_io_open(info, inode, file); +#endif out: unlock_fb_info(info); if (res) diff --git a/include/linux/fb.h b/include/linux/fb.h index a8dccd23c249..ecfbcc0553a5 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -659,6 +659,9 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, /* drivers/video/fb_defio.c */ int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma); extern void fb_deferred_io_init(struct fb_info *info); +extern void fb_deferred_io_open(struct fb_info *info, + struct inode *inode, + struct file *file); extern void fb_deferred_io_cleanup(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync); -- cgit v1.2.3-71-gd317 From 216214c64a8c1cb9078c2c0aec7bb4a2f8e75397 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 9 Dec 2020 16:40:38 +0200 Subject: net/mlx5: DR, Create multi-destination flow table with level less than 64 Flow table that contains flow pointing to multiple flow tables or multiple TIRs must have a level lower than 64. In our case it applies to muli- destination flow table. Fix the level of the created table to comply with HW Spec definitions, and still make sure that its level lower than SW-owned tables, so that it would be possible to point from the multi-destination FW table to SW tables. Fixes: 34583beea4b7 ("net/mlx5: DR, Create multi-destination table for SW-steering use") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c | 3 ++- include/linux/mlx5/mlx5_ifc.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c index 1fbcd012bb85..7ccfd40586ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -112,7 +112,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, int ret; ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; - ft_attr.level = dmn->info.caps.max_ft_level - 2; + ft_attr.level = min_t(int, dmn->info.caps.max_ft_level - 2, + MLX5_FT_MAX_MULTIPATH_LEVEL); ft_attr.reformat_en = reformat_req; ft_attr.decap_en = reformat_req; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6d16eed6850e..eb86e80e4643 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1289,6 +1289,8 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) +#define MLX5_FT_MAX_MULTIPATH_LEVEL 63 + enum { MLX5_STEERING_FORMAT_CONNECTX_5 = 0, MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, -- cgit v1.2.3-71-gd317 From 519d8ab17682da5f2fae5941d906d85b9fd3593a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 May 2021 21:43:50 +0200 Subject: virtchnl: Add missing padding to virtchnl_proto_hdrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On m68k (Coldfire M547x): CC drivers/net/ethernet/intel/i40e/i40e_main.o In file included from drivers/net/ethernet/intel/i40e/i40e_prototype.h:9, from drivers/net/ethernet/intel/i40e/i40e.h:41, from drivers/net/ethernet/intel/i40e/i40e_main.c:12: include/linux/avf/virtchnl.h:153:36: warning: division by zero [-Wdiv-by-zero] 153 | { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } | ^ include/linux/avf/virtchnl.h:844:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ 844 | VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); | ^~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/avf/virtchnl.h:844:33: error: enumerator value for ‘virtchnl_static_assert_virtchnl_proto_hdrs’ is not an integer constant 844 | VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); | ^~~~~~~~~~~~~~~~~~~ On m68k, integers are aligned on addresses that are multiples of two, not four, bytes. Hence the size of a structure containing integers may not be divisible by 4. Fix this by adding explicit padding. Fixes: 1f7ea1cd6a374842 ("ice: Enable FDIR Configure for AVF") Reported-by: kernel test robot Signed-off-by: Geert Uytterhoeven Acked-by: Jesse Brandeburg Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 565deea6ffe8..8612f8fc86c1 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -830,6 +830,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr); struct virtchnl_proto_hdrs { u8 tunnel_level; + u8 pad[3]; /** * specify where protocol header start from. * 0 - from the outer layer -- cgit v1.2.3-71-gd317 From 50c25ee97cf6ab011542167ab590c17012cea4ed Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 4 Jun 2021 20:01:08 -0700 Subject: Revert "MIPS: make userspace mapping young by default" This reverts commit f685a533a7fab35c5d069dcd663f59c8e4171a75. The MIPS cache flush logic needs to know whether the mapping was already established to decide how to flush caches. This is done by checking the valid bit in the PTE. The commit above breaks this logic by setting the valid in the PTE in new mappings, which causes kernel crashes. Link: https://lkml.kernel.org/r/20210526094335.92948-1-tsbogend@alpha.franken.de Fixes: f685a533a7f ("MIPS: make userspace mapping young by default") Reported-by: Zhou Yanjie Signed-off-by: Thomas Bogendoerfer Cc: Huang Pei Cc: Nicholas Piggin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/cache.c | 30 ++++++++++++++---------------- include/linux/pgtable.h | 8 ++++++++ mm/memory.c | 4 ++++ 3 files changed, 26 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index a7bf0c80371c..830ab91e574f 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -158,31 +158,29 @@ unsigned long _page_cachable_default; EXPORT_SYMBOL(_page_cachable_default); #define PM(p) __pgprot(_page_cachable_default | (p)) -#define PVA(p) PM(_PAGE_VALID | _PAGE_ACCESSED | (p)) static inline void setup_protection_map(void) { protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[1] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[2] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[3] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[4] = PVA(_PAGE_PRESENT); - protection_map[5] = PVA(_PAGE_PRESENT); - protection_map[6] = PVA(_PAGE_PRESENT); - protection_map[7] = PVA(_PAGE_PRESENT); + protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[4] = PM(_PAGE_PRESENT); + protection_map[5] = PM(_PAGE_PRESENT); + protection_map[6] = PM(_PAGE_PRESENT); + protection_map[7] = PM(_PAGE_PRESENT); protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[9] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | + protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); - protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); - protection_map[12] = PVA(_PAGE_PRESENT); - protection_map[13] = PVA(_PAGE_PRESENT); - protection_map[14] = PVA(_PAGE_PRESENT); - protection_map[15] = PVA(_PAGE_PRESENT); + protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[12] = PM(_PAGE_PRESENT); + protection_map[13] = PM(_PAGE_PRESENT); + protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE); } -#undef _PVA #undef PM void cpu_cache_init(void) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 46b13780c2c8..a43047b1030d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -432,6 +432,14 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres * To be differentiate with macro pte_mkyoung, this macro is used on platforms * where software maintains page access bit. */ +#ifndef pte_sw_mkyoung +static inline pte_t pte_sw_mkyoung(pte_t pte) +{ + return pte; +} +#define pte_sw_mkyoung pte_sw_mkyoung +#endif + #ifndef pte_savedwrite #define pte_savedwrite pte_write #endif diff --git a/mm/memory.c b/mm/memory.c index 730daa00952b..f3ffab9b9e39 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2939,6 +2939,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); + entry = pte_sw_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* @@ -3602,6 +3603,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); + entry = pte_sw_mkyoung(entry); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); @@ -3786,6 +3788,8 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) if (prefault && arch_wants_old_prefaulted_pte()) entry = pte_mkold(entry); + else + entry = pte_sw_mkyoung(entry); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); -- cgit v1.2.3-71-gd317