From 55037ed7bdc62151a726f5685f88afa6a82959b1 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 29 Mar 2022 10:12:52 -0700 Subject: uapi/linux/stddef.h: Add include guards Add include guard wrapper define to uapi/linux/stddef.h to prevent macro redefinition errors when stddef.h is included more than once. This was not needed before since the only contents already used a redefinition test. Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220329171252.57279-1-tadeusz.struk@linaro.org Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 3021ea25a284..7837ba4fe728 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_STDDEF_H +#define _UAPI_LINUX_STDDEF_H + #include #ifndef __always_inline @@ -41,3 +44,4 @@ struct { } __empty_ ## NAME; \ TYPE NAME[]; \ } +#endif -- cgit v1.2.3-71-gd317 From f56b919fa4f1b27c589e71f7d90e9785f9196bf1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Feb 2022 09:39:22 +0100 Subject: linux/fb.h: Spelling s/palette/palette/ Fix a misspelling of "palette" in a comment. Signed-off-by: Geert Uytterhoeven Reviewed-by: Pekka Paalanen Signed-off-by: Helge Deller --- include/uapi/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h index 4c14e8be7267..3a49913d006c 100644 --- a/include/uapi/linux/fb.h +++ b/include/uapi/linux/fb.h @@ -182,7 +182,7 @@ struct fb_fix_screeninfo { * * For pseudocolor: offset and length should be the same for all color * components. Offset specifies the position of the least significant bit - * of the pallette index in a pixel value. Length indicates the number + * of the palette index in a pixel value. Length indicates the number * of available palette entries (i.e. # of entries = 1 << length). */ struct fb_bitfield { -- cgit v1.2.3-71-gd317 From c4212f3eb89fd5654f0a6ed2ee1d13fcb86cb664 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 10 Apr 2022 15:13:24 -0600 Subject: io_uring: flag the fact that linked file assignment is sane Give applications a way to tell if the kernel supports sane linked files, as in files being assigned at the right time to be able to reliably do while using IOSQE_IO_LINK to order them. Not really a bug fix, but flag it as such so that it gets pulled in with backports of the deferred file assignment. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- include/uapi/linux/io_uring.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/fs/io_uring.c b/fs/io_uring.c index 659f8ecba5b7..f060ad018ba4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -11178,7 +11178,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | - IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP; + IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | + IORING_FEAT_LINKED_FILE; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 784adc6f6ed2..1845cf7c80ba 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -296,6 +296,7 @@ struct io_uring_params { #define IORING_FEAT_NATIVE_WORKERS (1U << 9) #define IORING_FEAT_RSRC_TAGS (1U << 10) #define IORING_FEAT_CQE_SKIP (1U << 11) +#define IORING_FEAT_LINKED_FILE (1U << 12) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3-71-gd317 From 470776c6b03491a3e82c644737a6da5466b8b3eb Mon Sep 17 00:00:00 2001 From: Shelby Heffron Date: Sun, 17 Apr 2022 13:05:08 -0700 Subject: Input: add Marine Navigation Keycodes Add keycodes that are used by marine navigation devices. Signed-off-by: Shelby Heffron Link: https://lore.kernel.org/r/20220414015356.1619310-1-Shelby.Heffron@garmin.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 7989d9483ea7..dff8e7f17074 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -662,6 +662,27 @@ /* Select an area of screen to be copied */ #define KEY_SELECTIVE_SCREENSHOT 0x27a +/* Move the focus to the next or previous user controllable element within a UI container */ +#define KEY_NEXT_ELEMENT 0x27b +#define KEY_PREVIOUS_ELEMENT 0x27c + +/* Toggle Autopilot engagement */ +#define KEY_AUTOPILOT_ENGAGE_TOGGLE 0x27d + +/* Shortcut Keys */ +#define KEY_MARK_WAYPOINT 0x27e +#define KEY_SOS 0x27f +#define KEY_NAV_CHART 0x280 +#define KEY_FISHING_CHART 0x281 +#define KEY_SINGLE_RANGE_RADAR 0x282 +#define KEY_DUAL_RANGE_RADAR 0x283 +#define KEY_RADAR_OVERLAY 0x284 +#define KEY_TRADITIONAL_SONAR 0x285 +#define KEY_CLEARVU_SONAR 0x286 +#define KEY_SIDEVU_SONAR 0x287 +#define KEY_NAV_INFO 0x288 +#define KEY_BRIGHTNESS_MENU 0x289 + /* * Some keyboards have keys which do not have a defined meaning, these keys * are intended to be programmed / bound to macros by the user. For most -- cgit v1.2.3-71-gd317 From 9e4ab6c89109472082616f8d2f6ada7deaffe161 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:19 +0100 Subject: arm64/sme: Implement vector length configuration prctl()s As for SVE provide a prctl() interface which allows processes to configure their SME vector length. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-12-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 4 ++++ arch/arm64/include/asm/processor.h | 4 +++- arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/fpsimd.c | 32 ++++++++++++++++++++++++++++++++ include/uapi/linux/prctl.h | 9 +++++++++ kernel/sys.c | 12 ++++++++++++ 6 files changed, 61 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 32cd682258d9..38fd6aab7feb 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -288,6 +288,8 @@ static inline int sme_max_virtualisable_vl(void) } extern unsigned int sme_get_vl(void); +extern int sme_set_current_vl(unsigned long arg); +extern int sme_get_current_vl(void); #else @@ -299,6 +301,8 @@ static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } +static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } +static inline int sme_get_current_vl(void) { return -EINVAL; } #endif /* ! CONFIG_ARM64_SME */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index abf34a9c2eab..7a57cbff8a03 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -355,9 +355,11 @@ extern void __init minsigstksz_setup(void); */ #include -/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ +/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() +#define SME_SET_VL(arg) sme_set_current_vl(arg) +#define SME_GET_VL() sme_get_current_vl() /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e1317b7c4525..4e6b58dcd6f9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -82,6 +82,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ +#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 754a96563f6f..39f44fcb9b99 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -149,6 +149,8 @@ static unsigned int vec_vl_inherit_flag(enum vec_type type) switch (type) { case ARM64_VEC_SVE: return TIF_SVE_VL_INHERIT; + case ARM64_VEC_SME: + return TIF_SME_VL_INHERIT; default: WARN_ON_ONCE(1); return 0; @@ -807,6 +809,36 @@ int sve_get_current_vl(void) return vec_prctl_status(ARM64_VEC_SVE, 0); } +#ifdef CONFIG_ARM64_SME +/* PR_SME_SET_VL */ +int sme_set_current_vl(unsigned long arg) +{ + unsigned long vl, flags; + int ret; + + vl = arg & PR_SME_VL_LEN_MASK; + flags = arg & ~vl; + + if (!system_supports_sme() || is_compat_task()) + return -EINVAL; + + ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags); + if (ret) + return ret; + + return vec_prctl_status(ARM64_VEC_SME, flags); +} + +/* PR_SME_GET_VL */ +int sme_get_current_vl(void) +{ + if (!system_supports_sme() || is_compat_task()) + return -EINVAL; + + return vec_prctl_status(ARM64_VEC_SME, 0); +} +#endif /* CONFIG_ARM64_SME */ + static void vec_probe_vqs(struct vl_info *info, DECLARE_BITMAP(map, SVE_VQ_MAX)) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index e998764f0262..a5e06dcbba13 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -272,6 +272,15 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +/* arm64 Scalable Matrix Extension controls */ +/* Flag values must be in sync with SVE versions */ +#define PR_SME_SET_VL 63 /* set task vector length */ +# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SME_GET_VL 64 /* get task vector length */ +/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */ +# define PR_SME_VL_LEN_MASK 0xffff +# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ + #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 diff --git a/kernel/sys.c b/kernel/sys.c index 374f83e95239..b911fa6d81ab 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -117,6 +117,12 @@ #ifndef SVE_GET_VL # define SVE_GET_VL() (-EINVAL) #endif +#ifndef SME_SET_VL +# define SME_SET_VL(a) (-EINVAL) +#endif +#ifndef SME_GET_VL +# define SME_GET_VL() (-EINVAL) +#endif #ifndef PAC_RESET_KEYS # define PAC_RESET_KEYS(a, b) (-EINVAL) #endif @@ -2541,6 +2547,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SVE_GET_VL: error = SVE_GET_VL(); break; + case PR_SME_SET_VL: + error = SME_SET_VL(arg2); + break; + case PR_SME_GET_VL: + error = SME_GET_VL(); + break; case PR_GET_SPECULATION_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; -- cgit v1.2.3-71-gd317 From e12310a0d30f260b26297bc8d7c95769489af038 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:28 +0100 Subject: arm64/sme: Implement ptrace support for streaming mode SVE registers The streaming mode SVE registers are represented using the same data structures as for SVE but since the vector lengths supported and in use may not be the same as SVE we represent them with a new type NT_ARM_SSVE. Unfortunately we only have a single 16 bit reserved field available in the header so there is no space to fit the current and maximum vector length for both standard and streaming SVE mode without redefining the structure in a way the creates a complicatd and fragile ABI. Since FFR is not present in streaming mode it is read and written as zero. Setting NT_ARM_SSVE registers will put the task into streaming mode, similarly setting NT_ARM_SVE registers will exit it. Reads that do not correspond to the current mode of the task will return the header with no register data. For compatibility reasons on write setting no flag for the register type will be interpreted as setting SVE registers, though users can provide no register data as an alternative mechanism for doing so. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-21-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/include/uapi/asm/ptrace.h | 13 ++- arch/arm64/kernel/fpsimd.c | 31 +++-- arch/arm64/kernel/ptrace.c | 214 +++++++++++++++++++++++++++-------- include/uapi/linux/elf.h | 1 + 5 files changed, 201 insertions(+), 59 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 6c33bc832ed4..5afcd0709aae 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -144,6 +144,7 @@ struct vl_info { extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); +extern void fpsimd_force_sync_to_sve(struct task_struct *task); extern void sve_sync_to_fpsimd(struct task_struct *task); extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task); diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 758ae984ff97..522b925a78c1 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -109,7 +109,7 @@ struct user_hwdebug_state { } dbg_regs[16]; }; -/* SVE/FP/SIMD state (NT_ARM_SVE) */ +/* SVE/FP/SIMD state (NT_ARM_SVE & NT_ARM_SSVE) */ struct user_sve_header { __u32 size; /* total meaningful regset content in bytes */ @@ -220,6 +220,7 @@ struct user_sve_header { (SVE_PT_SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - \ SVE_PT_SVE_PREGS_OFFSET(vq)) +/* For streaming mode SVE (SSVE) FFR must be read and written as zero */ #define SVE_PT_SVE_FFR_OFFSET(vq) \ (SVE_PT_REGS_OFFSET + __SVE_FFR_OFFSET(vq)) @@ -240,10 +241,12 @@ struct user_sve_header { - SVE_PT_SVE_OFFSET + (__SVE_VQ_BYTES - 1)) \ / __SVE_VQ_BYTES * __SVE_VQ_BYTES) -#define SVE_PT_SIZE(vq, flags) \ - (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ - SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ - : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags)) +#define SVE_PT_SIZE(vq, flags) \ + (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ + SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ + : ((((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD ? \ + SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags) \ + : SVE_PT_REGS_OFFSET))) /* pointer authentication masks (NT_ARM_PAC_MASK) */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 80f7ca12f855..94f06e9d37cf 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -643,7 +643,7 @@ static void fpsimd_to_sve(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); __fpsimd_to_sve(sst, fst, vq); } @@ -660,7 +660,7 @@ static void fpsimd_to_sve(struct task_struct *task) */ static void sve_to_fpsimd(struct task_struct *task) { - unsigned int vq; + unsigned int vq, vl; void const *sst = task->thread.sve_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; @@ -669,7 +669,8 @@ static void sve_to_fpsimd(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vl = thread_get_cur_vl(&task->thread); + vq = sve_vq_from_vl(vl); for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); @@ -717,6 +718,19 @@ void sve_alloc(struct task_struct *task) } +/* + * Force the FPSIMD state shared with SVE to be updated in the SVE state + * even if the SVE state is the current active state. + * + * This should only be called by ptrace. task must be non-runnable. + * task->thread.sve_state must point to at least sve_state_size(task) + * bytes of allocated kernel memory. + */ +void fpsimd_force_sync_to_sve(struct task_struct *task) +{ + fpsimd_to_sve(task); +} + /* * Ensure that task->thread.sve_state is up to date with respect to * the user task, irrespective of when SVE is in use or not. @@ -727,7 +741,8 @@ void sve_alloc(struct task_struct *task) */ void fpsimd_sync_to_sve(struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SVE)) + if (!test_tsk_thread_flag(task, TIF_SVE) && + !thread_sm_enabled(&task->thread)) fpsimd_to_sve(task); } @@ -741,7 +756,8 @@ void fpsimd_sync_to_sve(struct task_struct *task) */ void sve_sync_to_fpsimd(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_SVE)) + if (test_tsk_thread_flag(task, TIF_SVE) || + thread_sm_enabled(&task->thread)) sve_to_fpsimd(task); } @@ -766,7 +782,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) if (!test_tsk_thread_flag(task, TIF_SVE)) return; - vq = sve_vq_from_vl(task_get_sve_vl(task)); + vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); __fpsimd_to_sve(sst, fst, vq); @@ -810,8 +826,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, /* * To ensure the FPSIMD bits of the SVE vector registers are preserved, * write any live register state back to task_struct, and convert to a - * regular FPSIMD thread. Since the vector length can only be changed - * with a syscall we can't be in streaming mode while reconfiguring. + * regular FPSIMD thread. */ if (task == current) { get_cpu_fpsimd_context(); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 230a47b9189e..60185c27b394 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -713,21 +713,51 @@ static int system_call_set(struct task_struct *target, #ifdef CONFIG_ARM64_SVE static void sve_init_header_from_task(struct user_sve_header *header, - struct task_struct *target) + struct task_struct *target, + enum vec_type type) { unsigned int vq; + bool active; + bool fpsimd_only; + enum vec_type task_type; memset(header, 0, sizeof(*header)); - header->flags = test_tsk_thread_flag(target, TIF_SVE) ? - SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD; - if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) - header->flags |= SVE_PT_VL_INHERIT; + /* Check if the requested registers are active for the task */ + if (thread_sm_enabled(&target->thread)) + task_type = ARM64_VEC_SME; + else + task_type = ARM64_VEC_SVE; + active = (task_type == type); + + switch (type) { + case ARM64_VEC_SVE: + if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); + break; + case ARM64_VEC_SME: + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; + fpsimd_only = false; + break; + default: + WARN_ON_ONCE(1); + return; + } - header->vl = task_get_sve_vl(target); + if (active) { + if (fpsimd_only) { + header->flags |= SVE_PT_REGS_FPSIMD; + } else { + header->flags |= SVE_PT_REGS_SVE; + } + } + + header->vl = task_get_vl(target, type); vq = sve_vq_from_vl(header->vl); - header->max_vl = sve_max_vl(); + header->max_vl = vec_max_vl(type); header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); @@ -738,19 +768,17 @@ static unsigned int sve_size_from_header(struct user_sve_header const *header) return ALIGN(header->size, SVE_VQ_BYTES); } -static int sve_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) +static int sve_get_common(struct task_struct *target, + const struct user_regset *regset, + struct membuf to, + enum vec_type type) { struct user_sve_header header; unsigned int vq; unsigned long start, end; - if (!system_supports_sve()) - return -EINVAL; - /* Header */ - sve_init_header_from_task(&header, target); + sve_init_header_from_task(&header, target, type); vq = sve_vq_from_vl(header.vl); membuf_write(&to, &header, sizeof(header)); @@ -758,49 +786,61 @@ static int sve_get(struct task_struct *target, if (target == current) fpsimd_preserve_current_state(); - /* Registers: FPSIMD-only case */ - BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); - if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) + BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); + + switch ((header.flags & SVE_PT_REGS_MASK)) { + case SVE_PT_REGS_FPSIMD: return __fpr_get(target, regset, to); - /* Otherwise: full SVE case */ + case SVE_PT_REGS_SVE: + start = SVE_PT_SVE_OFFSET; + end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); + membuf_write(&to, target->thread.sve_state, end - start); - BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); - start = SVE_PT_SVE_OFFSET; - end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); - membuf_write(&to, target->thread.sve_state, end - start); + start = end; + end = SVE_PT_SVE_FPSR_OFFSET(vq); + membuf_zero(&to, end - start); - start = end; - end = SVE_PT_SVE_FPSR_OFFSET(vq); - membuf_zero(&to, end - start); + /* + * Copy fpsr, and fpcr which must follow contiguously in + * struct fpsimd_state: + */ + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, + end - start); - /* - * Copy fpsr, and fpcr which must follow contiguously in - * struct fpsimd_state: - */ - start = end; - end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; - membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start); + start = end; + end = sve_size_from_header(&header); + return membuf_zero(&to, end - start); - start = end; - end = sve_size_from_header(&header); - return membuf_zero(&to, end - start); + default: + return 0; + } } -static int sve_set(struct task_struct *target, +static int sve_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + struct membuf to) +{ + if (!system_supports_sve()) + return -EINVAL; + + return sve_get_common(target, regset, to, ARM64_VEC_SVE); +} + +static int sve_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + enum vec_type type) { int ret; struct user_sve_header header; unsigned int vq; unsigned long start, end; - if (!system_supports_sve()) - return -EINVAL; - /* Header */ if (count < sizeof(header)) return -EINVAL; @@ -813,13 +853,37 @@ static int sve_set(struct task_struct *target, * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by * vec_set_vector_length(), which will also validate them for us: */ - ret = vec_set_vector_length(target, ARM64_VEC_SVE, header.vl, + ret = vec_set_vector_length(target, type, header.vl, ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); if (ret) goto out; /* Actual VL set may be less than the user asked for: */ - vq = sve_vq_from_vl(task_get_sve_vl(target)); + vq = sve_vq_from_vl(task_get_vl(target, type)); + + /* Enter/exit streaming mode */ + if (system_supports_sme()) { + u64 old_svcr = target->thread.svcr; + + switch (type) { + case ARM64_VEC_SVE: + target->thread.svcr &= ~SYS_SVCR_EL0_SM_MASK; + break; + case ARM64_VEC_SME: + target->thread.svcr |= SYS_SVCR_EL0_SM_MASK; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + /* + * If we switched then invalidate any existing SVE + * state and ensure there's storage. + */ + if (target->thread.svcr != old_svcr) + sve_alloc(target); + } /* Registers: FPSIMD-only case */ @@ -828,10 +892,15 @@ static int sve_set(struct task_struct *target, ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); + if (type == ARM64_VEC_SME) + fpsimd_force_sync_to_sve(target); goto out; } - /* Otherwise: full SVE case */ + /* + * Otherwise: no registers or full SVE case. For backwards + * compatibility reasons we treat empty flags as SVE registers. + */ /* * If setting a different VL from the requested VL and there is @@ -852,8 +921,9 @@ static int sve_set(struct task_struct *target, /* * Ensure target->thread.sve_state is up to date with target's - * FPSIMD regs, so that a short copyin leaves trailing registers - * unmodified. + * FPSIMD regs, so that a short copyin leaves trailing + * registers unmodified. Always enable SVE even if going into + * streaming mode. */ fpsimd_sync_to_sve(target); set_tsk_thread_flag(target, TIF_SVE); @@ -889,8 +959,46 @@ out: return ret; } +static int sve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (!system_supports_sve()) + return -EINVAL; + + return sve_set_common(target, regset, pos, count, kbuf, ubuf, + ARM64_VEC_SVE); +} + #endif /* CONFIG_ARM64_SVE */ +#ifdef CONFIG_ARM64_SME + +static int ssve_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_sme()) + return -EINVAL; + + return sve_get_common(target, regset, to, ARM64_VEC_SME); +} + +static int ssve_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (!system_supports_sme()) + return -EINVAL; + + return sve_set_common(target, regset, pos, count, kbuf, ubuf, + ARM64_VEC_SME); +} + +#endif /* CONFIG_ARM64_SME */ + #ifdef CONFIG_ARM64_PTR_AUTH static int pac_mask_get(struct task_struct *target, const struct user_regset *regset, @@ -1108,6 +1216,9 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SVE, #endif +#ifdef CONFIG_ARM64_SVE + REGSET_SSVE, +#endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, REGSET_PAC_ENABLED_KEYS, @@ -1188,6 +1299,17 @@ static const struct user_regset aarch64_regsets[] = { .set = sve_set, }, #endif +#ifdef CONFIG_ARM64_SME + [REGSET_SSVE] = { /* Streaming mode SVE */ + .core_note_type = NT_ARM_SSVE, + .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .regset_get = ssve_get, + .set = ssve_set, + }, +#endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { .core_note_type = NT_ARM_PAC_MASK, diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 787c657bfae8..a8dc688e1826 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -431,6 +431,7 @@ typedef struct elf64_shdr { #define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ +#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3-71-gd317 From 776b4a1cf36411e96972455ca72906b722b80ea1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:29 +0100 Subject: arm64/sme: Add ptrace support for ZA The ZA array can be read and written with the NT_ARM_ZA. Similarly to our interface for the SVE vector registers the regset consists of a header with information on the current vector length followed by an optional register data payload, represented as for signals as a series of horizontal vectors from 0 to VL/8 in the endianness independent format used for vectors. On get if ZA is enabled then register data will be provided, otherwise it will be omitted. On set if register data is provided then ZA is enabled and initialized using the provided data, otherwise it is disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-22-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/ptrace.h | 56 ++++++++++++++ arch/arm64/kernel/ptrace.c | 144 +++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 3 files changed, 201 insertions(+) (limited to 'include/uapi/linux') diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 522b925a78c1..7fa2f7036aa7 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -268,6 +268,62 @@ struct user_pac_generic_keys { __uint128_t apgakey; }; +/* ZA state (NT_ARM_ZA) */ + +struct user_za_header { + __u32 size; /* total meaningful regset content in bytes */ + __u32 max_size; /* maxmium possible size for this thread */ + __u16 vl; /* current vector length */ + __u16 max_vl; /* maximum possible vector length */ + __u16 flags; + __u16 __reserved; +}; + +/* + * Common ZA_PT_* flags: + * These must be kept in sync with prctl interface in + */ +#define ZA_PT_VL_INHERIT ((1 << 17) /* PR_SME_VL_INHERIT */ >> 16) +#define ZA_PT_VL_ONEXEC ((1 << 18) /* PR_SME_SET_VL_ONEXEC */ >> 16) + + +/* + * The remainder of the ZA state follows struct user_za_header. The + * total size of the ZA state (including header) depends on the + * metadata in the header: ZA_PT_SIZE(vq, flags) gives the total size + * of the state in bytes, including the header. + * + * Refer to for details of how to pass the correct + * "vq" argument to these macros. + */ + +/* Offset from the start of struct user_za_header to the register data */ +#define ZA_PT_ZA_OFFSET \ + ((sizeof(struct user_za_header) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +/* + * The payload starts at offset ZA_PT_ZA_OFFSET, and is of size + * ZA_PT_ZA_SIZE(vq, flags). + * + * The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8 + * bytes each, starting from vector 0. + * + * Additional data might be appended in the future. + * + * The ZA matrix is represented in memory in an endianness-invariant layout + * which differs from the layout used for the FPSIMD V-registers on big-endian + * systems: see sigcontext.h for more explanation. + */ + +#define ZA_PT_ZAV_OFFSET(vq, n) \ + (ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n)) + +#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES)) + +#define ZA_PT_SIZE(vq) \ + (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 60185c27b394..47d8a7472171 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -997,6 +997,141 @@ static int ssve_set(struct task_struct *target, ARM64_VEC_SME); } +static int za_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_za_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sme()) + return -EINVAL; + + /* Header */ + memset(&header, 0, sizeof(header)); + + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header.flags |= ZA_PT_VL_INHERIT; + + header.vl = task_get_sme_vl(target); + vq = sve_vq_from_vl(header.vl); + header.max_vl = sme_max_vl(); + header.max_size = ZA_PT_SIZE(vq); + + /* If ZA is not active there is only the header */ + if (thread_za_enabled(&target->thread)) + header.size = ZA_PT_SIZE(vq); + else + header.size = ZA_PT_ZA_OFFSET; + + membuf_write(&to, &header, sizeof(header)); + + BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header)); + end = ZA_PT_ZA_OFFSET; + + if (target == current) + fpsimd_preserve_current_state(); + + /* Any register data to include? */ + if (thread_za_enabled(&target->thread)) { + start = end; + end = ZA_PT_SIZE(vq); + membuf_write(&to, target->thread.za_state, end - start); + } + + /* Zero any trailing padding */ + start = end; + end = ALIGN(header.size, SVE_VQ_BYTES); + return membuf_zero(&to, end - start); +} + +static int za_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_za_header header; + unsigned int vq; + unsigned long start, end; + + if (!system_supports_sme()) + return -EINVAL; + + /* Header */ + if (count < sizeof(header)) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, + 0, sizeof(header)); + if (ret) + goto out; + + /* + * All current ZA_PT_* flags are consumed by + * vec_set_vector_length(), which will also validate them for + * us: + */ + ret = vec_set_vector_length(target, ARM64_VEC_SME, header.vl, + ((unsigned long)header.flags) << 16); + if (ret) + goto out; + + /* Actual VL set may be less than the user asked for: */ + vq = sve_vq_from_vl(task_get_sme_vl(target)); + + /* Ensure there is some SVE storage for streaming mode */ + if (!target->thread.sve_state) { + sve_alloc(target); + if (!target->thread.sve_state) { + clear_thread_flag(TIF_SME); + ret = -ENOMEM; + goto out; + } + } + + /* Allocate/reinit ZA storage */ + sme_alloc(target); + if (!target->thread.za_state) { + ret = -ENOMEM; + clear_tsk_thread_flag(target, TIF_SME); + goto out; + } + + /* If there is no data then disable ZA */ + if (!count) { + target->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK; + goto out; + } + + /* + * If setting a different VL from the requested VL and there is + * register data, the data layout will be wrong: don't even + * try to set the registers in this case. + */ + if (vq != sve_vq_from_vl(header.vl)) { + ret = -EIO; + goto out; + } + + BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header)); + start = ZA_PT_ZA_OFFSET; + end = ZA_PT_SIZE(vq); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.za_state, + start, end); + if (ret) + goto out; + + /* Mark ZA as active and let userspace use it */ + set_tsk_thread_flag(target, TIF_SME); + target->thread.svcr |= SYS_SVCR_EL0_ZA_MASK; + +out: + fpsimd_flush_task_state(target); + return ret; +} + #endif /* CONFIG_ARM64_SME */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -1218,6 +1353,7 @@ enum aarch64_regset { #endif #ifdef CONFIG_ARM64_SVE REGSET_SSVE, + REGSET_ZA, #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, @@ -1309,6 +1445,14 @@ static const struct user_regset aarch64_regsets[] = { .regset_get = ssve_get, .set = ssve_set, }, + [REGSET_ZA] = { /* SME ZA */ + .core_note_type = NT_ARM_ZA, + .n = DIV_ROUND_UP(ZA_PT_ZA_SIZE(SVE_VQ_MAX), SVE_VQ_BYTES), + .size = SVE_VQ_BYTES, + .align = SVE_VQ_BYTES, + .regset_get = za_get, + .set = za_set, + }, #endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index a8dc688e1826..97808f958903 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -432,6 +432,7 @@ typedef struct elf64_shdr { #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ #define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ +#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3-71-gd317 From c35fe2a68f29a0bda15ae994154cacaae5f69791 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 25 Apr 2022 16:18:33 +0100 Subject: elf: Fix the arm64 MTE ELF segment name and value Unfortunately, the name/value choice for the MTE ELF segment type (PT_ARM_MEMTAG_MTE) was pretty poor: LOPROC+1 is already in use by PT_AARCH64_UNWIND, as defined in the AArch64 ELF ABI (https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst). Update the ELF segment type value to LOPROC+2 and also change the define to PT_AARCH64_MEMTAG_MTE to match the AArch64 ELF ABI namespace. The AArch64 ELF ABI document is updating accordingly (segment type not previously mentioned in the document). Signed-off-by: Catalin Marinas Fixes: 761b9b366cec ("elf: Introduce the ARM MTE ELF segment type") Cc: Will Deacon Cc: Jonathan Corbet Cc: Eric Biederman Cc: Kees Cook Cc: Luis Machado Cc: Richard Earnshaw Link: https://lore.kernel.org/r/20220425151833.2603830-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/memory-tagging-extension.rst | 4 ++-- arch/arm64/kernel/elfcore.c | 2 +- include/uapi/linux/elf.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index dd27f78d7608..dbae47bba25e 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -228,10 +228,10 @@ Core dump support ----------------- The allocation tags for user memory mapped with ``PROT_MTE`` are dumped -in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The +in the core file as additional ``PT_AARCH64_MEMTAG_MTE`` segments. The program header for such segment is defined as: -:``p_type``: ``PT_ARM_MEMTAG_MTE`` +:``p_type``: ``PT_AARCH64_MEMTAG_MTE`` :``p_flags``: 0 :``p_offset``: segment file offset :``p_vaddr``: segment virtual address, same as the corresponding diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 2b3f3d0544b9..98d67444a5b6 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -95,7 +95,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) for_each_mte_vma(current, vma) { struct elf_phdr phdr; - phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_type = PT_AARCH64_MEMTAG_MTE; phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 787c657bfae8..7ce993e6786c 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -42,7 +42,7 @@ typedef __s64 Elf64_Sxword; /* ARM MTE memory tag segment type */ -#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) +#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2) /* * Extended Numbering -- cgit v1.2.3-71-gd317 From 7b33a09d036ffd9a04506122840629c7e870cf08 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:40 +0000 Subject: KVM: arm64: Add support for userspace to suspend a vCPU Introduce a new MP state, KVM_MP_STATE_SUSPENDED, which indicates a vCPU is in a suspended state. In the suspended state the vCPU will block until a wakeup event (pending interrupt) is recognized. Add a new system event type, KVM_SYSTEM_EVENT_WAKEUP, to indicate to userspace that KVM has recognized one such wakeup event. It is the responsibility of userspace to then make the vCPU runnable, or leave it suspended until the next wakeup event. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-7-oupton@google.com --- Documentation/virt/kvm/api.rst | 37 ++++++++++++++++++++++++++-- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 51 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 2 ++ 4 files changed, 89 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 4a900cdbc62e..46ca84600dca 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1476,14 +1476,43 @@ Possible values are: [s390] KVM_MP_STATE_LOAD the vcpu is in a special load/startup state [s390] + KVM_MP_STATE_SUSPENDED the vcpu is in a suspend state and is waiting + for a wakeup event [arm64] ========================== =============================================== On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. -For arm64/riscv: -^^^^^^^^^^^^^^^^ +For arm64: +^^^^^^^^^^ + +If a vCPU is in the KVM_MP_STATE_SUSPENDED state, KVM will emulate the +architectural execution of a WFI instruction. + +If a wakeup event is recognized, KVM will exit to userspace with a +KVM_SYSTEM_EVENT exit, where the event type is KVM_SYSTEM_EVENT_WAKEUP. If +userspace wants to honor the wakeup, it must set the vCPU's MP state to +KVM_MP_STATE_RUNNABLE. If it does not, KVM will continue to await a wakeup +event in subsequent calls to KVM_RUN. + +.. warning:: + + If userspace intends to keep the vCPU in a SUSPENDED state, it is + strongly recommended that userspace take action to suppress the + wakeup event (such as masking an interrupt). Otherwise, subsequent + calls to KVM_RUN will immediately exit with a KVM_SYSTEM_EVENT_WAKEUP + event and inadvertently waste CPU cycles. + + Additionally, if userspace takes action to suppress a wakeup event, + it is strongly recommended that it also restores the vCPU to its + original state when the vCPU is made RUNNABLE again. For example, + if userspace masked a pending interrupt to suppress the wakeup, + the interrupt should be unmasked before returning control to the + guest. + +For riscv: +^^^^^^^^^^ The only states that are valid are KVM_MP_STATE_STOPPED and KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not. @@ -5985,6 +6014,7 @@ should put the acknowledged interrupt vector into the 'epr' field. #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 + #define KVM_SYSTEM_EVENT_WAKEUP 4 __u32 type; __u32 ndata; __u64 data[16]; @@ -6009,6 +6039,9 @@ Valid values for 'type' are: has requested a crash condition maintenance. Userspace can choose to ignore the request, or to gather VM memory core dump and/or reset/shutdown of the VM. + - KVM_SYSTEM_EVENT_WAKEUP -- the exiting vCPU is in a suspended state and + KVM has recognized a wakeup event. Userspace may honor this event by + marking the exiting vCPU as runnable, or deny it and call KVM_RUN again. If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain architecture specific information for the system-level event. Only diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f3f93d48e21a..46027b9b80ca 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) +#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index efe54aba5cce..abd32a84ed7a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -444,6 +444,18 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; } +static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + kvm_vcpu_kick(vcpu); +} + +static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -464,6 +476,9 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, case KVM_MP_STATE_STOPPED: kvm_arm_vcpu_power_off(vcpu); break; + case KVM_MP_STATE_SUSPENDED: + kvm_arm_vcpu_suspend(vcpu); + break; default: ret = -EINVAL; } @@ -648,6 +663,39 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); } +static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_vcpu_suspended(vcpu)) + return 1; + + kvm_vcpu_wfi(vcpu); + + /* + * The suspend state is sticky; we do not leave it until userspace + * explicitly marks the vCPU as runnable. Request that we suspend again + * later. + */ + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + + /* + * Check to make sure the vCPU is actually runnable. If so, exit to + * userspace informing it of the wakeup condition. + */ + if (kvm_arch_vcpu_runnable(vcpu)) { + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + return 0; + } + + /* + * Otherwise, we were unblocked to process a different event, such as a + * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to + * process the event. + */ + return 1; +} + /** * check_vcpu_requests - check and handle pending vCPU requests * @vcpu: the VCPU pointer @@ -686,6 +734,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) kvm_pmu_handle_pmcr(vcpu, __vcpu_sys_reg(vcpu, PMCR_EL0)); + + if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) + return kvm_vcpu_suspend(vcpu); } return 1; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6a184d260c7f..7f72fb7b05f2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -444,6 +444,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_WAKEUP 4 __u32 type; __u32 ndata; union { @@ -646,6 +647,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 #define KVM_MP_STATE_AP_RESET_HOLD 9 +#define KVM_MP_STATE_SUSPENDED 10 struct kvm_mp_state { __u32 mp_state; -- cgit v1.2.3-71-gd317 From bfbab44568779e1682bc6f63688bb9c965f0e74a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:41 +0000 Subject: KVM: arm64: Implement PSCI SYSTEM_SUSPEND ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND" describes a PSCI call that allows software to request that a system be placed in the deepest possible low-power state. Effectively, software can use this to suspend itself to RAM. Unfortunately, there really is no good way to implement a system-wide PSCI call in KVM. Any precondition checks done in the kernel will need to be repeated by userspace since there is no good way to protect a critical section that spans an exit to userspace. SYSTEM_RESET and SYSTEM_OFF are equally plagued by this issue, although no users have seemingly cared for the relatively long time these calls have been supported. The solution is to just make the whole implementation userspace's problem. Introduce a new system event, KVM_SYSTEM_EVENT_SUSPEND, that indicates to userspace a calling vCPU has invoked PSCI SYSTEM_SUSPEND. Additionally, add a CAP to get buy-in from userspace for this new exit type. Only advertise the SYSTEM_SUSPEND PSCI call if userspace has opted in. If a vCPU calls SYSTEM_SUSPEND, punt straight to userspace. Provide explicit documentation of userspace's responsibilites for the exit and point to the PSCI specification to describe the actual PSCI call. Reviewed-by: Reiji Watanabe Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-8-oupton@google.com --- Documentation/virt/kvm/api.rst | 39 +++++++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/arm.c | 5 +++++ arch/arm64/kvm/psci.c | 29 +++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 2 ++ 5 files changed, 77 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 46ca84600dca..d8d7859fc556 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6015,6 +6015,7 @@ should put the acknowledged interrupt vector into the 'epr' field. #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 #define KVM_SYSTEM_EVENT_WAKEUP 4 + #define KVM_SYSTEM_EVENT_SUSPEND 5 __u32 type; __u32 ndata; __u64 data[16]; @@ -6042,6 +6043,34 @@ Valid values for 'type' are: - KVM_SYSTEM_EVENT_WAKEUP -- the exiting vCPU is in a suspended state and KVM has recognized a wakeup event. Userspace may honor this event by marking the exiting vCPU as runnable, or deny it and call KVM_RUN again. + - KVM_SYSTEM_EVENT_SUSPEND -- the guest has requested a suspension of + the VM. + +For arm/arm64: +^^^^^^^^^^^^^^ + + KVM_SYSTEM_EVENT_SUSPEND exits are enabled with the + KVM_CAP_ARM_SYSTEM_SUSPEND VM capability. If a guest invokes the PSCI + SYSTEM_SUSPEND function, KVM will exit to userspace with this event + type. + + It is the sole responsibility of userspace to implement the PSCI + SYSTEM_SUSPEND call according to ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND". + KVM does not change the vCPU's state before exiting to userspace, so + the call parameters are left in-place in the vCPU registers. + + Userspace is _required_ to take action for such an exit. It must + either: + + - Honor the guest request to suspend the VM. Userspace can request + in-kernel emulation of suspension by setting the calling vCPU's + state to KVM_MP_STATE_SUSPENDED. Userspace must configure the vCPU's + state according to the parameters passed to the PSCI function when + the calling vCPU is resumed. See ARM DEN0022D.b 5.19.1 "Intended use" + for details on the function parameters. + + - Deny the guest request to suspend the VM. See ARM DEN0022D.b 5.19.2 + "Caller responsibilities" for possible return values. If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain architecture specific information for the system-level event. Only @@ -7767,6 +7796,16 @@ At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting this capability will disable PMU virtualization for that VM. Usermode should adjust CPUID leaf 0xA to reflect that the PMU is disabled. +8.36 KVM_CAP_ARM_SYSTEM_SUSPEND +------------------------------- + +:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND +:Architectures: arm64 +:Type: vm + +When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of +type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request. + 9. Known KVM API problems ========================= diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 46027b9b80ca..d9df81949f76 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -137,6 +137,8 @@ struct kvm_arch { */ #define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 #define KVM_ARCH_FLAG_EL1_32BIT 4 + /* PSCI SYSTEM_SUSPEND enabled for the guest */ +#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 unsigned long flags; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index abd32a84ed7a..f8a89ae52710 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -97,6 +97,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->lock); break; + case KVM_CAP_ARM_SYSTEM_SUSPEND: + r = 0; + set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); + break; default: r = -EINVAL; break; @@ -210,6 +214,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: + case KVM_CAP_ARM_SYSTEM_SUSPEND: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 2e6f060214a7..5de30e72ad40 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -195,6 +195,15 @@ static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu) KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2); } +static void kvm_psci_system_suspend(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + memset(&run->system_event, 0, sizeof(vcpu->run->system_event)); + run->system_event.type = KVM_SYSTEM_EVENT_SUSPEND; + run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) { int i; @@ -300,6 +309,7 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) { unsigned long val = PSCI_RET_NOT_SUPPORTED; u32 psci_fn = smccc_get_function(vcpu); + struct kvm *kvm = vcpu->kvm; u32 arg; int ret = 1; @@ -331,6 +341,11 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) case ARM_SMCCC_VERSION_FUNC_ID: val = 0; break; + case PSCI_1_0_FN_SYSTEM_SUSPEND: + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) + val = 0; + break; case PSCI_1_1_FN_SYSTEM_RESET2: case PSCI_1_1_FN64_SYSTEM_RESET2: if (minor >= 1) @@ -338,6 +353,20 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) break; } break; + case PSCI_1_0_FN_SYSTEM_SUSPEND: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + /* + * Return directly to userspace without changing the vCPU's + * registers. Userspace depends on reading the SMCCC parameters + * to implement SYSTEM_SUSPEND. + */ + if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) { + kvm_psci_system_suspend(vcpu); + return 0; + } + break; case PSCI_1_1_FN_SYSTEM_RESET2: kvm_psci_narrow_to_32bit(vcpu); fallthrough; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7f72fb7b05f2..32c56384fd08 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -445,6 +445,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 #define KVM_SYSTEM_EVENT_WAKEUP 4 +#define KVM_SYSTEM_EVENT_SUSPEND 5 __u32 type; __u32 ndata; union { @@ -1154,6 +1155,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DISABLE_QUIRKS2 213 /* #define KVM_CAP_VM_TSC_CONTROL 214 */ #define KVM_CAP_SYSTEM_EVENT_DATA 215 +#define KVM_CAP_ARM_SYSTEM_SUSPEND 216 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317