From 7a4fa29106d9a38ef005f5ab15d493c259f269c0 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 14 Dec 2017 15:54:29 +0200 Subject: net: sched: Add TCA_HW_OFFLOAD Qdiscs can be offloaded to HW, but current implementation isn't uniform. Instead, qdiscs either pass information about offload status via their TCA_OPTIONS or omit it altogether. Introduce a new attribute - TCA_HW_OFFLOAD that would form a uniform uAPI for the offloading status of qdiscs. Signed-off-by: Yuval Mintz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index d8b5f80c2ea6..843e29aa3cac 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -557,6 +557,7 @@ enum { TCA_PAD, TCA_DUMP_INVISIBLE, TCA_CHAIN, + TCA_HW_OFFLOAD, __TCA_MAX }; -- cgit v1.2.3-71-gd317 From 4a98795bc8ea148b1ebbbf001283e06430cffe36 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 14 Dec 2017 15:54:31 +0200 Subject: pkt_sched: Remove TC_RED_OFFLOADED from uapi Following the previous patch, RED is now using the new uniform uapi for indicating it's offloaded. As a result, TC_RED_OFFLOADED is no longer utilized by kernel and can be removed [as it's still not part of any stable release]. Fixes: 602f3baf2218 ("net_sch: red: Add offload ability to RED qdisc") Signed-off-by: Yuval Mintz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index af3cc2f4e1ad..37b5096ae97b 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -256,7 +256,6 @@ struct tc_red_qopt { #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 -#define TC_RED_OFFLOADED 8 }; struct tc_red_xstats { -- cgit v1.2.3-71-gd317 From 4c82fd0abb87e20d0d68ef5237e74732352806c8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Dec 2017 12:08:33 +0100 Subject: netfilter: uapi: correct UNTRACKED conntrack state bit number nft_ct exposes this bit to userspace. This used to be #define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) (IP_CT_NUMBER is 5, so this was 0x40) .. but this got changed to 8 (0x100) when the untracked object got removed. Replace this with a literal 6 to prevent further incompatible changes in case IP_CT_NUMBER ever increases. Fixes: cc41c84b7e7f2 ("netfilter: kill the fake untracked conntrack objects") Reported-by: Li Shuang Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 3fea7709a441..57ccfb32e87f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -36,7 +36,7 @@ enum ip_conntrack_info { #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << 6) /* Bitset representing status of connection. */ enum ip_conntrack_status { -- cgit v1.2.3-71-gd317 From c0bace798436bca0fdc221ff61143f1376a9c3de Mon Sep 17 00:00:00 2001 From: Felix Janda Date: Mon, 1 Jan 2018 19:33:20 +0100 Subject: uapi libc compat: add fallback for unsupported libcs libc-compat.h aims to prevent symbol collisions between uapi and libc headers for each supported libc. This requires continuous coordination between them. The goal of this commit is to improve the situation for libcs (such as musl) which are not yet supported and/or do not wish to be explicitly supported, while not affecting supported libcs. More precisely, with this commit, unsupported libcs can request the suppression of any specific uapi definition by defining the correspondings _UAPI_DEF_* macro as 0. This can fix symbol collisions for them, as long as the libc headers are included before the uapi headers. Inclusion in the other order is outside the scope of this commit. All infrastructure in order to enable this fallback for unsupported libcs is already in place, except that libc-compat.h unconditionally defines all _UAPI_DEF_* macros to 1 for all unsupported libcs so that any previous definitions are ignored. In order to fix this, this commit merely makes these definitions conditional. This commit together with the musl libc commit http://git.musl-libc.org/cgit/musl/commit/?id=04983f2272382af92eb8f8838964ff944fbb8258 fixes for example the following compiler errors when is included after musl's : ./linux/in6.h:32:8: error: redefinition of 'struct in6_addr' ./linux/in6.h:49:8: error: redefinition of 'struct sockaddr_in6' ./linux/in6.h:59:8: error: redefinition of 'struct ipv6_mreq' The comments referencing glibc are still correct, but this file is not only used for glibc any more. Signed-off-by: Felix Janda Reviewed-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/libc-compat.h | 55 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 282875cf8056..8254c937c9f4 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -168,46 +168,99 @@ /* If we did not see any headers from any supported C libraries, * or we are being included in the kernel, then define everything - * that we need. */ + * that we need. Check for previous __UAPI_* definitions to give + * unsupported C libraries a way to opt out of any kernel definition. */ #else /* !defined(__GLIBC__) */ /* Definitions for if.h */ +#ifndef __UAPI_DEF_IF_IFCONF #define __UAPI_DEF_IF_IFCONF 1 +#endif +#ifndef __UAPI_DEF_IF_IFMAP #define __UAPI_DEF_IF_IFMAP 1 +#endif +#ifndef __UAPI_DEF_IF_IFNAMSIZ #define __UAPI_DEF_IF_IFNAMSIZ 1 +#endif +#ifndef __UAPI_DEF_IF_IFREQ #define __UAPI_DEF_IF_IFREQ 1 +#endif /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +#endif /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* Definitions for in.h */ +#ifndef __UAPI_DEF_IN_ADDR #define __UAPI_DEF_IN_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN_IPPROTO #define __UAPI_DEF_IN_IPPROTO 1 +#endif +#ifndef __UAPI_DEF_IN_PKTINFO #define __UAPI_DEF_IN_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP_MREQ #define __UAPI_DEF_IP_MREQ 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN #define __UAPI_DEF_SOCKADDR_IN 1 +#endif +#ifndef __UAPI_DEF_IN_CLASS #define __UAPI_DEF_IN_CLASS 1 +#endif /* Definitions for in6.h */ +#ifndef __UAPI_DEF_IN6_ADDR #define __UAPI_DEF_IN6_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN6_ADDR_ALT #define __UAPI_DEF_IN6_ADDR_ALT 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN6 #define __UAPI_DEF_SOCKADDR_IN6 1 +#endif +#ifndef __UAPI_DEF_IPV6_MREQ #define __UAPI_DEF_IPV6_MREQ 1 +#endif +#ifndef __UAPI_DEF_IPPROTO_V6 #define __UAPI_DEF_IPPROTO_V6 1 +#endif +#ifndef __UAPI_DEF_IPV6_OPTIONS #define __UAPI_DEF_IPV6_OPTIONS 1 +#endif +#ifndef __UAPI_DEF_IN6_PKTINFO #define __UAPI_DEF_IN6_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP6_MTUINFO #define __UAPI_DEF_IP6_MTUINFO 1 +#endif /* Definitions for ipx.h */ +#ifndef __UAPI_DEF_SOCKADDR_IPX #define __UAPI_DEF_SOCKADDR_IPX 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION #define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION #define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_CONFIG_DATA #define __UAPI_DEF_IPX_CONFIG_DATA 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEF #define __UAPI_DEF_IPX_ROUTE_DEF 1 +#endif /* Definitions for xattr.h */ +#ifndef __UAPI_DEF_XATTR #define __UAPI_DEF_XATTR 1 +#endif #endif /* __GLIBC__ */ -- cgit v1.2.3-71-gd317 From 6926e041a8920c8ec27e4e155efa760aa01551fd Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 3 Jan 2018 23:14:21 +0100 Subject: uapi/if_ether.h: prevent redefinition of struct ethhdr Musl provides its own ethhdr struct definition. Add a guard to prevent its definition of the appropriate musl header has already been included. glibc does not implement this header, but when glibc will implement this they can just define __UAPI_DEF_ETHHDR 0 to make it work with the kernel. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 3 +++ include/uapi/linux/libc-compat.h | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 3ee3bf7c8526..144de4d2f385 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,6 +23,7 @@ #define _UAPI_LINUX_IF_ETHER_H #include +#include /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -149,11 +150,13 @@ * This is an Ethernet frame header. */ +#if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ } __attribute__((packed)); +#endif #endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 8254c937c9f4..fc29efaa918c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -264,4 +264,10 @@ #endif /* __GLIBC__ */ +/* Definitions for if_ether.h */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #endif /* _UAPI_LIBC_COMPAT_H */ -- cgit v1.2.3-71-gd317 From e53927393b9987b7c986b6364c27111077f0ea3e Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 5 Jan 2018 14:16:14 +0100 Subject: lightnvm: set target over-provision on create ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow to set the over-provision percentage on target creation. In case that the value is not provided, fall back to the default value set by the target. In pblk, set the default OP to 11% of the total size of the device Signed-off-by: Javier González Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 106 +++++++++++++++++++++++++++++++++--------- drivers/lightnvm/pblk-init.c | 5 +- drivers/lightnvm/pblk.h | 2 + include/linux/lightnvm.h | 6 +++ include/uapi/linux/lightnvm.h | 9 ++++ 5 files changed, 104 insertions(+), 24 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index d5f231c9339e..dcc9e621e651 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -140,7 +140,8 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear) } static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, - int lun_begin, int lun_end) + u16 lun_begin, u16 lun_end, + u16 op) { struct nvm_tgt_dev *tgt_dev = NULL; struct nvm_dev_map *dev_rmap = dev->rmap; @@ -219,6 +220,7 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, tgt_dev->geo.nr_chnls = nr_chnls; tgt_dev->geo.all_luns = nr_luns; tgt_dev->geo.nr_luns = (lun_balanced) ? prev_nr_luns : -1; + tgt_dev->geo.op = op; tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; tgt_dev->q = dev->q; tgt_dev->map = dev_map; @@ -266,9 +268,57 @@ static struct nvm_tgt_type *nvm_find_target_type(const char *name) return tt; } +static int nvm_config_check_luns(struct nvm_geo *geo, int lun_begin, + int lun_end) +{ + if (lun_begin > lun_end || lun_end >= geo->all_luns) { + pr_err("nvm: lun out of bound (%u:%u > %u)\n", + lun_begin, lun_end, geo->all_luns - 1); + return -EINVAL; + } + + return 0; +} + +static int __nvm_config_simple(struct nvm_dev *dev, + struct nvm_ioctl_create_simple *s) +{ + struct nvm_geo *geo = &dev->geo; + + if (s->lun_begin == -1 && s->lun_end == -1) { + s->lun_begin = 0; + s->lun_end = geo->all_luns - 1; + } + + return nvm_config_check_luns(geo, s->lun_begin, s->lun_end); +} + +static int __nvm_config_extended(struct nvm_dev *dev, + struct nvm_ioctl_create_extended *e) +{ + struct nvm_geo *geo = &dev->geo; + + if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) { + e->lun_begin = 0; + e->lun_end = dev->geo.all_luns - 1; + } + + /* op not set falls into target's default */ + if (e->op == 0xFFFF) + e->op = NVM_TARGET_DEFAULT_OP; + + if (e->op < NVM_TARGET_MIN_OP || + e->op > NVM_TARGET_MAX_OP) { + pr_err("nvm: invalid over provisioning value\n"); + return -EINVAL; + } + + return nvm_config_check_luns(geo, e->lun_begin, e->lun_end); +} + static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) { - struct nvm_ioctl_create_simple *s = &create->conf.s; + struct nvm_ioctl_create_extended e; struct request_queue *tqueue; struct gendisk *tdisk; struct nvm_tgt_type *tt; @@ -277,6 +327,28 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) void *targetdata; int ret; + switch (create->conf.type) { + case NVM_CONFIG_TYPE_SIMPLE: + ret = __nvm_config_simple(dev, &create->conf.s); + if (ret) + return ret; + + e.lun_begin = create->conf.s.lun_begin; + e.lun_end = create->conf.s.lun_end; + e.op = NVM_TARGET_DEFAULT_OP; + break; + case NVM_CONFIG_TYPE_EXTENDED: + ret = __nvm_config_extended(dev, &create->conf.e); + if (ret) + return ret; + + e = create->conf.e; + break; + default: + pr_err("nvm: config type not valid\n"); + return -EINVAL; + } + tt = nvm_find_target_type(create->tgttype); if (!tt) { pr_err("nvm: target type %s not found\n", create->tgttype); @@ -289,7 +361,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return -EINVAL; } - ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end); + ret = nvm_reserve_luns(dev, e.lun_begin, e.lun_end); if (ret) return ret; @@ -299,7 +371,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_reserve; } - tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end); + tgt_dev = nvm_create_tgt_dev(dev, e.lun_begin, e.lun_end, e.op); if (!tgt_dev) { pr_err("nvm: could not create target device\n"); ret = -ENOMEM; @@ -369,7 +441,7 @@ err_dev: err_t: kfree(t); err_reserve: - nvm_release_luns_err(dev, s->lun_begin, s->lun_end); + nvm_release_luns_err(dev, e.lun_begin, e.lun_end); return ret; } @@ -949,7 +1021,6 @@ EXPORT_SYMBOL(nvm_unregister); static int __nvm_configure_create(struct nvm_ioctl_create *create) { struct nvm_dev *dev; - struct nvm_ioctl_create_simple *s; down_write(&nvm_lock); dev = nvm_find_nvm_dev(create->dev); @@ -960,23 +1031,6 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) return -EINVAL; } - if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { - pr_err("nvm: config type not valid\n"); - return -EINVAL; - } - s = &create->conf.s; - - if (s->lun_begin == -1 && s->lun_end == -1) { - s->lun_begin = 0; - s->lun_end = dev->geo.all_luns - 1; - } - - if (s->lun_begin > s->lun_end || s->lun_end >= dev->geo.all_luns) { - pr_err("nvm: lun out of bound (%u:%u > %u)\n", - s->lun_begin, s->lun_end, dev->geo.all_luns - 1); - return -EINVAL; - } - return nvm_create_tgt(dev, create); } @@ -1076,6 +1130,12 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create))) return -EFAULT; + if (create.conf.type == NVM_CONFIG_TYPE_EXTENDED && + create.conf.e.rsv != 0) { + pr_err("nvm: reserved config field in use\n"); + return -EINVAL; + } + create.dev[DISK_NAME_LEN - 1] = '\0'; create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0'; create.tgtname[DISK_NAME_LEN - 1] = '\0'; diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index c8a718249e26..533f6908e238 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -585,7 +585,10 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) sector_t provisioned; int sec_meta, blk_meta; - pblk->op = 20; + if (geo->op == NVM_TARGET_DEFAULT_OP) + pblk->op = PBLK_DEFAULT_OP; + else + pblk->op = geo->op; provisioned = nr_free_blks; provisioned *= (100 - pblk->op); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 1e719d4181ce..19e622c65e92 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -54,6 +54,8 @@ /* Static pool sizes */ #define PBLK_GEN_WS_POOL_SIZE (2) +#define PBLK_DEFAULT_OP (11) + enum { PBLK_READ = READ, PBLK_WRITE = WRITE,/* Write from write buffer */ diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 8e43bfebd38d..7f4b60abdf27 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -218,6 +218,10 @@ struct nvm_target { #define ADDR_EMPTY (~0ULL) +#define NVM_TARGET_DEFAULT_OP (101) +#define NVM_TARGET_MIN_OP (3) +#define NVM_TARGET_MAX_OP (80) + #define NVM_VERSION_MAJOR 1 #define NVM_VERSION_MINOR 0 #define NVM_VERSION_PATCH 0 @@ -291,6 +295,8 @@ struct nvm_geo { int max_rq_size; + int op; + struct nvm_addr_format ppaf; /* Legacy 1.2 specific geometry */ diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index 42d1a434af29..f9a1be7fc696 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h @@ -75,14 +75,23 @@ struct nvm_ioctl_create_simple { __u32 lun_end; }; +struct nvm_ioctl_create_extended { + __u16 lun_begin; + __u16 lun_end; + __u16 op; + __u16 rsv; +}; + enum { NVM_CONFIG_TYPE_SIMPLE = 0, + NVM_CONFIG_TYPE_EXTENDED = 1, }; struct nvm_ioctl_create_conf { __u32 type; union { struct nvm_ioctl_create_simple s; + struct nvm_ioctl_create_extended e; }; }; -- cgit v1.2.3-71-gd317 From 81df978c49379481716aef591de77313c286d747 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:48 +0100 Subject: perf: Add sample_id to PERF_RECORD_ITRACE_START event comment Adding missing sample_id line into PERF_RECORD_ITRACE_START event comment. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-5-jolsa@kernel.org [ Update the tools/include/uapi/linux copy ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 1 + tools/include/uapi/linux/perf_event.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b9a4953018ed..8bb66e8da945 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -864,6 +864,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b9a4953018ed..8bb66e8da945 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -864,6 +864,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, -- cgit v1.2.3-71-gd317 From 972c14884728bf5f69ec69cfb1beeec1a9cd29ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:51 +0100 Subject: perf: Update PERF_RECORD_MISC_* comment for perf_event_header::misc bit 13 The perf_event_header::misc bit 13 is shared on different events and next patch is adding yet another bit 13 user. Updating the comment to make it more structured and clear which events use bit 13. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20180107160356.28203-8-jolsa@kernel.org [ Update the tools/include/uapi/linux copy ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 9 ++++++--- tools/include/uapi/linux/perf_event.h | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 8bb66e8da945..c77c9a2ebbbb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 8bb66e8da945..c77c9a2ebbbb 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) -- cgit v1.2.3-71-gd317 From 34be39305a77b8b1ec9f279163c7cdb6cc719b91 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 12 Dec 2017 12:10:24 +0100 Subject: sched/deadline: Implement "runtime overrun signal" support This patch adds the possibility of getting the delivery of a SIGXCPU signal whenever there is a runtime overrun. The request is done through the sched_flags field within the sched_attr structure. Forward port of https://lkml.org/lkml/2009/10/16/170 Tested-by: Mathieu Poirier Signed-off-by: Juri Lelli Signed-off-by: Claudio Scordino Signed-off-by: Luca Abeni Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/1513077024-25461-1-git-send-email-claudio@evidence.eu.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ include/uapi/linux/sched.h | 5 +++++ kernel/sched/core.c | 3 +-- kernel/sched/deadline.c | 7 +++++++ kernel/time/posix-cpu-timers.c | 18 ++++++++++++++++++ 5 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d2588263a989..274a449c805a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -472,11 +472,15 @@ struct sched_dl_entity { * has not been executed yet. This flag is useful to avoid race * conditions between the inactive timer handler and the wakeup * code. + * + * @dl_overrun tells if the task asked to be informed about runtime + * overruns. */ unsigned int dl_throttled : 1; unsigned int dl_boosted : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; + unsigned int dl_overrun : 1; /* * Bandwidth enforcement timer. Each -deadline task has its diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 30a9e51bbb1e..22627f80063e 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -49,5 +49,10 @@ */ #define SCHED_FLAG_RESET_ON_FORK 0x01 #define SCHED_FLAG_RECLAIM 0x02 +#define SCHED_FLAG_DL_OVERRUN 0x04 + +#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \ + SCHED_FLAG_RECLAIM | \ + SCHED_FLAG_DL_OVERRUN) #endif /* _UAPI_LINUX_SCHED_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a794f8155cd5..e28391bf8b04 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4085,8 +4085,7 @@ recheck: return -EINVAL; } - if (attr->sched_flags & - ~(SCHED_FLAG_RESET_ON_FORK | SCHED_FLAG_RECLAIM)) + if (attr->sched_flags & ~SCHED_FLAG_ALL) return -EINVAL; /* diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 2473736c7616..4c666dbe5038 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1155,6 +1155,12 @@ static void update_curr_dl(struct rq *rq) throttle: if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { dl_se->dl_throttled = 1; + + /* If requested, inform the user about runtime overruns. */ + if (dl_runtime_exceeded(dl_se) && + (dl_se->flags & SCHED_FLAG_DL_OVERRUN)) + dl_se->dl_overrun = 1; + __dequeue_task_dl(rq, curr, 0); if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); @@ -2566,6 +2572,7 @@ void __dl_clear_params(struct task_struct *p) dl_se->dl_throttled = 0; dl_se->dl_yielded = 0; dl_se->dl_non_contending = 0; + dl_se->dl_overrun = 0; } bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 1f27887aa194..cf50ea34dbd1 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "posix-timers.h" @@ -791,6 +792,14 @@ check_timers_list(struct list_head *timers, return 0; } +static inline void check_dl_overrun(struct task_struct *tsk) +{ + if (tsk->dl.dl_overrun) { + tsk->dl.dl_overrun = 0; + __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); + } +} + /* * Check for any per-thread CPU timers that have fired and move them off * the tsk->cpu_timers[N] list onto the firing list. Here we update the @@ -804,6 +813,9 @@ static void check_thread_timers(struct task_struct *tsk, u64 expires; unsigned long soft; + if (dl_task(tsk)) + check_dl_overrun(tsk); + /* * If cputime_expires is zero, then there are no active * per thread CPU timers. @@ -906,6 +918,9 @@ static void check_process_timers(struct task_struct *tsk, struct task_cputime cputime; unsigned long soft; + if (dl_task(tsk)) + check_dl_overrun(tsk); + /* * If cputimer is not running, then there are no active * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). @@ -1111,6 +1126,9 @@ static inline int fastpath_timer_check(struct task_struct *tsk) return 1; } + if (dl_task(tsk) && tsk->dl.dl_overrun) + return 1; + return 0; } -- cgit v1.2.3-71-gd317 From 95a332088ecb113c2e8753fa3f1df9b0dda9beec Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 12 Jan 2018 12:29:22 -0800 Subject: Revert "openvswitch: Add erspan tunnel support." This reverts commit ceaa001a170e43608854d5290a48064f57b565ed. The OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS attr should be designed as a nested attribute to support all ERSPAN v1 and v2's fields. The current attr is a be32 supporting only one field. Thus, this patch reverts it and later patch will redo it using nested attr. Signed-off-by: William Tu Cc: Jiri Benc Cc: Pravin Shelar Acked-by: Jiri Benc Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 - net/openvswitch/flow_netlink.c | 51 +--------------------------------------- 2 files changed, 1 insertion(+), 51 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 4265d7f9e1f2..dcfab5e3b55c 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -363,7 +363,6 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */ OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ OVS_TUNNEL_KEY_ATTR_PAD, - OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* be32 ERSPAN index. */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 624ea74353dd..f143908b651d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,7 +49,6 @@ #include #include #include -#include #include "flow_netlink.h" @@ -334,8 +333,7 @@ size_t ovs_tun_key_attr_size(void) * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ - + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */ - + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ + + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } static size_t ovs_nsh_key_attr_size(void) @@ -402,7 +400,6 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] .next = ovs_vxlan_ext_key_lens }, [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, - [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) }, }; static const struct ovs_len_tbl @@ -634,33 +631,6 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, return 0; } -static int erspan_tun_opt_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask, - bool log) -{ - unsigned long opt_key_offset; - struct erspan_metadata opts; - - BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); - - memset(&opts, 0, sizeof(opts)); - opts.index = nla_get_be32(attr); - - /* Index has only 20-bit */ - if (ntohl(opts.index) & ~INDEX_MASK) { - OVS_NLERR(log, "ERSPAN index number %x too large.", - ntohl(opts.index)); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask); - opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); - SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), - is_mask); - - return 0; -} - static int ip_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -768,19 +738,6 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_PAD: break; - case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: - if (opts_type) { - OVS_NLERR(log, "Multiple metadata blocks provided"); - return -EINVAL; - } - - err = erspan_tun_opt_from_nlattr(a, match, is_mask, log); - if (err) - return err; - - tun_flags |= TUNNEL_ERSPAN_OPT; - opts_type = type; - break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -905,10 +862,6 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, else if (output->tun_flags & TUNNEL_VXLAN_OPT && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; - else if (output->tun_flags & TUNNEL_ERSPAN_OPT && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, - ((struct erspan_metadata *)tun_opts)->index)) - return -EMSGSIZE; } return 0; @@ -2533,8 +2486,6 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: break; - case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: - break; } }; -- cgit v1.2.3-71-gd317 From 1ff2775a32ef105d9bdbb5f00f20293244a2accc Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Fix up header split Due to the '#ifdef __KERNEL__' being located in the wrong place, some definitions from the kernel API were placed in the UAPI header during the scripted header split. Fix this. Also, remove the duplicate comment which is only relevant to the UAPI header. Fixes: 607ca46e97a1 ("UAPI: (Scripted) Disintegrate include/linux") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- include/linux/nubus.h | 27 +++++++++++++++++++++++---- include/uapi/linux/nubus.h | 23 ----------------------- 2 files changed, 23 insertions(+), 27 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/nubus.h b/include/linux/nubus.h index d8d63370a28c..55b9a4569a69 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -5,16 +5,28 @@ Originally written by Alan Cox. Hacked to death by C. Scott Ananian and David Huggins-Daines. - - Some of the constants in here are from the corresponding - NetBSD/OpenBSD header file, by Allen Briggs. We figured out the - rest of them on our own. */ +*/ + #ifndef LINUX_NUBUS_H #define LINUX_NUBUS_H #include #include +struct nubus_dir { + unsigned char *base; + unsigned char *ptr; + int done; + int mask; +}; + +struct nubus_dirent { + unsigned char *base; + unsigned char type; + __u32 data; /* Actually 24 bits used */ + int mask; +}; + struct nubus_board { struct nubus_board* next; struct nubus_dev* first_dev; @@ -130,4 +142,11 @@ void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent *dirent, unsigned int len); void nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, unsigned int maxlen); + +/* Returns a pointer to the "standard" slot space. */ +static inline void *nubus_slot_addr(int slot) +{ + return (void *)(0xF0000000 | (slot << 24)); +} + #endif /* LINUX_NUBUS_H */ diff --git a/include/uapi/linux/nubus.h b/include/uapi/linux/nubus.h index f3776cc80f4d..48031e7858f1 100644 --- a/include/uapi/linux/nubus.h +++ b/include/uapi/linux/nubus.h @@ -221,27 +221,4 @@ enum nubus_display_res_id { NUBUS_RESID_SIXTHMODE = 0x0085 }; -struct nubus_dir -{ - unsigned char *base; - unsigned char *ptr; - int done; - int mask; -}; - -struct nubus_dirent -{ - unsigned char *base; - unsigned char type; - __u32 data; /* Actually 24bits used */ - int mask; -}; - - -/* We'd like to get rid of this eventually. Only daynaport.c uses it now. */ -static inline void *nubus_slot_addr(int slot) -{ - return (void *)(0xF0000000|(slot<<24)); -} - #endif /* _UAPILINUX_NUBUS_H */ -- cgit v1.2.3-71-gd317 From 3214d01f139b7544e870fc0b7fcce8da13c1cb51 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jan 2018 16:06:47 +1100 Subject: KVM: PPC: Book3S: Provide information about hardware/firmware CVE workarounds This adds a new ioctl, KVM_PPC_GET_CPU_CHAR, that gives userspace information about the underlying machine's level of vulnerability to the recently announced vulnerabilities CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754, and whether the machine provides instructions to assist software to work around the vulnerabilities. The ioctl returns two u64 words describing characteristics of the CPU and required software behaviour respectively, plus two mask words which indicate which bits have been filled in by the kernel, for extensibility. The bit definitions are the same as for the new H_GET_CPU_CHARACTERISTICS hypercall. There is also a new capability, KVM_CAP_PPC_GET_CPU_CHAR, which indicates whether the new ioctl is available. Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 46 +++++++++++++ arch/powerpc/include/uapi/asm/kvm.h | 25 +++++++ arch/powerpc/kvm/powerpc.c | 131 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 3 + 4 files changed, 205 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 57d3ee9e4bde..fc3ae951bc07 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3403,6 +3403,52 @@ invalid, if invalid pages are written to (e.g. after the end of memory) or if no page table is present for the addresses (e.g. when using hugepages). +4.108 KVM_PPC_GET_CPU_CHAR + +Capability: KVM_CAP_PPC_GET_CPU_CHAR +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_ppc_cpu_char (out) +Returns: 0 on successful completion + -EFAULT if struct kvm_ppc_cpu_char cannot be written + +This ioctl gives userspace information about certain characteristics +of the CPU relating to speculative execution of instructions and +possible information leakage resulting from speculative execution (see +CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is +returned in struct kvm_ppc_cpu_char, which looks like this: + +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +For extensibility, the character_mask and behaviour_mask fields +indicate which bits of character and behaviour have been filled in by +the kernel. If the set of defined bits is extended in future then +userspace will be able to tell whether it is running on a kernel that +knows about the new bits. + +The character field describes attributes of the CPU which can help +with preventing inadvertent information disclosure - specifically, +whether there is an instruction to flash-invalidate the L1 data cache +(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set +to a mode where entries can only be used by the thread that created +them, whether the bcctr[l] instruction prevents speculation, and +whether a speculation barrier instruction (ori 31,31,0) is provided. + +The behaviour field describes actions that software should take to +prevent inadvertent information disclosure, and thus describes which +vulnerabilities the hardware is subject to; specifically whether the +L1 data cache should be flushed when returning to user mode from the +kernel, and whether a speculation barrier should be placed between an +array bounds check and the array access. + +These fields use the same bit definitions as the new +H_GET_CPU_CHARACTERISTICS hypercall. + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 61d6049f4c1e..637b7263cb86 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info { __u32 ap_encodings[8]; }; +/* For KVM_PPC_GET_CPU_CHAR */ +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +/* + * Values for character and character_mask. + * These are identical to the values used by H_GET_CPU_CHARACTERISTICS. + */ +#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63) +#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60) +#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59) +#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) +#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) +#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) + +#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) +#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) +#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1915e86cef6f..0a7c88786ec0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -39,6 +39,10 @@ #include #include #include +#ifdef CONFIG_PPC_PSERIES +#include +#include +#endif #include "timing.h" #include "irq.h" @@ -548,6 +552,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif + case KVM_CAP_PPC_GET_CPU_CHAR: r = 1; break; @@ -1759,6 +1764,124 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return r; } +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * These functions check whether the underlying hardware is safe + * against attacks based on observing the effects of speculatively + * executed instructions, and whether it supplies instructions for + * use in workarounds. The information comes from firmware, either + * via the device tree on powernv platforms or from an hcall on + * pseries platforms. + */ +#ifdef CONFIG_PPC_PSERIES +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct h_cpu_char_result c; + unsigned long rc; + + if (!machine_is(pseries)) + return -ENOTTY; + + rc = plpar_get_cpu_characteristics(&c); + if (rc == H_SUCCESS) { + cp->character = c.character; + cp->behaviour = c.behaviour; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | + KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + } + return 0; +} +#else +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + return -ENOTTY; +} +#endif + +static inline bool have_fw_feat(struct device_node *fw_features, + const char *state, const char *name) +{ + struct device_node *np; + bool r = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + r = of_property_read_bool(np, state); + of_node_put(np); + } + return r; +} + +static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct device_node *np, *fw_features; + int r; + + memset(cp, 0, sizeof(*cp)); + r = pseries_get_cpu_char(cp); + if (r != -ENOTTY) + return r; + + np = of_find_node_by_name(NULL, "ibm,opal"); + if (np) { + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + if (!fw_features) + return 0; + if (have_fw_feat(fw_features, "enabled", + "inst-spec-barrier-ori31,31,0")) + cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31; + if (have_fw_feat(fw_features, "enabled", + "fw-bcctrl-serialized")) + cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-ori30,30,0")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-trig2")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2; + if (have_fw_feat(fw_features, "enabled", + "fw-l1d-thread-split")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV; + if (have_fw_feat(fw_features, "enabled", + "fw-count-cache-disabled")) + cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + + if (have_fw_feat(fw_features, "enabled", + "speculation-policy-favor-security")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY; + if (!have_fw_feat(fw_features, "disabled", + "needs-l1d-flush-msr-pr-0-to-1")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR; + if (!have_fw_feat(fw_features, "disabled", + "needs-spec-barrier-for-bound-checks")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + + of_node_put(fw_features); + } + + return 0; +} +#endif + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1861,6 +1984,14 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_GET_CPU_CHAR: { + struct kvm_ppc_cpu_char cpuchar; + + r = kvmppc_get_cpu_char(&cpuchar); + if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar))) + r = -EFAULT; + break; + } default: { struct kvm *kvm = filp->private_data; r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 496e59a2738b..7a99b98cf88e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 +#define KVM_CAP_PPC_GET_CPU_CHAR 151 #ifdef KVM_CAP_IRQ_ROUTING @@ -1261,6 +1262,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) /* Available with KVM_CAP_PPC_RADIX_MMU */ #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) +/* Available with KVM_CAP_PPC_GET_CPU_CHAR */ +#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3-71-gd317 From 35b3fde6203b932b2b1a5b53b3d8808abc9c4f60 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 17 Jan 2018 14:44:34 +0100 Subject: KVM: s390: wire up bpb feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new firmware interfaces for branch prediction behaviour changes are transparently available for the guest. Nevertheless, there is new state attached that should be migrated and properly resetted. Provide a mechanism for handling reset, migration and VSIE. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck [Changed capability number to 152. - Radim] Signed-off-by: Radim Krčmář --- arch/s390/include/asm/kvm_host.h | 3 ++- arch/s390/include/uapi/asm/kvm.h | 5 ++++- arch/s390/kvm/kvm-s390.c | 12 ++++++++++++ arch/s390/kvm/vsie.c | 10 ++++++++++ include/uapi/linux/kvm.h | 1 + 5 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index e14f381757f6..c1b0a9ac1dc8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -207,7 +207,8 @@ struct kvm_s390_sie_block { __u16 ipa; /* 0x0056 */ __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ - __u8 reserved60; /* 0x0060 */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ #define ECB_GS 0x40 #define ECB_TE 0x10 #define ECB_SRSI 0x04 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 38535a57fef8..4cdaa55fabfe 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -224,6 +224,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) +#define KVM_SYNC_BPBC (1UL << 10) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -247,7 +248,9 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding1[52]; /* riccb needs to be 64byte aligned */ + __u8 bpbc : 1; /* bp mode */ + __u8 reserved2 : 7; + __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ union { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2c93cbbcd15e..2598cf243b86 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -421,6 +421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_GS: r = test_facility(133); break; + case KVM_CAP_S390_BPB: + r = test_facility(82); + break; default: r = 0; } @@ -2198,6 +2201,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 82)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; if (test_kvm_facility(vcpu->kvm, 133)) vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With @@ -2339,6 +2344,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) @@ -3298,6 +3304,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; vcpu->arch.gs_enabled = 1; } + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && + test_kvm_facility(vcpu->kvm, 82)) { + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; + vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; + } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); /* save host (userspace) fprs/vrs */ @@ -3344,6 +3355,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->s.regs.pft = vcpu->arch.pfault_token; kvm_run->s.regs.pfs = vcpu->arch.pfault_select; kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; + kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); /* Save guest register state */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 5d6ae0326d9e..751348348477 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -223,6 +223,12 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) memcpy(scb_o->gcr, scb_s->gcr, 128); scb_o->pp = scb_s->pp; + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) { + scb_o->fpf &= ~FPF_BPBC; + scb_o->fpf |= scb_s->fpf & FPF_BPBC; + } + /* interrupt intercept */ switch (scb_s->icptcode) { case ICPT_PROGI: @@ -265,6 +271,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ecb3 = 0; scb_s->ecd = 0; scb_s->fac = 0; + scb_s->fpf = 0; rc = prepare_cpuflags(vcpu, vsie_page); if (rc) @@ -324,6 +331,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix_unmapped(vsie_page); scb_s->ecb |= scb_o->ecb & ECB_TE; } + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) + scb_s->fpf |= scb_o->fpf & FPF_BPBC; /* SIMD */ if (test_kvm_facility(vcpu->kvm, 129)) { scb_s->eca |= scb_o->eca & ECA_VX; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7a99b98cf88e..8fb90a0819c3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -933,6 +933,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 #define KVM_CAP_PPC_GET_CPU_CHAR 151 +#define KVM_CAP_S390_BPB 152 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-71-gd317 From ad8bc4d005576e3f380ba2dab24c183519f4e9fa Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 6 Dec 2017 11:40:10 +0800 Subject: btrfs: put btrfs_ioctl_vol_args_v2 related defines together Just a code spatial rearrangement, no functional change. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index ce615b75e855..c8d99b9ca550 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -33,7 +33,12 @@ struct btrfs_ioctl_vol_args { char name[BTRFS_PATH_NAME_MAX + 1]; }; -#define BTRFS_DEVICE_PATH_NAME_MAX 1024 +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 +#define BTRFS_SUBVOL_NAME_MAX 4039 + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) #define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3) @@ -101,11 +106,7 @@ struct btrfs_ioctl_qgroup_limit_args { * - BTRFS_IOC_SUBVOL_GETFLAGS * - BTRFS_IOC_SUBVOL_SETFLAGS */ -#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) -#define BTRFS_SUBVOL_RDONLY (1ULL << 1) -#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) -#define BTRFS_SUBVOL_NAME_MAX 4039 struct btrfs_ioctl_vol_args_v2 { __s64 fd; __u64 transid; -- cgit v1.2.3-71-gd317 From e2731e55884f2138a252b0a3d7b24d57e49c3c59 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 9 Jan 2018 09:05:41 +0800 Subject: btrfs: define SUPER_FLAG_METADUMP_V2 btrfs-progs uses super flag bit BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34). So just define that in kernel so that we know its been used. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 ++- include/uapi/linux/btrfs_tree.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 73ab44159d82..1916016e9fbb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -61,7 +61,8 @@ BTRFS_HEADER_FLAG_RELOC |\ BTRFS_SUPER_FLAG_ERROR |\ BTRFS_SUPER_FLAG_SEEDING |\ - BTRFS_SUPER_FLAG_METADUMP) + BTRFS_SUPER_FLAG_METADUMP |\ + BTRFS_SUPER_FLAG_METADUMP_V2) static const struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 6d6e5da51527..38ab0e06259a 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -456,6 +456,7 @@ struct btrfs_free_space_header { #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) +#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) /* -- cgit v1.2.3-71-gd317 From 98820a7e244b17b8a4d9e9d1ff9d3b4e5bfca58b Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 9 Jan 2018 09:05:42 +0800 Subject: btrfs: add support for SUPER_FLAG_CHANGING_FSID The UUID change by btrfstune sets SUPER_FLAG_CHANGING_FSID and resets it only when changing fsid is complete. Its not a good idea to mount the device anything in between, reading metadata blocks would fail with UUID mismatch. This patch doesn't add SUPER_FLAG_CHANGING_FSID into BTRFS_SUPER_FLAG_SUPP list, so mount will fail (along with the fix in the next patch) when SUPER_FLAG_CHANGING_FSID is set. Signed-off-by: Anand Jain Reviewed-by: Qu Wenruo Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 38ab0e06259a..aff1356c2bb8 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -457,6 +457,7 @@ struct btrfs_free_space_header { #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) #define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) +#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) /* -- cgit v1.2.3-71-gd317