From 2bbbd96357ce76cc45ec722c00f654aa7b189112 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Mon, 28 Aug 2017 17:25:16 +0200 Subject: bus: mbus: fix window size calculation for 4GB windows At least the Armada XP SoC supports 4GB on a single DRAM window. Because the size register values contain the actual size - 1, the MSB is set in that case. For example, the SDRAM window's control register's value is 0xffffffe1 for 4GB (bits 31 to 24 contain the size). The MBUS driver reads back each window's size from registers and calculates the actual size as (control_reg | ~DDR_SIZE_MASK) + 1, which overflows for 32 bit values, resulting in other miscalculations further on (a bad RAM window for the CESA crypto engine calculated by mvebu_mbus_setup_cpu_target_nooverlap() in my case). This patch changes the type in 'struct mbus_dram_window' from u32 to u64, which allows us to keep using the same register calculation code in most MBUS-using drivers (which calculate ->size - 1 again). Fixes: fddddb52a6c4 ("bus: introduce an Marvell EBU MBus driver") CC: stable@vger.kernel.org Signed-off-by: Jan Luebbe Signed-off-by: Gregory CLEMENT --- include/linux/mbus.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mbus.h b/include/linux/mbus.h index 0d3f14fd2621..4773145246ed 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -31,8 +31,8 @@ struct mbus_dram_target_info struct mbus_dram_window { u8 cs_index; u8 mbus_attr; - u32 base; - u32 size; + u64 base; + u64 size; } cs[4]; }; -- cgit v1.2.3-71-gd317 From 30ae9610d275f8f03f5bf7612ce71d8af6fc400b Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 9 Oct 2017 11:46:55 -0500 Subject: irqchip/gic-v3-its: Add missing changes to support 52bit physical address The current ITS driver works fine as long as normal memory and GICR regions are located within the lower 48bit (>=0 && <2^48) physical address space. Some of the registers GICR_PEND/PROP, GICR_VPEND/VPROP and GITS_CBASER are handled properly but not all when configuring the hardware with 52bit physical address. This patch does the following changes to support 52bit PA. -Handle 52bit PA in GITS_BASERn. -Fix ITT_addr width to 52bits, bits[51:8]. -Fix RDbase width to 52bits, bits[51:16]. -Fix VPT_addr width to 52bits, bits[51:16]. Definition of the GITS_BASERn register when ITS PageSize is 64KB: -Bits[47:16] of the register provide bits[47:16] of the table PA. -Bits[15:12] of the register provide bits[51:48] of the table PA. -Bits[15:00] of the base physical address are 0. Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 26 +++++++++++++++++++++----- include/linux/irqchip/arm-gic-v3.h | 2 ++ 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 991cf33750c6..e88395605e32 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -312,7 +312,7 @@ static void its_encode_size(struct its_cmd_block *cmd, u8 size) static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) { - its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 50, 8); + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); } static void its_encode_valid(struct its_cmd_block *cmd, int valid) @@ -322,7 +322,7 @@ static void its_encode_valid(struct its_cmd_block *cmd, int valid) static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) { - its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 50, 16); + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); } static void its_encode_collection(struct its_cmd_block *cmd, u16 col) @@ -362,7 +362,7 @@ static void its_encode_its_list(struct its_cmd_block *cmd, u16 its_list) static void its_encode_vpt_addr(struct its_cmd_block *cmd, u64 vpt_pa) { - its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 50, 16); + its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 51, 16); } static void its_encode_vpt_size(struct its_cmd_block *cmd, u8 vpt_size) @@ -1482,9 +1482,9 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, u64 val = its_read_baser(its, baser); u64 esz = GITS_BASER_ENTRY_SIZE(val); u64 type = GITS_BASER_TYPE(val); + u64 baser_phys, tmp; u32 alloc_pages; void *base; - u64 tmp; retry_alloc_baser: alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz); @@ -1500,8 +1500,24 @@ retry_alloc_baser: if (!base) return -ENOMEM; + baser_phys = virt_to_phys(base); + + /* Check if the physical address of the memory is above 48bits */ + if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && (baser_phys >> 48)) { + + /* 52bit PA is supported only when PageSize=64K */ + if (psz != SZ_64K) { + pr_err("ITS: no 52bit PA support when psz=%d\n", psz); + free_pages((unsigned long)base, order); + return -ENXIO; + } + + /* Convert 52bit PA to 48bit field */ + baser_phys = GITS_BASER_PHYS_52_to_48(baser_phys); + } + retry_baser: - val = (virt_to_phys(base) | + val = (baser_phys | (type << GITS_BASER_TYPE_SHIFT) | ((esz - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | ((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT) | diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1ea576c8126f..14b74f22d43c 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -372,6 +372,8 @@ #define GITS_BASER_ENTRY_SIZE_SHIFT (48) #define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) -- cgit v1.2.3-71-gd317 From 20608924cc2e6bdeaf6f58ccbe9ddfe12dbfa082 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 4 Oct 2017 14:26:26 +0200 Subject: genirq: generic chip: Add irq_gc_mask_disable_and_ack_set() The irq_gc_mask_disable_reg_and_ack() function name implies that it provides the combined functions of irq_gc_mask_disable_reg() and irq_gc_ack(). However, the implementation does not actually do that since it writes the mask instead of the disable register. It also does not maintain the mask cache which makes it inappropriate to use with other masking functions. In addition, commit 659fb32d1b67 ("genirq: replace irq_gc_ack() with {set,clr}_bit variants (fwd)") effectively renamed irq_gc_ack() to irq_gc_ack_set_bit() so this function probably should have also been renamed at that time. The generic chip code currently provides three functions for use with the irq_mask member of the irq_chip structure and two functions for use with the irq_ack member of the irq_chip structure. These functions could be combined into six functions for use with the irq_mask_ack member of the irq_chip structure. However, since only one of the combinations is currently used, only the function irq_gc_mask_disable_and_ack_set() is added by this commit. The '_reg' and '_bit' portions of the base function name were left out of the new combined function name in an attempt to keep the function name length manageable with the 80 character source code line length while still allowing the distinct aspects of each combination to be captured by the name. If other combinations are desired in the future please add them to the irq generic chip library at that time. Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 + kernel/irq/generic-chip.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index d4728bf6a537..494d328f7051 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1010,6 +1010,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d); void irq_gc_ack_set_bit(struct irq_data *d); void irq_gc_ack_clr_bit(struct irq_data *d); void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); +void irq_gc_mask_disable_and_ack_set(struct irq_data *d); void irq_gc_eoi(struct irq_data *d); int irq_gc_set_wake(struct irq_data *d, unsigned int on); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 5270a54b9fa4..ec5fe9a0cb05 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -150,6 +150,31 @@ void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) irq_gc_unlock(gc); } +/** + * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt + * @d: irq_data + * + * This generic implementation of the irq_mask_ack method is for chips + * with separate enable/disable registers instead of a single mask + * register and where a pending interrupt is acknowledged by setting a + * bit. + * + * Note: This is the only permutation currently used. Similar generic + * functions should be added here if other permutations are required. + */ +void irq_gc_mask_disable_and_ack_set(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.disable); + *ct->mask_cache &= ~mask; + irq_reg_writel(gc, mask, ct->regs.ack); + irq_gc_unlock(gc); +} + /** * irq_gc_eoi - EOI interrupt * @d: irq_data -- cgit v1.2.3-71-gd317 From 0d08af35f16a0cc418ad2afde3bc5f70ace82705 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 4 Oct 2017 14:28:17 +0200 Subject: genirq: generic chip: remove irq_gc_mask_disable_reg_and_ack() Any usage of the irq_gc_mask_disable_reg_and_ack() function has been replaced with the desired functionality. The incorrect and ambiguously named function is removed here to prevent accidental misuse. Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 - kernel/irq/generic-chip.c | 16 ---------------- 2 files changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 494d328f7051..5ad10948ea95 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1009,7 +1009,6 @@ void irq_gc_mask_clr_bit(struct irq_data *d); void irq_gc_unmask_enable_reg(struct irq_data *d); void irq_gc_ack_set_bit(struct irq_data *d); void irq_gc_ack_clr_bit(struct irq_data *d); -void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); void irq_gc_mask_disable_and_ack_set(struct irq_data *d); void irq_gc_eoi(struct irq_data *d); int irq_gc_set_wake(struct irq_data *d, unsigned int on); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index ec5fe9a0cb05..c26c5bb6b491 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -134,22 +134,6 @@ void irq_gc_ack_clr_bit(struct irq_data *d) irq_gc_unlock(gc); } -/** - * irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt - * @d: irq_data - */ -void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - struct irq_chip_type *ct = irq_data_get_chip_type(d); - u32 mask = d->mask; - - irq_gc_lock(gc); - irq_reg_writel(gc, mask, ct->regs.mask); - irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); -} - /** * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt * @d: irq_data -- cgit v1.2.3-71-gd317 From 0ad646c81b2182f7fa67ec0c8c825e0ee165696d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Oct 2017 11:58:53 -0700 Subject: tun: call dev_get_valid_name() before register_netdevice() register_netdevice() could fail early when we have an invalid dev name, in which case ->ndo_uninit() is not called. For tun device, this is a problem because a timer etc. are already initialized and it expects ->ndo_uninit() to clean them up. We could move these initializations into a ->ndo_init() so that register_netdevice() knows better, however this is still complicated due to the logic in tun_detach(). Therefore, I choose to just call dev_get_valid_name() before register_netdevice(), which is quicker and much easier to audit. And for this specific case, it is already enough. Fixes: 96442e42429e ("tuntap: choose the txq based on rxq") Reported-by: Dmitry Alexeev Cc: Jason Wang Cc: "Michael S. Tsirkin" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 +++ include/linux/netdevice.h | 3 +++ net/core/dev.c | 6 +++--- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 5ce580f413b9..e21bf90b819f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2027,6 +2027,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!dev) return -ENOMEM; + err = dev_get_valid_name(net, dev, name); + if (err) + goto err_free_dev; dev_net_set(dev, net); dev->rtnl_link_ops = &tun_link_ops; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f535779d9dc1..2eaac7d75af4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3694,6 +3694,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); +int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name); + #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) diff --git a/net/core/dev.c b/net/core/dev.c index 588b473194a8..11596a302a26 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1147,9 +1147,8 @@ static int dev_alloc_name_ns(struct net *net, return ret; } -static int dev_get_valid_name(struct net *net, - struct net_device *dev, - const char *name) +int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name) { BUG_ON(!net); @@ -1165,6 +1164,7 @@ static int dev_get_valid_name(struct net *net, return 0; } +EXPORT_SYMBOL(dev_get_valid_name); /** * dev_change_name - change name of a device -- cgit v1.2.3-71-gd317 From 363b02dab09b3226f3bd1420dad9c72b79a42a76 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Oct 2017 16:43:25 +0100 Subject: KEYS: Fix race between updating and finding a negative key Consolidate KEY_FLAG_INSTANTIATED, KEY_FLAG_NEGATIVE and the rejection error into one field such that: (1) The instantiation state can be modified/read atomically. (2) The error can be accessed atomically with the state. (3) The error isn't stored unioned with the payload pointers. This deals with the problem that the state is spread over three different objects (two bits and a separate variable) and reading or updating them atomically isn't practical, given that not only can uninstantiated keys change into instantiated or rejected keys, but rejected keys can also turn into instantiated keys - and someone accessing the key might not be using any locking. The main side effect of this problem is that what was held in the payload may change, depending on the state. For instance, you might observe the key to be in the rejected state. You then read the cached error, but if the key semaphore wasn't locked, the key might've become instantiated between the two reads - and you might now have something in hand that isn't actually an error code. The state is now KEY_IS_UNINSTANTIATED, KEY_IS_POSITIVE or a negative error code if the key is negatively instantiated. The key_is_instantiated() function is replaced with key_is_positive() to avoid confusion as negative keys are also 'instantiated'. Additionally, barriering is included: (1) Order payload-set before state-set during instantiation. (2) Order state-read before payload-read when using the key. Further separate barriering is necessary if RCU is being used to access the payload content after reading the payload pointers. Fixes: 146aa8b1453b ("KEYS: Merge the type-specific data with the payload data") Cc: stable@vger.kernel.org # v4.4+ Reported-by: Eric Biggers Signed-off-by: David Howells Reviewed-by: Eric Biggers --- include/linux/key.h | 47 ++++++++++++++++++++------------ net/dns_resolver/dns_key.c | 2 +- security/keys/big_key.c | 4 +-- security/keys/encrypted-keys/encrypted.c | 2 +- security/keys/gc.c | 8 +++--- security/keys/key.c | 31 +++++++++++++-------- security/keys/keyctl.c | 9 +++--- security/keys/keyring.c | 10 +++---- security/keys/proc.c | 7 +++-- security/keys/process_keys.c | 2 +- security/keys/request_key.c | 7 ++--- security/keys/request_key_auth.c | 2 +- security/keys/trusted.c | 2 +- security/keys/user_defined.c | 4 +-- 14 files changed, 80 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index e315e16b6ff8..8a15cabe928d 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -138,6 +138,11 @@ struct key_restriction { struct key_type *keytype; }; +enum key_state { + KEY_IS_UNINSTANTIATED, + KEY_IS_POSITIVE, /* Positively instantiated */ +}; + /*****************************************************************************/ /* * authentication token / access credential / keyring @@ -169,6 +174,7 @@ struct key { * - may not match RCU dereferenced payload * - payload should contain own length */ + short state; /* Key state (+) or rejection error (-) */ #ifdef KEY_DEBUGGING unsigned magic; @@ -176,18 +182,16 @@ struct key { #endif unsigned long flags; /* status flags (change with bitops) */ -#define KEY_FLAG_INSTANTIATED 0 /* set if key has been instantiated */ -#define KEY_FLAG_DEAD 1 /* set if key type has been deleted */ -#define KEY_FLAG_REVOKED 2 /* set if key had been revoked */ -#define KEY_FLAG_IN_QUOTA 3 /* set if key consumes quota */ -#define KEY_FLAG_USER_CONSTRUCT 4 /* set if key is being constructed in userspace */ -#define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ -#define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ -#define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ -#define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ -#define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ -#define KEY_FLAG_KEEP 10 /* set if key should not be removed */ -#define KEY_FLAG_UID_KEYRING 11 /* set if key is a user or user session keyring */ +#define KEY_FLAG_DEAD 0 /* set if key type has been deleted */ +#define KEY_FLAG_REVOKED 1 /* set if key had been revoked */ +#define KEY_FLAG_IN_QUOTA 2 /* set if key consumes quota */ +#define KEY_FLAG_USER_CONSTRUCT 3 /* set if key is being constructed in userspace */ +#define KEY_FLAG_ROOT_CAN_CLEAR 4 /* set if key can be cleared by root without permission */ +#define KEY_FLAG_INVALIDATED 5 /* set if key has been invalidated */ +#define KEY_FLAG_BUILTIN 6 /* set if key is built in to the kernel */ +#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ +#define KEY_FLAG_KEEP 8 /* set if key should not be removed */ +#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -213,7 +217,6 @@ struct key { struct list_head name_link; struct assoc_array keys; }; - int reject_error; }; /* This is set on a keyring to restrict the addition of a link to a key @@ -353,17 +356,27 @@ extern void key_set_timeout(struct key *, unsigned); #define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */ #define KEY_NEED_ALL 0x3f /* All the above permissions */ +static inline short key_read_state(const struct key *key) +{ + /* Barrier versus mark_key_instantiated(). */ + return smp_load_acquire(&key->state); +} + /** - * key_is_instantiated - Determine if a key has been positively instantiated + * key_is_positive - Determine if a key has been positively instantiated * @key: The key to check. * * Return true if the specified key has been positively instantiated, false * otherwise. */ -static inline bool key_is_instantiated(const struct key *key) +static inline bool key_is_positive(const struct key *key) +{ + return key_read_state(key) == KEY_IS_POSITIVE; +} + +static inline bool key_is_negative(const struct key *key) { - return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && - !test_bit(KEY_FLAG_NEGATIVE, &key->flags); + return key_read_state(key) < 0; } #define dereference_key_rcu(KEY) \ diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 8737412c7b27..e1d4d898a007 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data) static void dns_resolver_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - if (key_is_instantiated(key)) { + if (key_is_positive(key)) { int err = PTR_ERR(key->payload.data[dns_key_error]); if (err) diff --git a/security/keys/big_key.c b/security/keys/big_key.c index e607830b6154..929e14978c42 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -247,7 +247,7 @@ void big_key_revoke(struct key *key) /* clear the quota */ key_payload_reserve(key, 0); - if (key_is_instantiated(key) && + if (key_is_positive(key) && (size_t)key->payload.data[big_key_len] > BIG_KEY_FILE_THRESHOLD) vfs_truncate(path, 0); } @@ -279,7 +279,7 @@ void big_key_describe(const struct key *key, struct seq_file *m) seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, ": %zu [%s]", datalen, datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 535db141f4da..d92cbf9687c3 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -854,7 +854,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) size_t datalen = prep->datalen; int ret = 0; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (key_is_negative(key)) return -ENOKEY; if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; diff --git a/security/keys/gc.c b/security/keys/gc.c index 87cb260e4890..f01d48cb3de1 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -129,15 +129,15 @@ static noinline void key_gc_unused_keys(struct list_head *keys) while (!list_empty(keys)) { struct key *key = list_entry(keys->next, struct key, graveyard_link); + short state = key->state; + list_del(&key->graveyard_link); kdebug("- %u", key->serial); key_check(key); /* Throw away the key data if the key is instantiated */ - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && - !test_bit(KEY_FLAG_NEGATIVE, &key->flags) && - key->type->destroy) + if (state == KEY_IS_POSITIVE && key->type->destroy) key->type->destroy(key); security_key_free(key); @@ -151,7 +151,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys) } atomic_dec(&key->user->nkeys); - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) + if (state != KEY_IS_UNINSTANTIATED) atomic_dec(&key->user->nikeys); key_user_put(key->user); diff --git a/security/keys/key.c b/security/keys/key.c index eb914a838840..9385e7cc710f 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -401,6 +401,18 @@ int key_payload_reserve(struct key *key, size_t datalen) } EXPORT_SYMBOL(key_payload_reserve); +/* + * Change the key state to being instantiated. + */ +static void mark_key_instantiated(struct key *key, int reject_error) +{ + /* Commit the payload before setting the state; barrier versus + * key_read_state(). + */ + smp_store_release(&key->state, + (reject_error < 0) ? reject_error : KEY_IS_POSITIVE); +} + /* * Instantiate a key and link it into the target keyring atomically. Must be * called with the target keyring's semaphore writelocked. The target key's @@ -424,14 +436,14 @@ static int __key_instantiate_and_link(struct key *key, mutex_lock(&key_construction_mutex); /* can't instantiate twice */ - if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state == KEY_IS_UNINSTANTIATED) { /* instantiate the key */ ret = key->type->instantiate(key, prep); if (ret == 0) { /* mark the key as being instantiated */ atomic_inc(&key->user->nikeys); - set_bit(KEY_FLAG_INSTANTIATED, &key->flags); + mark_key_instantiated(key, 0); if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) awaken = 1; @@ -577,13 +589,10 @@ int key_reject_and_link(struct key *key, mutex_lock(&key_construction_mutex); /* can't instantiate twice */ - if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state == KEY_IS_UNINSTANTIATED) { /* mark the key as being negatively instantiated */ atomic_inc(&key->user->nikeys); - key->reject_error = -error; - smp_wmb(); - set_bit(KEY_FLAG_NEGATIVE, &key->flags); - set_bit(KEY_FLAG_INSTANTIATED, &key->flags); + mark_key_instantiated(key, -error); now = current_kernel_time(); key->expiry = now.tv_sec + timeout; key_schedule_gc(key->expiry + key_gc_delay); @@ -752,8 +761,8 @@ static inline key_ref_t __key_update(key_ref_t key_ref, ret = key->type->update(key, prep); if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + /* Updating a negative key positively instantiates it */ + mark_key_instantiated(key, 0); up_write(&key->sem); @@ -986,8 +995,8 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) ret = key->type->update(key, &prep); if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + /* Updating a negative key positively instantiates it */ + mark_key_instantiated(key, 0); up_write(&key->sem); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 365ff85d7e27..76d22f726ae4 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -766,10 +766,9 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) key = key_ref_to_ptr(key_ref); - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { - ret = -ENOKEY; - goto error2; - } + ret = key_read_state(key); + if (ret < 0) + goto error2; /* Negatively instantiated */ /* see if we can read it directly */ ret = key_permission(key_ref, KEY_NEED_READ); @@ -901,7 +900,7 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) atomic_dec(&key->user->nkeys); atomic_inc(&newowner->nkeys); - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state != KEY_IS_UNINSTANTIATED) { atomic_dec(&key->user->nikeys); atomic_inc(&newowner->nikeys); } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 4fa82a8a9c0e..06173b091a74 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -414,7 +414,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m) else seq_puts(m, "[anon]"); - if (key_is_instantiated(keyring)) { + if (key_is_positive(keyring)) { if (keyring->keys.nr_leaves_on_tree != 0) seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); else @@ -553,7 +553,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data) { struct keyring_search_context *ctx = iterator_data; const struct key *key = keyring_ptr_to_key(object); - unsigned long kflags = key->flags; + unsigned long kflags = READ_ONCE(key->flags); + short state = READ_ONCE(key->state); kenter("{%d}", key->serial); @@ -597,9 +598,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data) if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { /* we set a different error code if we pass a negative key */ - if (kflags & (1 << KEY_FLAG_NEGATIVE)) { - smp_rmb(); - ctx->result = ERR_PTR(key->reject_error); + if (state < 0) { + ctx->result = ERR_PTR(state); kleave(" = %d [neg]", ctx->skipped_ret); goto skipped; } diff --git a/security/keys/proc.c b/security/keys/proc.c index de834309d100..4089ce1f7757 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -182,6 +182,7 @@ static int proc_keys_show(struct seq_file *m, void *v) unsigned long timo; key_ref_t key_ref, skey_ref; char xbuf[16]; + short state; int rc; struct keyring_search_context ctx = { @@ -236,17 +237,19 @@ static int proc_keys_show(struct seq_file *m, void *v) sprintf(xbuf, "%luw", timo / (60*60*24*7)); } + state = key_read_state(key); + #define showflag(KEY, LETTER, FLAG) \ (test_bit(FLAG, &(KEY)->flags) ? LETTER : '-') seq_printf(m, "%08x %c%c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ", key->serial, - showflag(key, 'I', KEY_FLAG_INSTANTIATED), + state != KEY_IS_UNINSTANTIATED ? 'I' : '-', showflag(key, 'R', KEY_FLAG_REVOKED), showflag(key, 'D', KEY_FLAG_DEAD), showflag(key, 'Q', KEY_FLAG_IN_QUOTA), showflag(key, 'U', KEY_FLAG_USER_CONSTRUCT), - showflag(key, 'N', KEY_FLAG_NEGATIVE), + state < 0 ? 'N' : '-', showflag(key, 'i', KEY_FLAG_INVALIDATED), refcount_read(&key->usage), xbuf, diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 293d3598153b..740affd65ee9 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -730,7 +730,7 @@ try_again: ret = -EIO; if (!(lflags & KEY_LOOKUP_PARTIAL) && - !test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) + key_read_state(key) == KEY_IS_UNINSTANTIATED) goto invalid_key; /* check the permissions */ diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 63e63a42db3c..e8036cd0ad54 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -595,10 +595,9 @@ int wait_for_key_construction(struct key *key, bool intr) intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (ret) return -ERESTARTSYS; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { - smp_rmb(); - return key->reject_error; - } + ret = key_read_state(key); + if (ret < 0) + return ret; return key_validate(key); } EXPORT_SYMBOL(wait_for_key_construction); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 6ebf1af8fce9..424e1d90412e 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -73,7 +73,7 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } diff --git a/security/keys/trusted.c b/security/keys/trusted.c index ddfaebf60fc8..bd85315cbfeb 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -1066,7 +1066,7 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep) char *datablob; int ret = 0; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (key_is_negative(key)) return -ENOKEY; p = key->payload.data[0]; if (!p->migratable) diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 3d8c68eba516..9f558bedba23 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -114,7 +114,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep) /* attach the new data, displacing the old */ key->expiry = prep->expiry; - if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (key_is_positive(key)) zap = dereference_key_locked(key); rcu_assign_keypointer(key, prep->payload.data[0]); prep->payload.data[0] = NULL; @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(user_destroy); void user_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, ": %u", key->datalen); } -- cgit v1.2.3-71-gd317 From 55dfce873dca46df00304c44a568d7933bffff89 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 9 Oct 2017 11:09:33 -0700 Subject: Input: factor out and export input_device_id matching code Factor out and export input_match_device_id() so that modules may use it. It will be needed by joydev to blacklist accelerometers in composite devices. Tested-by: Roderick Colenbrander Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 83 +++++++++++++++++++++++---------------------------- include/linux/input.h | 3 ++ 2 files changed, 41 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index d268fdc23c64..02e6ea7955fe 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -933,58 +933,51 @@ int input_set_keycode(struct input_dev *dev, } EXPORT_SYMBOL(input_set_keycode); +bool input_match_device_id(const struct input_dev *dev, + const struct input_device_id *id) +{ + if (id->flags & INPUT_DEVICE_ID_MATCH_BUS) + if (id->bustype != dev->id.bustype) + return false; + + if (id->flags & INPUT_DEVICE_ID_MATCH_VENDOR) + if (id->vendor != dev->id.vendor) + return false; + + if (id->flags & INPUT_DEVICE_ID_MATCH_PRODUCT) + if (id->product != dev->id.product) + return false; + + if (id->flags & INPUT_DEVICE_ID_MATCH_VERSION) + if (id->version != dev->id.version) + return false; + + if (!bitmap_subset(id->evbit, dev->evbit, EV_MAX) || + !bitmap_subset(id->keybit, dev->keybit, KEY_MAX) || + !bitmap_subset(id->relbit, dev->relbit, REL_MAX) || + !bitmap_subset(id->absbit, dev->absbit, ABS_MAX) || + !bitmap_subset(id->mscbit, dev->mscbit, MSC_MAX) || + !bitmap_subset(id->ledbit, dev->ledbit, LED_MAX) || + !bitmap_subset(id->sndbit, dev->sndbit, SND_MAX) || + !bitmap_subset(id->ffbit, dev->ffbit, FF_MAX) || + !bitmap_subset(id->swbit, dev->swbit, SW_MAX)) { + return false; + } + + return true; +} +EXPORT_SYMBOL(input_match_device_id); + static const struct input_device_id *input_match_device(struct input_handler *handler, struct input_dev *dev) { const struct input_device_id *id; for (id = handler->id_table; id->flags || id->driver_info; id++) { - - if (id->flags & INPUT_DEVICE_ID_MATCH_BUS) - if (id->bustype != dev->id.bustype) - continue; - - if (id->flags & INPUT_DEVICE_ID_MATCH_VENDOR) - if (id->vendor != dev->id.vendor) - continue; - - if (id->flags & INPUT_DEVICE_ID_MATCH_PRODUCT) - if (id->product != dev->id.product) - continue; - - if (id->flags & INPUT_DEVICE_ID_MATCH_VERSION) - if (id->version != dev->id.version) - continue; - - if (!bitmap_subset(id->evbit, dev->evbit, EV_MAX)) - continue; - - if (!bitmap_subset(id->keybit, dev->keybit, KEY_MAX)) - continue; - - if (!bitmap_subset(id->relbit, dev->relbit, REL_MAX)) - continue; - - if (!bitmap_subset(id->absbit, dev->absbit, ABS_MAX)) - continue; - - if (!bitmap_subset(id->mscbit, dev->mscbit, MSC_MAX)) - continue; - - if (!bitmap_subset(id->ledbit, dev->ledbit, LED_MAX)) - continue; - - if (!bitmap_subset(id->sndbit, dev->sndbit, SND_MAX)) - continue; - - if (!bitmap_subset(id->ffbit, dev->ffbit, FF_MAX)) - continue; - - if (!bitmap_subset(id->swbit, dev->swbit, SW_MAX)) - continue; - - if (!handler->match || handler->match(handler, dev)) + if (input_match_device_id(dev, id) && + (!handler->match || handler->match(handler, dev))) { return id; + } } return NULL; diff --git a/include/linux/input.h b/include/linux/input.h index fb5e23c7ed98..2a44650e449d 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -469,6 +469,9 @@ int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke); int input_set_keycode(struct input_dev *dev, const struct input_keymap_entry *ke); +bool input_match_device_id(const struct input_dev *dev, + const struct input_device_id *id); + void input_enable_softrepeat(struct input_dev *dev, int delay, int period); extern struct class input_class; -- cgit v1.2.3-71-gd317 From 8724ecb072293f109a6f5dc93be8a98bf61fe14f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 9 Oct 2017 12:01:14 -0700 Subject: Input: allow matching device IDs on property bits Let's allow matching input devices on their property bits, both in-kernel and when generating module aliases. Tested-by: Roderick Colenbrander Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 3 ++- include/linux/input.h | 4 ++++ include/linux/mod_devicetable.h | 3 +++ scripts/mod/devicetable-offsets.c | 1 + scripts/mod/file2alias.c | 6 +++++- 5 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index 02e6ea7955fe..762bfb9487dc 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -960,7 +960,8 @@ bool input_match_device_id(const struct input_dev *dev, !bitmap_subset(id->ledbit, dev->ledbit, LED_MAX) || !bitmap_subset(id->sndbit, dev->sndbit, SND_MAX) || !bitmap_subset(id->ffbit, dev->ffbit, FF_MAX) || - !bitmap_subset(id->swbit, dev->swbit, SW_MAX)) { + !bitmap_subset(id->swbit, dev->swbit, SW_MAX) || + !bitmap_subset(id->propbit, dev->propbit, INPUT_PROP_MAX)) { return false; } diff --git a/include/linux/input.h b/include/linux/input.h index 2a44650e449d..7c7516eb7d76 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -234,6 +234,10 @@ struct input_dev { #error "SW_MAX and INPUT_DEVICE_ID_SW_MAX do not match" #endif +#if INPUT_PROP_MAX != INPUT_DEVICE_ID_PROP_MAX +#error "INPUT_PROP_MAX and INPUT_DEVICE_ID_PROP_MAX do not match" +#endif + #define INPUT_DEVICE_ID_MATCH_DEVICE \ (INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT) #define INPUT_DEVICE_ID_MATCH_DEVICE_AND_VERSION \ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 3f74ef2281e8..72f0b7f19c59 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -293,6 +293,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_SND_MAX 0x07 #define INPUT_DEVICE_ID_FF_MAX 0x7f #define INPUT_DEVICE_ID_SW_MAX 0x0f +#define INPUT_DEVICE_ID_PROP_MAX 0x1f #define INPUT_DEVICE_ID_MATCH_BUS 1 #define INPUT_DEVICE_ID_MATCH_VENDOR 2 @@ -308,6 +309,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_MATCH_SNDBIT 0x0400 #define INPUT_DEVICE_ID_MATCH_FFBIT 0x0800 #define INPUT_DEVICE_ID_MATCH_SWBIT 0x1000 +#define INPUT_DEVICE_ID_MATCH_PROPBIT 0x2000 struct input_device_id { @@ -327,6 +329,7 @@ struct input_device_id { kernel_ulong_t sndbit[INPUT_DEVICE_ID_SND_MAX / BITS_PER_LONG + 1]; kernel_ulong_t ffbit[INPUT_DEVICE_ID_FF_MAX / BITS_PER_LONG + 1]; kernel_ulong_t swbit[INPUT_DEVICE_ID_SW_MAX / BITS_PER_LONG + 1]; + kernel_ulong_t propbit[INPUT_DEVICE_ID_PROP_MAX / BITS_PER_LONG + 1]; kernel_ulong_t driver_info; }; diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index e4d90e50f6fe..812657ab5aa3 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -105,6 +105,7 @@ int main(void) DEVID_FIELD(input_device_id, sndbit); DEVID_FIELD(input_device_id, ffbit); DEVID_FIELD(input_device_id, swbit); + DEVID_FIELD(input_device_id, propbit); DEVID(eisa_device_id); DEVID_FIELD(eisa_device_id, sig); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 29d6699d5a06..bc25898f6df0 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -761,7 +761,7 @@ static void do_input(char *alias, sprintf(alias + strlen(alias), "%X,*", i); } -/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */ +/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwXprX where X is comma-separated %02X. */ static int do_input_entry(const char *filename, void *symval, char *alias) { @@ -779,6 +779,7 @@ static int do_input_entry(const char *filename, void *symval, DEF_FIELD_ADDR(symval, input_device_id, sndbit); DEF_FIELD_ADDR(symval, input_device_id, ffbit); DEF_FIELD_ADDR(symval, input_device_id, swbit); + DEF_FIELD_ADDR(symval, input_device_id, propbit); sprintf(alias, "input:"); @@ -816,6 +817,9 @@ static int do_input_entry(const char *filename, void *symval, sprintf(alias + strlen(alias), "w*"); if (flags & INPUT_DEVICE_ID_MATCH_SWBIT) do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX); + sprintf(alias + strlen(alias), "pr*"); + if (flags & INPUT_DEVICE_ID_MATCH_PROPBIT) + do_input(alias, *propbit, 0, INPUT_DEVICE_ID_PROP_MAX); return 1; } ADD_TO_DEVTABLE("input", input_device_id, do_input_entry); -- cgit v1.2.3-71-gd317 From a961e40917fb14614d368d8bc9782ca4d6a8cd11 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 19 Oct 2017 13:30:15 -0400 Subject: membarrier: Provide register expedited private command This introduces a "register private expedited" membarrier command which allows eventual removal of important memory barrier constraints on the scheduler fast-paths. It changes how the "private expedited" membarrier command (new to 4.14) is used from user-space. This new command allows processes to register their intent to use the private expedited command. This affects how the expedited private command introduced in 4.14-rc is meant to be used, and should be merged before 4.14 final. Processes are now required to register before using MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM. This fixes a problem that arose when designing requested extensions to sys_membarrier() to allow JITs to efficiently flush old code from instruction caches. Several potential algorithms are much less painful if the user register intent to use this functionality early on, for example, before the process spawns the second thread. Registering at this time removes the need to interrupt each and every thread in that process at the first expedited sys_membarrier() system call. Signed-off-by: Mathieu Desnoyers Acked-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Viro Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + include/linux/mm_types.h | 3 +++ include/linux/sched/mm.h | 16 ++++++++++++++++ include/uapi/linux/membarrier.h | 23 ++++++++++++++++------- kernel/sched/membarrier.c | 34 ++++++++++++++++++++++++++++++---- 5 files changed, 66 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 5470d3c1892a..3e14ba25f678 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; + membarrier_execve(current); acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 46f4ecf5479a..1861ea8dba77 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -445,6 +445,9 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ +#ifdef CONFIG_MEMBARRIER + atomic_t membarrier_state; +#endif #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index ae53e413fb13..ab9bf7b73954 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC) | flags; } +#ifdef CONFIG_MEMBARRIER +enum { + MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), + MEMBARRIER_STATE_SWITCH_MM = (1U << 1), +}; + +static inline void membarrier_execve(struct task_struct *t) +{ + atomic_set(&t->mm->membarrier_state, 0); +} +#else +static inline void membarrier_execve(struct task_struct *t) +{ +} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h index 6d47b3249d8a..4e01ad7ffe98 100644 --- a/include/uapi/linux/membarrier.h +++ b/include/uapi/linux/membarrier.h @@ -52,21 +52,30 @@ * (non-running threads are de facto in such a * state). This only covers threads from the * same processes as the caller thread. This - * command returns 0. The "expedited" commands - * complete faster than the non-expedited ones, - * they never block, but have the downside of - * causing extra overhead. + * command returns 0 on success. The + * "expedited" commands complete faster than + * the non-expedited ones, they never block, + * but have the downside of causing extra + * overhead. A process needs to register its + * intent to use the private expedited command + * prior to using it, otherwise this command + * returns -EPERM. + * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + * Register the process intent to use + * MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always + * returns 0. * * Command to be passed to the membarrier system call. The commands need to * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to * the value 0. */ enum membarrier_cmd { - MEMBARRIER_CMD_QUERY = 0, - MEMBARRIER_CMD_SHARED = (1 << 0), + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ - MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4), }; #endif /* _UAPI_LINUX_MEMBARRIER_H */ diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index a92fddc22747..dd7908743dab 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "sched.h" /* for cpu_rq(). */ @@ -26,21 +27,26 @@ * except MEMBARRIER_CMD_QUERY. */ #define MEMBARRIER_CMD_BITMASK \ - (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) + (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ } -static void membarrier_private_expedited(void) +static int membarrier_private_expedited(void) { int cpu; bool fallback = false; cpumask_var_t tmpmask; + if (!(atomic_read(¤t->mm->membarrier_state) + & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) + return -EPERM; + if (num_online_cpus() == 1) - return; + return 0; /* * Matches memory barriers around rq->curr modification in @@ -94,6 +100,24 @@ static void membarrier_private_expedited(void) * rq->curr modification in scheduler. */ smp_mb(); /* exit from system call is not a mb */ + return 0; +} + +static void membarrier_register_private_expedited(void) +{ + struct task_struct *p = current; + struct mm_struct *mm = p->mm; + + /* + * We need to consider threads belonging to different thread + * groups, which use the same mm. (CLONE_VM but not + * CLONE_THREAD). + */ + if (atomic_read(&mm->membarrier_state) + & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) + return; + atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, + &mm->membarrier_state); } /** @@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) synchronize_sched(); return 0; case MEMBARRIER_CMD_PRIVATE_EXPEDITED: - membarrier_private_expedited(); + return membarrier_private_expedited(); + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + membarrier_register_private_expedited(); return 0; default: return -EINVAL; -- cgit v1.2.3-71-gd317 From 27fdb35fe99011d86bcc54f62fe84712c53f4d05 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Oct 2017 14:26:21 -0700 Subject: doc: Fix various RCU docbook comment-header problems Because many of RCU's files have not been included into docbook, a number of errors have accumulated. This commit fixes them. Signed-off-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- include/linux/rculist.h | 2 +- include/linux/rcupdate.h | 22 ++++++++++++++-------- include/linux/srcu.h | 1 + kernel/rcu/srcutree.c | 2 +- kernel/rcu/sync.c | 9 ++++++--- kernel/rcu/tree.c | 18 ++++++++++-------- 6 files changed, 33 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index b1fd8bf85fdc..2bea1d5e9930 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -276,7 +276,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, #define list_entry_rcu(ptr, type, member) \ container_of(lockless_dereference(ptr), type, member) -/** +/* * Where are list_empty_rcu() and list_first_entry_rcu()? * * Implementing those functions following their counterparts list_empty() and diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de50d8a4cf41..1a9f70d44af9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -523,7 +523,7 @@ static inline void rcu_preempt_sleep_check(void) { } * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the READ_ONCE(). This * is useful in cases where update-side locks prevent the value of the - * pointer from changing. Please note that this primitive does -not- + * pointer from changing. Please note that this primitive does *not* * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. @@ -568,7 +568,7 @@ static inline void rcu_preempt_sleep_check(void) { } * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to * kill_dependency(). It could be used as follows: - * + * `` * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); @@ -579,6 +579,7 @@ static inline void rcu_preempt_sleep_check(void) { } * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); + *`` */ #define rcu_pointer_handoff(p) (p) @@ -778,18 +779,21 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_INIT_POINTER() - initialize an RCU protected pointer + * @p: The pointer to be initialized. + * @v: The value to initialized the pointer to. * * Initialize an RCU-protected pointer in special cases where readers * do not need ordering constraints on the CPU or the compiler. These * special cases are: * - * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- + * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer *or* * 2. The caller has taken whatever steps are required to prevent - * RCU readers from concurrently accessing this pointer -or- + * RCU readers from concurrently accessing this pointer *or* * 3. The referenced data structure has already been exposed to - * readers either at compile time or via rcu_assign_pointer() -and- - * a. You have not made -any- reader-visible changes to - * this structure since then -or- + * readers either at compile time or via rcu_assign_pointer() *and* + * + * a. You have not made *any* reader-visible changes to + * this structure since then *or* * b. It is OK for readers accessing this structure from its * new location to see the old state of the structure. (For * example, the changes were to statistical counters or to @@ -805,7 +809,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * by a single external-to-structure RCU-protected pointer, then you may * use RCU_INIT_POINTER() to initialize the internal RCU-protected * pointers, but you must use rcu_assign_pointer() to initialize the - * external-to-structure pointer -after- you have completely initialized + * external-to-structure pointer *after* you have completely initialized * the reader-accessible portions of the linked structure. * * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no @@ -819,6 +823,8 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer + * @p: The pointer to be initialized. + * @v: The value to initialized the pointer to. * * GCC-style initialization for an RCU-protected pointer in a structure field. */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 39af9bc0f653..62be8966e837 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -78,6 +78,7 @@ void synchronize_srcu(struct srcu_struct *sp); /** * srcu_read_lock_held - might we be in SRCU read-side critical section? + * @sp: The srcu_struct structure to check * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 729a8706751d..6d5880089ff6 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -854,7 +854,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, /** * call_srcu() - Queue a callback for invocation after an SRCU grace period * @sp: srcu_struct in queue the callback - * @head: structure to be used for queueing the SRCU callback. + * @rhp: structure to be used for queueing the SRCU callback. * @func: function to be invoked after the SRCU grace period * * The callback function will be invoked some time after a full SRCU diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 50d1861f7759..3f943efcf61c 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -85,6 +85,9 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) } /** + * rcu_sync_enter_start - Force readers onto slow path for multiple updates + * @rsp: Pointer to rcu_sync structure to use for synchronization + * * Must be called after rcu_sync_init() and before first use. * * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}() @@ -142,7 +145,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) /** * rcu_sync_func() - Callback function managing reader access to fastpath - * @rsp: Pointer to rcu_sync structure to use for synchronization + * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization * * This function is passed to one of the call_rcu() functions by * rcu_sync_exit(), so that it is invoked after a grace period following the @@ -158,9 +161,9 @@ void rcu_sync_enter(struct rcu_sync *rsp) * rcu_sync_exit(). Otherwise, set all state back to idle so that readers * can again use their fastpaths. */ -static void rcu_sync_func(struct rcu_head *rcu) +static void rcu_sync_func(struct rcu_head *rhp) { - struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head); + struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); unsigned long flags; BUG_ON(rsp->gp_state != GP_PASSED); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b0ad62b0e7b8..3e3650e94ae6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3097,9 +3097,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, * read-side critical sections have completed. call_rcu_sched() assumes * that the read-side critical sections end on enabling of preemption * or on voluntary preemption. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR - * - anything that disables preemption. + * RCU read-side critical sections are delimited by: + * + * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR + * - anything that disables preemption. * * These may be nested. * @@ -3124,11 +3125,12 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); * handler. This means that read-side critical sections in process * context must not be interrupted by softirqs. This interface is to be * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. - * OR - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. - * These may be nested. + * RCU read-side critical sections are delimited by: + * + * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context, OR + * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. + * + * These may be nested. * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. -- cgit v1.2.3-71-gd317 From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:36 -0700 Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- include/net/tcp.h | 5 ++++ kernel/bpf/sockmap.c | 19 +++++++------- net/core/filter.c | 29 +++++++++++----------- samples/sockmap/sockmap_kern.c | 2 +- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 2 +- tools/testing/selftests/bpf/sockmap_verdict_prog.c | 4 +-- 8 files changed, 36 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..818a0b26249e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -728,7 +728,7 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); -struct sock *do_sk_redirect_map(void); +struct sock *do_sk_redirect_map(struct sk_buff *skb); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/net/tcp.h b/include/net/tcp.h index 89974c5286d8..b1ef98ebce53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,11 @@ struct tcp_skb_cb { struct inet6_skb_parm h6; #endif } header; /* For incoming skbs */ + struct { + __u32 key; + __u32 flags; + struct bpf_map *map; + } bpf; }; }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index c68899d5b246..beaabb21c3a3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -39,6 +39,7 @@ #include #include #include +#include struct bpf_stab { struct bpf_map map; @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) return SK_DROP; skb_orphan(skb); + /* We need to ensure that BPF metadata for maps is also cleared + * when we orphan the skb so that we don't have the possibility + * to reference a stale map. + */ + TCP_SKB_CB(skb)->bpf.map = NULL; skb->sk = psock->sock; bpf_compute_data_end(skb); + preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); + preempt_enable(); skb->sk = NULL; return rc; @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) struct sock *sk; int rc; - /* Because we use per cpu values to feed input from sock redirect - * in BPF program to do_sk_redirect_map() call we need to ensure we - * are not preempted. RCU read lock is not sufficient in this case - * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. - */ - preempt_disable(); rc = smap_verdict_func(psock, skb); switch (rc) { case SK_REDIRECT: - sk = do_sk_redirect_map(); - preempt_enable(); + sk = do_sk_redirect_map(skb); if (likely(sk)) { struct smap_psock *peer = smap_psock_sk(sk); @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) /* Fall through and free skb otherwise */ case SK_DROP: default: - if (rc != SK_REDIRECT) - preempt_enable(); kfree_skb(skb); } } diff --git a/net/core/filter.c b/net/core/filter.c index 74b8c91fb5f4..ca1ba0bbfbc2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags) +BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, + struct bpf_map *, map, u32, key, u64, flags) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); if (unlikely(flags)) return SK_ABORTED; - ri->ifindex = key; - ri->flags = flags; - ri->map = map; + tcb->bpf.key = key; + tcb->bpf.flags = flags; + tcb->bpf.map = map; return SK_REDIRECT; } -struct sock *do_sk_redirect_map(void) +struct sock *do_sk_redirect_map(struct sk_buff *skb) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct sock *sk = NULL; - if (ri->map) { - sk = __sock_map_lookup_elem(ri->map, ri->ifindex); + if (tcb->bpf.map) { + sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key); - ri->ifindex = 0; - ri->map = NULL; - /* we do not clear flags for future lookup */ + tcb->bpf.key = 0; + tcb->bpf.map = NULL; } return sk; @@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { .func = bpf_sk_redirect_map, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_ANYTHING, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c index f9b38ef82dc2..52b0053274f4 100644 --- a/samples/sockmap/sockmap_kern.c +++ b/samples/sockmap/sockmap_kern.c @@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb) ret = 1; bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); - return bpf_sk_redirect_map(&sock_map, ret, 0); + return bpf_sk_redirect_map(skb, &sock_map, ret, 0); } SEC("sockops") diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 43ab5c402f98..be9a631a69f7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -569,9 +569,10 @@ union bpf_attr { * @flags: reserved for future use * Return: 0 on success or negative error code * - * int bpf_sk_redirect_map(map, key, flags) + * int bpf_sk_redirect_map(skb, map, key, flags) * Redirect skb to a sock in map using key as a lookup key for the * sock in map. + * @skb: pointer to skb * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 36fb9161b34a..b2e02bdcd098 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; -static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = +static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c index 9b99bd10807d..2cd2d552938b 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb) bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); if (!map) - return bpf_sk_redirect_map(&sock_map_rx, sk, 0); - return bpf_sk_redirect_map(&sock_map_tx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0); } char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-71-gd317