From 59aabfc7e959f5f213e4e5cc7567ab4934da2adf Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 30 Apr 2015 17:12:16 -0400 Subject: locking/rwsem: Reduce spinlock contention in wakeup after up_read()/up_write() In up_write()/up_read(), rwsem_wake() will be called whenever it detects that some writers/readers are waiting. The rwsem_wake() function will take the wait_lock and call __rwsem_do_wake() to do the real wakeup. For a heavily contended rwsem, doing a spin_lock() on wait_lock will cause further contention on the heavily contended rwsem cacheline resulting in delay in the completion of the up_read/up_write operations. This patch makes the wait_lock taking and the call to __rwsem_do_wake() optional if at least one spinning writer is present. The spinning writer will be able to take the rwsem and call rwsem_wake() later when it calls up_write(). With the presence of a spinning writer, rwsem_wake() will now try to acquire the lock using trylock. If that fails, it will just quit. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Davidlohr Bueso Acked-by: Jason Low Cc: Andrew Morton Cc: Borislav Petkov Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430428337-16802-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/linux/osq_lock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 3a6490e81b28..703ea5c30a33 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock) extern bool osq_lock(struct optimistic_spin_queue *lock); extern void osq_unlock(struct optimistic_spin_queue *lock); +static inline bool osq_is_locked(struct optimistic_spin_queue *lock) +{ + return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL; +} + #endif -- cgit v1.2.3-71-gd317 From 663fdcbee0a656cdaef934e7f50e6c2670373bc9 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Thu, 30 Apr 2015 17:27:21 +0530 Subject: kernel: Replace reference to ASSIGN_ONCE() with WRITE_ONCE() in comment Looks like commit : 43239cbe79fc ("kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)") left behind a reference to ASSIGN_ONCE(). Update this to WRITE_ONCE(). Signed-off-by: Preeti U Murthy Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: borntraeger@de.ibm.com Cc: dave@stgolabs.net Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20150430115721.22278.94082.stgit@preeti.in.ibm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2..a7c0941d10da 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -450,7 +450,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * with an explicit memory barrier or atomic instruction that provides the * required ordering. * - * If possible use READ_ONCE/ASSIGN_ONCE instead. + * If possible use READ_ONCE()/WRITE_ONCE() instead. */ #define __ACCESS_ONCE(x) ({ \ __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \ -- cgit v1.2.3-71-gd317 From 0cd3be6e9a46f84ef7a42e1a5645d32ad547b12e Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Mon, 4 May 2015 23:04:16 +0200 Subject: clk: si5351: Do not pass struct clk in platform_data When registering clk-si5351 by platform_data, we should not pass struct clk for the reference clocks. Drop struct clk from platform_data and rework the driver to use devm_clk_get of named clock references. While at it, check for at least one valid input clock and properly prepare/ enable valid reference clocks. Signed-off-by: Sebastian Hesselbarth Reported-by: Michael Welling Reported-by: Jean-Francois Moine Reported-by: Russell King Tested-by: Michael Welling Tested-by: Jean-Francois Moine Signed-off-by: Michael Turquette --- drivers/clk/clk-si5351.c | 63 +++++++++++++++++++++++++----------- include/linux/platform_data/si5351.h | 4 --- 2 files changed, 45 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c index 44ea107cfc67..30335d3b99af 100644 --- a/drivers/clk/clk-si5351.c +++ b/drivers/clk/clk-si5351.c @@ -1128,13 +1128,6 @@ static int si5351_dt_parse(struct i2c_client *client, if (!pdata) return -ENOMEM; - pdata->clk_xtal = of_clk_get(np, 0); - if (!IS_ERR(pdata->clk_xtal)) - clk_put(pdata->clk_xtal); - pdata->clk_clkin = of_clk_get(np, 1); - if (!IS_ERR(pdata->clk_clkin)) - clk_put(pdata->clk_clkin); - /* * property silabs,pll-source : , [<..>] * allow to selectively set pll source @@ -1328,8 +1321,22 @@ static int si5351_i2c_probe(struct i2c_client *client, i2c_set_clientdata(client, drvdata); drvdata->client = client; drvdata->variant = variant; - drvdata->pxtal = pdata->clk_xtal; - drvdata->pclkin = pdata->clk_clkin; + drvdata->pxtal = devm_clk_get(&client->dev, "xtal"); + drvdata->pclkin = devm_clk_get(&client->dev, "clkin"); + + if (PTR_ERR(drvdata->pxtal) == -EPROBE_DEFER || + PTR_ERR(drvdata->pclkin) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + /* + * Check for valid parent clock: VARIANT_A and VARIANT_B need XTAL, + * VARIANT_C can have CLKIN instead. + */ + if (IS_ERR(drvdata->pxtal) && + (drvdata->variant != SI5351_VARIANT_C || IS_ERR(drvdata->pclkin))) { + dev_err(&client->dev, "missing parent clock\n"); + return -EINVAL; + } drvdata->regmap = devm_regmap_init_i2c(client, &si5351_regmap_config); if (IS_ERR(drvdata->regmap)) { @@ -1393,6 +1400,11 @@ static int si5351_i2c_probe(struct i2c_client *client, } } + if (!IS_ERR(drvdata->pxtal)) + clk_prepare_enable(drvdata->pxtal); + if (!IS_ERR(drvdata->pclkin)) + clk_prepare_enable(drvdata->pclkin); + /* register xtal input clock gate */ memset(&init, 0, sizeof(init)); init.name = si5351_input_names[0]; @@ -1407,7 +1419,8 @@ static int si5351_i2c_probe(struct i2c_client *client, clk = devm_clk_register(&client->dev, &drvdata->xtal); if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return PTR_ERR(clk); + ret = PTR_ERR(clk); + goto err_clk; } /* register clkin input clock gate */ @@ -1425,7 +1438,8 @@ static int si5351_i2c_probe(struct i2c_client *client, if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return PTR_ERR(clk); + ret = PTR_ERR(clk); + goto err_clk; } } @@ -1447,7 +1461,8 @@ static int si5351_i2c_probe(struct i2c_client *client, clk = devm_clk_register(&client->dev, &drvdata->pll[0].hw); if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return -EINVAL; + ret = PTR_ERR(clk); + goto err_clk; } /* register PLLB or VXCO (Si5351B) */ @@ -1471,7 +1486,8 @@ static int si5351_i2c_probe(struct i2c_client *client, clk = devm_clk_register(&client->dev, &drvdata->pll[1].hw); if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return -EINVAL; + ret = PTR_ERR(clk); + goto err_clk; } /* register clk multisync and clk out divider */ @@ -1492,8 +1508,10 @@ static int si5351_i2c_probe(struct i2c_client *client, num_clocks * sizeof(*drvdata->onecell.clks), GFP_KERNEL); if (WARN_ON(!drvdata->msynth || !drvdata->clkout || - !drvdata->onecell.clks)) - return -ENOMEM; + !drvdata->onecell.clks)) { + ret = -ENOMEM; + goto err_clk; + } for (n = 0; n < num_clocks; n++) { drvdata->msynth[n].num = n; @@ -1511,7 +1529,8 @@ static int si5351_i2c_probe(struct i2c_client *client, if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return -EINVAL; + ret = PTR_ERR(clk); + goto err_clk; } } @@ -1538,7 +1557,8 @@ static int si5351_i2c_probe(struct i2c_client *client, if (IS_ERR(clk)) { dev_err(&client->dev, "unable to register %s\n", init.name); - return -EINVAL; + ret = PTR_ERR(clk); + goto err_clk; } drvdata->onecell.clks[n] = clk; @@ -1557,10 +1577,17 @@ static int si5351_i2c_probe(struct i2c_client *client, &drvdata->onecell); if (ret) { dev_err(&client->dev, "unable to add clk provider\n"); - return ret; + goto err_clk; } return 0; + +err_clk: + if (!IS_ERR(drvdata->pxtal)) + clk_disable_unprepare(drvdata->pxtal); + if (!IS_ERR(drvdata->pclkin)) + clk_disable_unprepare(drvdata->pclkin); + return ret; } static const struct i2c_device_id si5351_i2c_ids[] = { diff --git a/include/linux/platform_data/si5351.h b/include/linux/platform_data/si5351.h index a947ab8b441a..533d9807e543 100644 --- a/include/linux/platform_data/si5351.h +++ b/include/linux/platform_data/si5351.h @@ -5,8 +5,6 @@ #ifndef __LINUX_PLATFORM_DATA_SI5351_H__ #define __LINUX_PLATFORM_DATA_SI5351_H__ -struct clk; - /** * enum si5351_pll_src - Si5351 pll clock source * @SI5351_PLL_SRC_DEFAULT: default, do not change eeprom config @@ -107,8 +105,6 @@ struct si5351_clkout_config { * @clkout: array of clkout configuration */ struct si5351_platform_data { - struct clk *clk_xtal; - struct clk *clk_clkin; enum si5351_pll_src pll_src[2]; struct si5351_clkout_config clkout[8]; }; -- cgit v1.2.3-71-gd317 From 2d94e5224e81c58986a8cf44a3bf4830ce5cb96e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 7 May 2015 14:26:34 -0700 Subject: HID: hid-sensor-hub: Fix debug lock warning When CONFIG_DEBUG_LOCK_ALLOC is defined, mutex magic is compared and warned for (l->magic != l), here l is the address of mutex pointer. In hid-sensor-hub as part of hsdev creation, a per hsdev mutex is initialized during MFD cell creation. This hsdev, which contains, mutex is part of platform data for the a cell. But platform_data is copied in platform_device_add_data() in platform.c. This copy will copy the whole hsdev structure including mutex. But once copied the magic will no longer match. So when client driver call sensor_hub_input_attr_get_raw_value, this will trigger mutex warning. So to avoid this allocate mutex dynamically. This will be same even after copy. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 13 ++++++++++--- include/linux/hid-sensor-hub.h | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index c3f6f1e311ea..090a1ba0abb6 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -294,7 +294,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, if (!report) return -EINVAL; - mutex_lock(&hsdev->mutex); + mutex_lock(hsdev->mutex_ptr); if (flag == SENSOR_HUB_SYNC) { memset(&hsdev->pending, 0, sizeof(hsdev->pending)); init_completion(&hsdev->pending.ready); @@ -328,7 +328,7 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, kfree(hsdev->pending.raw_data); hsdev->pending.status = false; } - mutex_unlock(&hsdev->mutex); + mutex_unlock(hsdev->mutex_ptr); return ret_val; } @@ -667,7 +667,14 @@ static int sensor_hub_probe(struct hid_device *hdev, hsdev->vendor_id = hdev->vendor; hsdev->product_id = hdev->product; hsdev->usage = collection->usage; - mutex_init(&hsdev->mutex); + hsdev->mutex_ptr = devm_kzalloc(&hdev->dev, + sizeof(struct mutex), + GFP_KERNEL); + if (!hsdev->mutex_ptr) { + ret = -ENOMEM; + goto err_stop_hw; + } + mutex_init(hsdev->mutex_ptr); hsdev->start_collection_index = i; if (last_hsdev) last_hsdev->end_collection_index = i; diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0408421d885f..0042bf330b99 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -74,7 +74,7 @@ struct sensor_hub_pending { * @usage: Usage id for this hub device instance. * @start_collection_index: Starting index for a phy type collection * @end_collection_index: Last index for a phy type collection - * @mutex: synchronizing mutex. + * @mutex_ptr: synchronizing mutex pointer. * @pending: Holds information of pending sync read request. */ struct hid_sensor_hub_device { @@ -84,7 +84,7 @@ struct hid_sensor_hub_device { u32 usage; int start_collection_index; int end_collection_index; - struct mutex mutex; + struct mutex *mutex_ptr; struct sensor_hub_pending pending; }; -- cgit v1.2.3-71-gd317 From 336b7e1f230912cd8df2497be8dd7be4647d8fc8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 11 May 2015 14:06:32 -0400 Subject: block: remove export for blk_queue_bio With commit ff36ab345 ("dm: remove request-based logic from make_request_fn wrapper") DM no longer calls blk_queue_bio() directly, so remove its export. Doing so required a forward declaration in blk-core.c. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++-- include/linux/blkdev.h | 2 -- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 7871603f0a29..03b5f8d77f37 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -734,6 +734,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) } EXPORT_SYMBOL(blk_init_queue_node); +static void blk_queue_bio(struct request_queue *q, struct bio *bio); + struct request_queue * blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, spinlock_t *lock) @@ -1578,7 +1580,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } -void blk_queue_bio(struct request_queue *q, struct bio *bio) +static void blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; @@ -1686,7 +1688,6 @@ out_unlock: spin_unlock_irq(q->queue_lock); } } -EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ /* * If bio->bi_dev is a partition, remap the location diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516f24de..5d93a6645e88 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -821,8 +821,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern void blk_queue_bio(struct request_queue *q, struct bio *bio); - /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be -- cgit v1.2.3-71-gd317 From f7bcb70ebae0dcdb5a2d859b09e4465784d99029 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 8 May 2015 13:47:23 -0700 Subject: ktime: Fix ktime_divns to do signed division It was noted that the 32bit implementation of ktime_divns() was doing unsigned division and didn't properly handle negative values. And when a ktime helper was changed to utilize ktime_divns, it caused a regression on some IR blasters. See the following bugzilla for details: https://bugzilla.redhat.com/show_bug.cgi?id=1200353 This patch fixes the problem in ktime_divns by checking and preserving the sign bit, and then reapplying it if appropriate after the division, it also changes the return type to a s64 to make it more obvious this is expected. Nicolas also pointed out that negative dividers would cause infinite loops on 32bit systems, negative dividers is unlikely for users of this function, but out of caution this patch adds checks for negative dividers for both 32-bit (BUG_ON) and 64-bit(WARN_ON) versions to make sure no such use cases creep in. [ tglx: Hand an u64 to do_div() to avoid the compiler warning ] Fixes: 166afb64511e 'ktime: Sanitize ktime_to_us/ms conversion' Reported-and-tested-by: Trevor Cordes Signed-off-by: John Stultz Acked-by: Nicolas Pitre Cc: Ingo Molnar Cc: Josh Boyer Cc: One Thousand Gnomes Cc: Link: http://lkml.kernel.org/r/1431118043-23452-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/ktime.h | 27 +++++++++++++++++++++------ kernel/time/hrtimer.c | 14 ++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 5fc3d1083071..2b6a204bd8d4 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -166,19 +166,34 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) } #if BITS_PER_LONG < 64 -extern u64 __ktime_divns(const ktime_t kt, s64 div); -static inline u64 ktime_divns(const ktime_t kt, s64 div) +extern s64 __ktime_divns(const ktime_t kt, s64 div); +static inline s64 ktime_divns(const ktime_t kt, s64 div) { + /* + * Negative divisors could cause an inf loop, + * so bug out here. + */ + BUG_ON(div < 0); if (__builtin_constant_p(div) && !(div >> 32)) { - u64 ns = kt.tv64; - do_div(ns, div); - return ns; + s64 ns = kt.tv64; + u64 tmp = ns < 0 ? -ns : ns; + + do_div(tmp, div); + return ns < 0 ? -tmp : tmp; } else { return __ktime_divns(kt, div); } } #else /* BITS_PER_LONG < 64 */ -# define ktime_divns(kt, div) (u64)((kt).tv64 / (div)) +static inline s64 ktime_divns(const ktime_t kt, s64 div) +{ + /* + * 32-bit implementation cannot handle negative divisors, + * so catch them on 64bit as well. + */ + WARN_ON(div < 0); + return kt.tv64 / div; +} #endif static inline s64 ktime_to_us(const ktime_t kt) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 76d4bd962b19..93ef7190bdea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -266,21 +266,23 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) /* * Divide a ktime value by a nanosecond value */ -u64 __ktime_divns(const ktime_t kt, s64 div) +s64 __ktime_divns(const ktime_t kt, s64 div) { - u64 dclc; int sft = 0; + s64 dclc; + u64 tmp; dclc = ktime_to_ns(kt); + tmp = dclc < 0 ? -dclc : dclc; + /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; div >>= 1; } - dclc >>= sft; - do_div(dclc, (unsigned long) div); - - return dclc; + tmp >>= sft; + do_div(tmp, (unsigned long) div); + return dclc < 0 ? -tmp : tmp; } EXPORT_SYMBOL_GPL(__ktime_divns); #endif /* BITS_PER_LONG >= 64 */ -- cgit v1.2.3-71-gd317 From 07ee0722bf941960fb3888f9c9b5839473372fd1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 May 2015 11:30:47 +0800 Subject: rhashtable: Add cap on number of elements in hash table We currently have no limit on the number of elements in a hash table. This is a problem because some users (tipc) set a ceiling on the maximum table size and when that is reached the hash table may degenerate. Others may encounter OOM when growing and if we allow insertions when that happens the hash table perofrmance may also suffer. This patch adds a new paramater insecure_max_entries which becomes the cap on the table. If unset it defaults to max_size * 2. If it is also zero it means that there is no cap on the number of elements in the table. However, the table will grow whenever the utilisation hits 100% and if that growth fails, you will get ENOMEM on insertion. As allowing oversubscription is potentially dangerous, the name contains the word insecure. Note that the cap is not a hard limit. This is done for performance reasons as enforcing a hard limit will result in use of atomic ops that are heavier than the ones we currently use. The reasoning is that we're only guarding against a gross over- subscription of the table, rather than a small breach of the limit. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 19 +++++++++++++++++++ lib/rhashtable.c | 11 +++++++++++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index dbcbcc59aa92..843ceca9a21e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -17,6 +17,7 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H +#include #include #include #include @@ -100,6 +101,7 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed + * @insecure_max_entries: Maximum number of entries (may be exceeded) * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker @@ -115,6 +117,7 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; + unsigned int insecure_max_entries; unsigned int max_size; unsigned int min_size; u32 nulls_base; @@ -286,6 +289,18 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht, (!ht->p.max_size || tbl->size < ht->p.max_size); } +/** + * rht_grow_above_max - returns true if table is above maximum + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_max(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return ht->p.insecure_max_entries && + atomic_read(&ht->nelems) >= ht->p.insecure_max_entries; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -589,6 +604,10 @@ restart: goto out; } + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto out; + if (unlikely(rht_grow_above_100(ht, tbl))) { slow_path: spin_unlock_bh(lock); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b28df4019ade..4396434e4715 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -14,6 +14,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -446,6 +447,10 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, if (key && rhashtable_lookup_fast(ht, key, ht->p)) goto exit; + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto exit; + err = -EAGAIN; if (rhashtable_check_elasticity(ht, tbl, hash) || rht_grow_above_100(ht, tbl)) @@ -738,6 +743,12 @@ int rhashtable_init(struct rhashtable *ht, if (params->max_size) ht->p.max_size = rounddown_pow_of_two(params->max_size); + if (params->insecure_max_entries) + ht->p.insecure_max_entries = + rounddown_pow_of_two(params->insecure_max_entries); + else + ht->p.insecure_max_entries = ht->p.max_size * 2; + ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); /* The maximum (not average) chain length grows with the -- cgit v1.2.3-71-gd317 From ab3f02fc237211f0583c1e7ba3bf504747be9b8d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2015 10:52:27 +0200 Subject: locking/arch: Add WRITE_ONCE() to set_mb() Since we assume set_mb() to result in a single store followed by a full memory barrier, employ WRITE_ONCE(). Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/barrier.h | 2 +- arch/arm64/include/asm/barrier.h | 2 +- arch/ia64/include/asm/barrier.h | 2 +- arch/metag/include/asm/barrier.h | 2 +- arch/mips/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/barrier.h | 2 +- arch/s390/include/asm/barrier.h | 2 +- arch/sparc/include/asm/barrier_64.h | 2 +- arch/x86/include/asm/barrier.h | 2 +- arch/x86/um/asm/barrier.h | 2 +- include/asm-generic/barrier.h | 2 +- include/linux/compiler.h | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index d2f81e6b8c1c..993150aea681 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 71f19c4dc0de..ff7de78d01b8 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -114,7 +114,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define nop() asm volatile("nop"); #define smp_mb__before_atomic() smp_mb() diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index f6769eb2bbf9..03117e7b2ab8 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -82,7 +82,7 @@ do { \ * acquire vs release semantics but we can't discuss this stuff with * Linus just yet. Grrr... */ -#define set_mb(var, value) do { (var) = (value); mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) /* * The group barrier in front of the rsm & ssm are necessary to ensure diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index d703d8e26a65..97eb018a2933 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -84,7 +84,7 @@ static inline void fence(void) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 2b8bbbcb9be0..cff1bbdaa74a 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -113,7 +113,7 @@ #endif #define set_mb(var, value) \ - do { var = value; smp_mb(); } while (0) + do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index a3bf5be111ff..2a072e48780d 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -34,7 +34,7 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 8d724718ec21..b66cd53d35fc 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 76648941fea7..125fec7512f4 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -41,7 +41,7 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define dma_wmb() wmb() #define set_mb(__var, __value) \ - do { __var = __value; membar_safe("#StoreLoad"); } while(0) + do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0) #ifdef CONFIG_SMP #define smp_mb() mb() diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 959e45b81fe2..9de5cde133a1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -40,7 +40,7 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #endif /* SMP */ #define read_barrier_depends() do { } while (0) diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 7e8a1a650435..cc0cb01f346d 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -39,7 +39,7 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index f5c40b0fadc2..3938716b44d7 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -67,7 +67,7 @@ #endif #ifndef set_mb -#define set_mb(var, value) do { (var) = (value); mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #endif #ifndef smp_mb__before_atomic diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a7c0941d10da..03e227ba481c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #define WRITE_ONCE(x, val) \ - ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #endif /* __KERNEL__ */ -- cgit v1.2.3-71-gd317 From b92b8b35a2e38bde319fd1d68ec84628c1f1b0fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2015 10:51:55 +0200 Subject: locking/arch: Rename set_mb() to smp_store_mb() Since set_mb() is really about an smp_mb() -- not a IO/DMA barrier like mb() rename it to match the recent smp_load_acquire() and smp_store_release(). Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt | 6 +++--- arch/arm/include/asm/barrier.h | 2 +- arch/arm64/include/asm/barrier.h | 2 +- arch/ia64/include/asm/barrier.h | 7 +------ arch/metag/include/asm/barrier.h | 2 +- arch/mips/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/barrier.h | 2 +- arch/s390/include/asm/barrier.h | 2 +- arch/sh/include/asm/barrier.h | 2 +- arch/sparc/include/asm/barrier_64.h | 2 +- arch/x86/include/asm/barrier.h | 4 ++-- arch/x86/um/asm/barrier.h | 3 ++- fs/select.c | 6 +++--- include/asm-generic/barrier.h | 4 ++-- include/linux/sched.h | 8 ++++---- kernel/futex.c | 2 +- kernel/locking/qspinlock_paravirt.h | 2 +- kernel/sched/wait.c | 4 ++-- 18 files changed, 29 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f95746189b5d..fe4020e4b468 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1662,7 +1662,7 @@ CPU from reordering them. There are some more advanced barrier functions: - (*) set_mb(var, value) + (*) smp_store_mb(var, value) This assigns the value to the variable and then inserts a full memory barrier after it, depending on the function. It isn't guaranteed to @@ -1975,7 +1975,7 @@ after it has altered the task state: CPU 1 =============================== set_current_state(); - set_mb(); + smp_store_mb(); STORE current->state LOAD event_indicated @@ -2016,7 +2016,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING: CPU 1 CPU 2 =============================== =============================== set_current_state(); STORE event_indicated - set_mb(); wake_up(); + smp_store_mb(); wake_up(); STORE current->state STORE current->state LOAD event_indicated diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 993150aea681..6c2327e1c732 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index ff7de78d01b8..0fa47c4275cb 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -114,7 +114,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define nop() asm volatile("nop"); #define smp_mb__before_atomic() smp_mb() diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index 03117e7b2ab8..843ba435e43b 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -77,12 +77,7 @@ do { \ ___p1; \ }) -/* - * XXX check on this ---I suspect what Linus really wants here is - * acquire vs release semantics but we can't discuss this stuff with - * Linus just yet. Grrr... - */ -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) /* * The group barrier in front of the rsm & ssm are necessary to ensure diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index 97eb018a2933..5a696e507930 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -84,7 +84,7 @@ static inline void fence(void) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index cff1bbdaa74a..7ecba84656d4 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -112,7 +112,7 @@ #define __WEAK_LLSC_MB " \n" #endif -#define set_mb(var, value) \ +#define smp_store_mb(var, value) \ do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 2a072e48780d..39505d660a70 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -34,7 +34,7 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index b66cd53d35fc..e6f8615a11eb 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h index 43715308b068..bf91037db4e0 100644 --- a/arch/sh/include/asm/barrier.h +++ b/arch/sh/include/asm/barrier.h @@ -32,7 +32,7 @@ #define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") #endif -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) #include diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 125fec7512f4..809941e33e12 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -40,7 +40,7 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define dma_rmb() rmb() #define dma_wmb() wmb() -#define set_mb(__var, __value) \ +#define smp_store_mb(__var, __value) \ do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0) #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 9de5cde133a1..e51a8f803f55 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -35,12 +35,12 @@ #define smp_mb() mb() #define smp_rmb() dma_rmb() #define smp_wmb() barrier() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #endif /* SMP */ #define read_barrier_depends() do { } while (0) diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index cc0cb01f346d..b9531d343134 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -39,7 +39,8 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) + +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) diff --git a/fs/select.c b/fs/select.c index f684c750e08a..015547330e88 100644 --- a/fs/select.c +++ b/fs/select.c @@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) * doesn't imply write barrier and the users expect write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with set_mb() in poll_schedule_timeout. + * and is paired with smp_store_mb() in poll_schedule_timeout. */ smp_wmb(); pwq->triggered = 1; @@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, /* * Prepare for the next iteration. * - * The following set_mb() serves two purposes. First, it's + * The following smp_store_mb() serves two purposes. First, it's * the counterpart rmb of the wmb in pollwake() such that data * written before wake up is always visible after wake up. * Second, the full barrier guarantees that triggered clearing @@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, * this problem doesn't exist for the first iteration as * add_wait_queue() has full barrier semantics. */ - set_mb(pwq->triggered, 0); + smp_store_mb(pwq->triggered, 0); return rc; } diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 3938716b44d7..e6a83d712ef6 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -66,8 +66,8 @@ #define smp_read_barrier_depends() do { } while (0) #endif -#ifndef set_mb -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#ifndef smp_store_mb +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #endif #ifndef smp_mb__before_atomic diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..18f197223ebd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -252,7 +252,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_task_state(tsk, state_value) \ do { \ (tsk)->task_state_change = _THIS_IP_; \ - set_mb((tsk)->state, (state_value)); \ + smp_store_mb((tsk)->state, (state_value)); \ } while (0) /* @@ -274,7 +274,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ - set_mb(current->state, (state_value)); \ + smp_store_mb(current->state, (state_value)); \ } while (0) #else @@ -282,7 +282,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) #define set_task_state(tsk, state_value) \ - set_mb((tsk)->state, (state_value)) + smp_store_mb((tsk)->state, (state_value)) /* * set_current_state() includes a barrier so that the write of current->state @@ -298,7 +298,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) #define set_current_state(state_value) \ - set_mb(current->state, (state_value)) + smp_store_mb(current->state, (state_value)) #endif diff --git a/kernel/futex.c b/kernel/futex.c index 2579e407ff67..55ca63ad9622 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2055,7 +2055,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, { /* * The task state is guaranteed to be set before another task can - * wake it. set_current_state() is implemented using set_mb() and + * wake it. set_current_state() is implemented using smp_store_mb() and * queue_me() calls spin_unlock() upon completion, both serializing * access to the hash list and forcing another memory barrier. */ diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 27ab96dca68c..04ab18151cc8 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -175,7 +175,7 @@ static void pv_wait_node(struct mcs_spinlock *node) * * Matches the xchg() from pv_kick_node(). */ - set_mb(pn->state, vcpu_halted); + smp_store_mb(pn->state, vcpu_halted); if (!READ_ONCE(node->locked)) pv_wait(&pn->state, vcpu_halted); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 852143a79f36..9bc82329eaad 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout) * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss * an event. */ - set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ + smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ return timeout; } @@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) * doesn't imply write barrier and the users expects write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with set_mb() in wait_woken(). + * and is paired with smp_store_mb() in wait_woken(). */ smp_wmb(); /* C */ wait->flags |= WQ_FLAG_WOKEN; -- cgit v1.2.3-71-gd317 From faecbb45ebefb20260ad4a631e011e93c896cb73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 May 2015 13:42:25 +0200 Subject: Revert "netfilter: bridge: query conntrack about skb dnat" This reverts commit c055d5b03bb4cb69d349d787c9787c0383abd8b2. There are two issues: 'dnat_took_place' made me think that this is related to -j DNAT/MASQUERADE. But thats only one part of the story. This is also relevant for SNAT when we undo snat translation in reverse/reply direction. Furthermore, I originally wanted to do this mainly to avoid storing ipv6 addresses once we make DNAT/REDIRECT work for ipv6 on bridges. However, I forgot about SNPT/DNPT which is stateless. So we can't escape storing address for ipv6 anyway. Might as well do it for ipv4 too. Reported-and-tested-by: Bernhard Thaler Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 1 + net/bridge/br_netfilter.c | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66e374d62f64..f15154a879c7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -176,6 +176,7 @@ struct nf_bridge_info { struct net_device *physindev; struct net_device *physoutdev; char neigh_header[8]; + __be32 ipv4_daddr; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ab55e2472beb..60ddfbeb47f5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -37,10 +37,6 @@ #include #include -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#include -#endif - #include #include "br_private.h" #ifdef CONFIG_SYSCTL @@ -350,24 +346,15 @@ free_skb: return 0; } -static bool dnat_took_place(const struct sk_buff *skb) +static bool daddr_was_changed(const struct sk_buff *skb, + const struct nf_bridge_info *nf_bridge) { -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (!ct || nf_ct_is_untracked(ct)) - return false; - - return test_bit(IPS_DST_NAT_BIT, &ct->status); -#else - return false; -#endif + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; } /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. + * This is also true when SNAT takes place (for the reply direction). * * There are two cases to consider: * 1. The packet was DNAT'ed to a device in the same bridge @@ -421,7 +408,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - if (dnat_took_place(skb)) { + if (daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -632,6 +619,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { + struct nf_bridge_info *nf_bridge; struct net_bridge_port *p; struct net_bridge *br; __u32 len = nf_bridge_encap_header_len(skb); @@ -669,6 +657,9 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, if (!setup_pre_routing(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; + skb->protocol = htons(ETH_P_IP); NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, -- cgit v1.2.3-71-gd317 From d654976cbf852ee20612ee10dbe57cdacda9f452 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 May 2015 21:51:19 -0700 Subject: tcp: fix a potential deadlock in tcp_get_info() Taking socket spinlock in tcp_get_info() can deadlock, as inet_diag_dump_icsk() holds the &hashinfo->ehash_locks[i], while packet processing can use the reverse locking order. We could avoid this locking for TCP_LISTEN states, but lockdep would certainly get confused as all TCP sockets share same lockdep classes. [ 523.722504] ====================================================== [ 523.728706] [ INFO: possible circular locking dependency detected ] [ 523.734990] 4.1.0-dbg-DEV #1676 Not tainted [ 523.739202] ------------------------------------------------------- [ 523.745474] ss/18032 is trying to acquire lock: [ 523.750002] (slock-AF_INET){+.-...}, at: [] tcp_get_info+0x2c4/0x360 [ 523.758129] [ 523.758129] but task is already holding lock: [ 523.763968] (&(&hashinfo->ehash_locks[i])->rlock){+.-...}, at: [] inet_diag_dump_icsk+0x1d5/0x6c0 [ 523.774661] [ 523.774661] which lock already depends on the new lock. [ 523.774661] [ 523.782850] [ 523.782850] the existing dependency chain (in reverse order) is: [ 523.790326] -> #1 (&(&hashinfo->ehash_locks[i])->rlock){+.-...}: [ 523.796599] [] lock_acquire+0xbb/0x270 [ 523.802565] [] _raw_spin_lock+0x38/0x50 [ 523.808628] [] __inet_hash_nolisten+0x78/0x110 [ 523.815273] [] tcp_v4_syn_recv_sock+0x24b/0x350 [ 523.822067] [] tcp_check_req+0x3c1/0x500 [ 523.828199] [] tcp_v4_do_rcv+0x239/0x3d0 [ 523.834331] [] tcp_v4_rcv+0xa8e/0xc10 [ 523.840202] [] ip_local_deliver_finish+0x133/0x3e0 [ 523.847214] [] ip_local_deliver+0xaa/0xc0 [ 523.853440] [] ip_rcv_finish+0x168/0x5c0 [ 523.859624] [] ip_rcv+0x307/0x420 Lets use u64_sync infrastructure instead. As a bonus, 64bit arches get optimized, as these are nop for them. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 11 +++++++---- net/ipv4/tcp_fastopen.c | 4 ++++ net/ipv4/tcp_input.c | 4 ++++ 4 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3b2911502a8c..e8bbf403618f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -158,6 +158,8 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ + u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46efa03d2b11..f1377f2a0472 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -402,6 +402,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; + u64_stats_init(&tp->syncp); tp->reordering = sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2598,6 +2599,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; + unsigned int start; u32 rate; memset(info, 0, sizeof(*info)); @@ -2665,10 +2667,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) rate = READ_ONCE(sk->sk_max_pacing_rate); info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; - spin_lock_bh(&sk->sk_lock.slock); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; - spin_unlock_bh(&sk->sk_lock.slock); + do { + start = u64_stats_fetch_begin_irq(&tp->syncp); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 3c673d5e6cff..46b087a27503 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -206,6 +206,10 @@ static bool tcp_fastopen_create_child(struct sock *sk, skb_set_owner_r(skb2, child); __skb_queue_tail(&child->sk_receive_queue, skb2); tp->syn_data_acked = 1; + + /* u64_stats_update_begin(&tp->syncp) not needed here, + * as we certainly are not changing upper 32bit value (0) + */ tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; } else { end_seq = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 243d674b3ef5..c9ab964189a0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3286,7 +3286,9 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { u32 delta = ack - tp->snd_una; + u64_stats_update_begin(&tp->syncp); tp->bytes_acked += delta; + u64_stats_update_end(&tp->syncp); tp->snd_una = ack; } @@ -3295,7 +3297,9 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { u32 delta = seq - tp->rcv_nxt; + u64_stats_update_begin(&tp->syncp); tp->bytes_received += delta; + u64_stats_update_end(&tp->syncp); tp->rcv_nxt = seq; } -- cgit v1.2.3-71-gd317 From cc4a84c3da6f9dd6a297dc81fe4437643a60fe03 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 May 2015 14:07:30 -0700 Subject: net: phy: bcm7xxx: Fix 7425 PHY ID and flags While adding support for 7425 PHY in the 7xxx PHY driver, the ID that was used was actually coming from an external PHY: a BCM5461x. Fix this by using the proper ID for the internal 7425 PHY and set the PHY_IS_INTERNAL flag, otherwise consumers of this PHY driver would not be able to properly identify it as such. Fixes: d068b02cfdfc2 ("net: phy: add BCM7425 and BCM7429 PHYs") Signed-off-by: Florian Fainelli Reviewed-by: Petri Gynther Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 2 +- include/linux/brcmphy.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 64c74c6a4828..b5dc59de094e 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -404,7 +404,7 @@ static struct phy_driver bcm7xxx_driver[] = { .name = "Broadcom BCM7425", .features = PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause, - .flags = 0, + .flags = PHY_IS_INTERNAL, .config_init = bcm7xxx_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index ae2982c0f7a6..656da2a12ffe 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -17,7 +17,7 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 -#define PHY_ID_BCM7425 0x03625e60 +#define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 -- cgit v1.2.3-71-gd317 From 7d010fdf299929f9583ce5e17da629dcd83c36ef Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:13 +0200 Subject: x86/mm/mtrr: Avoid #ifdeffery with phys_wc_to_mtrr_index() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only one user but since we're going to bury MTRR next out of access to drivers, expose this last piece of API to drivers in a general fashion only needing io.h for access to helpers. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Abhilash Kesavan Cc: Andrew Morton Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Borislav Petkov Cc: Brian Gerst Cc: Catalin Marinas Cc: Cristian Stoica Cc: Daniel Vetter Cc: Dave Airlie Cc: Dave Hansen Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Matthias Brugger Cc: Mel Gorman Cc: Peter Zijlstra Cc: Suresh Siddha Cc: Thierry Reding Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Toshi Kani Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: Will Deacon Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1429722736-4473-1-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 3 +++ arch/x86/include/asm/mtrr.h | 5 ----- arch/x86/kernel/cpu/mtrr/main.c | 6 +++--- drivers/gpu/drm/drm_ioctl.c | 14 +------------- include/linux/io.h | 7 +++++++ 5 files changed, 14 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 4afc05ffa566..a2b97404922d 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -339,6 +339,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, #define IO_SPACE_LIMIT 0xffff #ifdef CONFIG_MTRR +extern int __must_check arch_phys_wc_index(int handle); +#define arch_phys_wc_index arch_phys_wc_index + extern int __must_check arch_phys_wc_add(unsigned long base, unsigned long size); extern void arch_phys_wc_del(int handle); diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index a31759e1edd9..b94f6f64e23d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -48,7 +48,6 @@ extern void mtrr_aps_init(void); extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); -extern int phys_wc_to_mtrr_index(int handle); # else static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { @@ -84,10 +83,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) { } -static inline int phys_wc_to_mtrr_index(int handle) -{ - return -1; -} #define mtrr_ap_init() do {} while (0) #define mtrr_bp_init() do {} while (0) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 04aceb7e6443..81baf5fee0e1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -580,7 +580,7 @@ void arch_phys_wc_del(int handle) EXPORT_SYMBOL(arch_phys_wc_del); /* - * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value + * arch_phys_wc_index - translates arch_phys_wc_add's return value * @handle: Return value from arch_phys_wc_add * * This will turn the return value from arch_phys_wc_add into an mtrr @@ -590,14 +590,14 @@ EXPORT_SYMBOL(arch_phys_wc_del); * in printk line. Alas there is an illegitimate use in some ancient * drm ioctls. */ -int phys_wc_to_mtrr_index(int handle) +int arch_phys_wc_index(int handle) { if (handle < MTRR_TO_PHYS_WC_OFFSET) return -1; else return handle - MTRR_TO_PHYS_WC_OFFSET; } -EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index); +EXPORT_SYMBOL_GPL(arch_phys_wc_index); /* * HACK ALERT! diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 266dcd6cdf3b..0a957828b3bd 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -36,9 +36,6 @@ #include #include -#ifdef CONFIG_X86 -#include -#endif static int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -197,16 +194,7 @@ static int drm_getmap(struct drm_device *dev, void *data, map->type = r_list->map->type; map->flags = r_list->map->flags; map->handle = (void *)(unsigned long) r_list->user_token; - -#ifdef CONFIG_X86 - /* - * There appears to be exactly one user of the mtrr index: dritest. - * It's easy enough to keep it working on non-PAT systems. - */ - map->mtrr = phys_wc_to_mtrr_index(r_list->map->mtrr); -#else - map->mtrr = -1; -#endif + map->mtrr = arch_phys_wc_index(r_list->map->mtrr); mutex_unlock(&dev->struct_mutex); diff --git a/include/linux/io.h b/include/linux/io.h index 986f2bffea1e..04cce4da3685 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -111,6 +111,13 @@ static inline void arch_phys_wc_del(int handle) } #define arch_phys_wc_add arch_phys_wc_add +#ifndef arch_phys_wc_index +static inline int arch_phys_wc_index(int handle) +{ + return -1; +} +#define arch_phys_wc_index arch_phys_wc_index +#endif #endif #endif /* _LINUX_IO_H */ -- cgit v1.2.3-71-gd317 From f36963c9d3f6f415732710da3acdd8608a9fa0e5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 9 May 2015 03:14:13 +0930 Subject: cpumask_set_cpu_local_first => cpumask_local_spread, lament da91309e0a7e (cpumask: Utility function to set n'th cpu...) created a genuinely weird function. I never saw it before, it went through DaveM. (He only does this to make us other maintainers feel better about our own mistakes.) cpumask_set_cpu_local_first's purpose is say "I need to spread things across N online cpus, choose the ones on this numa node first"; you call it in a loop. It can fail. One of the two callers ignores this, the other aborts and fails the device open. It can fail in two ways: allocating the off-stack cpumask, or through a convoluted codepath which AFAICT can only occur if cpu_online_mask changes. Which shouldn't happen, because if cpu_online_mask can change while you call this, it could return a now-offline cpu anyway. It contains a nonsensical test "!cpumask_of_node(numa_node)". This was drawn to my attention by Geert, who said this causes a warning on Sparc. It sets a single bit in a cpumask instead of returning a cpu number, because that's what the callers want. It could be made more efficient by passing the previous cpu rather than an index, but that would be more invasive to the callers. Fixes: da91309e0a7e8966d916a74cce42ed170fde06bf Signed-off-by: Rusty Russell (then rebased) Tested-by: Amir Vadai Acked-by: Amir Vadai Acked-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 +-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 10 ++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 +-- include/linux/cpumask.h | 6 +-- lib/cpumask.c | 74 +++++++++----------------- 5 files changed, 37 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a6dcbf850c1f..6f9ffb9026cd 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + int numa_node = dev_to_node(&adapter->pdev->dev); if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) return -ENOMEM; - cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), - eqo->affinity_mask); - + cpumask_set_cpu(cpumask_local_spread(i, numa_node), + eqo->affinity_mask); netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 32f5ec737472..cf467a9f6cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) { struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx]; int numa_node = priv->mdev->dev->numa_node; - int ret = 0; if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) return -ENOMEM; - ret = cpumask_set_cpu_local_first(ring_idx, numa_node, - ring->affinity_mask); - if (ret) - free_cpumask_var(ring->affinity_mask); - - return ret; + cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node), + ring->affinity_mask); + return 0; } static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f7bf312fb443..7bed3a88579f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->queue_index = queue_index; if (queue_index < priv->num_tx_rings_p_up) - cpumask_set_cpu_local_first(queue_index, - priv->mdev->dev->numa_node, - &ring->affinity_mask); + cpumask_set_cpu(cpumask_local_spread(queue_index, + priv->mdev->dev->numa_node), + &ring->affinity_mask); *pring = ring; return 0; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 27e285b92b5f..59915ea5373c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, return 1; } -static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +static inline unsigned int cpumask_local_spread(unsigned int i, int node) { - set_bit(0, cpumask_bits(dstp)); - return 0; } @@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); +unsigned int cpumask_local_spread(unsigned int i, int node); /** * for_each_cpu - iterate over every cpu in a mask diff --git a/lib/cpumask.c b/lib/cpumask.c index 830dd5dec40f..5f627084f2e9 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -139,64 +139,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first - * + * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number - * @numa_node: local numa_node - * @dstp: cpumask with the relevant cpu bit set according to the policy + * @node: local numa_node * - * This function sets the cpumask according to a numa aware policy. - * cpumask could be used as an affinity hint for the IRQ related to a - * queue. When the policy is to spread queues across cores - local cores - * first. + * This function selects an online CPU according to a numa aware policy; + * local cpus are returned first, followed by non-local ones, then it + * wraps around. * - * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set - * the cpu bit and need to re-call the function. + * It's not very efficient, but useful for setup. */ -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +unsigned int cpumask_local_spread(unsigned int i, int node) { - cpumask_var_t mask; int cpu; - int ret = 0; - - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (numa_node == -1 || !cpumask_of_node(numa_node)) { - /* Use all online cpu's for non numa aware system */ - cpumask_copy(mask, cpu_online_mask); + if (node == -1) { + for_each_cpu(cpu, cpu_online_mask) + if (i-- == 0) + return cpu; } else { - int n; - - cpumask_and(mask, - cpumask_of_node(numa_node), cpu_online_mask); - - n = cpumask_weight(mask); - if (i >= n) { - i -= n; - - /* If index > number of local cpu's, mask out local - * cpu's - */ - cpumask_andnot(mask, cpu_online_mask, mask); + /* NUMA first. */ + for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) + if (i-- == 0) + return cpu; + + for_each_cpu(cpu, cpu_online_mask) { + /* Skip NUMA nodes, done above. */ + if (cpumask_test_cpu(cpu, cpumask_of_node(node))) + continue; + + if (i-- == 0) + return cpu; } } - - for_each_cpu(cpu, mask) { - if (--i < 0) - goto out; - } - - ret = -EAGAIN; - -out: - free_cpumask_var(mask); - - if (!ret) - cpumask_set_cpu(cpu, dstp); - - return ret; + BUG(); } -EXPORT_SYMBOL(cpumask_set_cpu_local_first); +EXPORT_SYMBOL(cpumask_local_spread); -- cgit v1.2.3-71-gd317 From 80188b0d77d7426b494af739ac129e0e684acb84 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 29 May 2015 07:39:34 +1000 Subject: percpu_counter: batch size aware __percpu_counter_compare() XFS uses non-stanard batch sizes for avoiding frequent global counter updates on it's allocated inode counters, as they increment or decrement in batches of 64 inodes. Hence the standard percpu counter batch of 32 means that the counter is effectively a global counter. Currently Xfs uses a batch size of 128 so that it doesn't take the global lock on every single modification. However, Xfs also needs to compare accurately against zero, which means we need to use percpu_counter_compare(), and that has a hard-coded batch size of 32, and hence will spuriously fail to detect when it is supposed to use precise comparisons and hence the accounting goes wrong. Add __percpu_counter_compare() to take a custom batch size so we can use it sanely in XFS and factor percpu_counter_compare() to use it. Signed-off-by: Dave Chinner Acked-by: Tejun Heo Signed-off-by: Dave Chinner --- include/linux/percpu_counter.h | 13 ++++++++++++- lib/percpu_counter.c | 6 +++--- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 50e50095c8d1..84a109449610 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs); +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); + +static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); +} static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) return 0; } +static inline int +__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) +{ + return percpu_counter_compare(fbc, rhs); +} + static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 48144cdae819..f051d69f0910 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ - if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else @@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) else return 0; } -EXPORT_SYMBOL(percpu_counter_compare); +EXPORT_SYMBOL(__percpu_counter_compare); static int __init percpu_counter_startup(void) { -- cgit v1.2.3-71-gd317 From d6472302f242559d45dcf4ebace62508dc4d8aeb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 2 Jun 2015 19:01:38 +1000 Subject: x86/mm: Decouple from Nothing in uses anything from , so remove it from there and fix up the resulting build problems triggered on x86 {64|32}-bit {def|allmod|allno}configs. The breakages were triggering in places where x86 builds relied on vmalloc() facilities but did not include explicitly and relied on the implicit inclusion via . Also add: - to - to ... which were two other implicit header file dependencies. Suggested-by: David Miller Signed-off-by: Stephen Rothwell [ Tidied up the changelog. ] Acked-by: David Miller Acked-by: Takashi Iwai Acked-by: Viresh Kumar Acked-by: Vinod Koul Cc: Andrew Morton Cc: Anton Vorontsov Cc: Boris Ostrovsky Cc: Colin Cross Cc: David Vrabel Cc: H. Peter Anvin Cc: Haiyang Zhang Cc: James E.J. Bottomley Cc: Jaroslav Kysela Cc: K. Y. Srinivasan Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Kristen Carlson Accardi Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Suma Ramars Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 3 +-- arch/x86/kernel/crash.c | 1 + arch/x86/kernel/machine_kexec_64.c | 1 + arch/x86/mm/pageattr-test.c | 1 + arch/x86/mm/pageattr.c | 1 + arch/x86/xen/p2m.c | 1 + drivers/acpi/apei/erst.c | 1 + drivers/cpufreq/intel_pstate.c | 1 + drivers/dma/mic_x100_dma.c | 1 + drivers/net/hyperv/netvsc.c | 1 + drivers/net/hyperv/rndis_filter.c | 1 + drivers/scsi/fnic/fnic_debugfs.c | 1 + drivers/scsi/fnic/fnic_trace.c | 1 + include/linux/io.h | 1 + sound/pci/asihpi/hpioctl.c | 1 + 15 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index a2b97404922d..a94463063b46 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -40,6 +40,7 @@ #include #include #include +#include #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -198,8 +199,6 @@ extern void set_iounmap_nonlazy(void); #include -#include - /* * Convert a virtual cached pointer to an uncached pointer */ diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c76d3e37c6e1..e068d6683dba 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 415480d3ea84..11546b462fa6 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 6629f397b467..8ff686aa7e8c 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 70d221fe2eb4..fae3c5366ac0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b47124d4cd67..8b7f18e200aa 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index ed65e9c4b5b0..3670bbab57a3 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "apei-internal.h" diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6414661ac1c4..2ba53f4f6af2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c index 6de2e677be04..74d9db05a5ad 100644 --- a/drivers/dma/mic_x100_dma.c +++ b/drivers/dma/mic_x100_dma.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "mic_x100_dma.h" diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ea091bc5ff09..1e09243d5449 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "hyperv_net.h" diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 9118cea91882..35a482d526d9 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "hyperv_net.h" diff --git a/drivers/scsi/fnic/fnic_debugfs.c b/drivers/scsi/fnic/fnic_debugfs.c index 5980c10c734d..d6498fabe628 100644 --- a/drivers/scsi/fnic/fnic_debugfs.c +++ b/drivers/scsi/fnic/fnic_debugfs.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "fnic.h" static struct dentry *fnic_trace_debugfs_root; diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c index 65a9bde26974..4e15c4bf0795 100644 --- a/drivers/scsi/fnic/fnic_trace.c +++ b/drivers/scsi/fnic/fnic_trace.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fnic_io.h" #include "fnic.h" diff --git a/include/linux/io.h b/include/linux/io.h index 04cce4da3685..fb5a99800e77 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -19,6 +19,7 @@ #define _LINUX_IO_H #include +#include #include #include diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 6610bd096fc9..d17937b92331 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -32,6 +32,7 @@ #include #include #include +#include #ifdef MODULE_FIRMWARE MODULE_FIRMWARE("asihpi/dsp5000.bin"); -- cgit v1.2.3-71-gd317