From 9f99798ff49e73dded73a8c674044ea6fb6af651 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 31 Jul 2012 20:37:00 +0900 Subject: ptrace: mark __ptrace_may_access() static __ptrace_may_access() is used within only kernel/ptrace.c. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- kernel/ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index a232bb59d93f..1f5e55dda955 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -180,7 +180,8 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) return has_ns_capability(current, ns, CAP_SYS_PTRACE); } -int __ptrace_may_access(struct task_struct *task, unsigned int mode) +/* Returns 0 on success, -errno on denial. */ +static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; -- cgit v1.2.3-71-gd317 From 0a13c00e9d4502b8e3fd9260ce781758ff2c3970 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:44 -0700 Subject: workqueue: reorder queueing functions so that _on() variants are on top Currently, queue/schedule[_delayed]_work_on() are located below the counterpart without the _on postifx even though the latter is usually implemented using the former. Swap them. This is cleanup and doesn't cause any functional difference. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 13 ++--- kernel/workqueue.c | 124 +++++++++++++++++++++++----------------------- 2 files changed, 69 insertions(+), 68 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index af155450cabb..597034276611 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -365,23 +365,24 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, extern void destroy_workqueue(struct workqueue_struct *wq); -extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); extern int queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); -extern int queue_delayed_work(struct workqueue_struct *wq, - struct delayed_work *work, unsigned long delay); +extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); +extern int queue_delayed_work(struct workqueue_struct *wq, + struct delayed_work *work, unsigned long delay); extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); -extern int schedule_work(struct work_struct *work); extern int schedule_work_on(int cpu, struct work_struct *work); -extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay); +extern int schedule_work(struct work_struct *work); extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, - unsigned long delay); + unsigned long delay); +extern int schedule_delayed_work(struct delayed_work *work, + unsigned long delay); extern int schedule_on_each_cpu(work_func_t func); extern int keventd_up(void); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 692d97628a10..07d309e7e359 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1052,27 +1052,6 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, spin_unlock_irqrestore(&gcwq->lock, flags); } -/** - * queue_work - queue work on a workqueue - * @wq: workqueue to use - * @work: work to queue - * - * Returns 0 if @work was already on a queue, non-zero otherwise. - * - * We queue the work to the CPU on which it was submitted, but if the CPU dies - * it can be processed by another CPU. - */ -int queue_work(struct workqueue_struct *wq, struct work_struct *work) -{ - int ret; - - ret = queue_work_on(get_cpu(), wq, work); - put_cpu(); - - return ret; -} -EXPORT_SYMBOL_GPL(queue_work); - /** * queue_work_on - queue work on specific cpu * @cpu: CPU number to execute work on @@ -1097,31 +1076,34 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work) } EXPORT_SYMBOL_GPL(queue_work_on); -static void delayed_work_timer_fn(unsigned long __data) -{ - struct delayed_work *dwork = (struct delayed_work *)__data; - struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); - - __queue_work(smp_processor_id(), cwq->wq, &dwork->work); -} - /** - * queue_delayed_work - queue work on a workqueue after delay + * queue_work - queue work on a workqueue * @wq: workqueue to use - * @dwork: delayable work to queue - * @delay: number of jiffies to wait before queueing + * @work: work to queue * * Returns 0 if @work was already on a queue, non-zero otherwise. + * + * We queue the work to the CPU on which it was submitted, but if the CPU dies + * it can be processed by another CPU. */ -int queue_delayed_work(struct workqueue_struct *wq, - struct delayed_work *dwork, unsigned long delay) +int queue_work(struct workqueue_struct *wq, struct work_struct *work) { - if (delay == 0) - return queue_work(wq, &dwork->work); + int ret; - return queue_delayed_work_on(-1, wq, dwork, delay); + ret = queue_work_on(get_cpu(), wq, work); + put_cpu(); + + return ret; +} +EXPORT_SYMBOL_GPL(queue_work); + +static void delayed_work_timer_fn(unsigned long __data) +{ + struct delayed_work *dwork = (struct delayed_work *)__data; + struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); + + __queue_work(smp_processor_id(), cwq->wq, &dwork->work); } -EXPORT_SYMBOL_GPL(queue_delayed_work); /** * queue_delayed_work_on - queue work on specific CPU after delay @@ -1178,6 +1160,24 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL_GPL(queue_delayed_work_on); +/** + * queue_delayed_work - queue work on a workqueue after delay + * @wq: workqueue to use + * @dwork: delayable work to queue + * @delay: number of jiffies to wait before queueing + * + * Returns 0 if @work was already on a queue, non-zero otherwise. + */ +int queue_delayed_work(struct workqueue_struct *wq, + struct delayed_work *dwork, unsigned long delay) +{ + if (delay == 0) + return queue_work(wq, &dwork->work); + + return queue_delayed_work_on(-1, wq, dwork, delay); +} +EXPORT_SYMBOL_GPL(queue_delayed_work); + /** * worker_enter_idle - enter idle state * @worker: worker which is entering idle state @@ -2877,6 +2877,19 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork) } EXPORT_SYMBOL(cancel_delayed_work_sync); +/* + * schedule_work_on - put work task on a specific cpu + * @cpu: cpu to put the work task on + * @work: job to be done + * + * This puts a job on a specific cpu + */ +int schedule_work_on(int cpu, struct work_struct *work) +{ + return queue_work_on(cpu, system_wq, work); +} +EXPORT_SYMBOL(schedule_work_on); + /** * schedule_work - put work task in global workqueue * @work: job to be done @@ -2894,18 +2907,21 @@ int schedule_work(struct work_struct *work) } EXPORT_SYMBOL(schedule_work); -/* - * schedule_work_on - put work task on a specific cpu - * @cpu: cpu to put the work task on - * @work: job to be done +/** + * schedule_delayed_work_on - queue work in global workqueue on CPU after delay + * @cpu: cpu to use + * @dwork: job to be done + * @delay: number of jiffies to wait * - * This puts a job on a specific cpu + * After waiting for a given time this puts a job in the kernel-global + * workqueue on the specified CPU. */ -int schedule_work_on(int cpu, struct work_struct *work) +int schedule_delayed_work_on(int cpu, + struct delayed_work *dwork, unsigned long delay) { - return queue_work_on(cpu, system_wq, work); + return queue_delayed_work_on(cpu, system_wq, dwork, delay); } -EXPORT_SYMBOL(schedule_work_on); +EXPORT_SYMBOL(schedule_delayed_work_on); /** * schedule_delayed_work - put work task in global workqueue after delay @@ -2922,22 +2938,6 @@ int schedule_delayed_work(struct delayed_work *dwork, } EXPORT_SYMBOL(schedule_delayed_work); -/** - * schedule_delayed_work_on - queue work in global workqueue on CPU after delay - * @cpu: cpu to use - * @dwork: job to be done - * @delay: number of jiffies to wait - * - * After waiting for a given time this puts a job in the kernel-global - * workqueue on the specified CPU. - */ -int schedule_delayed_work_on(int cpu, - struct delayed_work *dwork, unsigned long delay) -{ - return queue_delayed_work_on(cpu, system_wq, dwork, delay); -} -EXPORT_SYMBOL(schedule_delayed_work_on); - /** * schedule_on_each_cpu - execute a function synchronously on each online CPU * @func: the function to call -- cgit v1.2.3-71-gd317 From d4283e9378619c14dc3826a6b0527eb5d967ffde Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:44 -0700 Subject: workqueue: make queueing functions return bool All queueing functions return 1 on success, 0 if the work item was already pending. Update them to return bool instead. This signifies better that they don't return 0 / -errno. This is cleanup and doesn't cause any functional difference. While at it, fix comment opening for schedule_work_on(). Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 20 ++++++++++---------- kernel/workqueue.c | 47 +++++++++++++++++++++++------------------------ 2 files changed, 33 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 597034276611..278dc5ddb73f 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -365,24 +365,24 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, extern void destroy_workqueue(struct workqueue_struct *wq); -extern int queue_work_on(int cpu, struct workqueue_struct *wq, +extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); -extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); -extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, +extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work); +extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); -extern int queue_delayed_work(struct workqueue_struct *wq, +extern bool queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); -extern int schedule_work_on(int cpu, struct work_struct *work); -extern int schedule_work(struct work_struct *work); -extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, - unsigned long delay); -extern int schedule_delayed_work(struct delayed_work *work, - unsigned long delay); +extern bool schedule_work_on(int cpu, struct work_struct *work); +extern bool schedule_work(struct work_struct *work); +extern bool schedule_delayed_work_on(int cpu, struct delayed_work *work, + unsigned long delay); +extern bool schedule_delayed_work(struct delayed_work *work, + unsigned long delay); extern int schedule_on_each_cpu(work_func_t func); extern int keventd_up(void); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 07d309e7e359..70f95ab28f3d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1058,19 +1058,19 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, * @wq: workqueue to use * @work: work to queue * - * Returns 0 if @work was already on a queue, non-zero otherwise. + * Returns %false if @work was already on a queue, %true otherwise. * * We queue the work to a specific CPU, the caller must ensure it * can't go away. */ -int -queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work) +bool queue_work_on(int cpu, struct workqueue_struct *wq, + struct work_struct *work) { - int ret = 0; + bool ret = false; if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { __queue_work(cpu, wq, work); - ret = 1; + ret = true; } return ret; } @@ -1081,14 +1081,14 @@ EXPORT_SYMBOL_GPL(queue_work_on); * @wq: workqueue to use * @work: work to queue * - * Returns 0 if @work was already on a queue, non-zero otherwise. + * Returns %false if @work was already on a queue, %true otherwise. * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. */ -int queue_work(struct workqueue_struct *wq, struct work_struct *work) +bool queue_work(struct workqueue_struct *wq, struct work_struct *work) { - int ret; + bool ret; ret = queue_work_on(get_cpu(), wq, work); put_cpu(); @@ -1112,14 +1112,14 @@ static void delayed_work_timer_fn(unsigned long __data) * @dwork: work to queue * @delay: number of jiffies to wait before queueing * - * Returns 0 if @work was already on a queue, non-zero otherwise. + * Returns %false if @work was already on a queue, %true otherwise. */ -int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, - struct delayed_work *dwork, unsigned long delay) +bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, + struct delayed_work *dwork, unsigned long delay) { - int ret = 0; struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; + bool ret = false; if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { unsigned int lcpu; @@ -1154,7 +1154,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, add_timer_on(timer, cpu); else add_timer(timer); - ret = 1; + ret = true; } return ret; } @@ -1166,9 +1166,9 @@ EXPORT_SYMBOL_GPL(queue_delayed_work_on); * @dwork: delayable work to queue * @delay: number of jiffies to wait before queueing * - * Returns 0 if @work was already on a queue, non-zero otherwise. + * Returns %false if @work was already on a queue, %true otherwise. */ -int queue_delayed_work(struct workqueue_struct *wq, +bool queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { if (delay == 0) @@ -2877,14 +2877,14 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork) } EXPORT_SYMBOL(cancel_delayed_work_sync); -/* +/** * schedule_work_on - put work task on a specific cpu * @cpu: cpu to put the work task on * @work: job to be done * * This puts a job on a specific cpu */ -int schedule_work_on(int cpu, struct work_struct *work) +bool schedule_work_on(int cpu, struct work_struct *work) { return queue_work_on(cpu, system_wq, work); } @@ -2894,14 +2894,14 @@ EXPORT_SYMBOL(schedule_work_on); * schedule_work - put work task in global workqueue * @work: job to be done * - * Returns zero if @work was already on the kernel-global workqueue and - * non-zero otherwise. + * Returns %false if @work was already on the kernel-global workqueue and + * %true otherwise. * * This puts a job in the kernel-global workqueue if it was not already * queued and leaves it in the same position on the kernel-global * workqueue otherwise. */ -int schedule_work(struct work_struct *work) +bool schedule_work(struct work_struct *work) { return queue_work(system_wq, work); } @@ -2916,8 +2916,8 @@ EXPORT_SYMBOL(schedule_work); * After waiting for a given time this puts a job in the kernel-global * workqueue on the specified CPU. */ -int schedule_delayed_work_on(int cpu, - struct delayed_work *dwork, unsigned long delay) +bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, + unsigned long delay) { return queue_delayed_work_on(cpu, system_wq, dwork, delay); } @@ -2931,8 +2931,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on); * After waiting for a given time this puts a job in the kernel-global * workqueue. */ -int schedule_delayed_work(struct delayed_work *dwork, - unsigned long delay) +bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work(system_wq, dwork, delay); } -- cgit v1.2.3-71-gd317 From 959d1af8cffc8fd38ed53e8be1cf4ab8782f9c00 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:45 -0700 Subject: workqueue: add missing smp_wmb() in process_one_work() WORK_STRUCT_PENDING is used to claim ownership of a work item and process_one_work() releases it before starting execution. When someone else grabs PENDING, all pre-release updates to the work item should be visible and all updates made by the new owner should happen afterwards. Grabbing PENDING uses test_and_set_bit() and thus has a full barrier; however, clearing doesn't have a matching wmb. Given the preceding spin_unlock and use of clear_bit, I don't believe this can be a problem on an actual machine and there hasn't been any related report but it still is theretically possible for clear_pending to permeate upwards and happen before work->entry update. Add an explicit smp_wmb() before work_clear_pending(). Signed-off-by: Tejun Heo Cc: Oleg Nesterov Cc: stable@vger.kernel.org --- kernel/workqueue.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 70f95ab28f3d..5c26d36146b7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1997,7 +1997,9 @@ __acquires(&gcwq->lock) spin_unlock_irq(&gcwq->lock); + smp_wmb(); /* paired with test_and_set_bit(PENDING) */ work_clear_pending(work); + lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); trace_workqueue_execute_start(work); -- cgit v1.2.3-71-gd317 From 8930caba3dbdd8b86dd6934a5920bf61b53a931e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:45 -0700 Subject: workqueue: disable irq while manipulating PENDING Queueing operations use WORK_STRUCT_PENDING_BIT to synchronize access to the target work item. They first try to claim the bit and proceed with queueing only after that succeeds and there's a window between PENDING being set and the actual queueing where the task can be interrupted or preempted. There's also a similar window in process_one_work() when clearing PENDING. A work item is dequeued, gcwq->lock is released and then PENDING is cleared and the worker might get interrupted or preempted between releasing gcwq->lock and clearing PENDING. cancel[_delayed]_work_sync() tries to claim or steal PENDING. The function assumes that a work item with PENDING is either queued or in the process of being [de]queued. In the latter case, it busy-loops until either the work item loses PENDING or is queued. If canceling coincides with the above described interrupts or preemptions, the canceling task will busy-loop while the queueing or executing task is preempted. This patch keeps irq disabled across claiming PENDING and actual queueing and moves PENDING clearing in process_one_work() inside gcwq->lock so that busy looping from PENDING && !queued doesn't wait for interrupted/preempted tasks. Note that, in process_one_work(), setting last CPU and clearing PENDING got merged into single operation. This removes possible long busy-loops and will allow using try_to_grab_pending() from bh and irq contexts. v2: __queue_work() was testing preempt_count() to ensure that the caller has disabled preemption. This triggers spuriously if !CONFIG_PREEMPT_COUNT. Use preemptible() instead. Reported by Fengguang Wu. v3: Disable irq instead of preemption. IRQ will be disabled while grabbing gcwq->lock later anyway and this allows using try_to_grab_pending() from bh and irq contexts. Signed-off-by: Tejun Heo Cc: Oleg Nesterov Cc: Fengguang Wu --- kernel/workqueue.c | 73 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5c26d36146b7..30474c4e107c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -537,9 +537,10 @@ static int work_next_color(int color) * work is on queue. Once execution starts, WORK_STRUCT_CWQ is * cleared and the work data contains the cpu number it was last on. * - * set_work_{cwq|cpu}() and clear_work_data() can be used to set the - * cwq, cpu or clear work->data. These functions should only be - * called while the work is owned - ie. while the PENDING bit is set. + * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data() + * can be used to set the cwq, cpu or clear work->data. These functions + * should only be called while the work is owned - ie. while the PENDING + * bit is set. * * get_work_[g]cwq() can be used to obtain the gcwq or cwq * corresponding to a work. gcwq is available once the work has been @@ -561,9 +562,10 @@ static void set_work_cwq(struct work_struct *work, WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); } -static void set_work_cpu(struct work_struct *work, unsigned int cpu) +static void set_work_cpu_and_clear_pending(struct work_struct *work, + unsigned int cpu) { - set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING); + set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, 0); } static void clear_work_data(struct work_struct *work) @@ -981,7 +983,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, struct cpu_workqueue_struct *cwq; struct list_head *worklist; unsigned int work_flags; - unsigned long flags; + + /* + * While a work item is PENDING && off queue, a task trying to + * steal the PENDING will busy-loop waiting for it to either get + * queued or lose PENDING. Grabbing PENDING and queueing should + * happen with IRQ disabled. + */ + WARN_ON_ONCE(!irqs_disabled()); debug_work_activate(work); @@ -1008,7 +1017,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { struct worker *worker; - spin_lock_irqsave(&last_gcwq->lock, flags); + spin_lock(&last_gcwq->lock); worker = find_worker_executing_work(last_gcwq, work); @@ -1016,14 +1025,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, gcwq = last_gcwq; else { /* meh... not running there, queue here */ - spin_unlock_irqrestore(&last_gcwq->lock, flags); - spin_lock_irqsave(&gcwq->lock, flags); + spin_unlock(&last_gcwq->lock); + spin_lock(&gcwq->lock); } - } else - spin_lock_irqsave(&gcwq->lock, flags); + } else { + spin_lock(&gcwq->lock); + } } else { gcwq = get_gcwq(WORK_CPU_UNBOUND); - spin_lock_irqsave(&gcwq->lock, flags); + spin_lock(&gcwq->lock); } /* gcwq determined, get cwq and queue */ @@ -1031,7 +1041,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, trace_workqueue_queue_work(cpu, cwq, work); if (WARN_ON(!list_empty(&work->entry))) { - spin_unlock_irqrestore(&gcwq->lock, flags); + spin_unlock(&gcwq->lock); return; } @@ -1049,7 +1059,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, insert_work(cwq, work, worklist, work_flags); - spin_unlock_irqrestore(&gcwq->lock, flags); + spin_unlock(&gcwq->lock); } /** @@ -1067,11 +1077,16 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work) { bool ret = false; + unsigned long flags; + + local_irq_save(flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { __queue_work(cpu, wq, work); ret = true; } + + local_irq_restore(flags); return ret; } EXPORT_SYMBOL_GPL(queue_work_on); @@ -1102,7 +1117,9 @@ static void delayed_work_timer_fn(unsigned long __data) struct delayed_work *dwork = (struct delayed_work *)__data; struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); + local_irq_disable(); __queue_work(smp_processor_id(), cwq->wq, &dwork->work); + local_irq_enable(); } /** @@ -1120,6 +1137,10 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; bool ret = false; + unsigned long flags; + + /* read the comment in __queue_work() */ + local_irq_save(flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { unsigned int lcpu; @@ -1156,6 +1177,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, add_timer(timer); ret = true; } + + local_irq_restore(flags); return ret; } EXPORT_SYMBOL_GPL(queue_delayed_work_on); @@ -1970,15 +1993,13 @@ __acquires(&gcwq->lock) return; } - /* claim and process */ + /* claim and dequeue */ debug_work_deactivate(work); hlist_add_head(&worker->hentry, bwh); worker->current_work = work; worker->current_cwq = cwq; work_color = get_work_color(work); - /* record the current cpu number in the work data and dequeue */ - set_work_cpu(work, gcwq->cpu); list_del_init(&work->entry); /* @@ -1995,10 +2016,18 @@ __acquires(&gcwq->lock) if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) wake_up_worker(pool); - spin_unlock_irq(&gcwq->lock); + /* + * Record the last CPU and clear PENDING. The following wmb is + * paired with the implied mb in test_and_set_bit(PENDING) and + * ensures all updates to @work made here are visible to and + * precede any updates by the next PENDING owner. Also, clear + * PENDING inside @gcwq->lock so that PENDING and queued state + * changes happen together while IRQ is disabled. + */ + smp_wmb(); + set_work_cpu_and_clear_pending(work, gcwq->cpu); - smp_wmb(); /* paired with test_and_set_bit(PENDING) */ - work_clear_pending(work); + spin_unlock_irq(&gcwq->lock); lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); @@ -2836,9 +2865,11 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); */ bool flush_delayed_work(struct delayed_work *dwork) { + local_irq_disable(); if (del_timer_sync(&dwork->timer)) __queue_work(raw_smp_processor_id(), get_work_cwq(&dwork->work)->wq, &dwork->work); + local_irq_enable(); return flush_work(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work); @@ -2857,9 +2888,11 @@ EXPORT_SYMBOL(flush_delayed_work); */ bool flush_delayed_work_sync(struct delayed_work *dwork) { + local_irq_disable(); if (del_timer_sync(&dwork->timer)) __queue_work(raw_smp_processor_id(), get_work_cwq(&dwork->work)->wq, &dwork->work); + local_irq_enable(); return flush_work_sync(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work_sync); -- cgit v1.2.3-71-gd317 From d8e794dfd51c368ed3f686b7f4172830b60ae47b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:45 -0700 Subject: workqueue: set delayed_work->timer function on initialization delayed_work->timer.function is currently initialized during queue_delayed_work_on(). Export delayed_work_timer_fn() and set delayed_work timer function during delayed_work initialization together with other fields. This ensures the timer function is always valid on an initialized delayed_work. This is to help mod_delayed_work() implementation. To detect delayed_work users which diddle with the internal timer, trigger WARN if timer function doesn't match on queue. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 13 +++++++++++-- kernel/workqueue.c | 7 ++++--- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 278dc5ddb73f..ab95fef38d56 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -16,6 +16,7 @@ struct workqueue_struct; struct work_struct; typedef void (*work_func_t)(struct work_struct *work); +void delayed_work_timer_fn(unsigned long __data); /* * The first word is the work queue pointer and the flags rolled into @@ -124,12 +125,14 @@ struct execute_work { #define __DELAYED_WORK_INITIALIZER(n, f) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ - .timer = TIMER_INITIALIZER(NULL, 0, 0), \ + .timer = TIMER_INITIALIZER(delayed_work_timer_fn, \ + 0, (unsigned long)&(n)), \ } #define __DEFERRED_WORK_INITIALIZER(n, f) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ - .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0), \ + .timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn, \ + 0, (unsigned long)&(n)), \ } #define DECLARE_WORK(n, f) \ @@ -207,18 +210,24 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } do { \ INIT_WORK(&(_work)->work, (_func)); \ init_timer(&(_work)->timer); \ + (_work)->timer.function = delayed_work_timer_fn;\ + (_work)->timer.data = (unsigned long)(_work); \ } while (0) #define INIT_DELAYED_WORK_ONSTACK(_work, _func) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ init_timer_on_stack(&(_work)->timer); \ + (_work)->timer.function = delayed_work_timer_fn;\ + (_work)->timer.data = (unsigned long)(_work); \ } while (0) #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ init_timer_deferrable(&(_work)->timer); \ + (_work)->timer.function = delayed_work_timer_fn;\ + (_work)->timer.data = (unsigned long)(_work); \ } while (0) /** diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 30474c4e107c..55392385fe30 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1112,7 +1112,7 @@ bool queue_work(struct workqueue_struct *wq, struct work_struct *work) } EXPORT_SYMBOL_GPL(queue_work); -static void delayed_work_timer_fn(unsigned long __data) +void delayed_work_timer_fn(unsigned long __data) { struct delayed_work *dwork = (struct delayed_work *)__data; struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); @@ -1121,6 +1121,7 @@ static void delayed_work_timer_fn(unsigned long __data) __queue_work(smp_processor_id(), cwq->wq, &dwork->work); local_irq_enable(); } +EXPORT_SYMBOL_GPL(delayed_work_timer_fn); /** * queue_delayed_work_on - queue work on specific CPU after delay @@ -1145,6 +1146,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { unsigned int lcpu; + WARN_ON_ONCE(timer->function != delayed_work_timer_fn || + timer->data != (unsigned long)dwork); BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); @@ -1168,8 +1171,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, set_work_cwq(work, get_cwq(lcpu, wq), 0); timer->expires = jiffies + delay; - timer->data = (unsigned long)dwork; - timer->function = delayed_work_timer_fn; if (unlikely(cpu >= 0)) add_timer_on(timer, cpu); -- cgit v1.2.3-71-gd317 From 57469821fd5c61f25f783827d7334063cff67d65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:45 -0700 Subject: workqueue: unify local CPU queueing handling Queueing functions have been using different methods to determine the local CPU. * queue_work() superflously uses get/put_cpu() to acquire and hold the local CPU across queue_work_on(). * delayed_work_timer_fn() uses smp_processor_id(). * queue_delayed_work() calls queue_delayed_work_on() with -1 @cpu which is interpreted as the local CPU. * flush_delayed_work[_sync]() were using raw_smp_processor_id(). * __queue_work() interprets %WORK_CPU_UNBOUND as local CPU if the target workqueue is bound one but nobody uses this. This patch converts all functions to uniformly use %WORK_CPU_UNBOUND to indicate local CPU and use the local binding feature of __queue_work(). unlikely() is dropped from %WORK_CPU_UNBOUND handling in __queue_work(). Signed-off-by: Tejun Heo --- kernel/workqueue.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 55392385fe30..ce60bb5d12fb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1003,7 +1003,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, if (!(wq->flags & WQ_UNBOUND)) { struct global_cwq *last_gcwq; - if (unlikely(cpu == WORK_CPU_UNBOUND)) + if (cpu == WORK_CPU_UNBOUND) cpu = raw_smp_processor_id(); /* @@ -1103,12 +1103,7 @@ EXPORT_SYMBOL_GPL(queue_work_on); */ bool queue_work(struct workqueue_struct *wq, struct work_struct *work) { - bool ret; - - ret = queue_work_on(get_cpu(), wq, work); - put_cpu(); - - return ret; + return queue_work_on(WORK_CPU_UNBOUND, wq, work); } EXPORT_SYMBOL_GPL(queue_work); @@ -1118,7 +1113,7 @@ void delayed_work_timer_fn(unsigned long __data) struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); local_irq_disable(); - __queue_work(smp_processor_id(), cwq->wq, &dwork->work); + __queue_work(WORK_CPU_UNBOUND, cwq->wq, &dwork->work); local_irq_enable(); } EXPORT_SYMBOL_GPL(delayed_work_timer_fn); @@ -1172,7 +1167,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, timer->expires = jiffies + delay; - if (unlikely(cpu >= 0)) + if (unlikely(cpu != WORK_CPU_UNBOUND)) add_timer_on(timer, cpu); else add_timer(timer); @@ -1198,7 +1193,7 @@ bool queue_delayed_work(struct workqueue_struct *wq, if (delay == 0) return queue_work(wq, &dwork->work); - return queue_delayed_work_on(-1, wq, dwork, delay); + return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); } EXPORT_SYMBOL_GPL(queue_delayed_work); @@ -2868,7 +2863,7 @@ bool flush_delayed_work(struct delayed_work *dwork) { local_irq_disable(); if (del_timer_sync(&dwork->timer)) - __queue_work(raw_smp_processor_id(), + __queue_work(WORK_CPU_UNBOUND, get_work_cwq(&dwork->work)->wq, &dwork->work); local_irq_enable(); return flush_work(&dwork->work); @@ -2891,7 +2886,7 @@ bool flush_delayed_work_sync(struct delayed_work *dwork) { local_irq_disable(); if (del_timer_sync(&dwork->timer)) - __queue_work(raw_smp_processor_id(), + __queue_work(WORK_CPU_UNBOUND, get_work_cwq(&dwork->work)->wq, &dwork->work); local_irq_enable(); return flush_work_sync(&dwork->work); -- cgit v1.2.3-71-gd317 From 715f1300802e6eaefa85f6cfc70ae99af3d5d497 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:46 -0700 Subject: workqueue: fix zero @delay handling of queue_delayed_work_on() If @delay is zero and the dealyed_work is idle, queue_delayed_work() queues it for immediate execution; however, queue_delayed_work_on() lacks this logic and always goes through timer regardless of @delay. This patch moves 0 @delay handling logic from queue_delayed_work() to queue_delayed_work_on() so that both functions behave the same. Signed-off-by: Tejun Heo --- kernel/workqueue.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ce60bb5d12fb..6cbdc22f8ec7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1125,7 +1125,9 @@ EXPORT_SYMBOL_GPL(delayed_work_timer_fn); * @dwork: work to queue * @delay: number of jiffies to wait before queueing * - * Returns %false if @work was already on a queue, %true otherwise. + * Returns %false if @work was already on a queue, %true otherwise. If + * @delay is zero and @dwork is idle, it will be scheduled for immediate + * execution. */ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) @@ -1135,6 +1137,9 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, bool ret = false; unsigned long flags; + if (!delay) + return queue_work_on(cpu, wq, &dwork->work); + /* read the comment in __queue_work() */ local_irq_save(flags); @@ -1185,14 +1190,11 @@ EXPORT_SYMBOL_GPL(queue_delayed_work_on); * @dwork: delayable work to queue * @delay: number of jiffies to wait before queueing * - * Returns %false if @work was already on a queue, %true otherwise. + * Equivalent to queue_delayed_work_on() but tries to use the local CPU. */ bool queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { - if (delay == 0) - return queue_work(wq, &dwork->work); - return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); } EXPORT_SYMBOL_GPL(queue_delayed_work); -- cgit v1.2.3-71-gd317 From bf4ede014ea886b71ef71368738da35b316cb7c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:46 -0700 Subject: workqueue: move try_to_grab_pending() upwards try_to_grab_pending() will be used by to-be-implemented mod_delayed_work[_on](). Move try_to_grab_pending() and related functions above queueing functions. This patch only moves functions around. Signed-off-by: Tejun Heo --- kernel/workqueue.c | 286 ++++++++++++++++++++++++++--------------------------- 1 file changed, 143 insertions(+), 143 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6cbdc22f8ec7..0f50f4078e36 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -903,6 +903,149 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq, work); } +/** + * move_linked_works - move linked works to a list + * @work: start of series of works to be scheduled + * @head: target list to append @work to + * @nextp: out paramter for nested worklist walking + * + * Schedule linked works starting from @work to @head. Work series to + * be scheduled starts at @work and includes any consecutive work with + * WORK_STRUCT_LINKED set in its predecessor. + * + * If @nextp is not NULL, it's updated to point to the next work of + * the last scheduled work. This allows move_linked_works() to be + * nested inside outer list_for_each_entry_safe(). + * + * CONTEXT: + * spin_lock_irq(gcwq->lock). + */ +static void move_linked_works(struct work_struct *work, struct list_head *head, + struct work_struct **nextp) +{ + struct work_struct *n; + + /* + * Linked worklist will always end before the end of the list, + * use NULL for list head. + */ + list_for_each_entry_safe_from(work, n, NULL, entry) { + list_move_tail(&work->entry, head); + if (!(*work_data_bits(work) & WORK_STRUCT_LINKED)) + break; + } + + /* + * If we're already inside safe list traversal and have moved + * multiple works to the scheduled queue, the next position + * needs to be updated. + */ + if (nextp) + *nextp = n; +} + +static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +{ + struct work_struct *work = list_first_entry(&cwq->delayed_works, + struct work_struct, entry); + + trace_workqueue_activate_work(work); + move_linked_works(work, &cwq->pool->worklist, NULL); + __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); + cwq->nr_active++; +} + +/** + * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight + * @cwq: cwq of interest + * @color: color of work which left the queue + * @delayed: for a delayed work + * + * A work either has completed or is removed from pending queue, + * decrement nr_in_flight of its cwq and handle workqueue flushing. + * + * CONTEXT: + * spin_lock_irq(gcwq->lock). + */ +static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, + bool delayed) +{ + /* ignore uncolored works */ + if (color == WORK_NO_COLOR) + return; + + cwq->nr_in_flight[color]--; + + if (!delayed) { + cwq->nr_active--; + if (!list_empty(&cwq->delayed_works)) { + /* one down, submit a delayed one */ + if (cwq->nr_active < cwq->max_active) + cwq_activate_first_delayed(cwq); + } + } + + /* is flush in progress and are we at the flushing tip? */ + if (likely(cwq->flush_color != color)) + return; + + /* are there still in-flight works? */ + if (cwq->nr_in_flight[color]) + return; + + /* this cwq is done, clear flush_color */ + cwq->flush_color = -1; + + /* + * If this was the last cwq, wake up the first flusher. It + * will handle the rest. + */ + if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) + complete(&cwq->wq->first_flusher->done); +} + +/* + * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, + * so this work can't be re-armed in any way. + */ +static int try_to_grab_pending(struct work_struct *work) +{ + struct global_cwq *gcwq; + int ret = -1; + + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) + return 0; + + /* + * The queueing is in progress, or it is already queued. Try to + * steal it from ->worklist without clearing WORK_STRUCT_PENDING. + */ + gcwq = get_work_gcwq(work); + if (!gcwq) + return ret; + + spin_lock_irq(&gcwq->lock); + if (!list_empty(&work->entry)) { + /* + * This work is queued, but perhaps we locked the wrong gcwq. + * In that case we must see the new value after rmb(), see + * insert_work()->wmb(). + */ + smp_rmb(); + if (gcwq == get_work_gcwq(work)) { + debug_work_deactivate(work); + list_del_init(&work->entry); + cwq_dec_nr_in_flight(get_work_cwq(work), + get_work_color(work), + *work_data_bits(work) & WORK_STRUCT_DELAYED); + ret = 1; + } + } + spin_unlock_irq(&gcwq->lock); + + return ret; +} + /** * insert_work - insert a work into gcwq * @cwq: cwq @work belongs to @@ -1831,107 +1974,6 @@ static bool manage_workers(struct worker *worker) return ret; } -/** - * move_linked_works - move linked works to a list - * @work: start of series of works to be scheduled - * @head: target list to append @work to - * @nextp: out paramter for nested worklist walking - * - * Schedule linked works starting from @work to @head. Work series to - * be scheduled starts at @work and includes any consecutive work with - * WORK_STRUCT_LINKED set in its predecessor. - * - * If @nextp is not NULL, it's updated to point to the next work of - * the last scheduled work. This allows move_linked_works() to be - * nested inside outer list_for_each_entry_safe(). - * - * CONTEXT: - * spin_lock_irq(gcwq->lock). - */ -static void move_linked_works(struct work_struct *work, struct list_head *head, - struct work_struct **nextp) -{ - struct work_struct *n; - - /* - * Linked worklist will always end before the end of the list, - * use NULL for list head. - */ - list_for_each_entry_safe_from(work, n, NULL, entry) { - list_move_tail(&work->entry, head); - if (!(*work_data_bits(work) & WORK_STRUCT_LINKED)) - break; - } - - /* - * If we're already inside safe list traversal and have moved - * multiple works to the scheduled queue, the next position - * needs to be updated. - */ - if (nextp) - *nextp = n; -} - -static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) -{ - struct work_struct *work = list_first_entry(&cwq->delayed_works, - struct work_struct, entry); - - trace_workqueue_activate_work(work); - move_linked_works(work, &cwq->pool->worklist, NULL); - __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); - cwq->nr_active++; -} - -/** - * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight - * @cwq: cwq of interest - * @color: color of work which left the queue - * @delayed: for a delayed work - * - * A work either has completed or is removed from pending queue, - * decrement nr_in_flight of its cwq and handle workqueue flushing. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock). - */ -static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, - bool delayed) -{ - /* ignore uncolored works */ - if (color == WORK_NO_COLOR) - return; - - cwq->nr_in_flight[color]--; - - if (!delayed) { - cwq->nr_active--; - if (!list_empty(&cwq->delayed_works)) { - /* one down, submit a delayed one */ - if (cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); - } - } - - /* is flush in progress and are we at the flushing tip? */ - if (likely(cwq->flush_color != color)) - return; - - /* are there still in-flight works? */ - if (cwq->nr_in_flight[color]) - return; - - /* this cwq is done, clear flush_color */ - cwq->flush_color = -1; - - /* - * If this was the last cwq, wake up the first flusher. It - * will handle the rest. - */ - if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) - complete(&cwq->wq->first_flusher->done); -} - /** * process_one_work - process single work * @worker: self @@ -2767,48 +2809,6 @@ bool flush_work_sync(struct work_struct *work) } EXPORT_SYMBOL_GPL(flush_work_sync); -/* - * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, - * so this work can't be re-armed in any way. - */ -static int try_to_grab_pending(struct work_struct *work) -{ - struct global_cwq *gcwq; - int ret = -1; - - if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) - return 0; - - /* - * The queueing is in progress, or it is already queued. Try to - * steal it from ->worklist without clearing WORK_STRUCT_PENDING. - */ - gcwq = get_work_gcwq(work); - if (!gcwq) - return ret; - - spin_lock_irq(&gcwq->lock); - if (!list_empty(&work->entry)) { - /* - * This work is queued, but perhaps we locked the wrong gcwq. - * In that case we must see the new value after rmb(), see - * insert_work()->wmb(). - */ - smp_rmb(); - if (gcwq == get_work_gcwq(work)) { - debug_work_deactivate(work); - list_del_init(&work->entry); - cwq_dec_nr_in_flight(get_work_cwq(work), - get_work_color(work), - *work_data_bits(work) & WORK_STRUCT_DELAYED); - ret = 1; - } - } - spin_unlock_irq(&gcwq->lock); - - return ret; -} - static bool __cancel_work_timer(struct work_struct *work, struct timer_list* timer) { -- cgit v1.2.3-71-gd317 From b5490077274482efde57a50b060b99bc839acd45 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:46 -0700 Subject: workqueue: introduce WORK_OFFQ_FLAG_* Low WORK_STRUCT_FLAG_BITS bits of work_struct->data contain WORK_STRUCT_FLAG_* and flush color. If the work item is queued, the rest point to the cpu_workqueue with WORK_STRUCT_CWQ set; otherwise, WORK_STRUCT_CWQ is clear and the bits contain the last CPU number - either a real CPU number or one of WORK_CPU_*. Scheduled addition of mod_delayed_work[_on]() requires an additional flag, which is used only while a work item is off queue. There are more than enough bits to represent off-queue CPU number on both 32 and 64bits. This patch introduces WORK_OFFQ_FLAG_* which occupy the lower part of the @work->data high bits while off queue. This patch doesn't define any actual OFFQ flag yet. Off-queue CPU number is now shifted by WORK_OFFQ_CPU_SHIFT, which adds the number of bits used by OFFQ flags to WORK_STRUCT_FLAG_SHIFT, to make room for OFFQ flags. To avoid shift width warning with large WORK_OFFQ_FLAG_BITS, ulong cast is added to WORK_STRUCT_NO_CPU and, just in case, BUILD_BUG_ON() to check that there are enough bits to accomodate off-queue CPU number is added. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 8 +++++++- kernel/workqueue.c | 14 +++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ab95fef38d56..f562674db404 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -68,9 +68,15 @@ enum { WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, + /* data contains off-queue information when !WORK_STRUCT_CWQ */ + WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS, + WORK_OFFQ_FLAG_BITS = 0, + WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, + + /* convenience constants */ WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, - WORK_STRUCT_NO_CPU = WORK_CPU_NONE << WORK_STRUCT_FLAG_BITS, + WORK_STRUCT_NO_CPU = (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0f50f4078e36..eeae77079483 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -533,9 +533,9 @@ static int work_next_color(int color) } /* - * A work's data points to the cwq with WORK_STRUCT_CWQ set while the - * work is on queue. Once execution starts, WORK_STRUCT_CWQ is - * cleared and the work data contains the cpu number it was last on. + * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data + * contain the pointer to the queued cwq. Once execution starts, the flag + * is cleared and the high bits contain OFFQ flags and CPU number. * * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data() * can be used to set the cwq, cpu or clear work->data. These functions @@ -565,7 +565,7 @@ static void set_work_cwq(struct work_struct *work, static void set_work_cpu_and_clear_pending(struct work_struct *work, unsigned int cpu) { - set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, 0); + set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0); } static void clear_work_data(struct work_struct *work) @@ -592,7 +592,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work) return ((struct cpu_workqueue_struct *) (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; - cpu = data >> WORK_STRUCT_FLAG_BITS; + cpu = data >> WORK_OFFQ_CPU_SHIFT; if (cpu == WORK_CPU_NONE) return NULL; @@ -3724,6 +3724,10 @@ static int __init init_workqueues(void) unsigned int cpu; int i; + /* make sure we have enough bits for OFFQ CPU number */ + BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) < + WORK_CPU_LAST); + cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); -- cgit v1.2.3-71-gd317 From 7beb2edf44b4dea820c733046ad7666d092bb4b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:46 -0700 Subject: workqueue: factor out __queue_delayed_work() from queue_delayed_work_on() This is to prepare for mod_delayed_work[_on]() and doesn't cause any functional difference. Signed-off-by: Tejun Heo --- kernel/workqueue.c | 74 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index eeae77079483..d7f1b7e2bbaa 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1261,6 +1261,46 @@ void delayed_work_timer_fn(unsigned long __data) } EXPORT_SYMBOL_GPL(delayed_work_timer_fn); +static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, + struct delayed_work *dwork, unsigned long delay) +{ + struct timer_list *timer = &dwork->timer; + struct work_struct *work = &dwork->work; + unsigned int lcpu; + + WARN_ON_ONCE(timer->function != delayed_work_timer_fn || + timer->data != (unsigned long)dwork); + BUG_ON(timer_pending(timer)); + BUG_ON(!list_empty(&work->entry)); + + timer_stats_timer_set_start_info(&dwork->timer); + + /* + * This stores cwq for the moment, for the timer_fn. Note that the + * work's gcwq is preserved to allow reentrance detection for + * delayed works. + */ + if (!(wq->flags & WQ_UNBOUND)) { + struct global_cwq *gcwq = get_work_gcwq(work); + + if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND) + lcpu = gcwq->cpu; + else + lcpu = raw_smp_processor_id(); + } else { + lcpu = WORK_CPU_UNBOUND; + } + + set_work_cwq(work, get_cwq(lcpu, wq), 0); + + timer->expires = jiffies + delay; + + if (unlikely(cpu != WORK_CPU_UNBOUND)) + add_timer_on(timer, cpu); + else + add_timer(timer); +} + /** * queue_delayed_work_on - queue work on specific CPU after delay * @cpu: CPU number to execute work on @@ -1275,7 +1315,6 @@ EXPORT_SYMBOL_GPL(delayed_work_timer_fn); bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { - struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; bool ret = false; unsigned long flags; @@ -1287,38 +1326,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, local_irq_save(flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { - unsigned int lcpu; - - WARN_ON_ONCE(timer->function != delayed_work_timer_fn || - timer->data != (unsigned long)dwork); - BUG_ON(timer_pending(timer)); - BUG_ON(!list_empty(&work->entry)); - - timer_stats_timer_set_start_info(&dwork->timer); - - /* - * This stores cwq for the moment, for the timer_fn. - * Note that the work's gcwq is preserved to allow - * reentrance detection for delayed works. - */ - if (!(wq->flags & WQ_UNBOUND)) { - struct global_cwq *gcwq = get_work_gcwq(work); - - if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND) - lcpu = gcwq->cpu; - else - lcpu = raw_smp_processor_id(); - } else - lcpu = WORK_CPU_UNBOUND; - - set_work_cwq(work, get_cwq(lcpu, wq), 0); - - timer->expires = jiffies + delay; - - if (unlikely(cpu != WORK_CPU_UNBOUND)) - add_timer_on(timer, cpu); - else - add_timer(timer); + __queue_delayed_work(cpu, wq, dwork, delay); ret = true; } -- cgit v1.2.3-71-gd317 From 36e227d242f9ec7cb4a8e968561b3b26e3d8b5d1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:46 -0700 Subject: workqueue: reorganize try_to_grab_pending() and __cancel_timer_work() * Use bool @is_dwork instead of @timer and let try_to_grab_pending() use to_delayed_work() to determine the delayed_work address. * Move timer handling from __cancel_work_timer() to try_to_grab_pending(). * Make try_to_grab_pending() use -EAGAIN instead of -1 for busy-looping and drop the ret local variable. * Add proper function comment to try_to_grab_pending(). This makes the code a bit easier to understand and will ease further changes. This patch doesn't make any functional change. v2: Use @is_dwork instead of @timer. Signed-off-by: Tejun Heo --- kernel/workqueue.c | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d7f1b7e2bbaa..4b3663b1c677 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1004,15 +1004,33 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, complete(&cwq->wq->first_flusher->done); } -/* - * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, - * so this work can't be re-armed in any way. +/** + * try_to_grab_pending - steal work item from worklist + * @work: work item to steal + * @is_dwork: @work is a delayed_work + * + * Try to grab PENDING bit of @work. This function can handle @work in any + * stable state - idle, on timer or on worklist. Return values are + * + * 1 if @work was pending and we successfully stole PENDING + * 0 if @work was idle and we claimed PENDING + * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry + * + * On >= 0 return, the caller owns @work's PENDING bit. */ -static int try_to_grab_pending(struct work_struct *work) +static int try_to_grab_pending(struct work_struct *work, bool is_dwork) { struct global_cwq *gcwq; - int ret = -1; + /* try to steal the timer if it exists */ + if (is_dwork) { + struct delayed_work *dwork = to_delayed_work(work); + + if (likely(del_timer(&dwork->timer))) + return 1; + } + + /* try to claim PENDING the normal way */ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) return 0; @@ -1022,7 +1040,7 @@ static int try_to_grab_pending(struct work_struct *work) */ gcwq = get_work_gcwq(work); if (!gcwq) - return ret; + return -EAGAIN; spin_lock_irq(&gcwq->lock); if (!list_empty(&work->entry)) { @@ -1038,12 +1056,14 @@ static int try_to_grab_pending(struct work_struct *work) cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work), *work_data_bits(work) & WORK_STRUCT_DELAYED); - ret = 1; + + spin_unlock_irq(&gcwq->lock); + return 1; } } spin_unlock_irq(&gcwq->lock); - return ret; + return -EAGAIN; } /** @@ -2817,15 +2837,12 @@ bool flush_work_sync(struct work_struct *work) } EXPORT_SYMBOL_GPL(flush_work_sync); -static bool __cancel_work_timer(struct work_struct *work, - struct timer_list* timer) +static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) { int ret; do { - ret = (timer && likely(del_timer(timer))); - if (!ret) - ret = try_to_grab_pending(work); + ret = try_to_grab_pending(work, is_dwork); wait_on_work(work); } while (unlikely(ret < 0)); @@ -2853,7 +2870,7 @@ static bool __cancel_work_timer(struct work_struct *work, */ bool cancel_work_sync(struct work_struct *work) { - return __cancel_work_timer(work, NULL); + return __cancel_work_timer(work, false); } EXPORT_SYMBOL_GPL(cancel_work_sync); @@ -2914,7 +2931,7 @@ EXPORT_SYMBOL(flush_delayed_work_sync); */ bool cancel_delayed_work_sync(struct delayed_work *dwork) { - return __cancel_work_timer(&dwork->work, &dwork->timer); + return __cancel_work_timer(&dwork->work, true); } EXPORT_SYMBOL(cancel_delayed_work_sync); -- cgit v1.2.3-71-gd317 From bbb68dfaba73e8338fe0f1dc711cc1d261daec87 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:46 -0700 Subject: workqueue: mark a work item being canceled as such There can be two reasons try_to_grab_pending() can fail with -EAGAIN. One is when someone else is queueing or deqeueing the work item. With the previous patches, it is guaranteed that PENDING and queued state will soon agree making it safe to busy-retry in this case. The other is if multiple __cancel_work_timer() invocations are racing one another. __cancel_work_timer() grabs PENDING and then waits for running instances of the target work item on all CPUs while holding PENDING and !queued. try_to_grab_pending() invoked from another task will keep returning -EAGAIN while the current owner is waiting. Not distinguishing the two cases is okay because __cancel_work_timer() is the only user of try_to_grab_pending() and it invokes wait_on_work() whenever grabbing fails. For the first case, busy looping should be fine but wait_on_work() doesn't cause any critical problem. For the latter case, the new contender usually waits for the same condition as the current owner, so no unnecessarily extended busy-looping happens. Combined, these make __cancel_work_timer() technically correct even without irq protection while grabbing PENDING or distinguishing the two different cases. While the current code is technically correct, not distinguishing the two cases makes it difficult to use try_to_grab_pending() for other purposes than canceling because it's impossible to tell whether it's safe to busy-retry grabbing. This patch adds a mechanism to mark a work item being canceled. try_to_grab_pending() now disables irq on success and returns -EAGAIN to indicate that grabbing failed but PENDING and queued states are gonna agree soon and it's safe to busy-loop. It returns -ENOENT if the work item is being canceled and it may stay PENDING && !queued for arbitrary amount of time. __cancel_work_timer() is modified to mark the work canceling with WORK_OFFQ_CANCELING after grabbing PENDING, thus making try_to_grab_pending() fail with -ENOENT instead of -EAGAIN. Also, it invokes wait_on_work() iff grabbing failed with -ENOENT. This isn't necessary for correctness but makes it consistent with other future users of try_to_grab_pending(). v2: try_to_grab_pending() was testing preempt_count() to ensure that the caller has disabled preemption. This triggers spuriously if !CONFIG_PREEMPT_COUNT. Use preemptible() instead. Reported by Fengguang Wu. v3: Updated so that try_to_grab_pending() disables irq on success rather than requiring preemption disabled by the caller. This makes busy-looping easier and will allow try_to_grap_pending() to be used from bh/irq contexts. Signed-off-by: Tejun Heo Cc: Fengguang Wu --- include/linux/workqueue.h | 5 ++- kernel/workqueue.c | 90 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f562674db404..5f4aeaa9f3e6 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -70,7 +70,10 @@ enum { /* data contains off-queue information when !WORK_STRUCT_CWQ */ WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS, - WORK_OFFQ_FLAG_BITS = 0, + + WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), + + WORK_OFFQ_FLAG_BITS = 1, WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, /* convenience constants */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4b3663b1c677..b4a4e05c89e1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -537,15 +537,20 @@ static int work_next_color(int color) * contain the pointer to the queued cwq. Once execution starts, the flag * is cleared and the high bits contain OFFQ flags and CPU number. * - * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data() - * can be used to set the cwq, cpu or clear work->data. These functions - * should only be called while the work is owned - ie. while the PENDING - * bit is set. + * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling() + * and clear_work_data() can be used to set the cwq, cpu or clear + * work->data. These functions should only be called while the work is + * owned - ie. while the PENDING bit is set. * - * get_work_[g]cwq() can be used to obtain the gcwq or cwq - * corresponding to a work. gcwq is available once the work has been - * queued anywhere after initialization. cwq is available only from - * queueing until execution starts. + * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to + * a work. gcwq is available once the work has been queued anywhere after + * initialization until it is sync canceled. cwq is available only while + * the work item is queued. + * + * %WORK_OFFQ_CANCELING is used to mark a work item which is being + * canceled. While being canceled, a work item may have its PENDING set + * but stay off timer and worklist for arbitrarily long and nobody should + * try to steal the PENDING bit. */ static inline void set_work_data(struct work_struct *work, unsigned long data, unsigned long flags) @@ -600,6 +605,22 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work) return get_gcwq(cpu); } +static void mark_work_canceling(struct work_struct *work) +{ + struct global_cwq *gcwq = get_work_gcwq(work); + unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE; + + set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING, + WORK_STRUCT_PENDING); +} + +static bool work_is_canceling(struct work_struct *work) +{ + unsigned long data = atomic_long_read(&work->data); + + return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING); +} + /* * Policy functions. These define the policies on how the global worker * pools are managed. Unless noted otherwise, these functions assume that @@ -1005,9 +1026,10 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, } /** - * try_to_grab_pending - steal work item from worklist + * try_to_grab_pending - steal work item from worklist and disable irq * @work: work item to steal * @is_dwork: @work is a delayed_work + * @flags: place to store irq state * * Try to grab PENDING bit of @work. This function can handle @work in any * stable state - idle, on timer or on worklist. Return values are @@ -1015,13 +1037,30 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, * 1 if @work was pending and we successfully stole PENDING * 0 if @work was idle and we claimed PENDING * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry + * -ENOENT if someone else is canceling @work, this state may persist + * for arbitrarily long * - * On >= 0 return, the caller owns @work's PENDING bit. + * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting + * preempted while holding PENDING and @work off queue, preemption must be + * disabled on entry. This ensures that we don't return -EAGAIN while + * another task is preempted in this function. + * + * On successful return, >= 0, irq is disabled and the caller is + * responsible for releasing it using local_irq_restore(*@flags). + * + * This function is safe to call from any context other than IRQ handler. + * An IRQ handler may run on top of delayed_work_timer_fn() which can make + * this function return -EAGAIN perpetually. */ -static int try_to_grab_pending(struct work_struct *work, bool is_dwork) +static int try_to_grab_pending(struct work_struct *work, bool is_dwork, + unsigned long *flags) { struct global_cwq *gcwq; + WARN_ON_ONCE(in_irq()); + + local_irq_save(*flags); + /* try to steal the timer if it exists */ if (is_dwork) { struct delayed_work *dwork = to_delayed_work(work); @@ -1040,9 +1079,9 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork) */ gcwq = get_work_gcwq(work); if (!gcwq) - return -EAGAIN; + goto fail; - spin_lock_irq(&gcwq->lock); + spin_lock(&gcwq->lock); if (!list_empty(&work->entry)) { /* * This work is queued, but perhaps we locked the wrong gcwq. @@ -1057,12 +1096,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork) get_work_color(work), *work_data_bits(work) & WORK_STRUCT_DELAYED); - spin_unlock_irq(&gcwq->lock); + spin_unlock(&gcwq->lock); return 1; } } - spin_unlock_irq(&gcwq->lock); - + spin_unlock(&gcwq->lock); +fail: + local_irq_restore(*flags); + if (work_is_canceling(work)) + return -ENOENT; + cpu_relax(); return -EAGAIN; } @@ -2839,13 +2882,24 @@ EXPORT_SYMBOL_GPL(flush_work_sync); static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) { + unsigned long flags; int ret; do { - ret = try_to_grab_pending(work, is_dwork); - wait_on_work(work); + ret = try_to_grab_pending(work, is_dwork, &flags); + /* + * If someone else is canceling, wait for the same event it + * would be waiting for before retrying. + */ + if (unlikely(ret == -ENOENT)) + wait_on_work(work); } while (unlikely(ret < 0)); + /* tell other tasks trying to grab @work to back off */ + mark_work_canceling(work); + local_irq_restore(flags); + + wait_on_work(work); clear_work_data(work); return ret; } -- cgit v1.2.3-71-gd317 From 8376fe22c7e79c7e90857d39f82aeae6cad6c4b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:47 -0700 Subject: workqueue: implement mod_delayed_work[_on]() Workqueue was lacking a mechanism to modify the timeout of an already pending delayed_work. delayed_work users have been working around this using several methods - using an explicit timer + work item, messing directly with delayed_work->timer, and canceling before re-queueing, all of which are error-prone and/or ugly. This patch implements mod_delayed_work[_on]() which behaves similarly to mod_timer() - if the delayed_work is idle, it's queued with the given delay; otherwise, its timeout is modified to the new value. Zero @delay guarantees immediate execution. v2: Updated to reflect try_to_grab_pending() changes. Now safe to be called from bh context. Signed-off-by: Tejun Heo Cc: Linus Torvalds Cc: Andrew Morton Cc: Ingo Molnar --- include/linux/workqueue.h | 4 ++++ kernel/workqueue.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 5f4aeaa9f3e6..20000305a8a6 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -390,6 +390,10 @@ extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern bool queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); +extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, + struct delayed_work *dwork, unsigned long delay); +extern bool mod_delayed_work(struct workqueue_struct *wq, + struct delayed_work *dwork, unsigned long delay); extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b4a4e05c89e1..41ae2c0979fe 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1413,6 +1413,59 @@ bool queue_delayed_work(struct workqueue_struct *wq, } EXPORT_SYMBOL_GPL(queue_delayed_work); +/** + * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU + * @cpu: CPU number to execute work on + * @wq: workqueue to use + * @dwork: work to queue + * @delay: number of jiffies to wait before queueing + * + * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise, + * modify @dwork's timer so that it expires after @delay. If @delay is + * zero, @work is guaranteed to be scheduled immediately regardless of its + * current state. + * + * Returns %false if @dwork was idle and queued, %true if @dwork was + * pending and its timer was modified. + * + * This function is safe to call from any context other than IRQ handler. + * See try_to_grab_pending() for details. + */ +bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, + struct delayed_work *dwork, unsigned long delay) +{ + unsigned long flags; + int ret; + + do { + ret = try_to_grab_pending(&dwork->work, true, &flags); + } while (unlikely(ret == -EAGAIN)); + + if (likely(ret >= 0)) { + __queue_delayed_work(cpu, wq, dwork, delay); + local_irq_restore(flags); + } + + /* -ENOENT from try_to_grab_pending() becomes %true */ + return ret; +} +EXPORT_SYMBOL_GPL(mod_delayed_work_on); + +/** + * mod_delayed_work - modify delay of or queue a delayed work + * @wq: workqueue to use + * @dwork: work to queue + * @delay: number of jiffies to wait before queueing + * + * mod_delayed_work_on() on local CPU. + */ +bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, + unsigned long delay) +{ + return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); +} +EXPORT_SYMBOL_GPL(mod_delayed_work); + /** * worker_enter_idle - enter idle state * @worker: worker which is entering idle state -- cgit v1.2.3-71-gd317 From a181dc14ed23f7a499542ff4c78532b5f24bb18f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Aug 2012 15:58:29 +0300 Subject: jump_label: Export jump_label_rate_limit() CC: Jason Baron CC: Ingo Molnar CC: Peter Zijlstra Signed-off-by: Gleb Natapov Acked-by: Jason Baron Signed-off-by: Avi Kivity --- kernel/jump_label.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 43049192b5ec..60f48fa0fd0d 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -118,6 +118,7 @@ void jump_label_rate_limit(struct static_key_deferred *key, key->timeout = rl; INIT_DELAYED_WORK(&key->work, jump_label_update_timeout); } +EXPORT_SYMBOL_GPL(jump_label_rate_limit); static int addr_conflict(struct jump_entry *entry, void *start, void *end) { -- cgit v1.2.3-71-gd317 From 1265057fa02c7bed3b6d9ddc8a2048065a370364 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 8 Aug 2012 09:38:42 -0700 Subject: workqueue: fix CPU binding of flush_delayed_work[_sync]() delayed_work encodes the workqueue to use and the last CPU in delayed_work->work.data while it's on timer. The target CPU is implicitly recorded as the CPU the timer is queued on and delayed_work_timer_fn() queues delayed_work->work to the CPU it is running on. Unfortunately, this leaves flush_delayed_work[_sync]() no way to find out which CPU the delayed_work was queued for when they try to re-queue after killing the timer. Currently, it chooses the local CPU flush is running on. This can unexpectedly move a delayed_work queued on a specific CPU to another CPU and lead to subtle errors. There isn't much point in trying to save several bytes in struct delayed_work, which is already close to a hundred bytes on 64bit with all debug options turned off. This patch adds delayed_work->cpu to remember the CPU it's queued for. Note that if the timer is migrated during CPU down, the work item could be queued to the downed global_cwq after this change. As a detached global_cwq behaves like an unbound one, this doesn't change much for the delayed_work. Signed-off-by: Tejun Heo Cc: Linus Torvalds Cc: Ingo Molnar Cc: Andrew Morton --- include/linux/workqueue.h | 1 + kernel/workqueue.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 20000305a8a6..b14d5d59af7c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -102,6 +102,7 @@ struct work_struct { struct delayed_work { struct work_struct work; struct timer_list timer; + int cpu; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 41ae2c0979fe..11723c5b2b20 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1319,7 +1319,7 @@ void delayed_work_timer_fn(unsigned long __data) struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); local_irq_disable(); - __queue_work(WORK_CPU_UNBOUND, cwq->wq, &dwork->work); + __queue_work(dwork->cpu, cwq->wq, &dwork->work); local_irq_enable(); } EXPORT_SYMBOL_GPL(delayed_work_timer_fn); @@ -1356,6 +1356,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, set_work_cwq(work, get_cwq(lcpu, wq), 0); + dwork->cpu = cpu; timer->expires = jiffies + delay; if (unlikely(cpu != WORK_CPU_UNBOUND)) @@ -2997,7 +2998,7 @@ bool flush_delayed_work(struct delayed_work *dwork) { local_irq_disable(); if (del_timer_sync(&dwork->timer)) - __queue_work(WORK_CPU_UNBOUND, + __queue_work(dwork->cpu, get_work_cwq(&dwork->work)->wq, &dwork->work); local_irq_enable(); return flush_work(&dwork->work); @@ -3020,7 +3021,7 @@ bool flush_delayed_work_sync(struct delayed_work *dwork) { local_irq_disable(); if (del_timer_sync(&dwork->timer)) - __queue_work(WORK_CPU_UNBOUND, + __queue_work(dwork->cpu, get_work_cwq(&dwork->work)->wq, &dwork->work); local_irq_enable(); return flush_work_sync(&dwork->work); -- cgit v1.2.3-71-gd317 From 23657bb192f14b789e4c478def8f11ecc95b4f6c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 13 Aug 2012 17:08:19 -0700 Subject: workqueue: add missing wmb() in clear_work_data() Any operation which clears PENDING should be preceded by a wmb to guarantee that the next PENDING owner sees all the changes made before PENDING release. There are only two places where PENDING is cleared - set_work_cpu_and_clear_pending() and clear_work_data(). The caller of the former already does smp_wmb() but the latter doesn't have any. Move the wmb above set_work_cpu_and_clear_pending() into it and add one to clear_work_data(). There hasn't been any report related to this issue, and, given how clear_work_data() is used, it is extremely unlikely to have caused any actual problems on any architecture. Signed-off-by: Tejun Heo Cc: Oleg Nesterov --- kernel/workqueue.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 11723c5b2b20..4fef9527a620 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -570,11 +570,19 @@ static void set_work_cwq(struct work_struct *work, static void set_work_cpu_and_clear_pending(struct work_struct *work, unsigned int cpu) { + /* + * The following wmb is paired with the implied mb in + * test_and_set_bit(PENDING) and ensures all updates to @work made + * here are visible to and precede any updates by the next PENDING + * owner. + */ + smp_wmb(); set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0); } static void clear_work_data(struct work_struct *work) { + smp_wmb(); /* see set_work_cpu_and_clear_pending() */ set_work_data(work, WORK_STRUCT_NO_CPU, 0); } @@ -2182,14 +2190,11 @@ __acquires(&gcwq->lock) wake_up_worker(pool); /* - * Record the last CPU and clear PENDING. The following wmb is - * paired with the implied mb in test_and_set_bit(PENDING) and - * ensures all updates to @work made here are visible to and - * precede any updates by the next PENDING owner. Also, clear - * PENDING inside @gcwq->lock so that PENDING and queued state - * changes happen together while IRQ is disabled. + * Record the last CPU and clear PENDING which should be the last + * update to @work. Also, do this inside @gcwq->lock so that + * PENDING and queued state changes happen together while IRQ is + * disabled. */ - smp_wmb(); set_work_cpu_and_clear_pending(work, gcwq->cpu); spin_unlock_irq(&gcwq->lock); -- cgit v1.2.3-71-gd317 From 4f82f45730c68fdaf9b0472495a965188404866e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 10:37:59 -0600 Subject: net ip6 flowlabel: Make owner a union of struct pid * and kuid_t Correct a long standing omission and use struct pid in the owner field of struct ip6_flowlabel when the share type is IPV6_FL_S_PROCESS. This guarantees we don't have issues when pid wraparound occurs. Use a kuid_t in the owner field of struct ip6_flowlabel when the share type is IPV6_FL_S_USER to add user namespace support. In /proc/net/ip6_flowlabel capture the current pid namespace when opening the file and release the pid namespace when the file is closed ensuring we print the pid owner value that is meaning to the reader of the file. Similarly use from_kuid_munged to print uid values that are meaningful to the reader of the file. This requires exporting pid_nr_ns so that ipv6 can continue to built as a module. Yoiks what silliness Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/net/ipv6.h | 5 ++++- init/Kconfig | 1 - kernel/pid.c | 1 + net/ipv6/ip6_flowlabel.c | 50 +++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 48 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 01c34b363a34..c8a202436e01 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -222,7 +222,10 @@ struct ip6_flowlabel { struct ipv6_txoptions *opt; unsigned long linger; u8 share; - u32 owner; + union { + struct pid *pid; + kuid_t uid; + } owner; unsigned long lastuse; unsigned long expires; struct net *fl_net; diff --git a/init/Kconfig b/init/Kconfig index f857f97bcef3..64ff9ce59443 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -948,7 +948,6 @@ config UIDGID_CONVERTED depends on NETFILTER_XT_MATCH_RECENT = n depends on NETFILTER_XT_TARGET_LOG = n depends on NETFILTER_NETLINK_LOG = n - depends on IPV6 = n depends on AF_RXRPC = n depends on NET_KEY = n depends on INET_DIAG = n diff --git a/kernel/pid.c b/kernel/pid.c index e86b291ad834..aebd4f5aaf41 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -479,6 +479,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) } return nr; } +EXPORT_SYMBOL_GPL(pid_nr_ns); pid_t pid_vnr(struct pid *pid) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 9772fbd8a3f5..c836a6a20a34 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) static void fl_free(struct ip6_flowlabel *fl) { + switch (fl->share) { + case IPV6_FL_S_PROCESS: + put_pid(fl->owner.pid); + break; + } if (fl) { release_net(fl->fl_net); kfree(fl->opt); @@ -394,10 +400,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, case IPV6_FL_S_ANY: break; case IPV6_FL_S_PROCESS: - fl->owner = current->pid; + fl->owner.pid = get_task_pid(current, PIDTYPE_PID); break; case IPV6_FL_S_USER: - fl->owner = current_euid(); + fl->owner.uid = current_euid(); break; default: err = -EINVAL; @@ -561,7 +567,10 @@ recheck: err = -EPERM; if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || - fl1->owner != fl->owner) + ((fl1->share == IPV6_FL_S_PROCESS) && + (fl1->owner.pid == fl->owner.pid)) || + ((fl1->share == IPV6_FL_S_USER) && + uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -EINVAL; @@ -621,6 +630,7 @@ done: struct ip6fl_iter_state { struct seq_net_private p; + struct pid_namespace *pid_ns; int bucket; }; @@ -699,6 +709,7 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v) static int ip6fl_seq_show(struct seq_file *seq, void *v) { + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); if (v == SEQ_START_TOKEN) seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); @@ -708,7 +719,11 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", (unsigned int)ntohl(fl->label), fl->share, - (int)fl->owner, + ((fl->share == IPV6_FL_S_PROCESS) ? + pid_nr_ns(fl->owner.pid, state->pid_ns) : + ((fl->share == IPV6_FL_S_USER) ? + from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : + 0)), atomic_read(&fl->users), fl->linger/HZ, (long)(fl->expires - jiffies)/HZ, @@ -727,8 +742,29 @@ static const struct seq_operations ip6fl_seq_ops = { static int ip6fl_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ip6fl_seq_ops, - sizeof(struct ip6fl_iter_state)); + struct seq_file *seq; + struct ip6fl_iter_state *state; + int err; + + err = seq_open_net(inode, file, &ip6fl_seq_ops, + sizeof(struct ip6fl_iter_state)); + + if (!err) { + seq = file->private_data; + state = ip6fl_seq_private(seq); + rcu_read_lock(); + state->pid_ns = get_pid_ns(task_active_pid_ns(current)); + rcu_read_unlock(); + } + return err; +} + +static int ip6fl_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); + put_pid_ns(state->pid_ns); + return seq_release_net(inode, file); } static const struct file_operations ip6fl_seq_fops = { @@ -736,7 +772,7 @@ static const struct file_operations ip6fl_seq_fops = { .open = ip6fl_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_net, + .release = ip6fl_seq_release, }; static int __net_init ip6_flowlabel_proc_init(struct net *net) -- cgit v1.2.3-71-gd317 From 523a6a945f3cf5f1d337e50634687a577a732a5f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Aug 2012 19:11:22 -0700 Subject: pidns: Export free_pid_ns There is a least one modular user so export free_pid_ns so modules can capture and use the pid namespace on the very rare occasion when it makes sense. Acked-by: David S. Miller Signed-off-by: "Eric W. Biederman" --- kernel/pid_namespace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index b3c7fd554250..baa528d7dfbd 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -16,6 +16,7 @@ #include #include #include +#include #define BITS_PER_PAGE (PAGE_SIZE*8) @@ -144,6 +145,7 @@ void free_pid_ns(struct kref *kref) if (parent != NULL) put_pid_ns(parent); } +EXPORT_SYMBOL_GPL(free_pid_ns); void zap_pid_ns_processes(struct pid_namespace *pid_ns) { -- cgit v1.2.3-71-gd317 From 330dad5b9c9555632578c00e94e85c122561c5c7 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 15 Aug 2012 23:25:36 +0900 Subject: workqueue: use enum value to set array size of pools in gcwq Commit 3270476a6c0ce322354df8679652f060d66526dc ('workqueue: reimplement WQ_HIGHPRI using a separate worker_pool') introduce separate worker_pool for HIGHPRI. Although there is NR_WORKER_POOLS enum value which represent size of pools, definition of worker_pool in gcwq doesn't use it. Using it makes code robust and prevent future mistakes. So change code to use this enum value. Signed-off-by: Joonsoo Kim Signed-off-by: Tejun Heo --- kernel/workqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4fef9527a620..49d8f4a0110d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -183,7 +183,8 @@ struct global_cwq { struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; /* L: hash of busy workers */ - struct worker_pool pools[2]; /* normal and highpri pools */ + struct worker_pool pools[NR_WORKER_POOLS]; + /* normal and highpri pools */ wait_queue_head_t rebind_hold; /* rebind hold wait */ } ____cacheline_aligned_in_smp; -- cgit v1.2.3-71-gd317 From b75cac9368fa91636e17d0f7950b35d837154e14 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 15 Aug 2012 23:25:37 +0900 Subject: workqueue: correct req_cpu in trace_workqueue_queue_work() When we do tracing workqueue_queue_work(), it records requested cpu. But, if !(@wq->flag & WQ_UNBOUND) and @cpu is WORK_CPU_UNBOUND, requested cpu is changed as local cpu. In case of @wq->flag & WQ_UNBOUND, above change is not occured, therefore it is reasonable to correct it. Use temporary local variable for storing requested cpu. Signed-off-by: Joonsoo Kim Signed-off-by: Tejun Heo --- kernel/workqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 49d8f4a0110d..c29f2dc0f4fc 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1198,6 +1198,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, struct cpu_workqueue_struct *cwq; struct list_head *worklist; unsigned int work_flags; + unsigned int req_cpu = cpu; /* * While a work item is PENDING && off queue, a task trying to @@ -1253,7 +1254,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, /* gcwq determined, get cwq and queue */ cwq = get_cwq(gcwq->cpu, wq); - trace_workqueue_queue_work(cpu, cwq, work); + trace_workqueue_queue_work(req_cpu, cwq, work); if (WARN_ON(!list_empty(&work->entry))) { spin_unlock(&gcwq->lock); -- cgit v1.2.3-71-gd317 From e42986de481238204f6e0b0f4434da428895c20b Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 15 Aug 2012 23:25:38 +0900 Subject: workqueue: change value of lcpu in __queue_delayed_work_on() We assign cpu id into work struct's data field in __queue_delayed_work_on(). In current implementation, when work is come in first time, current running cpu id is assigned. If we do __queue_delayed_work_on() with CPU A on CPU B, __queue_work() invoked in delayed_work_timer_fn() go into the following sub-optimal path in case of WQ_NON_REENTRANT. gcwq = get_gcwq(cpu); if (wq->flags & WQ_NON_REENTRANT && (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { Change lcpu to @cpu and rechange lcpu to local cpu if lcpu is WORK_CPU_UNBOUND. It is sufficient to prevent to go into sub-optimal path. tj: Slightly rephrased the comment. Signed-off-by: Joonsoo Kim Signed-off-by: Tejun Heo --- kernel/workqueue.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c29f2dc0f4fc..99ee9b939264 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1356,9 +1356,15 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, if (!(wq->flags & WQ_UNBOUND)) { struct global_cwq *gcwq = get_work_gcwq(work); - if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND) + /* + * If we cannot get the last gcwq from @work directly, + * select the last CPU such that it avoids unnecessarily + * triggering non-reentrancy check in __queue_work(). + */ + lcpu = cpu; + if (gcwq) lcpu = gcwq->cpu; - else + if (lcpu == WORK_CPU_UNBOUND) lcpu = raw_smp_processor_id(); } else { lcpu = WORK_CPU_UNBOUND; -- cgit v1.2.3-71-gd317 From 1aabe902ca3638d862bf0dad5a697d3a8e046b0a Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 15 Aug 2012 23:25:39 +0900 Subject: workqueue: introduce system_highpri_wq Commit 3270476a6c0ce322354df8679652f060d66526dc ('workqueue: reimplement WQ_HIGHPRI using a separate worker_pool') introduce separate worker pool for HIGHPRI. When we handle busyworkers for gcwq, it can be normal worker or highpri worker. But, we don't consider this difference in rebind_workers(), we use just system_wq for highpri worker. It makes mismatch between cwq->pool and worker->pool. It doesn't make error in current implementation, but possible in the future. Now, we introduce system_highpri_wq to use proper cwq for highpri workers in rebind_workers(). Following patch fix this issue properly. tj: Even apart from rebinding, having system_highpri_wq generally makes sense. Signed-off-by: Joonsoo Kim Signed-off-by: Tejun Heo --- kernel/workqueue.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 99ee9b939264..329c404b68c2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -269,12 +269,14 @@ struct workqueue_struct { }; struct workqueue_struct *system_wq __read_mostly; +struct workqueue_struct *system_highpri_wq __read_mostly; struct workqueue_struct *system_long_wq __read_mostly; struct workqueue_struct *system_nrt_wq __read_mostly; struct workqueue_struct *system_unbound_wq __read_mostly; struct workqueue_struct *system_freezable_wq __read_mostly; struct workqueue_struct *system_nrt_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_wq); +EXPORT_SYMBOL_GPL(system_highpri_wq); EXPORT_SYMBOL_GPL(system_long_wq); EXPORT_SYMBOL_GPL(system_nrt_wq); EXPORT_SYMBOL_GPL(system_unbound_wq); @@ -3928,6 +3930,7 @@ static int __init init_workqueues(void) } system_wq = alloc_workqueue("events", 0, 0); + system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0); system_long_wq = alloc_workqueue("events_long", 0, 0); system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, @@ -3936,9 +3939,9 @@ static int __init init_workqueues(void) WQ_FREEZABLE, 0); system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable", WQ_NON_REENTRANT | WQ_FREEZABLE, 0); - BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || - !system_unbound_wq || !system_freezable_wq || - !system_nrt_freezable_wq); + BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq || + !system_nrt_wq || !system_unbound_wq || !system_freezable_wq || + !system_nrt_freezable_wq); return 0; } early_initcall(init_workqueues); -- cgit v1.2.3-71-gd317 From e2b6a6d570f070aa90ac00d2d10b1488512f8520 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 15 Aug 2012 23:25:40 +0900 Subject: workqueue: use system_highpri_wq for highpri workers in rebind_workers() In rebind_workers(), we do inserting a work to rebind to cpu for busy workers. Currently, in this case, we use only system_wq. This makes a possible error situation as there is mismatch between cwq->pool and worker->pool. To prevent this, we should use system_highpri_wq for highpri worker to match theses. This implements it. tj: Rephrased comment a bit. Signed-off-by: Joonsoo Kim Signed-off-by: Tejun Heo --- kernel/workqueue.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 329c404b68c2..8936761b814a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1741,6 +1741,7 @@ retry: /* rebind busy workers */ for_each_busy_worker(worker, i, pos, gcwq) { struct work_struct *rebind_work = &worker->rebind_work; + struct workqueue_struct *wq; /* morph UNBOUND to REBIND */ worker->flags &= ~WORKER_UNBOUND; @@ -1750,11 +1751,20 @@ retry: work_data_bits(rebind_work))) continue; - /* wq doesn't matter, use the default one */ debug_work_activate(rebind_work); - insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work, - worker->scheduled.next, - work_color_to_flags(WORK_NO_COLOR)); + + /* + * wq doesn't really matter but let's keep @worker->pool + * and @cwq->pool consistent for sanity. + */ + if (worker_pool_pri(worker->pool)) + wq = system_highpri_wq; + else + wq = system_wq; + + insert_work(get_cwq(gcwq->cpu, wq), rebind_work, + worker->scheduled.next, + work_color_to_flags(WORK_NO_COLOR)); } } -- cgit v1.2.3-71-gd317 From 7635d2fd7f0fa63b6ec03050614c314d7139f14a Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 15 Aug 2012 23:25:41 +0900 Subject: workqueue: use system_highpri_wq for unbind_work To speed cpu down processing up, use system_highpri_wq. As scheduling priority of workers on it is higher than system_wq and it is not contended by other normal works on this cpu, work on it is processed faster than system_wq. tj: CPU up/downs care quite a bit about latency these days. This shouldn't hurt anything and makes sense. Signed-off-by: Joonsoo Kim Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8936761b814a..7da24711038f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3680,7 +3680,7 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, case CPU_DOWN_PREPARE: /* unbinding should happen on the local CPU */ INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn); - schedule_work_on(cpu, &unbind_work); + queue_work_on(cpu, system_highpri_wq, &unbind_work); flush_work(&unbind_work); break; } -- cgit v1.2.3-71-gd317 From 044c782ce3a901fbd17cbe701c592f582381174d Mon Sep 17 00:00:00 2001 From: Valentin Ilie Date: Sun, 19 Aug 2012 00:52:42 +0300 Subject: workqueue: fix checkpatch issues Fixed some checkpatch warnings. tj: adapted to wq/for-3.7 and massaged pr_xxx() format strings a bit. Signed-off-by: Valentin Ilie Signed-off-by: Tejun Heo LKML-Reference: <1345326762-21747-1-git-send-email-valentin.ilie@gmail.com> --- kernel/workqueue.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7da24711038f..de429ba000ee 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -269,18 +269,18 @@ struct workqueue_struct { }; struct workqueue_struct *system_wq __read_mostly; -struct workqueue_struct *system_highpri_wq __read_mostly; -struct workqueue_struct *system_long_wq __read_mostly; -struct workqueue_struct *system_nrt_wq __read_mostly; -struct workqueue_struct *system_unbound_wq __read_mostly; -struct workqueue_struct *system_freezable_wq __read_mostly; -struct workqueue_struct *system_nrt_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_wq); +struct workqueue_struct *system_highpri_wq __read_mostly; EXPORT_SYMBOL_GPL(system_highpri_wq); +struct workqueue_struct *system_long_wq __read_mostly; EXPORT_SYMBOL_GPL(system_long_wq); +struct workqueue_struct *system_nrt_wq __read_mostly; EXPORT_SYMBOL_GPL(system_nrt_wq); +struct workqueue_struct *system_unbound_wq __read_mostly; EXPORT_SYMBOL_GPL(system_unbound_wq); +struct workqueue_struct *system_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_freezable_wq); +struct workqueue_struct *system_nrt_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_nrt_freezable_wq); #define CREATE_TRACE_POINTS @@ -2232,11 +2232,9 @@ __acquires(&gcwq->lock) lock_map_release(&cwq->wq->lockdep_map); if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { - printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " - "%s/0x%08x/%d\n", - current->comm, preempt_count(), task_pid_nr(current)); - printk(KERN_ERR " last function: "); - print_symbol("%s\n", (unsigned long)f); + pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" + " last function: %pf\n", + current->comm, preempt_count(), task_pid_nr(current), f); debug_show_held_locks(current); dump_stack(); } @@ -2790,8 +2788,8 @@ reflush: if (++flush_cnt == 10 || (flush_cnt % 100 == 0 && flush_cnt <= 1000)) - pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n", - wq->name, flush_cnt); + pr_warn("workqueue %s: flush on destruction isn't complete after %u tries\n", + wq->name, flush_cnt); goto reflush; } @@ -3275,9 +3273,8 @@ static int wq_clamp_max_active(int max_active, unsigned int flags, int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE; if (max_active < 1 || max_active > lim) - printk(KERN_WARNING "workqueue: max_active %d requested for %s " - "is out of range, clamping between %d and %d\n", - max_active, name, 1, lim); + pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n", + max_active, name, 1, lim); return clamp_val(max_active, 1, lim); } -- cgit v1.2.3-71-gd317 From dbf2576e37da0fcc7aacbfbb9fd5d3de7888a3c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Aug 2012 14:51:23 -0700 Subject: workqueue: make all workqueues non-reentrant By default, each per-cpu part of a bound workqueue operates separately and a work item may be executing concurrently on different CPUs. The behavior avoids some cross-cpu traffic but leads to subtle weirdities and not-so-subtle contortions in the API. * There's no sane usefulness in allowing a single work item to be executed concurrently on multiple CPUs. People just get the behavior unintentionally and get surprised after learning about it. Most either explicitly synchronize or use non-reentrant/ordered workqueue but this is error-prone. * flush_work() can't wait for multiple instances of the same work item on different CPUs. If a work item is executing on cpu0 and then queued on cpu1, flush_work() can only wait for the one on cpu1. Unfortunately, work items can easily cross CPU boundaries unintentionally when the queueing thread gets migrated. This means that if multiple queuers compete, flush_work() can't even guarantee that the instance queued right before it is finished before returning. * flush_work_sync() was added to work around some of the deficiencies of flush_work(). In addition to the usual flushing, it ensures that all currently executing instances are finished before returning. This operation is expensive as it has to walk all CPUs and at the same time fails to address competing queuer case. Incorrectly using flush_work() when flush_work_sync() is necessary is an easy error to make and can lead to bugs which are difficult to reproduce. * Similar problems exist for flush_delayed_work[_sync](). Other than the cross-cpu access concern, there's no benefit in allowing parallel execution and it's plain silly to have this level of contortion for workqueue which is widely used from core code to extremely obscure drivers. This patch makes all workqueues non-reentrant. If a work item is executing on a different CPU when queueing is requested, it is always queued to that CPU. This guarantees that any given work item can be executing on one CPU at maximum and if a work item is queued and executing, both are on the same CPU. The only behavior change which may affect workqueue users negatively is that non-reentrancy overrides the affinity specified by queue_work_on(). On a reentrant workqueue, the affinity specified by queue_work_on() is always followed. Now, if the work item is executing on one of the CPUs, the work item will be queued there regardless of the requested affinity. I've reviewed all workqueue users which request explicit affinity, and, fortunately, none seems to be crazy enough to exploit parallel execution of the same work item. This adds an additional busy_hash lookup if the work item was previously queued on a different CPU. This shouldn't be noticeable under any sane workload. Work item queueing isn't a very high-frequency operation and they don't jump across CPUs all the time. In a micro benchmark to exaggerate this difference - measuring the time it takes for two work items to repeatedly jump between two CPUs a number (10M) of times with busy_hash table densely populated, the difference was around 3%. While the overhead is measureable, it is only visible in pathological cases and the difference isn't huge. This change brings much needed sanity to workqueue and makes its behavior consistent with timer. I think this is the right tradeoff to make. This enables significant simplification of workqueue API. Simplification patches will follow. Signed-off-by: Tejun Heo --- kernel/workqueue.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index de429ba000ee..c4feef9798ea 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1225,14 +1225,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, cpu = raw_smp_processor_id(); /* - * It's multi cpu. If @wq is non-reentrant and @work - * was previously on a different cpu, it might still - * be running there, in which case the work needs to - * be queued on that cpu to guarantee non-reentrance. + * It's multi cpu. If @work was previously on a different + * cpu, it might still be running there, in which case the + * work needs to be queued on that cpu to guarantee + * non-reentrancy. */ gcwq = get_gcwq(cpu); - if (wq->flags & WQ_NON_REENTRANT && - (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { + last_gcwq = get_work_gcwq(work); + + if (last_gcwq && last_gcwq != gcwq) { struct worker *worker; spin_lock(&last_gcwq->lock); -- cgit v1.2.3-71-gd317 From 606a5020b9bdceb20b4f43e11db0054afa349028 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Aug 2012 14:51:23 -0700 Subject: workqueue: gut flush[_delayed]_work_sync() Now that all workqueues are non-reentrant, flush[_delayed]_work_sync() are equivalent to flush[_delayed]_work(). Drop the separate implementation and make them thin wrappers around flush[_delayed]_work(). * start_flush_work() no longer takes @wait_executing as the only left user - flush_work() - always sets it to %true. * __cancel_work_timer() uses flush_work() instead of wait_on_work(). Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 14 +++++- kernel/workqueue.c | 122 ++++------------------------------------------ 2 files changed, 22 insertions(+), 114 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b14d5d59af7c..4f9d3bc161a2 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -412,11 +412,9 @@ extern int keventd_up(void); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); -extern bool flush_work_sync(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); -extern bool flush_delayed_work_sync(struct delayed_work *work); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, @@ -456,6 +454,18 @@ static inline bool __cancel_delayed_work(struct delayed_work *work) return ret; } +/* used to be different but now identical to flush_work(), deprecated */ +static inline bool flush_work_sync(struct work_struct *work) +{ + return flush_work(work); +} + +/* used to be different but now identical to flush_delayed_work(), deprecated */ +static inline bool flush_delayed_work_sync(struct delayed_work *dwork) +{ + return flush_delayed_work(dwork); +} + #ifndef CONFIG_SMP static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c4feef9798ea..5f13a9a2c792 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2801,8 +2801,7 @@ reflush: } EXPORT_SYMBOL_GPL(drain_workqueue); -static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, - bool wait_executing) +static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) { struct worker *worker = NULL; struct global_cwq *gcwq; @@ -2824,13 +2823,12 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, cwq = get_work_cwq(work); if (unlikely(!cwq || gcwq != cwq->pool->gcwq)) goto already_gone; - } else if (wait_executing) { + } else { worker = find_worker_executing_work(gcwq, work); if (!worker) goto already_gone; cwq = worker->current_cwq; - } else - goto already_gone; + } insert_wq_barrier(cwq, barr, work, worker); spin_unlock_irq(&gcwq->lock); @@ -2857,15 +2855,8 @@ already_gone: * flush_work - wait for a work to finish executing the last queueing instance * @work: the work to flush * - * Wait until @work has finished execution. This function considers - * only the last queueing instance of @work. If @work has been - * enqueued across different CPUs on a non-reentrant workqueue or on - * multiple workqueues, @work might still be executing on return on - * some of the CPUs from earlier queueing. - * - * If @work was queued only on a non-reentrant, ordered or unbound - * workqueue, @work is guaranteed to be idle on return if it hasn't - * been requeued since flush started. + * Wait until @work has finished execution. @work is guaranteed to be idle + * on return if it hasn't been requeued since flush started. * * RETURNS: * %true if flush_work() waited for the work to finish execution, @@ -2878,85 +2869,15 @@ bool flush_work(struct work_struct *work) lock_map_acquire(&work->lockdep_map); lock_map_release(&work->lockdep_map); - if (start_flush_work(work, &barr, true)) { + if (start_flush_work(work, &barr)) { wait_for_completion(&barr.done); destroy_work_on_stack(&barr.work); return true; - } else - return false; -} -EXPORT_SYMBOL_GPL(flush_work); - -static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) -{ - struct wq_barrier barr; - struct worker *worker; - - spin_lock_irq(&gcwq->lock); - - worker = find_worker_executing_work(gcwq, work); - if (unlikely(worker)) - insert_wq_barrier(worker->current_cwq, &barr, work, worker); - - spin_unlock_irq(&gcwq->lock); - - if (unlikely(worker)) { - wait_for_completion(&barr.done); - destroy_work_on_stack(&barr.work); - return true; - } else + } else { return false; -} - -static bool wait_on_work(struct work_struct *work) -{ - bool ret = false; - int cpu; - - might_sleep(); - - lock_map_acquire(&work->lockdep_map); - lock_map_release(&work->lockdep_map); - - for_each_gcwq_cpu(cpu) - ret |= wait_on_cpu_work(get_gcwq(cpu), work); - return ret; -} - -/** - * flush_work_sync - wait until a work has finished execution - * @work: the work to flush - * - * Wait until @work has finished execution. On return, it's - * guaranteed that all queueing instances of @work which happened - * before this function is called are finished. In other words, if - * @work hasn't been requeued since this function was called, @work is - * guaranteed to be idle on return. - * - * RETURNS: - * %true if flush_work_sync() waited for the work to finish execution, - * %false if it was already idle. - */ -bool flush_work_sync(struct work_struct *work) -{ - struct wq_barrier barr; - bool pending, waited; - - /* we'll wait for executions separately, queue barr only if pending */ - pending = start_flush_work(work, &barr, false); - - /* wait for executions to finish */ - waited = wait_on_work(work); - - /* wait for the pending one */ - if (pending) { - wait_for_completion(&barr.done); - destroy_work_on_stack(&barr.work); } - - return pending || waited; } -EXPORT_SYMBOL_GPL(flush_work_sync); +EXPORT_SYMBOL_GPL(flush_work); static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) { @@ -2970,14 +2891,14 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) * would be waiting for before retrying. */ if (unlikely(ret == -ENOENT)) - wait_on_work(work); + flush_work(work); } while (unlikely(ret < 0)); /* tell other tasks trying to grab @work to back off */ mark_work_canceling(work); local_irq_restore(flags); - wait_on_work(work); + flush_work(work); clear_work_data(work); return ret; } @@ -3029,29 +2950,6 @@ bool flush_delayed_work(struct delayed_work *dwork) } EXPORT_SYMBOL(flush_delayed_work); -/** - * flush_delayed_work_sync - wait for a dwork to finish - * @dwork: the delayed work to flush - * - * Delayed timer is cancelled and the pending work is queued for - * execution immediately. Other than timer handling, its behavior - * is identical to flush_work_sync(). - * - * RETURNS: - * %true if flush_work_sync() waited for the work to finish execution, - * %false if it was already idle. - */ -bool flush_delayed_work_sync(struct delayed_work *dwork) -{ - local_irq_disable(); - if (del_timer_sync(&dwork->timer)) - __queue_work(dwork->cpu, - get_work_cwq(&dwork->work)->wq, &dwork->work); - local_irq_enable(); - return flush_work_sync(&dwork->work); -} -EXPORT_SYMBOL(flush_delayed_work_sync); - /** * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish * @dwork: the delayed work cancel -- cgit v1.2.3-71-gd317 From ae930e0f4e66fd540c6fbad9f1e2a7743d8b9afe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Aug 2012 14:51:23 -0700 Subject: workqueue: gut system_nrt[_freezable]_wq() Now that all workqueues are non-reentrant, system[_freezable]_wq() are equivalent to system_nrt[_freezable]_wq(). Replace the latter with wrappers around system[_freezable]_wq(). The wrapping goes through inline functions so that __deprecated can be added easily. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 23 ++++++++++++++--------- kernel/workqueue.c | 10 +--------- 2 files changed, 15 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4f9d3bc161a2..855fcdaa2d72 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -297,10 +297,6 @@ enum { * system_long_wq is similar to system_wq but may host long running * works. Queue flushing might take relatively long. * - * system_nrt_wq is non-reentrant and guarantees that any given work - * item is never executed in parallel by multiple CPUs. Queue - * flushing might take relatively long. - * * system_unbound_wq is unbound workqueue. Workers are not bound to * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and @@ -308,16 +304,25 @@ enum { * * system_freezable_wq is equivalent to system_wq except that it's * freezable. - * - * system_nrt_freezable_wq is equivalent to system_nrt_wq except that - * it's freezable. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_long_wq; -extern struct workqueue_struct *system_nrt_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; -extern struct workqueue_struct *system_nrt_freezable_wq; + +static inline struct workqueue_struct *__system_nrt_wq(void) +{ + return system_wq; +} + +static inline struct workqueue_struct *__system_nrt_freezable_wq(void) +{ + return system_freezable_wq; +} + +/* equivlalent to system_wq and system_freezable_wq, deprecated */ +#define system_nrt_wq __system_nrt_wq() +#define system_nrt_freezable_wq __system_nrt_freezable_wq() extern struct workqueue_struct * __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5f13a9a2c792..85bd3409b9f5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -274,14 +274,10 @@ struct workqueue_struct *system_highpri_wq __read_mostly; EXPORT_SYMBOL_GPL(system_highpri_wq); struct workqueue_struct *system_long_wq __read_mostly; EXPORT_SYMBOL_GPL(system_long_wq); -struct workqueue_struct *system_nrt_wq __read_mostly; -EXPORT_SYMBOL_GPL(system_nrt_wq); struct workqueue_struct *system_unbound_wq __read_mostly; EXPORT_SYMBOL_GPL(system_unbound_wq); struct workqueue_struct *system_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_freezable_wq); -struct workqueue_struct *system_nrt_freezable_wq __read_mostly; -EXPORT_SYMBOL_GPL(system_nrt_freezable_wq); #define CREATE_TRACE_POINTS #include @@ -3838,16 +3834,12 @@ static int __init init_workqueues(void) system_wq = alloc_workqueue("events", 0, 0); system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0); system_long_wq = alloc_workqueue("events_long", 0, 0); - system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); system_freezable_wq = alloc_workqueue("events_freezable", WQ_FREEZABLE, 0); - system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable", - WQ_NON_REENTRANT | WQ_FREEZABLE, 0); BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq || - !system_nrt_wq || !system_unbound_wq || !system_freezable_wq || - !system_nrt_freezable_wq); + !system_unbound_wq || !system_freezable_wq); return 0; } early_initcall(init_workqueues); -- cgit v1.2.3-71-gd317 From 3b07e9ca26866697616097044f25fbe53dbab693 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Aug 2012 14:51:24 -0700 Subject: workqueue: deprecate system_nrt[_freezable]_wq system_nrt[_freezable]_wq are now spurious. Mark them deprecated and convert all users to system[_freezable]_wq. If you're cc'd and wondering what's going on: Now all workqueues are non-reentrant, so there's no reason to use system_nrt[_freezable]_wq. Please use system[_freezable]_wq instead. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Acked-By: Lai Jiangshan Cc: Jens Axboe Cc: David Airlie Cc: Jiri Kosina Cc: "David S. Miller" Cc: Rusty Russell Cc: "Paul E. McKenney" Cc: David Howells --- block/blk-throttle.c | 7 +++---- block/genhd.c | 10 +++++----- drivers/gpu/drm/drm_crtc_helper.c | 6 +++--- drivers/hid/hid-wiimote-ext.c | 2 +- drivers/mmc/core/host.c | 4 ++-- drivers/net/virtio_net.c | 12 ++++++------ include/linux/workqueue.h | 4 ++-- kernel/srcu.c | 4 ++-- security/keys/gc.c | 8 ++++---- security/keys/key.c | 2 +- 10 files changed, 29 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e287c19908c8..5a58e779912b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -180,7 +180,7 @@ static inline unsigned int total_nr_queued(struct throtl_data *td) /* * Worker for allocating per cpu stat for tgs. This is scheduled on the - * system_nrt_wq once there are some groups on the alloc_list waiting for + * system_wq once there are some groups on the alloc_list waiting for * allocation. */ static void tg_stats_alloc_fn(struct work_struct *work) @@ -194,8 +194,7 @@ alloc_stats: stats_cpu = alloc_percpu(struct tg_stats_cpu); if (!stats_cpu) { /* allocation failed, try again after some time */ - queue_delayed_work(system_nrt_wq, dwork, - msecs_to_jiffies(10)); + schedule_delayed_work(dwork, msecs_to_jiffies(10)); return; } } @@ -238,7 +237,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg) */ spin_lock_irqsave(&tg_stats_alloc_lock, flags); list_add(&tg->stats_alloc_node, &tg_stats_alloc_list); - queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0); + schedule_delayed_work(&tg_stats_alloc_work, 0); spin_unlock_irqrestore(&tg_stats_alloc_lock, flags); } diff --git a/block/genhd.c b/block/genhd.c index 5d8b44a6442b..a2f3d6a5f55c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1490,9 +1490,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now) intv = disk_events_poll_jiffies(disk); set_timer_slack(&ev->dwork.timer, intv / 4); if (check_now) - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); + queue_delayed_work(system_freezable_wq, &ev->dwork, 0); else if (intv) - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv); + queue_delayed_work(system_freezable_wq, &ev->dwork, intv); out_unlock: spin_unlock_irqrestore(&ev->lock, flags); } @@ -1535,7 +1535,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) spin_lock_irq(&ev->lock); ev->clearing |= mask; if (!ev->block) - mod_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); + mod_delayed_work(system_freezable_wq, &ev->dwork, 0); spin_unlock_irq(&ev->lock); } @@ -1571,7 +1571,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) /* uncondtionally schedule event check and wait for it to finish */ disk_block_events(disk); - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0); + queue_delayed_work(system_freezable_wq, &ev->dwork, 0); flush_delayed_work(&ev->dwork); __disk_unblock_events(disk, false); @@ -1608,7 +1608,7 @@ static void disk_events_workfn(struct work_struct *work) intv = disk_events_poll_jiffies(disk); if (!ev->block && intv) - queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv); + queue_delayed_work(system_freezable_wq, &ev->dwork, intv); spin_unlock_irq(&ev->lock); diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 3252e7067d8b..8fa9d52820d9 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -968,7 +968,7 @@ static void output_poll_execute(struct work_struct *work) } if (repoll) - queue_delayed_work(system_nrt_wq, delayed_work, DRM_OUTPUT_POLL_PERIOD); + schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD); } void drm_kms_helper_poll_disable(struct drm_device *dev) @@ -993,7 +993,7 @@ void drm_kms_helper_poll_enable(struct drm_device *dev) } if (poll) - queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD); + schedule_delayed_work(&dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD); } EXPORT_SYMBOL(drm_kms_helper_poll_enable); @@ -1020,6 +1020,6 @@ void drm_helper_hpd_irq_event(struct drm_device *dev) /* kill timer and schedule immediate execution, this doesn't block */ cancel_delayed_work(&dev->mode_config.output_poll_work); if (drm_kms_helper_poll) - queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, 0); + schedule_delayed_work(&dev->mode_config.output_poll_work, 0); } EXPORT_SYMBOL(drm_helper_hpd_irq_event); diff --git a/drivers/hid/hid-wiimote-ext.c b/drivers/hid/hid-wiimote-ext.c index 0a1805c9b0e5..d37cd092ffc7 100644 --- a/drivers/hid/hid-wiimote-ext.c +++ b/drivers/hid/hid-wiimote-ext.c @@ -204,7 +204,7 @@ static void wiiext_worker(struct work_struct *work) /* schedule work only once, otherwise mark for reschedule */ static void wiiext_schedule(struct wiimote_ext *ext) { - queue_work(system_nrt_wq, &ext->worker); + schedule_work(&ext->worker); } /* diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 597f189b4427..ee2e16b17017 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -204,8 +204,8 @@ void mmc_host_clk_release(struct mmc_host *host) host->clk_requests--; if (mmc_host_may_gate_card(host->card) && !host->clk_requests) - queue_delayed_work(system_nrt_wq, &host->clk_gate_work, - msecs_to_jiffies(host->clkgate_delay)); + schedule_delayed_work(&host->clk_gate_work, + msecs_to_jiffies(host->clkgate_delay)); spin_unlock_irqrestore(&host->clk_lock, flags); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 83d2b0c34c5e..9650c413e11f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -521,7 +521,7 @@ static void refill_work(struct work_struct *work) /* In theory, this can happen: if we don't get any buffers in * we will *never* try to fill again. */ if (still_empty) - queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2); + schedule_delayed_work(&vi->refill, HZ/2); } static int virtnet_poll(struct napi_struct *napi, int budget) @@ -540,7 +540,7 @@ again: if (vi->num < vi->max / 2) { if (!try_fill_recv(vi, GFP_ATOMIC)) - queue_delayed_work(system_nrt_wq, &vi->refill, 0); + schedule_delayed_work(&vi->refill, 0); } /* Out of packets? */ @@ -745,7 +745,7 @@ static int virtnet_open(struct net_device *dev) /* Make sure we have some buffers: if oom use wq. */ if (!try_fill_recv(vi, GFP_KERNEL)) - queue_delayed_work(system_nrt_wq, &vi->refill, 0); + schedule_delayed_work(&vi->refill, 0); virtnet_napi_enable(vi); return 0; @@ -1020,7 +1020,7 @@ static void virtnet_config_changed(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - queue_work(system_nrt_wq, &vi->config_work); + schedule_work(&vi->config_work); } static int init_vqs(struct virtnet_info *vi) @@ -1152,7 +1152,7 @@ static int virtnet_probe(struct virtio_device *vdev) otherwise get link status from config. */ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { netif_carrier_off(dev); - queue_work(system_nrt_wq, &vi->config_work); + schedule_work(&vi->config_work); } else { vi->status = VIRTIO_NET_S_LINK_UP; netif_carrier_on(dev); @@ -1264,7 +1264,7 @@ static int virtnet_restore(struct virtio_device *vdev) netif_device_attach(vi->dev); if (!try_fill_recv(vi, GFP_KERNEL)) - queue_delayed_work(system_nrt_wq, &vi->refill, 0); + schedule_delayed_work(&vi->refill, 0); mutex_lock(&vi->config_lock); vi->config_enable = true; diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a351be7c3e91..1ce3fb08308d 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -310,12 +310,12 @@ extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; -static inline struct workqueue_struct *__system_nrt_wq(void) +static inline struct workqueue_struct * __deprecated __system_nrt_wq(void) { return system_wq; } -static inline struct workqueue_struct *__system_nrt_freezable_wq(void) +static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void) { return system_freezable_wq; } diff --git a/kernel/srcu.c b/kernel/srcu.c index 2095be3318d5..97c465ebd844 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -379,7 +379,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, rcu_batch_queue(&sp->batch_queue, head); if (!sp->running) { sp->running = true; - queue_delayed_work(system_nrt_wq, &sp->work, 0); + schedule_delayed_work(&sp->work, 0); } spin_unlock_irqrestore(&sp->queue_lock, flags); } @@ -631,7 +631,7 @@ static void srcu_reschedule(struct srcu_struct *sp) } if (pending) - queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL); + schedule_delayed_work(&sp->work, SRCU_INTERVAL); } /* diff --git a/security/keys/gc.c b/security/keys/gc.c index 61ab7c82ebb1..d67c97bb1025 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -62,7 +62,7 @@ void key_schedule_gc(time_t gc_at) if (gc_at <= now || test_bit(KEY_GC_REAP_KEYTYPE, &key_gc_flags)) { kdebug("IMMEDIATE"); - queue_work(system_nrt_wq, &key_gc_work); + schedule_work(&key_gc_work); } else if (gc_at < key_gc_next_run) { kdebug("DEFERRED"); key_gc_next_run = gc_at; @@ -77,7 +77,7 @@ void key_schedule_gc(time_t gc_at) void key_schedule_gc_links(void) { set_bit(KEY_GC_KEY_EXPIRED, &key_gc_flags); - queue_work(system_nrt_wq, &key_gc_work); + schedule_work(&key_gc_work); } /* @@ -120,7 +120,7 @@ void key_gc_keytype(struct key_type *ktype) set_bit(KEY_GC_REAP_KEYTYPE, &key_gc_flags); kdebug("schedule"); - queue_work(system_nrt_wq, &key_gc_work); + schedule_work(&key_gc_work); kdebug("sleep"); wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE, key_gc_wait_bit, @@ -369,7 +369,7 @@ maybe_resched: } if (gc_state & KEY_GC_REAP_AGAIN) - queue_work(system_nrt_wq, &key_gc_work); + schedule_work(&key_gc_work); kleave(" [end %x]", gc_state); return; diff --git a/security/keys/key.c b/security/keys/key.c index 50d96d4e06f2..3cbe3529c418 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -598,7 +598,7 @@ void key_put(struct key *key) key_check(key); if (atomic_dec_and_test(&key->usage)) - queue_work(system_nrt_wq, &key_gc_work); + schedule_work(&key_gc_work); } } EXPORT_SYMBOL(key_put); -- cgit v1.2.3-71-gd317 From e0aecdd874d78b7129a64b056c20e529e2c916df Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Aug 2012 13:18:24 -0700 Subject: workqueue: use irqsafe timer for delayed_work Up to now, for delayed_works, try_to_grab_pending() couldn't be used from IRQ handlers because IRQs may happen while delayed_work_timer_fn() is in progress leading to indefinite -EAGAIN. This patch makes delayed_work use the new TIMER_IRQSAFE flag for delayed_work->timer. This makes try_to_grab_pending() and thus mod_delayed_work_on() safe to call from IRQ handlers. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 8 +++++--- kernel/workqueue.c | 20 +++++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index e84ebb69607d..d86b320319e0 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -136,7 +136,8 @@ struct execute_work { #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ .timer = __TIMER_INITIALIZER(delayed_work_timer_fn, \ - 0, (unsigned long)&(n), (tflags)), \ + 0, (unsigned long)&(n), \ + (tflags) | TIMER_IRQSAFE), \ } #define DECLARE_WORK(n, f) \ @@ -214,7 +215,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } do { \ INIT_WORK(&(_work)->work, (_func)); \ __setup_timer(&(_work)->timer, delayed_work_timer_fn, \ - (unsigned long)(_work), (_tflags)); \ + (unsigned long)(_work), \ + (_tflags) | TIMER_IRQSAFE); \ } while (0) #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ @@ -223,7 +225,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } __setup_timer_on_stack(&(_work)->timer, \ delayed_work_timer_fn, \ (unsigned long)(_work), \ - (_tflags)); \ + (_tflags) | TIMER_IRQSAFE); \ } while (0) #define INIT_DELAYED_WORK(_work, _func) \ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 85bd3409b9f5..b394df8beaee 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1048,16 +1048,14 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, * for arbitrarily long * * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting - * preempted while holding PENDING and @work off queue, preemption must be - * disabled on entry. This ensures that we don't return -EAGAIN while - * another task is preempted in this function. + * interrupted while holding PENDING and @work off queue, irq must be + * disabled on entry. This, combined with delayed_work->timer being + * irqsafe, ensures that we return -EAGAIN for finite short period of time. * * On successful return, >= 0, irq is disabled and the caller is * responsible for releasing it using local_irq_restore(*@flags). * - * This function is safe to call from any context other than IRQ handler. - * An IRQ handler may run on top of delayed_work_timer_fn() which can make - * this function return -EAGAIN perpetually. + * This function is safe to call from any context including IRQ handler. */ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, unsigned long *flags) @@ -1072,6 +1070,11 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, if (is_dwork) { struct delayed_work *dwork = to_delayed_work(work); + /* + * dwork->timer is irqsafe. If del_timer() fails, it's + * guaranteed that the timer is not queued anywhere and not + * running on the local CPU. + */ if (likely(del_timer(&dwork->timer))) return 1; } @@ -1327,9 +1330,8 @@ void delayed_work_timer_fn(unsigned long __data) struct delayed_work *dwork = (struct delayed_work *)__data; struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); - local_irq_disable(); + /* should have been called from irqsafe timer with irq already off */ __queue_work(dwork->cpu, cwq->wq, &dwork->work); - local_irq_enable(); } EXPORT_SYMBOL_GPL(delayed_work_timer_fn); @@ -1444,7 +1446,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work); * Returns %false if @dwork was idle and queued, %true if @dwork was * pending and its timer was modified. * - * This function is safe to call from any context other than IRQ handler. + * This function is safe to call from any context including IRQ handler. * See try_to_grab_pending() for details. */ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, -- cgit v1.2.3-71-gd317 From 57b30ae77bf00d2318df711ef9a4d2a9be0a3a2a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Aug 2012 13:18:24 -0700 Subject: workqueue: reimplement cancel_delayed_work() using try_to_grab_pending() cancel_delayed_work() can't be called from IRQ handlers due to its use of del_timer_sync() and can't cancel work items which are already transferred from timer to worklist. Also, unlike other flush and cancel functions, a canceled delayed_work would still point to the last associated cpu_workqueue. If the workqueue is destroyed afterwards and the work item is re-used on a different workqueue, the queueing code can oops trying to dereference already freed cpu_workqueue. This patch reimplements cancel_delayed_work() using try_to_grab_pending() and set_work_cpu_and_clear_pending(). This allows the function to be called from IRQ handlers and makes its behavior consistent with other flush / cancel functions. Signed-off-by: Tejun Heo Cc: Linus Torvalds Cc: Ingo Molnar Cc: Andrew Morton --- include/linux/workqueue.h | 17 +---------------- kernel/workqueue.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index d86b320319e0..4898289564ab 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -420,6 +420,7 @@ extern bool flush_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); +extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, @@ -428,22 +429,6 @@ extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq); extern unsigned int work_cpu(struct work_struct *work); extern unsigned int work_busy(struct work_struct *work); -/* - * Kill off a pending schedule_delayed_work(). Note that the work callback - * function may still be running on return from cancel_delayed_work(), unless - * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or - * cancel_work_sync() to wait on it. - */ -static inline bool cancel_delayed_work(struct delayed_work *work) -{ - bool ret; - - ret = del_timer_sync(&work->timer); - if (ret) - work_clear_pending(&work->work); - return ret; -} - /* * Like above, but uses del_timer() instead of del_timer_sync(). This means, * if it returns 0 the timer function may be running and the queueing is in diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b394df8beaee..039d0fae171a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2948,6 +2948,36 @@ bool flush_delayed_work(struct delayed_work *dwork) } EXPORT_SYMBOL(flush_delayed_work); +/** + * cancel_delayed_work - cancel a delayed work + * @dwork: delayed_work to cancel + * + * Kill off a pending delayed_work. Returns %true if @dwork was pending + * and canceled; %false if wasn't pending. Note that the work callback + * function may still be running on return, unless it returns %true and the + * work doesn't re-arm itself. Explicitly flush or use + * cancel_delayed_work_sync() to wait on it. + * + * This function is safe to call from any context including IRQ handler. + */ +bool cancel_delayed_work(struct delayed_work *dwork) +{ + unsigned long flags; + int ret; + + do { + ret = try_to_grab_pending(&dwork->work, true, &flags); + } while (unlikely(ret == -EAGAIN)); + + if (unlikely(ret < 0)) + return false; + + set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work)); + local_irq_restore(flags); + return true; +} +EXPORT_SYMBOL(cancel_delayed_work); + /** * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish * @dwork: the delayed work cancel -- cgit v1.2.3-71-gd317 From 5834ec3aea8a84b70efeb52ee91a8f8b1042cd2a Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Thu, 23 Aug 2012 02:47:13 +0200 Subject: PM / Freezer: Fix small typo "regrigerator" Noticed when digging into a suspend issue in linux-next (next-20120821). For more details see . Signed-off-by: Sedat Dilek Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/process.c b/kernel/power/process.c index 19db29f67558..87da817f9e13 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -79,7 +79,7 @@ static int try_to_freeze_tasks(bool user_only) /* * We need to retry, but first give the freezing tasks some - * time to enter the regrigerator. + * time to enter the refrigerator. */ msleep(10); } -- cgit v1.2.3-71-gd317 From c9235f4872e810d43bf1b19b92cdbe0ec282bada Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 Apr 2012 17:06:34 -0700 Subject: userns: Make credential debugging user namespace safe. Cc: David Howells Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- init/Kconfig | 1 - kernel/cred.c | 10 ++++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/init/Kconfig b/init/Kconfig index 448b701b1722..fdabc5160cdf 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -936,7 +936,6 @@ config UIDGID_CONVERTED depends on FS_POSIX_ACL = n depends on QUOTA = n depends on QUOTACTL = n - depends on DEBUG_CREDENTIALS = n depends on BSD_PROCESS_ACCT = n depends on DRM = n depends on PROC_EVENTS = n diff --git a/kernel/cred.c b/kernel/cred.c index de728ac50d82..48cea3da6d05 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -799,9 +799,15 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, atomic_read(&cred->usage), read_cred_subscribers(cred)); printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n", - cred->uid, cred->euid, cred->suid, cred->fsuid); + from_kuid_munged(&init_user_ns, cred->uid), + from_kuid_munged(&init_user_ns, cred->euid), + from_kuid_munged(&init_user_ns, cred->suid), + from_kuid_munged(&init_user_ns, cred->fsuid)); printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n", - cred->gid, cred->egid, cred->sgid, cred->fsgid); + from_kgid_munged(&init_user_ns, cred->gid), + from_kgid_munged(&init_user_ns, cred->egid), + from_kgid_munged(&init_user_ns, cred->sgid), + from_kgid_munged(&init_user_ns, cred->fsgid)); #ifdef CONFIG_SECURITY printk(KERN_ERR "CRED: ->security is %p\n", cred->security); if ((unsigned long) cred->security >= PAGE_SIZE && -- cgit v1.2.3-71-gd317 From 13af07df9b7e49f1987cf36aa048dc6c49d0f93d Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 23 Aug 2012 16:53:29 -0400 Subject: cgroup: revise how we re-populate root directory When remounting cgroupfs with some subsystems added to it and some removed, cgroup will remove all the files in root directory and then re-popluate it. What I'm doing here is, only remove files which belong to subsystems that are to be unbinded, and only create files for newly-added subsystems. The purpose is to have all other files untouched. This is a preparation for cgroup xattr support. v7: - checkpatch warnings fixed v6: - no changes v5: - no changes v4: - refactored cgroup_clear_directory() to not use cgroup_rm_file() - instead of going thru the list of files, get the file list using the subsystems - use 'subsys_mask' instead of {added,removed}_bits and made cgroup_populate_dir() to match the parameters with cgroup_clear_directory() v3: - refresh patches after recent refactoring Original-patch-by: Li Zefan Cc: Li Zefan Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski Signed-off-by: Tejun Heo --- kernel/cgroup.c | 61 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 79818507e444..875a7130647c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -824,7 +824,8 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); -static int cgroup_populate_dir(struct cgroup *cgrp); +static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, + unsigned long subsys_mask); static const struct inode_operations cgroup_dir_inode_operations; static const struct file_operations proc_cgroupstats_operations; @@ -963,12 +964,29 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) return -ENOENT; } -static void cgroup_clear_directory(struct dentry *dir) +/** + * cgroup_clear_directory - selective removal of base and subsystem files + * @dir: directory containing the files + * @base_files: true if the base files should be removed + * @subsys_mask: mask of the subsystem ids whose files should be removed + */ +static void cgroup_clear_directory(struct dentry *dir, bool base_files, + unsigned long subsys_mask) { struct cgroup *cgrp = __d_cgrp(dir); + struct cgroup_subsys *ss; - while (!list_empty(&cgrp->files)) - cgroup_rm_file(cgrp, NULL); + for_each_subsys(cgrp->root, ss) { + struct cftype_set *set; + if (!test_bit(ss->subsys_id, &subsys_mask)) + continue; + list_for_each_entry(set, &ss->cftsets, node) + cgroup_rm_file(cgrp, set->cfts); + } + if (base_files) { + while (!list_empty(&cgrp->files)) + cgroup_rm_file(cgrp, NULL); + } } /* @@ -977,8 +995,9 @@ static void cgroup_clear_directory(struct dentry *dir) static void cgroup_d_remove_dir(struct dentry *dentry) { struct dentry *parent; + struct cgroupfs_root *root = dentry->d_sb->s_fs_info; - cgroup_clear_directory(dentry); + cgroup_clear_directory(dentry, true, root->subsys_bits); parent = dentry->d_parent; spin_lock(&parent->d_lock); @@ -1339,6 +1358,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) struct cgroupfs_root *root = sb->s_fs_info; struct cgroup *cgrp = &root->top_cgroup; struct cgroup_sb_opts opts; + unsigned long added_bits, removed_bits; mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); @@ -1354,6 +1374,9 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); + added_bits = opts.subsys_bits & ~root->subsys_bits; + removed_bits = root->subsys_bits & ~opts.subsys_bits; + /* Don't allow flags or name to change at remount */ if (opts.flags != root->flags || (opts.name && strcmp(opts.name, root->name))) { @@ -1369,8 +1392,9 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) } /* clear out any existing files and repopulate subsystem files */ - cgroup_clear_directory(cgrp->dentry); - cgroup_populate_dir(cgrp); + cgroup_clear_directory(cgrp->dentry, false, removed_bits); + /* re-populate subsystem files */ + cgroup_populate_dir(cgrp, false, added_bits); if (opts.release_agent) strcpy(root->release_agent_path, opts.release_agent); @@ -1669,7 +1693,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, BUG_ON(root->number_of_cgroups != 1); cred = override_creds(&init_cred); - cgroup_populate_dir(root_cgrp); + cgroup_populate_dir(root_cgrp, true, root->subsys_bits); revert_creds(cred); mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); @@ -3843,18 +3867,29 @@ static struct cftype files[] = { { } /* terminate */ }; -static int cgroup_populate_dir(struct cgroup *cgrp) +/** + * cgroup_populate_dir - selectively creation of files in a directory + * @cgrp: target cgroup + * @base_files: true if the base files should be added + * @subsys_mask: mask of the subsystem ids whose files should be added + */ +static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, + unsigned long subsys_mask) { int err; struct cgroup_subsys *ss; - err = cgroup_addrm_files(cgrp, NULL, files, true); - if (err < 0) - return err; + if (base_files) { + err = cgroup_addrm_files(cgrp, NULL, files, true); + if (err < 0) + return err; + } /* process cftsets of each subsystem */ for_each_subsys(cgrp->root, ss) { struct cftype_set *set; + if (!test_bit(ss->subsys_id, &subsys_mask)) + continue; list_for_each_entry(set, &ss->cftsets, node) cgroup_addrm_files(cgrp, ss, set->cfts, true); @@ -3988,7 +4023,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, list_add_tail(&cgrp->allcg_node, &root->allcg_list); - err = cgroup_populate_dir(cgrp); + err = cgroup_populate_dir(cgrp, true, root->subsys_bits); /* If err < 0, we have a half-filled directory - oh well ;) */ mutex_unlock(&cgroup_mutex); -- cgit v1.2.3-71-gd317 From 03b1cde6b22f625ae832b939bc7379ec1466aec5 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 23 Aug 2012 16:53:30 -0400 Subject: cgroup: add xattr support This is one of the items in the plumber's wish list. For use cases: >> What would the use case be for this? > > Attaching meta information to services, in an easily discoverable > way. For example, in systemd we create one cgroup for each service, and > could then store data like the main pid of the specific service as an > xattr on the cgroup itself. That way we'd have almost all service state > in the cgroupfs, which would make it possible to terminate systemd and > later restart it without losing any state information. But there's more: > for example, some very peculiar services cannot be terminated on > shutdown (i.e. fakeraid DM stuff) and it would be really nice if the > services in question could just mark that on their cgroup, by setting an > xattr. On the more desktopy side of things there are other > possibilities: for example there are plans defining what an application > is along the lines of a cgroup (i.e. an app being a collection of > processes). With xattrs one could then attach an icon or human readable > program name on the cgroup. > > The key idea is that this would allow attaching runtime meta information > to cgroups and everything they model (services, apps, vms), that doesn't > need any complex userspace infrastructure, has good access control > (i.e. because the file system enforces that anyway, and there's the > "trusted." xattr namespace), notifications (inotify), and can easily be > shared among applications. > > Lennart v7: - no changes v6: - remove user xattr namespace, only allow trusted and security v5: - check for capabilities before setting/removing xattrs v4: - no changes v3: - instead of config option, use mount option to enable xattr support Original-patch-by: Li Zefan Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 13 +++++-- kernel/cgroup.c | 100 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 103 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c90eaa803440..145901f5ef99 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -216,6 +217,9 @@ struct cgroup { /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; + + /* directory xattrs */ + struct simple_xattrs xattrs; }; /* @@ -309,6 +313,9 @@ struct cftype { /* CFTYPE_* flags */ unsigned int flags; + /* file xattrs */ + struct simple_xattrs xattrs; + int (*open)(struct inode *inode, struct file *file); ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -394,7 +401,7 @@ struct cftype { */ struct cftype_set { struct list_head node; /* chained at subsys->cftsets */ - const struct cftype *cfts; + struct cftype *cfts; }; struct cgroup_scanner { @@ -406,8 +413,8 @@ struct cgroup_scanner { void *data; }; -int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); -int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_is_removed(const struct cgroup *cgrp); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 875a7130647c..508b4a97ab19 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -276,7 +276,8 @@ inline int cgroup_is_removed(const struct cgroup *cgrp) /* bits in struct cgroupfs_root flags field */ enum { - ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_XATTR, /* supports extended attributes */ }; static int cgroup_is_releasable(const struct cgroup *cgrp) @@ -913,15 +914,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) */ BUG_ON(!list_empty(&cgrp->pidlists)); + simple_xattrs_free(&cgrp->xattrs); + kfree_rcu(cgrp, rcu_head); } else { struct cfent *cfe = __d_cfe(dentry); struct cgroup *cgrp = dentry->d_parent->d_fsdata; + struct cftype *cft = cfe->type; WARN_ONCE(!list_empty(&cfe->node) && cgrp != &cgrp->root->top_cgroup, "cfe still linked for %s\n", cfe->type->name); kfree(cfe); + simple_xattrs_free(&cft->xattrs); } iput(inode); } @@ -1140,6 +1145,8 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) seq_printf(seq, ",%s", ss->name); if (test_bit(ROOT_NOPREFIX, &root->flags)) seq_puts(seq, ",noprefix"); + if (test_bit(ROOT_XATTR, &root->flags)) + seq_puts(seq, ",xattr"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); if (clone_children(&root->top_cgroup)) @@ -1208,6 +1215,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) opts->clone_children = true; continue; } + if (!strcmp(token, "xattr")) { + set_bit(ROOT_XATTR, &opts->flags); + continue; + } if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) @@ -1425,6 +1436,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) mutex_init(&cgrp->pidlist_mutex); INIT_LIST_HEAD(&cgrp->event_list); spin_lock_init(&cgrp->event_list_lock); + simple_xattrs_init(&cgrp->xattrs); } static void init_cgroup_root(struct cgroupfs_root *root) @@ -1769,6 +1781,8 @@ static void cgroup_kill_sb(struct super_block *sb) { mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); + simple_xattrs_free(&cgrp->xattrs); + kill_litter_super(sb); cgroup_drop_root(root); } @@ -2575,6 +2589,64 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, return simple_rename(old_dir, old_dentry, new_dir, new_dentry); } +static struct simple_xattrs *__d_xattrs(struct dentry *dentry) +{ + if (S_ISDIR(dentry->d_inode->i_mode)) + return &__d_cgrp(dentry)->xattrs; + else + return &__d_cft(dentry)->xattrs; +} + +static inline int xattr_enabled(struct dentry *dentry) +{ + struct cgroupfs_root *root = dentry->d_sb->s_fs_info; + return test_bit(ROOT_XATTR, &root->flags); +} + +static bool is_valid_xattr(const char *name) +{ + if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || + !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) + return true; + return false; +} + +static int cgroup_setxattr(struct dentry *dentry, const char *name, + const void *val, size_t size, int flags) +{ + if (!xattr_enabled(dentry)) + return -EOPNOTSUPP; + if (!is_valid_xattr(name)) + return -EINVAL; + return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags); +} + +static int cgroup_removexattr(struct dentry *dentry, const char *name) +{ + if (!xattr_enabled(dentry)) + return -EOPNOTSUPP; + if (!is_valid_xattr(name)) + return -EINVAL; + return simple_xattr_remove(__d_xattrs(dentry), name); +} + +static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name, + void *buf, size_t size) +{ + if (!xattr_enabled(dentry)) + return -EOPNOTSUPP; + if (!is_valid_xattr(name)) + return -EINVAL; + return simple_xattr_get(__d_xattrs(dentry), name, buf, size); +} + +static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size) +{ + if (!xattr_enabled(dentry)) + return -EOPNOTSUPP; + return simple_xattr_list(__d_xattrs(dentry), buf, size); +} + static const struct file_operations cgroup_file_operations = { .read = cgroup_file_read, .write = cgroup_file_write, @@ -2583,11 +2655,22 @@ static const struct file_operations cgroup_file_operations = { .release = cgroup_file_release, }; +static const struct inode_operations cgroup_file_inode_operations = { + .setxattr = cgroup_setxattr, + .getxattr = cgroup_getxattr, + .listxattr = cgroup_listxattr, + .removexattr = cgroup_removexattr, +}; + static const struct inode_operations cgroup_dir_inode_operations = { .lookup = cgroup_lookup, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .rename = cgroup_rename, + .setxattr = cgroup_setxattr, + .getxattr = cgroup_getxattr, + .listxattr = cgroup_listxattr, + .removexattr = cgroup_removexattr, }; static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -2635,6 +2718,7 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode, } else if (S_ISREG(mode)) { inode->i_size = 0; inode->i_fop = &cgroup_file_operations; + inode->i_op = &cgroup_file_inode_operations; } d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ @@ -2695,7 +2779,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) } static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, - const struct cftype *cft) + struct cftype *cft) { struct dentry *dir = cgrp->dentry; struct cgroup *parent = __d_cgrp(dir); @@ -2705,6 +2789,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, umode_t mode; char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; + simple_xattrs_init(&cft->xattrs); + /* does @cft->flags tell us to skip creation on @cgrp? */ if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) return 0; @@ -2745,9 +2831,9 @@ out: } static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, - const struct cftype cfts[], bool is_add) + struct cftype cfts[], bool is_add) { - const struct cftype *cft; + struct cftype *cft; int err, ret = 0; for (cft = cfts; cft->name[0] != '\0'; cft++) { @@ -2781,7 +2867,7 @@ static void cgroup_cfts_prepare(void) } static void cgroup_cfts_commit(struct cgroup_subsys *ss, - const struct cftype *cfts, bool is_add) + struct cftype *cfts, bool is_add) __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) { LIST_HEAD(pending); @@ -2832,7 +2918,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, * function currently returns 0 as long as @cfts registration is successful * even if some file creation attempts on existing cgroups fail. */ -int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { struct cftype_set *set; @@ -2862,7 +2948,7 @@ EXPORT_SYMBOL_GPL(cgroup_add_cftypes); * Returns 0 on successful unregistration, -ENOENT if @cfts is not * registered with @ss. */ -int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) +int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { struct cftype_set *set; -- cgit v1.2.3-71-gd317 From a1a71b45a66fd3c3c453b55fbd180f8fccdd1daa Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 23 Aug 2012 16:53:31 -0400 Subject: cgroup: rename subsys_bits to subsys_mask In a previous discussion, Tejun Heo suggested to rename references to subsys_bits (added_bits, removed_bits, etc) by something more meaningful. Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Aristeu Rozanski Signed-off-by: Tejun Heo --- kernel/cgroup.c | 84 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 508b4a97ab19..ced292d720b9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -111,13 +111,13 @@ struct cgroupfs_root { * The bitmask of subsystems intended to be attached to this * hierarchy */ - unsigned long subsys_bits; + unsigned long subsys_mask; /* Unique id for this hierarchy. */ int hierarchy_id; /* The bitmask of subsystems currently attached to this hierarchy */ - unsigned long actual_subsys_bits; + unsigned long actual_subsys_mask; /* A list running through the attached subsystems */ struct list_head subsys_list; @@ -557,7 +557,7 @@ static struct css_set *find_existing_css_set( * won't change, so no need for locking. */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - if (root->subsys_bits & (1UL << i)) { + if (root->subsys_mask & (1UL << i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -1002,7 +1002,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) struct dentry *parent; struct cgroupfs_root *root = dentry->d_sb->s_fs_info; - cgroup_clear_directory(dentry, true, root->subsys_bits); + cgroup_clear_directory(dentry, true, root->subsys_mask); parent = dentry->d_parent; spin_lock(&parent->d_lock); @@ -1046,22 +1046,22 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) * returns an error, no reference counts are touched. */ static int rebind_subsystems(struct cgroupfs_root *root, - unsigned long final_bits) + unsigned long final_subsys_mask) { - unsigned long added_bits, removed_bits; + unsigned long added_mask, removed_mask; struct cgroup *cgrp = &root->top_cgroup; int i; BUG_ON(!mutex_is_locked(&cgroup_mutex)); BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); - removed_bits = root->actual_subsys_bits & ~final_bits; - added_bits = final_bits & ~root->actual_subsys_bits; + removed_mask = root->actual_subsys_mask & ~final_subsys_mask; + added_mask = final_subsys_mask & ~root->actual_subsys_mask; /* Check that any added subsystems are currently free */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL << i; struct cgroup_subsys *ss = subsys[i]; - if (!(bit & added_bits)) + if (!(bit & added_mask)) continue; /* * Nobody should tell us to do a subsys that doesn't exist: @@ -1086,7 +1086,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; unsigned long bit = 1UL << i; - if (bit & added_bits) { + if (bit & added_mask) { /* We're binding this subsystem to this hierarchy */ BUG_ON(ss == NULL); BUG_ON(cgrp->subsys[i]); @@ -1099,7 +1099,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, if (ss->bind) ss->bind(cgrp); /* refcount was already taken, and we're keeping it */ - } else if (bit & removed_bits) { + } else if (bit & removed_mask) { /* We're removing this subsystem */ BUG_ON(ss == NULL); BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); @@ -1112,7 +1112,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, list_move(&ss->sibling, &rootnode.subsys_list); /* subsystem is now free - drop reference on module */ module_put(ss->module); - } else if (bit & final_bits) { + } else if (bit & final_subsys_mask) { /* Subsystem state should already exist */ BUG_ON(ss == NULL); BUG_ON(!cgrp->subsys[i]); @@ -1129,7 +1129,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, BUG_ON(cgrp->subsys[i]); } } - root->subsys_bits = root->actual_subsys_bits = final_bits; + root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; synchronize_rcu(); return 0; @@ -1158,7 +1158,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) } struct cgroup_sb_opts { - unsigned long subsys_bits; + unsigned long subsys_mask; unsigned long flags; char *release_agent; bool clone_children; @@ -1267,7 +1267,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) /* Mutually exclusive option 'all' + subsystem name */ if (all_ss) return -EINVAL; - set_bit(i, &opts->subsys_bits); + set_bit(i, &opts->subsys_mask); one_ss = true; break; @@ -1288,7 +1288,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) continue; if (ss->disabled) continue; - set_bit(i, &opts->subsys_bits); + set_bit(i, &opts->subsys_mask); } } @@ -1300,19 +1300,19 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) * the cpuset subsystem. */ if (test_bit(ROOT_NOPREFIX, &opts->flags) && - (opts->subsys_bits & mask)) + (opts->subsys_mask & mask)) return -EINVAL; /* Can't specify "none" and some subsystems */ - if (opts->subsys_bits && opts->none) + if (opts->subsys_mask && opts->none) return -EINVAL; /* * We either have to specify by name or by subsystems. (So all * empty hierarchies must have a name). */ - if (!opts->subsys_bits && !opts->name) + if (!opts->subsys_mask && !opts->name) return -EINVAL; /* @@ -1324,7 +1324,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL << i; - if (!(bit & opts->subsys_bits)) + if (!(bit & opts->subsys_mask)) continue; if (!try_module_get(subsys[i]->module)) { module_pin_failed = true; @@ -1341,7 +1341,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) /* drop refcounts only on the ones we took */ unsigned long bit = 1UL << i; - if (!(bit & opts->subsys_bits)) + if (!(bit & opts->subsys_mask)) continue; module_put(subsys[i]->module); } @@ -1351,13 +1351,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) return 0; } -static void drop_parsed_module_refcounts(unsigned long subsys_bits) +static void drop_parsed_module_refcounts(unsigned long subsys_mask) { int i; for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL << i; - if (!(bit & subsys_bits)) + if (!(bit & subsys_mask)) continue; module_put(subsys[i]->module); } @@ -1369,7 +1369,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) struct cgroupfs_root *root = sb->s_fs_info; struct cgroup *cgrp = &root->top_cgroup; struct cgroup_sb_opts opts; - unsigned long added_bits, removed_bits; + unsigned long added_mask, removed_mask; mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); @@ -1381,31 +1381,31 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) goto out_unlock; /* See feature-removal-schedule.txt */ - if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent) + if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent) pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); - added_bits = opts.subsys_bits & ~root->subsys_bits; - removed_bits = root->subsys_bits & ~opts.subsys_bits; + added_mask = opts.subsys_mask & ~root->subsys_mask; + removed_mask = root->subsys_mask & ~opts.subsys_mask; /* Don't allow flags or name to change at remount */ if (opts.flags != root->flags || (opts.name && strcmp(opts.name, root->name))) { ret = -EINVAL; - drop_parsed_module_refcounts(opts.subsys_bits); + drop_parsed_module_refcounts(opts.subsys_mask); goto out_unlock; } - ret = rebind_subsystems(root, opts.subsys_bits); + ret = rebind_subsystems(root, opts.subsys_mask); if (ret) { - drop_parsed_module_refcounts(opts.subsys_bits); + drop_parsed_module_refcounts(opts.subsys_mask); goto out_unlock; } /* clear out any existing files and repopulate subsystem files */ - cgroup_clear_directory(cgrp->dentry, false, removed_bits); + cgroup_clear_directory(cgrp->dentry, false, removed_mask); /* re-populate subsystem files */ - cgroup_populate_dir(cgrp, false, added_bits); + cgroup_populate_dir(cgrp, false, added_mask); if (opts.release_agent) strcpy(root->release_agent_path, opts.release_agent); @@ -1491,8 +1491,8 @@ static int cgroup_test_super(struct super_block *sb, void *data) * If we asked for subsystems (or explicitly for no * subsystems) then they must match */ - if ((opts->subsys_bits || opts->none) - && (opts->subsys_bits != root->subsys_bits)) + if ((opts->subsys_mask || opts->none) + && (opts->subsys_mask != root->subsys_mask)) return 0; return 1; @@ -1502,7 +1502,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) { struct cgroupfs_root *root; - if (!opts->subsys_bits && !opts->none) + if (!opts->subsys_mask && !opts->none) return NULL; root = kzalloc(sizeof(*root), GFP_KERNEL); @@ -1515,7 +1515,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) } init_cgroup_root(root); - root->subsys_bits = opts->subsys_bits; + root->subsys_mask = opts->subsys_mask; root->flags = opts->flags; if (opts->release_agent) strcpy(root->release_agent_path, opts->release_agent); @@ -1547,7 +1547,7 @@ static int cgroup_set_super(struct super_block *sb, void *data) if (!opts->new_root) return -EINVAL; - BUG_ON(!opts->subsys_bits && !opts->none); + BUG_ON(!opts->subsys_mask && !opts->none); ret = set_anon_super(sb, NULL); if (ret) @@ -1665,7 +1665,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (ret) goto unlock_drop; - ret = rebind_subsystems(root, root->subsys_bits); + ret = rebind_subsystems(root, root->subsys_mask); if (ret == -EBUSY) { free_cg_links(&tmp_cg_links); goto unlock_drop; @@ -1705,7 +1705,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, BUG_ON(root->number_of_cgroups != 1); cred = override_creds(&init_cred); - cgroup_populate_dir(root_cgrp, true, root->subsys_bits); + cgroup_populate_dir(root_cgrp, true, root->subsys_mask); revert_creds(cred); mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); @@ -1717,7 +1717,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, */ cgroup_drop_root(opts.new_root); /* no subsys rebinding, so refcounts don't change */ - drop_parsed_module_refcounts(opts.subsys_bits); + drop_parsed_module_refcounts(opts.subsys_mask); } kfree(opts.release_agent); @@ -1731,7 +1731,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, drop_new_super: deactivate_locked_super(sb); drop_modules: - drop_parsed_module_refcounts(opts.subsys_bits); + drop_parsed_module_refcounts(opts.subsys_mask); out_err: kfree(opts.release_agent); kfree(opts.name); @@ -4109,7 +4109,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, list_add_tail(&cgrp->allcg_node, &root->allcg_list); - err = cgroup_populate_dir(cgrp, true, root->subsys_bits); + err = cgroup_populate_dir(cgrp, true, root->subsys_mask); /* If err < 0, we have a half-filled directory - oh well ;) */ mutex_unlock(&cgroup_mutex); -- cgit v1.2.3-71-gd317 From 77f827de07432a74821cf0f831d699544b2d474f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Aug 2012 01:39:57 +0200 Subject: PM / Domains: Add power off/on function for system core suspend stage Introduce function pm_genpd_syscore_switch() and two wrappers around it, pm_genpd_syscore_poweroff() and pm_genpd_syscore_poweron(), allowing the callers to let the generic PM domains framework know that the given device is not necessary any more and its PM domain can be turned off (the former) or that the given device will be required immediately, so its PM domain has to be turned on (the latter) during the system core (syscore) stage of system suspend (or hibernation) and resume. These functions will be used for handling devices registered as clock sources and clock event devices that belong to PM domains. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 57 ++++++++++++++++++++++++++++++++++++++++----- include/linux/pm_domain.h | 16 +++++++++++++ kernel/power/Kconfig | 4 ++++ 3 files changed, 71 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 55c39f5b7a59..515c8ecf01ce 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -697,6 +697,24 @@ static inline void genpd_power_off_work_fn(struct work_struct *work) {} #ifdef CONFIG_PM_SLEEP +/** + * pm_genpd_present - Check if the given PM domain has been initialized. + * @genpd: PM domain to check. + */ +static bool pm_genpd_present(struct generic_pm_domain *genpd) +{ + struct generic_pm_domain *gpd; + + if (IS_ERR_OR_NULL(genpd)) + return false; + + list_for_each_entry(gpd, &gpd_list, gpd_list_node) + if (gpd == genpd) + return true; + + return false; +} + static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, struct device *dev) { @@ -750,9 +768,10 @@ static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev) * Check if the given PM domain can be powered off (during system suspend or * hibernation) and do that if so. Also, in that case propagate to its masters. * - * This function is only called in "noirq" stages of system power transitions, - * so it need not acquire locks (all of the "noirq" callbacks are executed - * sequentially, so it is guaranteed that it will never run twice in parallel). + * This function is only called in "noirq" and "syscore" stages of system power + * transitions, so it need not acquire locks (all of the "noirq" callbacks are + * executed sequentially, so it is guaranteed that it will never run twice in + * parallel). */ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd) { @@ -780,9 +799,10 @@ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd) * pm_genpd_sync_poweron - Synchronously power on a PM domain and its masters. * @genpd: PM domain to power on. * - * This function is only called in "noirq" stage of system power transitions, so - * it need not acquire locks (all of the "noirq" callbacks are executed - * sequentially, so it is guaranteed that it will never run twice in parallel). + * This function is only called in "noirq" and "syscore" stages of system power + * transitions, so it need not acquire locks (all of the "noirq" callbacks are + * executed sequentially, so it is guaranteed that it will never run twice in + * parallel). */ static void pm_genpd_sync_poweron(struct generic_pm_domain *genpd) { @@ -1272,6 +1292,31 @@ static void pm_genpd_complete(struct device *dev) } } +/** + * pm_genpd_syscore_switch - Switch power during system core suspend or resume. + * @dev: Device that normally is marked as "always on" to switch power for. + * + * This routine may only be called during the system core (syscore) suspend or + * resume phase for devices whose "always on" flags are set. + */ +void pm_genpd_syscore_switch(struct device *dev, bool suspend) +{ + struct generic_pm_domain *genpd; + + genpd = dev_to_genpd(dev); + if (!pm_genpd_present(genpd)) + return; + + if (suspend) { + genpd->suspended_count++; + pm_genpd_sync_poweroff(genpd); + } else { + pm_genpd_sync_poweron(genpd); + genpd->suspended_count--; + } +} +EXPORT_SYMBOL_GPL(pm_genpd_syscore_switch); + #else #define pm_genpd_prepare NULL diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index a7d6172922d4..ab83cf3dfaac 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -258,4 +258,20 @@ static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} static inline void pm_genpd_poweroff_unused(void) {} #endif +#ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP +extern void pm_genpd_syscore_switch(struct device *dev, bool suspend); +#else +static inline void pm_genpd_syscore_switch(struct device *dev, bool suspend) {} +#endif + +static inline void pm_genpd_syscore_poweroff(struct device *dev) +{ + pm_genpd_syscore_switch(dev, true); +} + +static inline void pm_genpd_syscore_poweron(struct device *dev) +{ + pm_genpd_syscore_switch(dev, false); +} + #endif /* _LINUX_PM_DOMAIN_H */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index a70518c9d82f..5dfdc9ea180b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -263,6 +263,10 @@ config PM_GENERIC_DOMAINS bool depends on PM +config PM_GENERIC_DOMAINS_SLEEP + def_bool y + depends on PM_SLEEP && PM_GENERIC_DOMAINS + config PM_GENERIC_DOMAINS_RUNTIME def_bool y depends on PM_RUNTIME && PM_GENERIC_DOMAINS -- cgit v1.2.3-71-gd317 From adc78e6b9946a4b22e22403d961f3b03c469e5d3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Aug 2012 01:40:41 +0200 Subject: timekeeping: Add suspend and resume of clock event devices Some clock event devices, for example such that belong to PM domains, need to be handled in a spcial way during the timekeeping suspend and resume (which takes place in the system core, or "syscore", stages of system power transitions) in analogy with clock sources. Introduce .suspend() and .resume() callbacks for clock event devices that will be executed by timekeeping_suspend/_resume(), respectively, next the the clock sources' .suspend() and .resume() callbacks. Signed-off-by: Rafael J. Wysocki --- include/linux/clockchips.h | 8 ++++++++ kernel/time/clockevents.c | 24 ++++++++++++++++++++++++ kernel/time/timekeeping.c | 2 ++ 3 files changed, 34 insertions(+) (limited to 'kernel') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index acba894374a1..8a7096fcb01e 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -97,6 +97,8 @@ struct clock_event_device { void (*broadcast)(const struct cpumask *mask); void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); + void (*suspend)(struct clock_event_device *); + void (*resume)(struct clock_event_device *); unsigned long min_delta_ticks; unsigned long max_delta_ticks; @@ -156,6 +158,9 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) freq, minsec); } +extern void clockevents_suspend(void); +extern void clockevents_resume(void); + #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void clockevents_notify(unsigned long reason, void *arg); #else @@ -164,6 +169,9 @@ extern void clockevents_notify(unsigned long reason, void *arg); #else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ +static inline void clockevents_suspend(void) {} +static inline void clockevents_resume(void) {} + #define clockevents_notify(reason, arg) do { } while (0) #endif diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 7e1ce012a851..30b6de0d977c 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -397,6 +397,30 @@ void clockevents_exchange_device(struct clock_event_device *old, local_irq_restore(flags); } +/** + * clockevents_suspend - suspend clock devices + */ +void clockevents_suspend(void) +{ + struct clock_event_device *dev; + + list_for_each_entry_reverse(dev, &clockevent_devices, list) + if (dev->suspend) + dev->suspend(dev); +} + +/** + * clockevents_resume - resume clock devices + */ +void clockevents_resume(void) +{ + struct clock_event_device *dev; + + list_for_each_entry(dev, &clockevent_devices, list) + if (dev->resume) + dev->resume(dev); +} + #ifdef CONFIG_GENERIC_CLOCKEVENTS /** * clockevents_notify - notification about relevant events diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 34e5eac81424..312a675cb240 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -773,6 +773,7 @@ static void timekeeping_resume(void) read_persistent_clock(&ts); + clockevents_resume(); clocksource_resume(); write_seqlock_irqsave(&tk->lock, flags); @@ -832,6 +833,7 @@ static int timekeeping_suspend(void) clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); clocksource_suspend(); + clockevents_suspend(); return 0; } -- cgit v1.2.3-71-gd317 From 65f8c95e46a1827ae8bbc52a817ea308dd7d65ae Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jul 2012 14:26:15 -0700 Subject: pstore/ftrace: Convert to its own enable/disable debugfs knob With this patch we no longer reuse function tracer infrastructure, now we register our own tracer back-end via a debugfs knob. It's a bit more code, but that is the only downside. On the bright side we have: - Ability to make persistent_ram module removable (when needed, we can move ftrace_ops struct into a module). Note that persistent_ram is still not removable for other reasons, but with this patch it's just one thing less to worry about; - Pstore part is more isolated from the generic function tracer. We tried it already by registering our own tracer in available_tracers, but that way we're loosing ability to see the traces while we record them to pstore. This solution is somewhere in the middle: we only register "internal ftracer" back-end, but not the "front-end"; - When there is only pstore tracing enabled, the kernel will only write to the pstore buffer, omitting function tracer buffer (which, of course, still can be enabled via 'echo function > current_tracer'). Suggested-by: Steven Rostedt Signed-off-by: Anton Vorontsov --- Documentation/ramoops.txt | 4 +- fs/pstore/Kconfig | 1 + fs/pstore/ftrace.c | 96 +++++++++++++++++++++++++++++++++++++++++- fs/pstore/internal.h | 6 +++ fs/pstore/platform.c | 1 + include/linux/pstore.h | 8 ---- kernel/trace/trace_functions.c | 15 +------ 7 files changed, 105 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt index 197ad59ab9bf..69b3cac4749d 100644 --- a/Documentation/ramoops.txt +++ b/Documentation/ramoops.txt @@ -102,9 +102,7 @@ related hangs. The functions call chain log is stored in a "ftrace-ramoops" file. Here is an example of usage: # mount -t debugfs debugfs /sys/kernel/debug/ - # cd /sys/kernel/debug/tracing - # echo function > current_tracer - # echo 1 > options/func_pstore + # echo 1 > /sys/kernel/debug/pstore/record_ftrace # reboot -f [...] # mount -t pstore pstore /mnt/ diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index d39bb5cce883..ca71db69da07 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -23,6 +23,7 @@ config PSTORE_FTRACE bool "Persistent function tracer" depends on PSTORE depends on FUNCTION_TRACER + depends on DEBUG_FS help With this option kernel traces function calls into a persistent ram buffer that can be decoded and dumped after reboot through diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index a130d484b7d3..2d57e1ac0115 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -17,19 +17,113 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include "internal.h" -void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip) +static void notrace pstore_ftrace_call(unsigned long ip, + unsigned long parent_ip) { + unsigned long flags; struct pstore_ftrace_record rec = {}; if (unlikely(oops_in_progress)) return; + local_irq_save(flags); + rec.ip = ip; rec.parent_ip = parent_ip; pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, sizeof(rec), psinfo); + + local_irq_restore(flags); +} + +static struct ftrace_ops pstore_ftrace_ops __read_mostly = { + .func = pstore_ftrace_call, +}; + +static DEFINE_MUTEX(pstore_ftrace_lock); +static bool pstore_ftrace_enabled; + +static ssize_t pstore_ftrace_knob_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + u8 on; + ssize_t ret; + + ret = kstrtou8_from_user(buf, count, 2, &on); + if (ret) + return ret; + + mutex_lock(&pstore_ftrace_lock); + + if (!on ^ pstore_ftrace_enabled) + goto out; + + if (on) + ret = register_ftrace_function(&pstore_ftrace_ops); + else + ret = unregister_ftrace_function(&pstore_ftrace_ops); + if (ret) { + pr_err("%s: unable to %sregister ftrace ops: %zd\n", + __func__, on ? "" : "un", ret); + goto err; + } + + pstore_ftrace_enabled = on; +out: + ret = count; +err: + mutex_unlock(&pstore_ftrace_lock); + + return ret; +} + +static ssize_t pstore_ftrace_knob_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + char val[] = { '0' + pstore_ftrace_enabled, '\n' }; + + return simple_read_from_buffer(buf, count, ppos, val, sizeof(val)); +} + +static const struct file_operations pstore_knob_fops = { + .open = simple_open, + .read = pstore_ftrace_knob_read, + .write = pstore_ftrace_knob_write, +}; + +void pstore_register_ftrace(void) +{ + struct dentry *dir; + struct dentry *file; + + if (!psinfo->write_buf) + return; + + dir = debugfs_create_dir("pstore", NULL); + if (!dir) { + pr_err("%s: unable to create pstore directory\n", __func__); + return; + } + + file = debugfs_create_file("record_ftrace", 0600, dir, NULL, + &pstore_knob_fops); + if (!file) { + pr_err("%s: unable to create record_ftrace file\n", __func__); + goto err_file; + } + + return; +err_file: + debugfs_remove(dir); } diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 0d0d3b7d5f12..4847f588b7d5 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -39,6 +39,12 @@ pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec) #endif } +#ifdef CONFIG_PSTORE_FTRACE +extern void pstore_register_ftrace(void); +#else +static inline void pstore_register_ftrace(void) {} +#endif + extern struct pstore_info *psinfo; extern void pstore_set_kmsg_bytes(int); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 29996e8793a7..6c23eab7f76c 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -236,6 +236,7 @@ int pstore_register(struct pstore_info *psi) kmsg_dump_register(&pstore_dumper); pstore_register_console(); + pstore_register_ftrace(); if (pstore_update_ms >= 0) { pstore_timer.expires = jiffies + diff --git a/include/linux/pstore.h b/include/linux/pstore.h index c892587d9b81..ee3034a40884 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -64,14 +64,6 @@ struct pstore_info { void *data; }; - -#ifdef CONFIG_PSTORE_FTRACE -extern void pstore_ftrace_call(unsigned long ip, unsigned long parent_ip); -#else -static inline void pstore_ftrace_call(unsigned long ip, unsigned long parent_ip) -{ } -#endif - #ifdef CONFIG_PSTORE extern int pstore_register(struct pstore_info *); #else diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index a426f410c060..0ad83e3929d1 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include "trace.h" @@ -75,10 +74,9 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) preempt_enable_notrace(); } -/* Our two options */ +/* Our option */ enum { TRACE_FUNC_OPT_STACK = 0x1, - TRACE_FUNC_OPT_PSTORE = 0x2, }; static struct tracer_flags func_flags; @@ -106,12 +104,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - /* - * So far tracing doesn't support multiple buffers, so - * we make an explicit call for now. - */ - if (unlikely(func_flags.val & TRACE_FUNC_OPT_PSTORE)) - pstore_ftrace_call(ip, parent_ip); pc = preempt_count(); trace_function(tr, ip, parent_ip, flags, pc); } @@ -176,9 +168,6 @@ static struct ftrace_ops trace_stack_ops __read_mostly = static struct tracer_opt func_opts[] = { #ifdef CONFIG_STACKTRACE { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, -#endif -#ifdef CONFIG_PSTORE_FTRACE - { TRACER_OPT(func_pstore, TRACE_FUNC_OPT_PSTORE) }, #endif { } /* Always set a last empty entry */ }; @@ -231,8 +220,6 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) register_ftrace_function(&trace_ops); } - break; - case TRACE_FUNC_OPT_PSTORE: break; default: return -EINVAL; -- cgit v1.2.3-71-gd317 From c6a57bfffea5b673e5b4f9aeff85a00607e59077 Mon Sep 17 00:00:00 2001 From: Luis Gonzalez Fernandez Date: Fri, 7 Sep 2012 21:35:21 +0200 Subject: PM / QoS: Add return code to pm_qos_get_value function. pm_qos_get_value don't return a return code in all cases. It's sure that anything interesting happend after BUG() but this prevent any compilation warning. [rjw: Chaneged the new return value to PM_QOS_DEFAULT_VALUE.] Signed-off-by: Luis Gonzalez Fernandez Signed-off-by: Rafael J. Wysocki --- kernel/power/qos.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 6a031e684026..846bd42c7ed1 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -139,6 +139,7 @@ static inline int pm_qos_get_value(struct pm_qos_constraints *c) default: /* runtime check for not using enum */ BUG(); + return PM_QOS_DEFAULT_VALUE; } } -- cgit v1.2.3-71-gd317 From 9f00d9776bc5beb92e8bfc884a7e96ddc5589e2e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 8 Sep 2012 02:53:54 +0000 Subject: netlink: hide struct module parameter in netlink_kernel_create This patch defines netlink_kernel_create as a wrapper function of __netlink_kernel_create to hide the struct module *me parameter (which seems to be THIS_MODULE in all existing netlink subsystems). Suggested by David S. Miller. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- crypto/crypto_user.c | 3 +-- drivers/connector/connector.c | 3 +-- drivers/infiniband/core/netlink.c | 2 +- drivers/scsi/scsi_netlink.c | 2 +- drivers/scsi/scsi_transport_iscsi.c | 3 +-- drivers/staging/gdm72xx/netlink_k.c | 2 +- include/linux/netlink.h | 13 ++++++++++--- kernel/audit.c | 3 +-- lib/kobject_uevent.c | 3 +-- net/bridge/netfilter/ebt_ulog.c | 3 +-- net/core/rtnetlink.c | 2 +- net/core/sock_diag.c | 3 +-- net/decnet/netfilter/dn_rtmsg.c | 3 +-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 3 +-- net/netfilter/nfnetlink.c | 2 +- net/netlink/af_netlink.c | 8 +++----- net/netlink/genetlink.c | 3 +-- net/xfrm/xfrm_user.c | 2 +- security/selinux/netlink.c | 3 +-- 20 files changed, 31 insertions(+), 37 deletions(-) (limited to 'kernel') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index ba2c611154af..165914e63ef2 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -500,8 +500,7 @@ static int __init crypto_user_init(void) .input = crypto_netlink_rcv, }; - crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO, - THIS_MODULE, &cfg); + crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO, &cfg); if (!crypto_nlsk) return -ENOMEM; diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 82fa4f0f91d6..965b7811e04f 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -264,8 +264,7 @@ static int __devinit cn_init(void) .input = dev->input, }; - dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, - THIS_MODULE, &cfg); + dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg); if (!dev->nls) return -EIO; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 3ae2bfd31015..fe10a949aef9 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -177,7 +177,7 @@ int __init ibnl_init(void) .input = ibnl_rcv, }; - nls = netlink_kernel_create(&init_net, NETLINK_RDMA, THIS_MODULE, &cfg); + nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg); if (!nls) { pr_warn("Failed to create netlink socket\n"); return -ENOMEM; diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 8818dd681c19..3252bc9625ee 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -501,7 +501,7 @@ scsi_netlink_init(void) } scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT, - THIS_MODULE, &cfg); + &cfg); if (!scsi_nl_sock) { printk(KERN_ERR "%s: register of receive handler failed\n", __func__); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index fa1dfaa83e32..519bd5303f3f 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2969,8 +2969,7 @@ static __init int iscsi_transport_init(void) if (err) goto unregister_conn_class; - nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, - THIS_MODULE, &cfg); + nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, &cfg); if (!nls) { err = -ENOBUFS; goto unregister_session_class; diff --git a/drivers/staging/gdm72xx/netlink_k.c b/drivers/staging/gdm72xx/netlink_k.c index 3abb31df8f28..2109cab0a14c 100644 --- a/drivers/staging/gdm72xx/netlink_k.c +++ b/drivers/staging/gdm72xx/netlink_k.c @@ -95,7 +95,7 @@ struct sock *netlink_init(int unit, void (*cb)(struct net_device *dev, u16 type, init_MUTEX(&netlink_mutex); #endif - sock = netlink_kernel_create(&init_net, unit, THIS_MODULE, &cfg); + sock = netlink_kernel_create(&init_net, unit, &cfg); if (sock) rcv_cb = cb; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 8719a4e235a5..cd17dda5a987 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -153,6 +153,7 @@ struct nlattr { #include #include +#include #include struct net; @@ -188,9 +189,15 @@ struct netlink_kernel_cfg { unsigned int flags; }; -extern struct sock *netlink_kernel_create(struct net *net, int unit, - struct module *module, - struct netlink_kernel_cfg *cfg); +extern struct sock *__netlink_kernel_create(struct net *net, int unit, + struct module *module, + struct netlink_kernel_cfg *cfg); +static inline struct sock * +netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) +{ + return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); +} + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); diff --git a/kernel/audit.c b/kernel/audit.c index ea3b7b6191c7..a24aafa850ae 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -971,8 +971,7 @@ static int __init audit_init(void) printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, - THIS_MODULE, &cfg); + audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, &cfg); if (!audit_sock) audit_panic("cannot initialize netlink socket"); else diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index c2e97787d01e..52e5abbc41db 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -382,8 +382,7 @@ static int uevent_net_init(struct net *net) if (!ue_sk) return -ENOMEM; - ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, - THIS_MODULE, &cfg); + ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, &cfg); if (!ue_sk->sk) { printk(KERN_ERR "kobject_uevent: unable to create netlink socket!\n"); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 19063473c71f..3476ec469740 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -298,8 +298,7 @@ static int __init ebt_ulog_init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, - THIS_MODULE, &cfg); + ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); if (!ebtulognl) ret = -ENOMEM; else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a71806eb9cc6..508c5df4a09c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2384,7 +2384,7 @@ static int __net_init rtnetlink_net_init(struct net *net) .flags = NL_CFG_F_NONROOT_RECV, }; - sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg); + sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg); if (!sk) return -ENOMEM; net->rtnl = sk; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 9d8755e4a7a5..602cd637182e 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -172,8 +172,7 @@ static int __net_init diag_net_init(struct net *net) .input = sock_diag_rcv, }; - net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, - THIS_MODULE, &cfg); + net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg); return net->diag_nlsk == NULL ? -ENOMEM : 0; } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 11db0ecf342f..dfe42012a044 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -130,8 +130,7 @@ static int __init dn_rtmsg_init(void) .input = dnrmg_receive_user_skb, }; - dnrmg = netlink_kernel_create(&init_net, - NETLINK_DNRTMSG, THIS_MODULE, &cfg); + dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8a7cd795a96e..dc1f10ed1872 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -986,7 +986,7 @@ static int __net_init nl_fib_lookup_init(struct net *net) .input = nl_fib_input, }; - sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, THIS_MODULE, &cfg); + sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg); if (sk == NULL) return -EAFNOSUPPORT; net->ipv4.fibnl = sk; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 1109f7f6c254..b5ef3cba2250 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -396,8 +396,7 @@ static int __init ulog_tg_init(void) for (i = 0; i < ULOG_MAXNLGROUPS; i++) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, - THIS_MODULE, &cfg); + nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); if (!nflognl) return -ENOMEM; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index a26503342e71..ffb92c03a358 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -241,7 +241,7 @@ static int __net_init nfnetlink_net_init(struct net *net) #endif }; - nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, THIS_MODULE, &cfg); + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); if (!nfnl) return -ENOMEM; net->nfnl_stash = nfnl; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b74540ce3c14..4d348e97e131 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1526,9 +1526,8 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(struct net *net, int unit, - struct module *module, - struct netlink_kernel_cfg *cfg) +__netlink_kernel_create(struct net *net, int unit, struct module *module, + struct netlink_kernel_cfg *cfg) { struct socket *sock; struct sock *sk; @@ -1603,8 +1602,7 @@ out_sock_release_nosk: sock_release(sock); return NULL; } -EXPORT_SYMBOL(netlink_kernel_create); - +EXPORT_SYMBOL(__netlink_kernel_create); void netlink_kernel_release(struct sock *sk) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index c1b71aef9f71..19288b7d6135 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -922,8 +922,7 @@ static int __net_init genl_pernet_init(struct net *net) }; /* we'll bump the group number right afterwards */ - net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, - THIS_MODULE, &cfg); + net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, &cfg); if (!net->genl_sock && net_eq(net, &init_net)) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ab58034c42d6..354070adb5ef 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2963,7 +2963,7 @@ static int __net_init xfrm_user_net_init(struct net *net) .input = xfrm_netlink_rcv, }; - nlsk = netlink_kernel_create(net, NETLINK_XFRM, THIS_MODULE, &cfg); + nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg); if (nlsk == NULL) return -ENOMEM; net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 0d2cd11f3c22..14d810ead420 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -116,8 +116,7 @@ static int __init selnl_init(void) .flags = NL_CFG_F_NONROOT_RECV, }; - selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX, - THIS_MODULE, &cfg); + selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX, &cfg); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); return 0; -- cgit v1.2.3-71-gd317 From bbdc18a3fb6740619f0d037241c85dc6cd4517aa Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 10 Sep 2012 12:05:18 +0000 Subject: properly __init-annotate pm_sysrq_init() This is used only as argument to subsys_initcall(). Signed-off-by: Jan Beulich Signed-off-by: Rafael J. Wysocki --- kernel/power/poweroff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index d52359374e85..68197a4e8fc9 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -37,7 +37,7 @@ static struct sysrq_key_op sysrq_poweroff_op = { .enable_mask = SYSRQ_ENABLE_BOOT, }; -static int pm_sysrq_init(void) +static int __init pm_sysrq_init(void) { register_sysrq_key('o', &sysrq_poweroff_op); return 0; -- cgit v1.2.3-71-gd317 From 15e473046cb6e5d18a4d0057e61d76315230382b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Sep 2012 20:12:54 +0000 Subject: netlink: Rename pid to portid to avoid confusion It is a frequent mistake to confuse the netlink port identifier with a process identifier. Try to reduce this confusion by renaming fields that hold port identifiers portid instead of pid. I have carefully avoided changing the structures exported to userspace to avoid changing the userspace API. I have successfully built an allyesconfig kernel with this change. Signed-off-by: "Eric W. Biederman" Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- crypto/crypto_user.c | 4 +- drivers/net/team/team.c | 38 +++--- drivers/net/wireless/mac80211_hwsim.c | 46 ++++---- drivers/scsi/scsi_transport_iscsi.c | 4 +- drivers/staging/gdm72xx/netlink_k.c | 2 +- fs/dlm/netlink.c | 8 +- include/linux/netlink.h | 14 +-- include/net/cfg80211.h | 2 +- include/net/genetlink.h | 34 +++--- include/net/netfilter/nf_conntrack_ecache.h | 32 +++--- include/net/netlink.h | 26 ++--- include/net/nfc/nfc.h | 2 +- include/net/xfrm.h | 6 +- kernel/audit.c | 20 ++-- kernel/taskstats.c | 4 +- net/bridge/br_fdb.c | 6 +- net/bridge/br_netlink.c | 2 +- net/can/gw.c | 2 +- net/core/fib_rules.c | 6 +- net/core/neighbour.c | 8 +- net/core/rtnetlink.c | 12 +- net/dcb/dcbnl.c | 18 +-- net/decnet/dn_dev.c | 6 +- net/decnet/dn_route.c | 10 +- net/decnet/dn_table.c | 12 +- net/ieee802154/nl-mac.c | 6 +- net/ieee802154/nl-phy.c | 6 +- net/ipv4/devinet.c | 28 ++--- net/ipv4/fib_frontend.c | 10 +- net/ipv4/fib_semantics.c | 8 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/inet_diag.c | 32 +++--- net/ipv4/ipmr.c | 10 +- net/ipv4/route.c | 8 +- net/ipv4/tcp_metrics.c | 2 +- net/ipv4/udp_diag.c | 6 +- net/ipv6/addrconf.c | 32 +++--- net/ipv6/addrlabel.c | 10 +- net/ipv6/ip6mr.c | 10 +- net/ipv6/route.c | 20 ++-- net/irda/irnetlink.c | 2 +- net/key/af_key.c | 32 +++--- net/l2tp/l2tp_netlink.c | 24 ++-- net/netfilter/ipset/ip_set_core.c | 24 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 6 +- net/netfilter/nf_conntrack_ecache.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 78 ++++++------- net/netfilter/nfnetlink_acct.c | 12 +- net/netfilter/nfnetlink_cthelper.c | 12 +- net/netfilter/nfnetlink_cttimeout.c | 12 +- net/netfilter/nfnetlink_log.c | 18 +-- net/netfilter/nfnetlink_queue_core.c | 28 ++--- net/netlabel/netlabel_cipso_v4.c | 2 +- net/netlabel/netlabel_mgmt.c | 4 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlink/af_netlink.c | 172 ++++++++++++++-------------- net/netlink/genetlink.c | 42 +++---- net/nfc/netlink.c | 26 ++--- net/openvswitch/actions.c | 4 +- net/openvswitch/datapath.c | 84 +++++++------- net/openvswitch/datapath.h | 2 +- net/openvswitch/vport.c | 2 +- net/openvswitch/vport.h | 6 +- net/packet/diag.c | 6 +- net/phonet/pn_netlink.c | 14 +-- net/sched/act_api.c | 52 ++++----- net/sched/cls_api.c | 14 +-- net/sched/sch_api.c | 44 +++---- net/tipc/netlink.c | 2 +- net/unix/diag.c | 14 +-- net/wireless/core.h | 2 +- net/wireless/mlme.c | 16 +-- net/wireless/nl80211.c | 110 +++++++++--------- net/xfrm/xfrm_state.c | 10 +- net/xfrm/xfrm_user.c | 62 +++++----- 75 files changed, 728 insertions(+), 728 deletions(-) (limited to 'kernel') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 165914e63ef2..6bba414d0c61 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -166,7 +166,7 @@ static int crypto_report_alg(struct crypto_alg *alg, struct crypto_user_alg *ualg; int err = 0; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, info->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq, CRYPTO_MSG_GETALG, sizeof(*ualg), info->nlmsg_flags); if (!nlh) { err = -EMSGSIZE; @@ -216,7 +216,7 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, if (err) return err; - return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).pid); + return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid); } static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b4f67b55ef79..266af7b38ebc 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1886,7 +1886,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &team_nl_family, 0, TEAM_CMD_NOOP); if (IS_ERR(hdr)) { err = PTR_ERR(hdr); @@ -1895,7 +1895,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, hdr); - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_msg_put: nlmsg_free(msg); @@ -1952,7 +1952,7 @@ static int team_nl_send_generic(struct genl_info *info, struct team *team, if (err < 0) goto err_fill; - err = genlmsg_unicast(genl_info_net(info), skb, info->snd_pid); + err = genlmsg_unicast(genl_info_net(info), skb, info->snd_portid); return err; err_fill: @@ -1961,11 +1961,11 @@ err_fill: } typedef int team_nl_send_func_t(struct sk_buff *skb, - struct team *team, u32 pid); + struct team *team, u32 portid); -static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 pid) +static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 portid) { - return genlmsg_unicast(dev_net(team->dev), skb, pid); + return genlmsg_unicast(dev_net(team->dev), skb, portid); } static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team, @@ -2050,13 +2050,13 @@ nest_cancel: } static int __send_and_alloc_skb(struct sk_buff **pskb, - struct team *team, u32 pid, + struct team *team, u32 portid, team_nl_send_func_t *send_func) { int err; if (*pskb) { - err = send_func(*pskb, team, pid); + err = send_func(*pskb, team, portid); if (err) return err; } @@ -2066,7 +2066,7 @@ static int __send_and_alloc_skb(struct sk_buff **pskb, return 0; } -static int team_nl_send_options_get(struct team *team, u32 pid, u32 seq, +static int team_nl_send_options_get(struct team *team, u32 portid, u32 seq, int flags, team_nl_send_func_t *send_func, struct list_head *sel_opt_inst_list) { @@ -2083,11 +2083,11 @@ static int team_nl_send_options_get(struct team *team, u32 pid, u32 seq, struct team_option_inst, tmp_list); start_again: - err = __send_and_alloc_skb(&skb, team, pid, send_func); + err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) return err; - hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags | NLM_F_MULTI, + hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2120,15 +2120,15 @@ start_again: goto start_again; send_done: - nlh = nlmsg_put(skb, pid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); if (!nlh) { - err = __send_and_alloc_skb(&skb, team, pid, send_func); + err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) goto errout; goto send_done; } - return send_func(skb, team, pid); + return send_func(skb, team, portid); nla_put_failure: err = -EMSGSIZE; @@ -2151,7 +2151,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info) list_for_each_entry(opt_inst, &team->option_inst_list, list) list_add_tail(&opt_inst->tmp_list, &sel_opt_inst_list); - err = team_nl_send_options_get(team, info->snd_pid, info->snd_seq, + err = team_nl_send_options_get(team, info->snd_portid, info->snd_seq, NLM_F_ACK, team_nl_send_unicast, &sel_opt_inst_list); @@ -2305,7 +2305,7 @@ team_put: } static int team_nl_fill_port_list_get(struct sk_buff *skb, - u32 pid, u32 seq, int flags, + u32 portid, u32 seq, int flags, struct team *team, bool fillall) { @@ -2313,7 +2313,7 @@ static int team_nl_fill_port_list_get(struct sk_buff *skb, void *hdr; struct team_port *port; - hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags, + hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags, TEAM_CMD_PORT_LIST_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2362,7 +2362,7 @@ static int team_nl_fill_port_list_get_all(struct sk_buff *skb, struct genl_info *info, int flags, struct team *team) { - return team_nl_fill_port_list_get(skb, info->snd_pid, + return team_nl_fill_port_list_get(skb, info->snd_portid, info->snd_seq, NLM_F_ACK, team, true); } @@ -2415,7 +2415,7 @@ static struct genl_multicast_group team_change_event_mcgrp = { }; static int team_nl_send_multicast(struct sk_buff *skb, - struct team *team, u32 pid) + struct team *team, u32 portid) { return genlmsg_multicast_netns(dev_net(team->dev), skb, 0, team_change_event_mcgrp.id, GFP_KERNEL); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 72b0456e41bf..9d45b3bb974c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -38,7 +38,7 @@ MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Software simulator of 802.11 radio(s) for mac80211"); MODULE_LICENSE("GPL"); -static u32 wmediumd_pid; +static u32 wmediumd_portid; static int radios = 2; module_param(radios, int, 0444); @@ -545,7 +545,7 @@ static bool mac80211_hwsim_addr_match(struct mac80211_hwsim_data *data, static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, struct sk_buff *my_skb, - int dst_pid) + int dst_portid) { struct sk_buff *skb; struct mac80211_hwsim_data *data = hw->priv; @@ -619,7 +619,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure; genlmsg_end(skb, msg_head); - genlmsg_unicast(&init_net, skb, dst_pid); + genlmsg_unicast(&init_net, skb, dst_portid); /* Enqueue the packet */ skb_queue_tail(&data->pending, my_skb); @@ -715,7 +715,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, { bool ack; struct ieee80211_tx_info *txi; - u32 _pid; + u32 _portid; mac80211_hwsim_monitor_rx(hw, skb); @@ -726,10 +726,10 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, } /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); /* NO wmediumd detected, perfect medium simulation */ ack = mac80211_hwsim_tx_frame_no_nl(hw, skb); @@ -814,7 +814,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, struct ieee80211_hw *hw = arg; struct sk_buff *skb; struct ieee80211_tx_info *info; - u32 _pid; + u32 _portid; hwsim_check_magic(vif); @@ -831,10 +831,10 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, mac80211_hwsim_monitor_rx(hw, skb); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); mac80211_hwsim_tx_frame_no_nl(hw, skb); dev_kfree_skb(skb); @@ -1315,7 +1315,7 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) struct hwsim_vif_priv *vp = (void *)vif->drv_priv; struct sk_buff *skb; struct ieee80211_pspoll *pspoll; - u32 _pid; + u32 _portid; if (!vp->assoc) return; @@ -1336,10 +1336,10 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) memcpy(pspoll->ta, mac, ETH_ALEN); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(data->hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(data->hw, skb, _portid); if (!mac80211_hwsim_tx_frame_no_nl(data->hw, skb)) printk(KERN_DEBUG "%s: PS-poll frame not ack'ed\n", __func__); @@ -1353,7 +1353,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, struct hwsim_vif_priv *vp = (void *)vif->drv_priv; struct sk_buff *skb; struct ieee80211_hdr *hdr; - u32 _pid; + u32 _portid; if (!vp->assoc) return; @@ -1375,10 +1375,10 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, memcpy(hdr->addr3, vp->bssid, ETH_ALEN); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(data->hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(data->hw, skb, _portid); if (!mac80211_hwsim_tx_frame_no_nl(data->hw, skb)) printk(KERN_DEBUG "%s: nullfunc frame not ack'ed\n", __func__); @@ -1632,10 +1632,10 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2, if (info == NULL) goto out; - wmediumd_pid = info->snd_pid; + wmediumd_portid = info->snd_portid; printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, " - "switching to wmediumd mode with pid %d\n", info->snd_pid); + "switching to wmediumd mode with pid %d\n", info->snd_portid); return 0; out: @@ -1672,10 +1672,10 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, if (state != NETLINK_URELEASE) return NOTIFY_DONE; - if (notify->pid == wmediumd_pid) { + if (notify->portid == wmediumd_portid) { printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink" " socket, switching to perfect channel medium\n"); - wmediumd_pid = 0; + wmediumd_portid = 0; } return NOTIFY_DONE; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 519bd5303f3f..31969f2e13ce 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2119,7 +2119,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_SESSION: err = iscsi_if_create_session(priv, ep, ev, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, ev->u.c_session.initial_cmdsn, ev->u.c_session.cmds_max, ev->u.c_session.queue_depth); @@ -2132,7 +2132,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) } err = iscsi_if_create_session(priv, ep, ev, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, ev->u.c_bound_session.initial_cmdsn, ev->u.c_bound_session.cmds_max, ev->u.c_bound_session.queue_depth); diff --git a/drivers/staging/gdm72xx/netlink_k.c b/drivers/staging/gdm72xx/netlink_k.c index 2109cab0a14c..20d0aec52e72 100644 --- a/drivers/staging/gdm72xx/netlink_k.c +++ b/drivers/staging/gdm72xx/netlink_k.c @@ -135,7 +135,7 @@ int netlink_send(struct sock *sock, int group, u16 type, void *msg, int len) } memcpy(nlmsg_data(nlh), msg, len); - NETLINK_CB(skb).pid = 0; + NETLINK_CB(skb).portid = 0; NETLINK_CB(skb).dst_group = 0; ret = netlink_broadcast(sock, skb, 0, group+1, GFP_ATOMIC); diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index ef17e0169da1..60a327863b11 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -14,7 +14,7 @@ #include "dlm_internal.h" static uint32_t dlm_nl_seqnum; -static uint32_t listener_nlpid; +static uint32_t listener_nlportid; static struct genl_family family = { .id = GENL_ID_GENERATE, @@ -64,13 +64,13 @@ static int send_data(struct sk_buff *skb) return rv; } - return genlmsg_unicast(&init_net, skb, listener_nlpid); + return genlmsg_unicast(&init_net, skb, listener_nlportid); } static int user_cmd(struct sk_buff *skb, struct genl_info *info) { - listener_nlpid = info->snd_pid; - printk("user_cmd nlpid %u\n", listener_nlpid); + listener_nlportid = info->snd_portid; + printk("user_cmd nlpid %u\n", listener_nlportid); return 0; } diff --git a/include/linux/netlink.h b/include/linux/netlink.h index cd17dda5a987..73ade5fbc856 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -165,7 +165,7 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ - __u32 pid; + __u32 portid; __u32 dst_group; struct sock *ssk; }; @@ -205,14 +205,14 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group) extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); -extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); -extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, +extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); +extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, - __u32 pid, __u32 group, gfp_t allocation, + __u32 portid, __u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data); -extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); +extern int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); @@ -253,12 +253,12 @@ struct netlink_callback { struct netlink_notify { struct net *net; - int pid; + int portid; int protocol; }; struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags); +__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { int (*dump)(struct sk_buff *skb, struct netlink_callback *); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ba2e6160fad1..7b3b0568d289 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2458,7 +2458,7 @@ struct wireless_dev { int beacon_interval; - u32 ap_unexpected_nlpid; + u32 ap_unexpected_nlportid; #ifdef CONFIG_CFG80211_WEXT /* wext data */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 48905cd3884c..bdfbe68c1c3b 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -65,7 +65,7 @@ struct genl_family { /** * struct genl_info - receiving information * @snd_seq: sending sequence number - * @snd_pid: netlink pid of sender + * @snd_portid: netlink portid of sender * @nlhdr: netlink message header * @genlhdr: generic netlink message header * @userhdr: user specific header @@ -75,7 +75,7 @@ struct genl_family { */ struct genl_info { u32 snd_seq; - u32 snd_pid; + u32 snd_portid; struct nlmsghdr * nlhdr; struct genlmsghdr * genlhdr; void * userhdr; @@ -130,10 +130,10 @@ extern int genl_register_mc_group(struct genl_family *family, struct genl_multicast_group *grp); extern void genl_unregister_mc_group(struct genl_family *family, struct genl_multicast_group *grp); -extern void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, +extern void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags); -void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd); /** @@ -183,7 +183,7 @@ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_family *family, int flags, u8 cmd) { - return genlmsg_put(skb, info->snd_pid, info->snd_seq, family, + return genlmsg_put(skb, info->snd_portid, info->snd_seq, family, flags, cmd); } @@ -212,49 +212,49 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) * genlmsg_multicast_netns - multicast a netlink message to a specific netns * @net: the net namespace * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ static inline int genlmsg_multicast_netns(struct net *net, struct sk_buff *skb, - u32 pid, unsigned int group, gfp_t flags) + u32 portid, unsigned int group, gfp_t flags) { - return nlmsg_multicast(net->genl_sock, skb, pid, group, flags); + return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); } /** * genlmsg_multicast - multicast a netlink message to the default netns * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ -static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, +static inline int genlmsg_multicast(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - return genlmsg_multicast_netns(&init_net, skb, pid, group, flags); + return genlmsg_multicast_netns(&init_net, skb, portid, group, flags); } /** * genlmsg_multicast_allns - multicast a netlink message to all net namespaces * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags * * This function must hold the RTNL or rcu_read_lock(). */ -int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, +int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags); /** * genlmsg_unicast - unicast a netlink message * @skb: netlink message as socket buffer - * @pid: netlink pid of the destination socket + * @portid: netlink portid of the destination socket */ -static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid) +static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 portid) { - return nlmsg_unicast(net->genl_sock, skb, pid); + return nlmsg_unicast(net->genl_sock, skb, portid); } /** @@ -264,7 +264,7 @@ static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid) */ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) { - return genlmsg_unicast(genl_info_net(info), skb, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), skb, info->snd_portid); } /** diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 4a045cda9c60..5654d292efd4 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -17,7 +17,7 @@ struct nf_conntrack_ecache { unsigned long missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ - u32 pid; /* netlink pid of destroyer */ + u32 portid; /* netlink portid of destroyer */ struct timer_list timeout; }; @@ -60,7 +60,7 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) /* This structure is passed to event handler */ struct nf_ct_event { struct nf_conn *ct; - u32 pid; + u32 portid; int report; }; @@ -92,7 +92,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) static inline int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { int ret = 0; @@ -112,11 +112,11 @@ nf_conntrack_eventmask_report(unsigned int eventmask, if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { struct nf_ct_event item = { .ct = ct, - .pid = e->pid ? e->pid : pid, + .portid = e->portid ? e->portid : portid, .report = report }; /* This is a resent of a destroy event? If so, skip missed */ - unsigned long missed = e->pid ? 0 : e->missed; + unsigned long missed = e->portid ? 0 : e->missed; if (!((eventmask | missed) & e->ctmask)) goto out_unlock; @@ -126,11 +126,11 @@ nf_conntrack_eventmask_report(unsigned int eventmask, spin_lock_bh(&ct->lock); if (ret < 0) { /* This is a destroy event that has been - * triggered by a process, we store the PID + * triggered by a process, we store the PORTID * to include it in the retransmission. */ if (eventmask & (1 << IPCT_DESTROY) && - e->pid == 0 && pid != 0) - e->pid = pid; + e->portid == 0 && portid != 0) + e->portid = portid; else e->missed |= eventmask; } else @@ -145,9 +145,9 @@ out_unlock: static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, - u32 pid, int report) + u32 portid, int report) { - return nf_conntrack_eventmask_report(1 << event, ct, pid, report); + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); } static inline int @@ -158,7 +158,7 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) struct nf_exp_event { struct nf_conntrack_expect *exp; - u32 pid; + u32 portid; int report; }; @@ -172,7 +172,7 @@ extern void nf_ct_expect_unregister_notifier(struct net *net, struct nf_exp_even static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, - u32 pid, + u32 portid, int report) { struct net *net = nf_ct_exp_net(exp); @@ -191,7 +191,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, - .pid = pid, + .portid = portid, .report = report }; notify->fcn(1 << event, &item); @@ -216,20 +216,20 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) {} static inline int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { return 0; } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { return 0; } static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { return 0; } static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) {} static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, struct nf_conntrack_expect *exp, - u32 pid, + u32 portid, int report) {} static inline int nf_conntrack_ecache_init(struct net *net) diff --git a/include/net/netlink.h b/include/net/netlink.h index 09175d5d1fbf..9690b0f6698a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -217,19 +217,19 @@ struct nla_policy { /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request - * @pid: Netlink PID of requesting application + * @portid: Netlink PORTID of requesting application */ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; - u32 pid; + u32 portid; }; extern int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *)); extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group, int report, + u32 portid, unsigned int group, int report, gfp_t flags); extern int nla_validate(const struct nlattr *head, @@ -444,7 +444,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) /** * nlmsg_put - Add a new netlink message to an skb * @skb: socket buffer to store message in - * @pid: netlink process id + * @portid: netlink process id * @seq: sequence number of message * @type: message type * @payload: length of message payload @@ -453,13 +453,13 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) * Returns NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ -static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int payload, int flags) { if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload))) return NULL; - return __nlmsg_put(skb, pid, seq, type, payload, flags); + return __nlmsg_put(skb, portid, seq, type, payload, flags); } /** @@ -478,7 +478,7 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, int type, int payload, int flags) { - return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + return nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, type, payload, flags); } @@ -563,18 +563,18 @@ static inline void nlmsg_free(struct sk_buff *skb) * nlmsg_multicast - multicast a netlink message * @sk: netlink socket to spread messages to * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group, gfp_t flags) + u32 portid, unsigned int group, gfp_t flags) { int err; NETLINK_CB(skb).dst_group = group; - err = netlink_broadcast(sk, skb, pid, group, flags); + err = netlink_broadcast(sk, skb, portid, group, flags); if (err > 0) err = 0; @@ -585,13 +585,13 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, * nlmsg_unicast - unicast a netlink message * @sk: netlink socket to spread message to * @skb: netlink message as socket buffer - * @pid: netlink pid of the destination socket + * @portid: netlink portid of the destination socket */ -static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid) +static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 portid) { int err; - err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + err = netlink_unicast(sk, skb, portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 6431f5e39022..6735909f826d 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -89,7 +89,7 @@ struct nfc_target { }; struct nfc_genl_data { - u32 poll_req_pid; + u32 poll_req_portid; struct mutex genl_data_mutex; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36ad56ba648b..ee8613f01860 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -263,7 +263,7 @@ struct km_event { } data; u32 seq; - u32 pid; + u32 portid; u32 event; struct net *net; }; @@ -310,7 +310,7 @@ extern void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; extern int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -extern void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +extern void km_state_expired(struct xfrm_state *x, int hard, u32 portid); extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { @@ -1554,7 +1554,7 @@ extern int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, #endif extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); -extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); +extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid); extern int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); diff --git a/kernel/audit.c b/kernel/audit.c index a24aafa850ae..e0cf64a0ae2d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -87,11 +87,11 @@ static int audit_failure = AUDIT_FAIL_PRINTK; /* * If audit records are to be written to the netlink socket, audit_pid - * contains the pid of the auditd process and audit_nlk_pid contains - * the pid to use to send netlink messages to that process. + * contains the pid of the auditd process and audit_nlk_portid contains + * the portid to use to send netlink messages to that process. */ int audit_pid; -static int audit_nlk_pid; +static int audit_nlk_portid; /* If audit_rate_limit is non-zero, limit the rate of sending audit records * to that number per second. This prevents DoS attacks, but results in @@ -401,7 +401,7 @@ static void kauditd_send_skb(struct sk_buff *skb) int err; /* take a reference in case we can't send it and we want to hold it */ skb_get(skb); - err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); + err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); if (err < 0) { BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); @@ -692,7 +692,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) status_set.backlog_limit = audit_backlog_limit; status_set.lost = atomic_read(&audit_lost); status_set.backlog = skb_queue_len(&audit_skb_queue); - audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, &status_set, sizeof(status_set)); break; case AUDIT_SET: @@ -720,7 +720,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sessionid, sid, 1); audit_pid = new_pid; - audit_nlk_pid = NETLINK_CB(skb).pid; + audit_nlk_portid = NETLINK_CB(skb).portid; } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(status_get->rate_limit, @@ -782,7 +782,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST: - err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; @@ -801,7 +801,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST_RULES: - err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; @@ -872,7 +872,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memcpy(sig_data->ctx, ctx, len); security_release_secctx(ctx, len); } - audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, sizeof(*sig_data) + len); kfree(sig_data); break; @@ -891,7 +891,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) rcu_read_unlock(); if (!err) - audit_send_reply(NETLINK_CB(skb).pid, seq, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d0a32796550f..123793cd06f9 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -467,7 +467,7 @@ static int cmd_attr_register_cpumask(struct genl_info *info) rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) goto out; - rc = add_del_listener(info->snd_pid, mask, REGISTER); + rc = add_del_listener(info->snd_portid, mask, REGISTER); out: free_cpumask_var(mask); return rc; @@ -483,7 +483,7 @@ static int cmd_attr_deregister_cpumask(struct genl_info *info) rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) goto out; - rc = add_del_listener(info->snd_pid, mask, DEREGISTER); + rc = add_del_listener(info->snd_portid, mask, DEREGISTER); out: free_cpumask_var(mask); return rc; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9ce430b4657c..ddf93efc133c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -467,14 +467,14 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, - u32 pid, u32 seq, int type, unsigned int flags) + u32 portid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -555,7 +555,7 @@ int br_fdb_dump(struct sk_buff *skb, goto skip; if (fdb_fill_info(skb, br, f, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) < 0) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe41260fbf38..093f527276a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -127,7 +127,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) goto skip; if (br_fill_ifinfo(skb, port, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) break; diff --git a/net/can/gw.c b/net/can/gw.c index b54d5e695b03..127879c55fb6 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -549,7 +549,7 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).pid, + if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..58a4ba27dfe3 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (unresolved) ops->unresolved_rules++; - notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); return 0; @@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); if (ops->delete) ops->delete(rule); fib_rule_put(rule); @@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWRULE, NLM_F_MULTI, ops) < 0) break; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 117afaf51268..c160adb38e5a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (tidx < tbl_skip || (family && tbl->family != family)) continue; - if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) break; @@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (neightbl_fill_param_info(skb, tbl, p, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) @@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) <= 0) { @@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, tbl) <= 0) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 508c5df4a09c..92575370d9f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1081,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, NLM_F_MULTI, ext_filter_mask) <= 0) @@ -1899,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (nskb == NULL) return -ENOBUFS; - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, + err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else - err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); return err; } @@ -2180,9 +2180,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, { struct netdev_hw_addr *ha; int err; - u32 pid, seq; + u32 portid, seq; - pid = NETLINK_CB(cb->skb).pid; + portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { @@ -2190,7 +2190,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, - pid, seq, 0, NTF_SELF); + portid, seq, 0, NTF_SELF); if (err < 0) return err; skip: diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 81f2bb62dea3..70989e672304 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1319,7 +1319,7 @@ nla_put_failure: } static int dcbnl_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid, int dcbx_ver) + u32 seq, u32 portid, int dcbx_ver) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -1330,7 +1330,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, if (!ops) return -EOPNOTSUPP; - skb = dcbnl_newmsg(event, cmd, pid, seq, 0, &nlh); + skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh); if (!skb) return -ENOBUFS; @@ -1353,16 +1353,16 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, } int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_IEEE); } EXPORT_SYMBOL(dcbnl_ieee_notify); int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_CEE); } EXPORT_SYMBOL(dcbnl_cee_notify); @@ -1656,7 +1656,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *netdev; struct dcbmsg *dcb = nlmsg_data(nlh); struct nlattr *tb[DCB_ATTR_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = -EINVAL; struct sk_buff *reply_skb; struct nlmsghdr *reply_nlh = NULL; @@ -1690,7 +1690,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out; } - reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, pid, nlh->nlmsg_seq, + reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq, nlh->nlmsg_flags, &reply_nlh); if (!reply_skb) { ret = -ENOBUFS; @@ -1705,7 +1705,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) nlmsg_end(reply_skb, reply_nlh); - ret = rtnl_unicast(reply_skb, &init_net, pid); + ret = rtnl_unicast(reply_skb, &init_net, portid); out: dev_put(netdev); return ret; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index f3924ab1e019..7b7e561412d3 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -667,12 +667,12 @@ static inline size_t dn_ifaddr_nlmsg_size(void) } static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -753,7 +753,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (dn_idx < skip_naddr) continue; - if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) < 0) goto done; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index c855e8d0738f..b57419cc41a4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1543,7 +1543,7 @@ static int dn_route_input(struct sk_buff *skb) return dn_route_input_slow(skb); } -static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct dn_route *rt = (struct dn_route *)skb_dst(skb); @@ -1551,7 +1551,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, struct nlmsghdr *nlh; long expires; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (!nlh) return -EMSGSIZE; @@ -1685,7 +1685,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); + err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err == 0) goto out_free; @@ -1694,7 +1694,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void goto out_free; } - return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid); out_free: kfree_skb(skb); @@ -1737,7 +1737,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) continue; skb_dst_set(skb, dst_clone(&rt->dst)); - if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { skb_dst_drop(skb); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 16c986ab1228..f968c1b58f47 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -291,14 +291,14 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) return payload; } -static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -374,14 +374,14 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; - u32 pid = req ? req->pid : 0; + u32 portid = req ? req->portid : 0; int err = -ENOBUFS; skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL); if (skb == NULL) goto errout; - err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, + err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id, f->fn_type, f->fn_scope, &f->fn_key, z, DN_FIB_INFO(f), 0); if (err < 0) { @@ -390,7 +390,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, kfree_skb(skb); goto errout; } - rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); return; errout: if (err < 0) @@ -411,7 +411,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, continue; if (f->fn_state & DN_S_ZOMBIE) continue; - if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->n, diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 1e9917124e75..96bb08abece2 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -246,7 +246,7 @@ nla_put_failure: } EXPORT_SYMBOL(ieee802154_nl_start_confirm); -static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev) { void *hdr; @@ -534,7 +534,7 @@ static int ieee802154_list_iface(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq, 0, dev); if (rc < 0) goto out_free; @@ -565,7 +565,7 @@ static int ieee802154_dump_iface(struct sk_buff *skb, if (idx < s_idx || (dev->type != ARPHRD_IEEE802154)) goto cont; - if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid, + if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) break; cont: diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index d54be34cca94..22b1a7058fd3 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -35,7 +35,7 @@ #include "ieee802154.h" -static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct wpan_phy *phy) { void *hdr; @@ -105,7 +105,7 @@ static int ieee802154_list_phy(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq, 0, phy); if (rc < 0) goto out_free; @@ -138,7 +138,7 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) return 0; rc = ieee802154_nl_fill_phy(data->skb, - NETLINK_CB(data->cb->skb).pid, + NETLINK_CB(data->cb->skb).portid, data->cb->nlh->nlmsg_seq, NLM_F_MULTI, phy); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f14eff506743..7b00556e184b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -311,7 +311,7 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) } static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy, struct nlmsghdr *nlh, u32 pid) + int destroy, struct nlmsghdr *nlh, u32 portid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -345,7 +345,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -382,7 +382,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -395,7 +395,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { @@ -417,7 +417,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -464,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; @@ -563,7 +563,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) continue; - __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -649,7 +649,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); } /* @@ -1246,12 +1246,12 @@ static size_t inet_nlmsg_size(void) } static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -1313,7 +1313,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) { rcu_read_unlock(); @@ -1335,7 +1335,7 @@ done: } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct sk_buff *skb; u32 seq = nlh ? nlh->nlmsg_seq : 0; @@ -1347,14 +1347,14 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, if (skb == NULL) goto errout; - err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); + err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); return; errout: if (err < 0) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index dc1f10ed1872..68c93d1bb03a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -557,7 +557,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_flags = rtm->rtm_flags; cfg->fc_nlflags = nlh->nlmsg_flags; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = net; @@ -955,7 +955,7 @@ static void nl_fib_input(struct sk_buff *skb) struct fib_result_nl *frn; struct nlmsghdr *nlh; struct fib_table *tb; - u32 pid; + u32 portid; net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); @@ -973,10 +973,10 @@ static void nl_fib_input(struct sk_buff *skb) nl_fib_lookup(frn, tb); - pid = NETLINK_CB(skb).pid; /* pid of sending process */ - NETLINK_CB(skb).pid = 0; /* from kernel */ + portid = NETLINK_CB(skb).portid; /* pid of sending process */ + NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); + netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); } static int __net_init nl_fib_lookup_init(struct net *net) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da80dc14cc76..3509065e409a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -391,7 +391,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, if (skb == NULL) goto errout; - err = fib_dump_info(skb, info->pid, seq, event, tb_id, + err = fib_dump_info(skb, info->portid, seq, event, tb_id, fa->fa_type, key, dst_len, fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { @@ -400,7 +400,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, kfree_skb(skb); goto errout; } - rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, + rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); return; errout: @@ -989,14 +989,14 @@ failure: return ERR_PTR(err); } -int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 8d766106b540..31d771ca9a70 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1873,7 +1873,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8bc005b1435f..535584c00f91 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -70,7 +70,7 @@ static inline void inet_diag_unlock_handler( int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); @@ -84,7 +84,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -201,23 +201,23 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, user_ns, pid, seq, nlmsg_flags, unlh); + skb, req, user_ns, portid, seq, nlmsg_flags, unlh); } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, struct sk_buff *skb, struct inet_diag_req_v2 *req, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; struct inet_diag_msg *r; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -260,14 +260,14 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *r, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, pid, seq, nlmsg_flags, + skb, r, portid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, r, user_ns, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -316,14 +316,14 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).ssk), - NETLINK_CB(in_skb).pid, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -557,7 +557,7 @@ static int inet_csk_diag_dump(struct sock *sk, return inet_csk_diag_fill(sk, skb, r, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -592,14 +592,14 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, } return inet_twsk_diag_fill(tw, skb, r, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, struct user_namespace *user_ns, - u32 pid, u32 seq, + u32 portid, u32 seq, const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -608,7 +608,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; @@ -711,7 +711,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, err = inet_diag_fill_req(skb, sk, req, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { cb->args[3] = j + 1; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8aa7a4cf9139..1daa95c2a0ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -626,7 +626,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { kfree_skb(skb); } @@ -870,7 +870,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { ip_mr_forward(net, mrt, skb, c, 0); } @@ -2117,12 +2117,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc_cache *c) + u32 portid, u32 seq, struct mfc_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2176,7 +2176,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d39edf16d607..940f4f4cb201 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2136,7 +2136,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, EXPORT_SYMBOL_GPL(ip_route_output_flow); static int rt_fill_info(struct net *net, __be32 dst, __be32 src, - struct flowi4 *fl4, struct sk_buff *skb, u32 pid, + struct flowi4 *fl4, struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct rtable *rt = skb_rtable(skb); @@ -2146,7 +2146,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2306,12 +2306,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void rt->rt_flags |= RTCF_NOTIFY; err = rt_fill_info(net, dst, src, &fl4, skb, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) goto errout_free; - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 988edb63ee73..4c752a6e0bcd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -803,7 +803,7 @@ static int tcp_metrics_dump_info(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tcp_metrics_nl_family, NLM_F_MULTI, TCP_METRICS_CMD_GET); if (!hdr) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d2f336ea82ca..505b30ad9182 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -26,7 +26,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return inet_sk_diag_fill(sk, NULL, skb, req, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -72,14 +72,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, err = inet_sk_diag_fill(sk, NULL, rep, req, sk_user_ns(NETLINK_CB(in_skb).ssk), - NETLINK_CB(in_skb).pid, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 572cb660837b..1237d5d037d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3534,12 +3534,12 @@ static inline int inet6_ifaddr_msgsize(void) } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u32 preferred, valid; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3577,7 +3577,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3586,7 +3586,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3602,7 +3602,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3611,7 +3611,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3652,7 +3652,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (++ip_idx < s_ip_idx) continue; err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI); @@ -3668,7 +3668,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETMULTICAST, NLM_F_MULTI); @@ -3683,7 +3683,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETANYCAST, NLM_F_MULTI); @@ -3805,7 +3805,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_ifa; } - err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDR, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ @@ -3813,7 +3813,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout_ifa: in6_ifa_put(ifa); errout: @@ -4015,14 +4015,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) } static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; struct ifinfomsg *hdr; struct nlmsghdr *nlh; void *protoinfo; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) return -EMSGSIZE; @@ -4080,7 +4080,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (!idev) goto cont; if (inet6_fill_ifinfo(skb, idev, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) <= 0) goto out; @@ -4128,14 +4128,14 @@ static inline size_t inet6_prefix_nlmsg_size(void) } static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, - struct prefix_info *pinfo, u32 pid, u32 seq, + struct prefix_info *pinfo, u32 portid, u32 seq, int event, unsigned int flags) { struct prefixmsg *pmsg; struct nlmsghdr *nlh; struct prefix_cacheinfo ci; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index eb6a63632d3c..0b0171570d17 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -470,10 +470,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, static int ip6addrlbl_fill(struct sk_buff *skb, struct ip6addrlbl_entry *p, u32 lseq, - u32 pid, u32 seq, int event, + u32 portid, u32 seq, int event, unsigned int flags) { - struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, + struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrlblmsg), flags); if (!nlh) return -EMSGSIZE; @@ -503,7 +503,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, ip6addrlbl_table.seq, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI)) <= 0) @@ -574,7 +574,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, } err = ip6addrlbl_fill(skb, p, lseq, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDRLABEL, 0); ip6addrlbl_put(p); @@ -585,7 +585,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto out; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); out: return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4532973f0dd4..08ea3f0b6e55 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else kfree_skb(skb); } @@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else ip6_mr_forward(net, mrt, skb, c); } @@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc6_cache *c) + u32 portid, u32 seq, struct mfc6_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 339d921cf3b6..a81c6790a648 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1874,7 +1874,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, }; @@ -1924,7 +1924,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = dev_net(dev), }; @@ -2285,7 +2285,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2376,7 +2376,7 @@ static inline size_t rt6_nlmsg_size(void) static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, - int iif, int type, u32 pid, u32 seq, + int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; @@ -2392,7 +2392,7 @@ static int rt6_fill_node(struct net *net, } } - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -2537,7 +2537,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) return rt6_fill_node(arg->net, arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, + NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, prefix, 0, NLM_F_MULTI); } @@ -2617,14 +2617,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).pid, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; } @@ -2644,14 +2644,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->pid, seq, 0, 0, 0); + event, info->portid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); return; errout: diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 6c7c4b92e4f8..c32971269280 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -100,7 +100,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) goto err_out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); if (hdr == NULL) { ret = -EMSGSIZE; diff --git a/net/key/af_key.c b/net/key/af_key.c index 334f93b8cfcb..2ca7d7f6861c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -54,7 +54,7 @@ struct pfkey_sock { struct { uint8_t msg_version; - uint32_t msg_pid; + uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { @@ -1447,7 +1447,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); @@ -1486,7 +1486,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); @@ -1523,7 +1523,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: @@ -1701,7 +1701,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -1736,7 +1736,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m c.data.proto = proto; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); @@ -1764,7 +1764,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -1798,7 +1798,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms return -EINVAL; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto); @@ -2157,7 +2157,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; - out_hdr->sadb_msg_pid = c->pid; + out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; @@ -2272,7 +2272,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); @@ -2351,7 +2351,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); @@ -2597,7 +2597,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (err) goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); @@ -2634,7 +2634,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -2663,7 +2663,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb return -EBUSY; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); @@ -2682,7 +2682,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -2711,7 +2711,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a47..6ec3f67ad3f1 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -78,7 +78,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); if (IS_ERR(hdr)) { ret = PTR_ERR(hdr); @@ -87,7 +87,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, hdr); - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -235,7 +235,7 @@ out: return ret; } -static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_tunnel *tunnel) { void *hdr; @@ -248,7 +248,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, struct l2tp_stats stats; unsigned int start; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -359,12 +359,12 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel); if (ret < 0) goto err_out; - return genlmsg_unicast(net, msg, info->snd_pid); + return genlmsg_unicast(net, msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -384,7 +384,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback if (tunnel == NULL) goto out; - if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, tunnel) <= 0) goto out; @@ -604,7 +604,7 @@ out: return ret; } -static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_session *session) { void *hdr; @@ -616,7 +616,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -705,12 +705,12 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session); if (ret < 0) goto err_out; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -742,7 +742,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback continue; } - if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, session) <= 0) break; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9730882697aa..ad39ef406851 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -563,13 +563,13 @@ flag_exist(const struct nlmsghdr *nlh) } static struct nlmsghdr * -start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, +start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), sizeof(*nfmsg), flags); if (nlh == NULL) return NULL; @@ -1045,7 +1045,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; - unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; @@ -1093,7 +1093,7 @@ dump_last: pr_debug("reference set\n"); __ip_set_get(index); } - nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, + nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, IPSET_CMD_LIST); if (!nlh) { @@ -1226,7 +1226,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, skb2 = nlmsg_new(payload, GFP_KERNEL); if (skb2 == NULL) return -ENOMEM; - rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, + rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = ret; @@ -1241,7 +1241,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1416,7 +1416,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; @@ -1428,7 +1428,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1476,7 +1476,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; @@ -1489,7 +1489,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, nlmsg_end(skb2, nlh2); pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1525,7 +1525,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_PROTOCOL); if (!nlh2) goto nlmsg_failure; @@ -1533,7 +1533,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 767cc12da0fe..0f924bf19c2b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2939,7 +2939,7 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_SERVICE); if (!hdr) @@ -3128,7 +3128,7 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DEST); if (!hdr) @@ -3257,7 +3257,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, struct netlink_callback *cb) { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DAEMON); if (!hdr) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index e7be79e640de..de9781b6464f 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -61,7 +61,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) goto out_unlock; item.ct = ct; - item.pid = 0; + item.portid = 0; item.report = 0; ret = notify->fcn(events | missed, &item); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a205bd6ce294..59770039b825 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -418,16 +418,16 @@ nla_put_failure: } static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -604,7 +604,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -680,7 +680,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, net, item->pid, group, item->report, + err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -757,7 +757,7 @@ restart: #endif rcu_read_lock(); res = - ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct); @@ -961,7 +961,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, else { /* Flush the whole table */ nf_conntrack_flush_report(net, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); return 0; } @@ -985,7 +985,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (del_timer(&ct->timeout)) { if (nf_conntrack_event_report(IPCT_DESTROY, ct, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)) < 0) { nf_ct_delete_from_lists(ct); /* we failed to report the event, try later */ @@ -1069,14 +1069,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -1616,7 +1616,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK) | events, - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); } @@ -1638,7 +1638,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK), - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } } @@ -1648,15 +1648,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1708,7 +1708,7 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) st = per_cpu_ptr(net->ct.stat, cpu); if (ctnetlink_ct_stat_cpu_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; @@ -1734,16 +1734,16 @@ ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1776,14 +1776,14 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), sock_net(skb->sk)); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2073,15 +2073,15 @@ nla_put_failure: } static int -ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2132,7 +2132,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2147,7 +2147,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC); + nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: @@ -2190,7 +2190,7 @@ restart: cb->args[1] = 0; } if (ctnetlink_exp_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { @@ -2283,14 +2283,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2344,7 +2344,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* after list removal, usage count == 1 */ spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid, + nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2366,7 +2366,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (!strcmp(m_help->helper->name, name) && del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2382,7 +2382,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2447,7 +2447,7 @@ static int ctnetlink_create_expect(struct net *net, u16 zone, const struct nlattr * const cda[], u_int8_t u3, - u32 pid, int report) + u32 portid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -2560,7 +2560,7 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (err < 0) goto err_out; } - err = nf_ct_expect_related_report(exp, pid, report); + err = nf_ct_expect_related_report(exp, portid, report); err_out: nf_ct_expect_put(exp); out: @@ -2603,7 +2603,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_CREATE) { err = ctnetlink_create_expect(net, zone, cda, u3, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); } return err; @@ -2618,15 +2618,15 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu, +ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2665,7 +2665,7 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; st = per_cpu_ptr(net->ct.stat, cpu); - if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index d7ec92879071..589d686f0b4c 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -91,16 +91,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, } static int -nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; event |= NFNL_SUBSYS_ACCT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -150,7 +150,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur) < 0) { @@ -195,7 +195,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur); @@ -203,7 +203,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 32a1ba3f3e27..3678073360a3 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -395,16 +395,16 @@ nla_put_failure: } static int -nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event |= NFNL_SUBSYS_CTHELPER << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -468,7 +468,7 @@ restart: cb->args[1] = 0; } if (nfnl_cthelper_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur) < 0) { @@ -538,7 +538,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur); @@ -547,7 +547,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index cdecbc8fe965..8847b4d8be06 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -155,16 +155,16 @@ err_proto_put: } static int -ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -222,7 +222,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { @@ -268,7 +268,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, break; } - ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid, + ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur); @@ -276,7 +276,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index be194b144297..8cb67c4dbd62 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -56,7 +56,7 @@ struct nfulnl_instance { struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ - int peer_pid; /* PID of the peer process */ + int peer_portid; /* PORTID of the peer process */ /* configurable parameters */ unsigned int flushtimeout; /* timeout until queue flush */ @@ -133,7 +133,7 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) +instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; int err; @@ -164,7 +164,7 @@ instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); inst->peer_user_ns = user_ns; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->group_num = group_num; inst->qthreshold = NFULNL_QTHRESH_DEFAULT; @@ -336,7 +336,7 @@ __nfulnl_send(struct nfulnl_instance *inst) if (!nlh) goto out; } - status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, MSG_DONTWAIT); inst->qlen = 0; @@ -703,7 +703,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -712,7 +712,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((net_eq(n->net, &init_net)) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -774,7 +774,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_lookup_get(group_num); - if (inst && inst->peer_pid != NETLINK_CB(skb).pid) { + if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto out_put; } @@ -788,7 +788,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_create(group_num, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, sk_user_ns(NETLINK_CB(skb).ssk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); @@ -947,7 +947,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", inst->group_num, - inst->peer_pid, inst->qlen, + inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, inst->flushtimeout, atomic_read(&inst->use)); } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index c0496a55ad0c..b8d9995b76a8 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -44,7 +44,7 @@ struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; - int peer_pid; + int peer_portid; unsigned int queue_maxlen; unsigned int copy_range; unsigned int queue_dropped; @@ -92,7 +92,7 @@ instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int pid) +instance_create(u_int16_t queue_num, int portid) { struct nfqnl_instance *inst; unsigned int h; @@ -111,7 +111,7 @@ instance_create(u_int16_t queue_num, int pid) } inst->queue_num = queue_num; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; @@ -440,7 +440,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } spin_lock_bh(&queue->lock); - if (!queue->peer_pid) { + if (!queue->peer_portid) { err = -EINVAL; goto err_out_free_nskb; } @@ -459,7 +459,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; @@ -616,7 +616,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -625,7 +625,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((n->net == &init_net) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -650,7 +650,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { [NFQA_MARK] = { .type = NLA_U32 }, }; -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) +static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) { struct nfqnl_instance *queue; @@ -658,7 +658,7 @@ static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) if (!queue) return ERR_PTR(-ENODEV); - if (queue->peer_pid != nlpid) + if (queue->peer_portid != nlportid) return ERR_PTR(-EPERM); return queue; @@ -698,7 +698,7 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -749,7 +749,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, queue = instance_lookup(queue_num); if (!queue) - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -832,7 +832,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, rcu_read_lock(); queue = instance_lookup(queue_num); - if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto err_out_unlock; } @@ -844,7 +844,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EBUSY; goto err_out_unlock; } - queue = instance_create(queue_num, NETLINK_CB(skb).pid); + queue = instance_create(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) { ret = PTR_ERR(queue); goto err_out_unlock; @@ -1016,7 +1016,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", inst->queue_num, - inst->peer_pid, inst->queue_total, + inst->peer_portid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, inst->id_sequence, 1); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 6bf878335d94..c15042f987bd 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -627,7 +627,7 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_cipsov4_gnl_family, NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL); if (data == NULL) diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 4809e2e48b02..c5384ffc6146 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -448,7 +448,7 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) struct netlbl_domhsh_walk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_LISTALL); if (data == NULL) @@ -613,7 +613,7 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, int ret_val = -ENOMEM; void *data; - data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + data = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_PROTOCOLS); if (data == NULL) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e7ff694f1049..b7944413b404 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1096,7 +1096,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, char *secctx; u32 secctx_len; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_unlabel_gnl_family, NLM_F_MULTI, cmd); if (data == NULL) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4d348e97e131..0f2e3ad69c47 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -67,8 +67,8 @@ struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; - u32 pid; - u32 dst_pid; + u32 portid; + u32 dst_portid; u32 dst_group; u32 flags; u32 subscriptions; @@ -104,7 +104,7 @@ static inline int netlink_is_kernel(struct sock *sk) return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } -struct nl_pid_hash { +struct nl_portid_hash { struct hlist_head *table; unsigned long rehash_time; @@ -118,7 +118,7 @@ struct nl_pid_hash { }; struct netlink_table { - struct nl_pid_hash hash; + struct nl_portid_hash hash; struct hlist_head mc_list; struct listeners __rcu *listeners; unsigned int flags; @@ -145,9 +145,9 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } -static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) +static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) { - return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; + return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } static void netlink_destroy_callback(struct netlink_callback *cb) @@ -239,17 +239,17 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) +static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_pid_hash *hash = &nl_table[protocol].hash; + struct nl_portid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; struct hlist_node *node; read_lock(&nl_table_lock); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(sk, node, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { + if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -260,7 +260,7 @@ found: return sk; } -static struct hlist_head *nl_pid_hash_zalloc(size_t size) +static struct hlist_head *nl_portid_hash_zalloc(size_t size) { if (size <= PAGE_SIZE) return kzalloc(size, GFP_ATOMIC); @@ -270,7 +270,7 @@ static struct hlist_head *nl_pid_hash_zalloc(size_t size) get_order(size)); } -static void nl_pid_hash_free(struct hlist_head *table, size_t size) +static void nl_portid_hash_free(struct hlist_head *table, size_t size) { if (size <= PAGE_SIZE) kfree(table); @@ -278,7 +278,7 @@ static void nl_pid_hash_free(struct hlist_head *table, size_t size) free_pages((unsigned long)table, get_order(size)); } -static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) +static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) { unsigned int omask, mask, shift; size_t osize, size; @@ -296,7 +296,7 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) size *= 2; } - table = nl_pid_hash_zalloc(size); + table = nl_portid_hash_zalloc(size); if (!table) return 0; @@ -311,23 +311,23 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) struct hlist_node *node, *tmp; sk_for_each_safe(sk, node, tmp, &otable[i]) - __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); + __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); } - nl_pid_hash_free(otable, osize); + nl_portid_hash_free(otable, osize); hash->rehash_time = jiffies + 10 * 60 * HZ; return 1; } -static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) +static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) { int avg = hash->entries >> hash->shift; - if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) + if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) return 1; if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { - nl_pid_hash_rehash(hash, 0); + nl_portid_hash_rehash(hash, 0); return 1; } @@ -356,9 +356,9 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -366,10 +366,10 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) int len; netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, node, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) + if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) break; len++; } @@ -377,17 +377,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) goto err; err = -EBUSY; - if (nlk_sk(sk)->pid) + if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) goto err; - if (len && nl_pid_hash_dilute(hash, len)) - head = nl_pid_hashfn(hash, pid); + if (len && nl_portid_hash_dilute(hash, len)) + head = nl_portid_hashfn(hash, portid); hash->entries++; - nlk_sk(sk)->pid = pid; + nlk_sk(sk)->portid = portid; sk_add_node(sk, head); err = 0; @@ -518,11 +518,11 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->pid) { + if (nlk->portid) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, - .pid = nlk->pid, + .portid = nlk->portid, }; atomic_notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); @@ -559,24 +559,24 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; struct hlist_node *node; - s32 pid = task_tgid_vnr(current); + s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; retry: cond_resched(); netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(osk, node, head) { if (!net_eq(sock_net(osk), net)) continue; - if (nlk_sk(osk)->pid == pid) { - /* Bind collision, search negative pid values. */ - pid = rover--; + if (nlk_sk(osk)->portid == portid) { + /* Bind collision, search negative portid values. */ + portid = rover--; if (rover > -4097) rover = -4097; netlink_table_ungrab(); @@ -585,7 +585,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, net, pid); + err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) goto retry; @@ -668,8 +668,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->pid) { - if (nladdr->nl_pid != nlk->pid) + if (nlk->portid) { + if (nladdr->nl_pid != nlk->portid) return -EINVAL; } else { err = nladdr->nl_pid ? @@ -715,7 +715,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_pid = 0; + nlk->dst_portid = 0; nlk->dst_group = 0; return 0; } @@ -726,12 +726,12 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - if (!nlk->pid) + if (!nlk->portid) err = netlink_autobind(sock); if (err == 0) { sk->sk_state = NETLINK_CONNECTED; - nlk->dst_pid = nladdr->nl_pid; + nlk->dst_portid = nladdr->nl_pid; nlk->dst_group = ffs(nladdr->nl_groups); } @@ -750,10 +750,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*nladdr); if (peer) { - nladdr->nl_pid = nlk->dst_pid; + nladdr->nl_pid = nlk->dst_portid; nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { - nladdr->nl_pid = nlk->pid; + nladdr->nl_pid = nlk->portid; nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; } return 0; @@ -772,19 +772,19 @@ static void netlink_overrun(struct sock *sk) atomic_inc(&sk->sk_drops); } -static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) +static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) { struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); + sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); if (!sock) return ERR_PTR(-ECONNREFUSED); /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid) { + nlk->dst_portid != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -935,7 +935,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, } int netlink_unicast(struct sock *ssk, struct sk_buff *skb, - u32 pid, int nonblock) + u32 portid, int nonblock) { struct sock *sk; int err; @@ -945,7 +945,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb, timeo = sock_sndtimeo(ssk, nonblock); retry: - sk = netlink_getsockbypid(ssk, pid); + sk = netlink_getsockbyportid(ssk, portid); if (IS_ERR(sk)) { kfree_skb(skb); return PTR_ERR(sk); @@ -1005,7 +1005,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) struct netlink_broadcast_data { struct sock *exclude_sk; struct net *net; - u32 pid; + u32 portid; u32 group; int failure; int delivery_failure; @@ -1026,7 +1026,7 @@ static int do_one_broadcast(struct sock *sk, if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1078,7 +1078,7 @@ out: return 0; } -int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data) @@ -1092,7 +1092,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, info.exclude_sk = ssk; info.net = net; - info.pid = pid; + info.portid = portid; info.group = group; info.failure = 0; info.delivery_failure = 0; @@ -1130,17 +1130,17 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, } EXPORT_SYMBOL(netlink_broadcast_filtered); -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation) { - return netlink_broadcast_filtered(ssk, skb, pid, group, allocation, + return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, NULL, NULL); } EXPORT_SYMBOL(netlink_broadcast); struct netlink_set_err_data { struct sock *exclude_sk; - u32 pid; + u32 portid; u32 group; int code; }; @@ -1156,7 +1156,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1174,14 +1174,14 @@ out: /** * netlink_set_err - report error to broadcast listeners * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() - * @pid: the PID of a process that we want to skip (if any) + * @portid: the PORTID of a process that we want to skip (if any) * @groups: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) +int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; struct hlist_node *node; @@ -1189,7 +1189,7 @@ int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) int ret = 0; info.exclude_sk = ssk; - info.pid = pid; + info.portid = portid; info.group = group; /* sk->sk_err wants a positive error value */ info.code = -code; @@ -1354,7 +1354,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr = msg->msg_name; - u32 dst_pid; + u32 dst_portid; u32 dst_group; struct sk_buff *skb; int err; @@ -1374,18 +1374,18 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = -EINVAL; if (addr->nl_family != AF_NETLINK) goto out; - dst_pid = addr->nl_pid; + dst_portid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if ((dst_group || dst_pid) && + if ((dst_group || dst_portid) && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) goto out; } else { - dst_pid = nlk->dst_pid; + dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; } - if (!nlk->pid) { + if (!nlk->portid) { err = netlink_autobind(sock); if (err) goto out; @@ -1399,7 +1399,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - NETLINK_CB(skb).pid = nlk->pid; + NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = siocb->scm->creds; @@ -1417,9 +1417,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (dst_group) { atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); + netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); } - err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); + err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: scm_destroy(siocb->scm); @@ -1482,7 +1482,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; addr->nl_family = AF_NETLINK; addr->nl_pad = 0; - addr->nl_pid = NETLINK_CB(skb).pid; + addr->nl_pid = NETLINK_CB(skb).portid; addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } @@ -1683,7 +1683,7 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) } struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) +__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; int size = NLMSG_LENGTH(len); @@ -1692,7 +1692,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) nlh->nlmsg_type = type; nlh->nlmsg_len = size; nlh->nlmsg_flags = flags; - nlh->nlmsg_pid = pid; + nlh->nlmsg_pid = portid; nlh->nlmsg_seq = seq; if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); @@ -1788,7 +1788,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1836,7 +1836,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) sk = netlink_lookup(sock_net(in_skb->sk), in_skb->sk->sk_protocol, - NETLINK_CB(in_skb).pid); + NETLINK_CB(in_skb).portid); if (sk) { sk->sk_err = ENOBUFS; sk->sk_error_report(sk); @@ -1845,12 +1845,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) return; } - rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); - netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); } EXPORT_SYMBOL(netlink_ack); @@ -1900,33 +1900,33 @@ EXPORT_SYMBOL(netlink_rcv_skb); * nlmsg_notify - send a notification netlink message * @sk: netlink socket to use * @skb: notification message - * @pid: destination netlink pid for reports or 0 + * @portid: destination netlink portid for reports or 0 * @group: destination multicast group or 0 * @report: 1 to report back, 0 to disable * @flags: allocation flags */ -int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags) { int err = 0; if (group) { - int exclude_pid = 0; + int exclude_portid = 0; if (report) { atomic_inc(&skb->users); - exclude_pid = pid; + exclude_portid = portid; } /* errors reported via destination sk->sk_err, but propagate * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ - err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); + err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); } if (report) { int err2; - err2 = nlmsg_unicast(sk, skb, pid); + err2 = nlmsg_unicast(sk, skb, portid); if (!err || err == -ESRCH) err = err2; } @@ -1951,7 +1951,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { @@ -1999,7 +1999,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) j = iter->hash_idx + 1; do { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); @@ -2038,7 +2038,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", s, s->sk_protocol, - nlk->pid, + nlk->portid, nlk->groups ? (u32)nlk->groups[0] : 0, sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), @@ -2183,12 +2183,12 @@ static int __init netlink_proto_init(void) order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; - hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); + hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) - nl_pid_hash_free(nl_table[i].hash.table, + nl_portid_hash_free(nl_table[i].hash.table, 1 * sizeof(*hash->table)); kfree(nl_table); goto panic; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 19288b7d6135..f2aabb6f4105 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -501,7 +501,7 @@ EXPORT_SYMBOL(genl_unregister_family); /** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message - * @pid: netlink pid the message is addressed to + * @portid: netlink portid the message is addressed to * @seq: sequence number (usually the one of the sender) * @family: generic netlink family * @flags: netlink message flags @@ -509,13 +509,13 @@ EXPORT_SYMBOL(genl_unregister_family); * * Returns pointer to user specific header */ -void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd) { struct nlmsghdr *nlh; struct genlmsghdr *hdr; - nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN + + nlh = nlmsg_put(skb, portid, seq, family->id, GENL_HDRLEN + family->hdrsize, flags); if (nlh == NULL) return NULL; @@ -585,7 +585,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } info.snd_seq = nlh->nlmsg_seq; - info.snd_pid = NETLINK_CB(skb).pid; + info.snd_portid = NETLINK_CB(skb).portid; info.nlhdr = nlh; info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; @@ -626,12 +626,12 @@ static struct genl_family genl_ctrl = { .netnsok = true, }; -static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, +static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -701,7 +701,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, +static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { @@ -709,7 +709,7 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, struct nlattr *nla_grps; struct nlattr *nest; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -756,7 +756,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++n < fams_to_skip) continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, CTRL_CMD_NEWFAMILY) < 0) goto errout; @@ -773,7 +773,7 @@ errout: } static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -782,7 +782,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); + err = ctrl_fill_info(family, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -792,7 +792,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, } static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -801,7 +801,7 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd); + err = ctrl_fill_mcgrp_info(grp, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -853,7 +853,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return -ENOENT; } - msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq, + msg = ctrl_build_family_msg(res, info->snd_portid, info->snd_seq, CTRL_CMD_NEWFAMILY); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -971,7 +971,7 @@ problem: subsys_initcall(genl_init); -static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, +static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { struct sk_buff *tmp; @@ -986,7 +986,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, goto error; } err = nlmsg_multicast(prev->genl_sock, tmp, - pid, group, flags); + portid, group, flags); if (err) goto error; } @@ -994,20 +994,20 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, prev = net; } - return nlmsg_multicast(prev->genl_sock, skb, pid, group, flags); + return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); error: kfree_skb(skb); return err; } -int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group, +int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - return genlmsg_mcast(skb, pid, group, flags); + return genlmsg_mcast(skb, portid, group, flags); } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, +void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *sk = net->genl_sock; @@ -1016,6 +1016,6 @@ void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, if (nlh) report = nlmsg_report(nlh); - nlmsg_notify(sk, skb, pid, group, report, flags); + nlmsg_notify(sk, skb, portid, group, report, flags); } EXPORT_SYMBOL(genl_notify); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 4c51714ee741..4bbb70e32d1e 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -58,7 +58,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, { void *hdr; - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfc_genl_family, flags, NFC_CMD_GET_TARGET); if (!hdr) return -EMSGSIZE; @@ -165,7 +165,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) @@ -347,13 +347,13 @@ free_msg: } static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, - u32 pid, u32 seq, + u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; - hdr = genlmsg_put(msg, pid, seq, &nfc_genl_family, flags, + hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_DEVICE); if (!hdr) return -EMSGSIZE; @@ -401,7 +401,7 @@ static int nfc_genl_dump_devices(struct sk_buff *skb, while (dev) { int rc; - rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).pid, + rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; @@ -520,7 +520,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) goto out_putdev; } - rc = nfc_genl_send_device(msg, dev, info->snd_pid, info->snd_seq, + rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq, NULL, 0); if (rc < 0) goto out_free; @@ -611,7 +611,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) - dev->genl_data.poll_req_pid = info->snd_pid; + dev->genl_data.poll_req_portid = info->snd_portid; mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -645,13 +645,13 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) mutex_lock(&dev->genl_data.genl_data_mutex); - if (dev->genl_data.poll_req_pid != info->snd_pid) { + if (dev->genl_data.poll_req_portid != info->snd_portid) { rc = -EBUSY; goto out; } rc = nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; out: mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -771,15 +771,15 @@ static int nfc_genl_rcv_nl_event(struct notifier_block *this, if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) goto out; - pr_debug("NETLINK_URELEASE event from id %d\n", n->pid); + pr_debug("NETLINK_URELEASE event from id %d\n", n->portid); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { - if (dev->genl_data.poll_req_pid == n->pid) { + if (dev->genl_data.poll_req_portid == n->portid) { nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; } dev = nfc_device_iter_next(&iter); } @@ -792,7 +792,7 @@ out: void nfc_genl_data_init(struct nfc_genl_data *genl_data) { - genl_data->poll_req_pid = 0; + genl_data->poll_req_portid = 0; mutex_init(&genl_data->genl_data_mutex); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 0da687769f56..c7425f32e7db 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -286,7 +286,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.key = &OVS_CB(skb)->flow->key; upcall.userdata = NULL; - upcall.pid = 0; + upcall.portid = 0; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -296,7 +296,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - upcall.pid = nla_get_u32(a); + upcall.portid = nla_get_u32(a); break; } } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 105a0b5adc51..56327e877ed9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -225,7 +225,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; - upcall.pid = p->upcall_pid; + upcall.portid = p->upcall_portid; ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; @@ -261,7 +261,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, int dp_ifindex; int err; - if (upcall_info->pid == 0) { + if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; } @@ -395,7 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, skb_copy_and_csum_dev(skb, nla_data(nla)); - err = genlmsg_unicast(net, user_skb, upcall_info->pid); + err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: kfree_skb(nskb); @@ -780,7 +780,7 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { /* Called with genl_lock. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, - struct sk_buff *skb, u32 pid, + struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { const int skb_orig_len = skb->len; @@ -795,7 +795,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, sf_acts = rcu_dereference_protected(flow->sf_acts, lockdep_genl_is_held()); - ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd); + ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -879,7 +879,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, - u32 pid, u32 seq, u8 cmd) + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -888,7 +888,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; } @@ -970,7 +970,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->hash = ovs_flow_hash(&key, key_len); ovs_flow_tbl_insert(table, flow); - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); } else { @@ -1008,7 +1008,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_flow_deferred_free_acts(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); /* Clear stats. */ @@ -1020,7 +1020,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } if (!IS_ERR(reply)) - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); else @@ -1061,7 +1061,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!flow) return -ENOENT; - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1103,13 +1103,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_flow_tbl_remove(table, flow); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid, + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); ovs_flow_deferred_free(flow); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; } @@ -1137,7 +1137,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; if (ovs_flow_cmd_fill_info(flow, dp, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) break; @@ -1191,13 +1191,13 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = { }; static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, flags, cmd); if (!ovs_header) goto error; @@ -1222,7 +1222,7 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1232,7 +1232,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd); + retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1311,7 +1311,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; - parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); vport = new_vport(&parms); if (IS_ERR(vport)) { @@ -1322,7 +1322,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1332,7 +1332,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) list_add_tail(&dp->list_node, &ovs_net->dps); rtnl_unlock(); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; @@ -1394,7 +1394,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return err; - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1402,7 +1402,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) __dp_destroy(dp); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1419,7 +1419,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); @@ -1428,7 +1428,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) return 0; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1444,7 +1444,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1461,7 +1461,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && - ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, + ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_DP_CMD_NEW) < 0) break; @@ -1521,13 +1521,13 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = { /* Called with RTNL lock or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -1537,7 +1537,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid)) + nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); @@ -1559,7 +1559,7 @@ error: } /* Called with RTNL lock or RCU read lock. */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1569,7 +1569,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1661,21 +1661,21 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; - parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); ovs_dp_detach_port(vport); goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1707,9 +1707,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (err) goto exit_unlock; if (a[OVS_VPORT_ATTR_UPCALL_PID]) - vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { netlink_set_err(sock_net(skb->sk)->genl_sock, 0, @@ -1717,7 +1717,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1743,7 +1743,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1751,7 +1751,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_dp_detach_port(vport); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1773,7 +1773,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1808,7 +1808,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_VPORT_CMD_NEW) < 0) diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 129ec5480758..031dfbf37c93 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -129,7 +129,7 @@ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; const struct nlattr *userdata; - u32 pid; + u32 portid; }; static inline struct net *ovs_dp_get_net(struct datapath *dp) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 1abd9609ba78..9055dd698c70 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -125,7 +125,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->dp = parms->dp; vport->port_no = parms->port_no; - vport->upcall_pid = parms->upcall_pid; + vport->upcall_portid = parms->upcall_portid; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index c56e4836e93b..3f7961ea3c56 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -70,7 +70,7 @@ struct vport_err_stats { * @rcu: RCU callback head for deferred destruction. * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. - * @upcall_pid: The Netlink port to use for packets received on this port that + * @upcall_portid: The Netlink port to use for packets received on this port that * miss the flow table. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. @@ -83,7 +83,7 @@ struct vport { struct rcu_head rcu; u16 port_no; struct datapath *dp; - u32 upcall_pid; + u32 upcall_portid; struct hlist_node hash_node; struct hlist_node dp_hash_node; @@ -113,7 +113,7 @@ struct vport_parms { /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; - u32 upcall_pid; + u32 upcall_portid; }; /** diff --git a/net/packet/diag.c b/net/packet/diag.c index 39bce0d50df9..8db6e21c46bd 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -126,13 +126,13 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct packet_diag_msg *rp; struct packet_sock *po = pkt_sk(sk); - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); if (!nlh) return -EMSGSIZE; @@ -184,7 +184,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (num < s_num) goto next; - if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid, + if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, sock_i_ino(sk)) < 0) goto done; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 7dd762a464e5..83a8389619aa 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -33,7 +33,7 @@ /* Device address handling */ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event); + u32 portid, u32 seq, int event); void phonet_address_notify(int event, struct net_device *dev, u8 addr) { @@ -101,12 +101,12 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) } static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -148,7 +148,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) continue; if (fill_addr(skb, pnd->netdev, addr << 2, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0) goto out; } @@ -165,12 +165,12 @@ out: /* Routes handling */ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -276,7 +276,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (addr_idx++ < addr_start_idx) continue; - if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid, + if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE)) goto out; } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index e3d2c78cb52c..102761d294cb 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -644,7 +644,7 @@ errout: } static int -tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, +tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, u16 flags, int event, int bind, int ref) { struct tcamsg *t; @@ -652,7 +652,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_nlmsg_trim; t = nlmsg_data(nlh); @@ -678,7 +678,7 @@ out_nlmsg_trim: } static int -act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, +act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; @@ -686,16 +686,16 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnl_unicast(skb, net, pid); + return rtnl_unicast(skb, net, portid); } static struct tc_action * -tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) +tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) { struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; @@ -762,7 +762,7 @@ static struct tc_action *create_a(int i) } static int tca_action_flush(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 pid) + struct nlmsghdr *n, u32 portid) { struct sk_buff *skb; unsigned char *b; @@ -799,7 +799,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (a->ops == NULL) goto err_out; - nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); + nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); if (!nlh) goto out_module_put; t = nlmsg_data(nlh); @@ -823,7 +823,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err > 0) return 0; @@ -841,7 +841,7 @@ noflush_out: static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int event) + u32 portid, int event) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -853,13 +853,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1] != NULL) - return tca_action_flush(net, tb[1], n, pid); + return tca_action_flush(net, tb[1], n, portid); else return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_get_1(tb[i], n, pid); + act = tcf_action_get_1(tb[i], n, portid); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; @@ -874,7 +874,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, } if (event == RTM_GETACTION) - ret = act_get_notify(net, pid, n, head, event); + ret = act_get_notify(net, portid, n, head, event); else { /* delete */ struct sk_buff *skb; @@ -884,7 +884,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } - if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event, + if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event, 0, 1) <= 0) { kfree_skb(skb); ret = -EINVAL; @@ -893,7 +893,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (ret > 0) return 0; @@ -905,7 +905,7 @@ err: } static int tcf_add_notify(struct net *net, struct tc_action *a, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -920,7 +920,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_kfree_skb; t = nlmsg_data(nlh); @@ -940,7 +940,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -953,7 +953,7 @@ out_kfree_skb: static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int ovr) + u32 portid, int ovr) { int ret = 0; struct tc_action *act; @@ -971,7 +971,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* dump then free all the actions after update; inserted policy * stays intact */ - ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); + ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags); for (a = act; a; a = act) { act = a->next; kfree(a); @@ -984,7 +984,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); @@ -1008,17 +1008,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_DELACTION); + portid, RTM_DELACTION); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_GETACTION); + portid, RTM_GETACTION); break; default: BUG(); @@ -1085,7 +1085,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) goto out_module_put; } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; @@ -1109,7 +1109,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_cancel(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; - if (NETLINK_CB(cb->skb).pid && ret) + if (NETLINK_CB(cb->skb).portid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); return skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc3ef5aef355..7ae02892437c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -343,13 +343,13 @@ errout: } static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, - unsigned long fh, u32 pid, u32 seq, u16 flags, int event) + unsigned long fh, u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -381,18 +381,18 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, unsigned long fh, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -407,7 +407,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, { struct tcf_dump_args *a = (void *)arg; - return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, + return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } @@ -465,7 +465,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, + if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) break; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a08b4ab3e421..a18d975db59c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1185,7 +1185,7 @@ graft: } static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1193,7 +1193,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct gnet_dump d; struct qdisc_size_table *stab; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1248,25 +1248,25 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (old && !tc_qdisc_dump_ignore(old)) { - if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } if (new && !tc_qdisc_dump_ignore(new)) { - if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } if (skb->len) - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); err_out: @@ -1289,7 +1289,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; } else { if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1300,7 +1300,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, continue; } if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1375,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; - u32 pid = tcm->tcm_parent; + u32 portid = tcm->tcm_parent; u32 clid = tcm->tcm_handle; u32 qid = TC_H_MAJ(clid); int err; @@ -1403,8 +1403,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - if (pid != TC_H_ROOT) { - u32 qid1 = TC_H_MAJ(pid); + if (portid != TC_H_ROOT) { + u32 qid1 = TC_H_MAJ(portid); if (qid && qid1) { /* If both majors are known, they must be identical. */ @@ -1418,10 +1418,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now qid is genuine qdisc handle consistent * both with parent and child. * - * TC_H_MAJ(pid) still may be unspecified, complete it now. + * TC_H_MAJ(portid) still may be unspecified, complete it now. */ - if (pid) - pid = TC_H_MAKE(qid, pid); + if (portid) + portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) qid = dev->qdisc->handle; @@ -1439,7 +1439,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now try to get class */ if (clid == 0) { - if (pid == TC_H_ROOT) + if (portid == TC_H_ROOT) clid = qid; } else clid = TC_H_MAKE(qid, clid); @@ -1478,7 +1478,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) new_cl = cl; err = -EOPNOTSUPP; if (cops->change) - err = cops->change(q, clid, pid, tca, &new_cl); + err = cops->change(q, clid, portid, tca, &new_cl); if (err == 0) tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); @@ -1492,7 +1492,7 @@ out: static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, unsigned long cl, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1500,7 +1500,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1540,18 +1540,18 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, unsigned long cl, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) { + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -1565,7 +1565,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; - return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid, + return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 47a839df27dc..6675914dc592 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -62,7 +62,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).pid); + genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); } return 0; diff --git a/net/unix/diag.c b/net/unix/diag.c index 750b13408449..06748f108a57 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -110,12 +110,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), flags); if (!nlh) return -EMSGSIZE; @@ -159,7 +159,7 @@ out_nlmsg_trim: } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -170,7 +170,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -200,7 +200,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(req->udiag_states & (1 << sk->sk_state))) goto next; if (sk_diag_dump(sk, skb, req, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) goto done; @@ -267,7 +267,7 @@ again: if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); @@ -277,7 +277,7 @@ again: goto again; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/wireless/core.h b/net/wireless/core.h index bc7430b54771..a343be4a52bd 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -55,7 +55,7 @@ struct cfg80211_registered_device { int opencount; /* also protected by devlist_mtx */ wait_queue_head_t dev_wait; - u32 ap_beacons_nlpid; + u32 ap_beacons_nlportid; /* protected by RTNL only */ int num_running_ifaces; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 8fd0242ee169..ec7fcee5bad6 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -615,7 +615,7 @@ EXPORT_SYMBOL(cfg80211_del_sta); struct cfg80211_mgmt_registration { struct list_head list; - u32 nlpid; + u32 nlportid; int match_len; @@ -624,7 +624,7 @@ struct cfg80211_mgmt_registration { u8 match[]; }; -int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, u16 frame_type, const u8 *match_data, int match_len) { @@ -672,7 +672,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, memcpy(nreg->match, match_data, match_len); nreg->match_len = match_len; - nreg->nlpid = snd_pid; + nreg->nlportid = snd_portid; nreg->frame_type = cpu_to_le16(frame_type); list_add(&nreg->list, &wdev->mgmt_registrations); @@ -685,7 +685,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, return err; } -void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); @@ -694,7 +694,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_lock_bh(&wdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { - if (reg->nlpid != nlpid) + if (reg->nlportid != nlportid) continue; if (rdev->ops->mgmt_frame_register) { @@ -710,8 +710,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_unlock_bh(&wdev->mgmt_registrations_lock); - if (nlpid == wdev->ap_unexpected_nlpid) - wdev->ap_unexpected_nlpid = 0; + if (nlportid == wdev->ap_unexpected_nlportid) + wdev->ap_unexpected_nlportid = 0; } void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) @@ -872,7 +872,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_mgmt(rdev, wdev, reg->nlpid, + if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, freq, sig_mbm, buf, len, gfp)) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 787aeaa902fe..34eb5c07a567 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -496,11 +496,11 @@ static bool is_valid_ie_attr(const struct nlattr *attr) } /* message building helper */ -static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq, +static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ - return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd); + return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd); } static int nl80211_msg_put_channel(struct sk_buff *msg, @@ -851,7 +851,7 @@ nla_put_failure: return -ENOBUFS; } -static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { void *hdr; @@ -866,7 +866,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, const struct ieee80211_txrx_stypes *mgmt_stypes = dev->wiphy.mgmt_stypes; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -1; @@ -1267,7 +1267,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++idx <= start) continue; - if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) { idx--; @@ -1290,7 +1290,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) { + if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -1736,14 +1736,14 @@ static inline u64 wdev_id(struct wireless_dev *wdev) ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); } -static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) return -1; @@ -1807,7 +1807,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; continue; } - if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { mutex_unlock(&rdev->devlist_mtx); @@ -1838,7 +1838,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, dev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2056,7 +2056,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) break; } - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2191,7 +2191,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2769,7 +2769,7 @@ nla_put_failure: return false; } -static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -2778,7 +2778,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *sinfoattr, *bss_param; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -2931,7 +2931,7 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; if (nl80211_send_station(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev, netdev, mac_addr, &sinfo) < 0) @@ -2977,7 +2977,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3303,7 +3303,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr); } -static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) @@ -3311,7 +3311,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *pinfoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -3389,7 +3389,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, if (err) goto out_err; - if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, dst, next_hop, &pinfo) < 0) @@ -3438,7 +3438,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, next_hop, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3679,7 +3679,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; @@ -3998,7 +3998,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) goto put_failure; @@ -4616,7 +4616,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, ASSERT_WDEV_LOCK(wdev); - hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).pid, seq, flags, + hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); if (!hdr) return -1; @@ -4735,14 +4735,14 @@ static int nl80211_dump_scan(struct sk_buff *skb, return skb->len; } -static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, struct survey_info *survey) { void *hdr; struct nlattr *infoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) return -ENOMEM; @@ -4836,7 +4836,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, } if (nl80211_send_survey(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, &survey) < 0) @@ -5451,7 +5451,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, } while (1) { - void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).pid, + void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_TESTMODE); struct nlattr *tmdata; @@ -5491,7 +5491,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, static struct sk_buff * __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 pid, u32 seq, gfp_t gfp) + int approxlen, u32 portid, u32 seq, gfp_t gfp) { struct sk_buff *skb; void *hdr; @@ -5501,7 +5501,7 @@ __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, if (!skb) return NULL; - hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE); + hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE); if (!hdr) { kfree_skb(skb); return NULL; @@ -5531,7 +5531,7 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, return NULL; return __cfg80211_testmode_alloc_skb(rdev, approxlen, - rdev->testmode_info->snd_pid, + rdev->testmode_info->snd_portid, rdev->testmode_info->snd_seq, GFP_KERNEL); } @@ -5867,7 +5867,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_REMAIN_ON_CHANNEL); if (IS_ERR(hdr)) { @@ -6086,7 +6086,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type, + return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); } @@ -6167,7 +6167,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_FRAME); if (IS_ERR(hdr)) { @@ -6284,7 +6284,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_POWER_SAVE); if (!hdr) { err = -ENOBUFS; @@ -6486,7 +6486,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_WOWLAN); if (!hdr) goto nla_put_failure; @@ -6760,10 +6760,10 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb, wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; - if (wdev->ap_unexpected_nlpid) + if (wdev->ap_unexpected_nlportid) return -EBUSY; - wdev->ap_unexpected_nlpid = info->snd_pid; + wdev->ap_unexpected_nlportid = info->snd_portid; return 0; } @@ -6793,7 +6793,7 @@ static int nl80211_probe_client(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_PROBE_CLIENT); if (IS_ERR(hdr)) { @@ -6828,10 +6828,10 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS)) return -EOPNOTSUPP; - if (rdev->ap_beacons_nlpid) + if (rdev->ap_beacons_nlportid) return -EBUSY; - rdev->ap_beacons_nlpid = info->snd_pid; + rdev->ap_beacons_nlportid = info->snd_portid; return 0; } @@ -7628,12 +7628,12 @@ static int nl80211_add_scan_req(struct sk_buff *msg, static int nl80211_send_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u32 pid, u32 seq, int flags, + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -7657,11 +7657,11 @@ static int nl80211_send_sched_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct net_device *netdev, - u32 pid, u32 seq, int flags, u32 cmd) + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -8370,9 +8370,9 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct sk_buff *msg; void *hdr; int err; - u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid); + u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); - if (!nlpid) + if (!nlportid) return false; msg = nlmsg_new(100, gfp); @@ -8396,7 +8396,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, return true; } - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; nla_put_failure: @@ -8420,7 +8420,7 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev, } int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlportid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp) { @@ -8449,7 +8449,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, genlmsg_end(msg, hdr); - return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: genlmsg_cancel(msg, hdr); @@ -8804,9 +8804,9 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; - u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid); + u32 nlportid = ACCESS_ONCE(rdev->ap_beacons_nlportid); - if (!nlpid) + if (!nlportid) return; msg = nlmsg_new(len + 100, gfp); @@ -8829,7 +8829,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, genlmsg_end(msg, hdr); - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return; nla_put_failure: @@ -8853,9 +8853,9 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) - cfg80211_mlme_unregister_socket(wdev, notify->pid); - if (rdev->ap_beacons_nlpid == notify->pid) - rdev->ap_beacons_nlpid = 0; + cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (rdev->ap_beacons_nlportid == notify->portid) + rdev->ap_beacons_nlportid = 0; } rcu_read_unlock(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7856c33898fa..30edad44e7fc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -166,7 +166,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) { @@ -1674,13 +1674,13 @@ void km_state_notify(struct xfrm_state *x, const struct km_event *c) EXPORT_SYMBOL(km_policy_notify); EXPORT_SYMBOL(km_state_notify); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid) +void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); @@ -1726,13 +1726,13 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) } EXPORT_SYMBOL(km_new_mapping); -void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 354070adb5ef..b313d932d678 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -603,7 +603,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, } c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -676,7 +676,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -826,7 +826,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -904,7 +904,7 @@ static inline size_t xfrm_spdinfo_msgsize(void) } static int build_spdinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; @@ -913,7 +913,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -946,17 +946,17 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -967,7 +967,7 @@ static inline size_t xfrm_sadinfo_msgsize(void) } static int build_sadinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; @@ -975,7 +975,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -1003,17 +1003,17 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1033,7 +1033,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1114,7 +1114,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -1401,7 +1401,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); xfrm_pol_put(xp); @@ -1486,7 +1486,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -1621,7 +1621,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, err = PTR_ERR(resp_skb); } else { err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); } } else { uid_t loginuid = audit_get_loginuid(current); @@ -1638,7 +1638,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); } @@ -1668,7 +1668,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_state_notify(NULL, &c); @@ -1695,7 +1695,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) return -EMSGSIZE; @@ -1777,11 +1777,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, spin_lock_bh(&x->lock); c.data.aevent = p->flags; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1827,7 +1827,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.data.aevent = XFRM_AE_CU; km_state_notify(x, &c); err = 0; @@ -1862,7 +1862,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_policy_notify(NULL, 0, &c); return 0; @@ -2370,7 +2370,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2429,7 +2429,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); if (nlh == NULL) { kfree_skb(skb); return -EMSGSIZE; @@ -2497,7 +2497,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2696,7 +2696,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2756,7 +2756,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2810,7 +2810,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; -- cgit v1.2.3-71-gd317 From e23eb920b0f3978687c497de2ac3eb9e281dab32 Mon Sep 17 00:00:00 2001 From: Peter Moody Date: Thu, 14 Jun 2012 10:04:35 -0700 Subject: audit: export audit_log_task_info At the suggestion of eparis@redhat.com, move this chunk of task logging from audit_log_exit to audit_log_task_info and export this function so it's usuable elsewhere in the kernel. This patch is against git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity#next-ima-appraisal Changelog v2: - add empty audit_log_task_info if CONFIG_AUDITSYSCALL isn't set. Changelog v1: - Initial post. Signed-off-by: Peter Moody Signed-off-by: Mimi Zohar --- include/linux/audit.h | 2 ++ kernel/auditsc.c | 74 +++++++++++++++++++++++---------------------------- 2 files changed, 36 insertions(+), 40 deletions(-) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 36abf2aa7e68..2a5073cf548a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -529,6 +529,7 @@ extern int audit_set_loginuid(uid_t loginuid); #define audit_get_loginuid(t) ((t)->loginuid) #define audit_get_sessionid(t) ((t)->sessionid) extern void audit_log_task_context(struct audit_buffer *ab); +extern void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk); extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); extern int __audit_bprm(struct linux_binprm *bprm); @@ -640,6 +641,7 @@ extern int audit_signals; #define audit_get_loginuid(t) (-1) #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) +#define audit_log_task_info(b, t) do { ; } while (0) #define audit_ipc_obj(i) ((void)0) #define audit_ipc_set_perm(q,u,g,m) ((void)0) #define audit_bprm(p) ({ 0; }) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 4b96415527b8..37f52f27828d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1154,13 +1154,38 @@ error_path: EXPORT_SYMBOL(audit_log_task_context); -static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) +void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { + const struct cred *cred; char name[sizeof(tsk->comm)]; struct mm_struct *mm = tsk->mm; struct vm_area_struct *vma; + char *tty; + + if (!ab) + return; /* tsk == current */ + cred = current_cred(); + + spin_lock_irq(&tsk->sighand->siglock); + if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) + tty = tsk->signal->tty->name; + else + tty = "(none)"; + spin_unlock_irq(&tsk->sighand->siglock); + + + audit_log_format(ab, + " ppid=%ld pid=%d auid=%u uid=%u gid=%u" + " euid=%u suid=%u fsuid=%u" + " egid=%u sgid=%u fsgid=%u ses=%u tty=%s", + sys_getppid(), + tsk->pid, + tsk->loginuid, cred->uid, cred->gid, + cred->euid, cred->suid, cred->fsuid, + cred->egid, cred->sgid, cred->fsgid, + tsk->sessionid, tty); get_task_comm(name, tsk); audit_log_format(ab, " comm="); @@ -1183,6 +1208,8 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk audit_log_task_context(ab); } +EXPORT_SYMBOL(audit_log_task_info); + static int audit_log_pid_context(struct audit_context *context, pid_t pid, uid_t auid, uid_t uid, unsigned int sessionid, u32 sid, char *comm) @@ -1585,26 +1612,12 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { - const struct cred *cred; int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; - const char *tty; struct audit_names *n; /* tsk == current */ - context->pid = tsk->pid; - if (!context->ppid) - context->ppid = sys_getppid(); - cred = current_cred(); - context->uid = cred->uid; - context->gid = cred->gid; - context->euid = cred->euid; - context->suid = cred->suid; - context->fsuid = cred->fsuid; - context->egid = cred->egid; - context->sgid = cred->sgid; - context->fsgid = cred->fsgid; context->personality = tsk->personality; ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); @@ -1619,32 +1632,13 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", context->return_code); - spin_lock_irq(&tsk->sighand->siglock); - if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) - tty = tsk->signal->tty->name; - else - tty = "(none)"; - spin_unlock_irq(&tsk->sighand->siglock); - audit_log_format(ab, - " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" - " ppid=%d pid=%d auid=%u uid=%u gid=%u" - " euid=%u suid=%u fsuid=%u" - " egid=%u sgid=%u fsgid=%u tty=%s ses=%u", - context->argv[0], - context->argv[1], - context->argv[2], - context->argv[3], - context->name_count, - context->ppid, - context->pid, - tsk->loginuid, - context->uid, - context->gid, - context->euid, context->suid, context->fsuid, - context->egid, context->sgid, context->fsgid, tty, - tsk->sessionid); - + " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", + context->argv[0], + context->argv[1], + context->argv[2], + context->argv[3], + context->name_count); audit_log_task_info(ab, tsk); audit_log_key(ab, context->filterkey); -- cgit v1.2.3-71-gd317 From be45c900fdc2c66baad5a7703fb8136991d88aeb Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 13 Sep 2012 09:50:55 +0200 Subject: cgroup: Remove CGROUP_BUILTIN_SUBSYS_COUNT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CGROUP_BUILTIN_SUBSYS_COUNT is used as start index or stop index when looping over the subsys array looking either at the builtin or the module subsystems. Since all the builtin subsystems have an id which is lower then CGROUP_BUILTIN_SUBSYS_COUNT we know that any module will have an id larger than CGROUP_BUILTIN_SUBSYS_COUNT. In short the ids are sorted. We are about to change id assignment to happen only at compile time later in this series. That means we can't rely on the above trick since all ids will always be defined at compile time. Furthermore, ordering the builtin subsystems and the module subsystems is not really necessary. So we need a different way to know which subsystem is a builtin or a module one. We can use the subsys[]->module pointer for this. Any place where we need to know if a subsys is module we just check for the pointer. If it is NULL then the subsystem is a builtin one. With this we are able to drop the CGROUP_BUILTIN_SUBSYS_COUNT enum. Though we need to introduce a temporary placeholder so that we don't get a compilation error when only CONFIG_CGROUP is selected and no single controller. An empty enum definition is not valid. Later in this series we are able to remove the placeholder again. And with this change we get a fix for this: kernel/cgroup.c: In function ‘cgroup_load_subsys’: kernel/cgroup.c:4326:38: warning: array subscript is below array bounds [-Warray-bounds] when CONFIG_CGROUP=y and no built in controller was enabled. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: Gao feng Cc: Jamal Hadi Salim Cc: John Fastabend Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- include/linux/cgroup.h | 2 +- kernel/cgroup.c | 68 +++++++++++++++++++++++++++++--------------------- 2 files changed, 41 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 145901f5ef99..1916cdb071dc 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -48,7 +48,7 @@ extern const struct file_operations proc_cgroup_operations; #define SUBSYS(_x) _x ## _subsys_id, enum cgroup_subsys_id { #include - CGROUP_BUILTIN_SUBSYS_COUNT + __CGROUP_TEMPORARY_PLACEHOLDER }; #undef SUBSYS /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ced292d720b9..1b18090269ad 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -88,7 +88,7 @@ static DEFINE_MUTEX(cgroup_root_mutex); /* * Generate an array of cgroup subsystem pointers. At boot time, this is - * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are + * populated with the built in subsystems, and modular subsystems are * registered after that. The mutable section of this array is protected by * cgroup_mutex. */ @@ -1321,7 +1321,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) * take duplicate reference counts on a subsystem that's already used, * but rebind_subsystems handles this case. */ - for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL << i; if (!(bit & opts->subsys_mask)) @@ -1337,7 +1337,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) * raced with a module_delete call, and to the user this is * essentially a "subsystem doesn't exist" case. */ - for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) { + for (i--; i >= 0; i--) { /* drop refcounts only on the ones we took */ unsigned long bit = 1UL << i; @@ -1354,7 +1354,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) static void drop_parsed_module_refcounts(unsigned long subsys_mask) { int i; - for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL << i; if (!(bit & subsys_mask)) @@ -4442,8 +4442,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) * since cgroup_init_subsys will have already taken care of it. */ if (ss->module == NULL) { - /* a few sanity checks */ - BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT); + /* a sanity check */ BUG_ON(subsys[ss->subsys_id] != ss); return 0; } @@ -4457,7 +4456,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) */ mutex_lock(&cgroup_mutex); /* find the first empty slot in the array */ - for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { if (subsys[i] == NULL) break; } @@ -4560,7 +4559,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) mutex_lock(&cgroup_mutex); /* deassign the subsys_id */ - BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT); subsys[ss->subsys_id] = NULL; /* remove subsystem from rootnode's list of subsystems */ @@ -4623,10 +4621,13 @@ int __init cgroup_init_early(void) for (i = 0; i < CSS_SET_TABLE_SIZE; i++) INIT_HLIST_HEAD(&css_set_table[i]); - /* at bootup time, we don't worry about modular subsystems */ - for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; + /* at bootup time, we don't worry about modular subsystems */ + if (!ss || ss->module) + continue; + BUG_ON(!ss->name); BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN); BUG_ON(!ss->create); @@ -4659,9 +4660,12 @@ int __init cgroup_init(void) if (err) return err; - /* at bootup time, we don't worry about modular subsystems */ - for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; + + /* at bootup time, we don't worry about modular subsystems */ + if (!ss || ss->module) + continue; if (!ss->early_init) cgroup_init_subsys(ss); if (ss->use_id) @@ -4856,13 +4860,16 @@ void cgroup_fork_callbacks(struct task_struct *child) { if (need_forkexit_callback) { int i; - /* - * forkexit callbacks are only supported for builtin - * subsystems, and the builtin section of the subsys array is - * immutable, so we don't need to lock the subsys array here. - */ - for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; + + /* + * forkexit callbacks are only supported for + * builtin subsystems. + */ + if (!ss || ss->module) + continue; + if (ss->fork) ss->fork(child); } @@ -4967,12 +4974,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) tsk->cgroups = &init_css_set; if (run_callbacks && need_forkexit_callback) { - /* - * modular subsystems can't use callbacks, so no need to lock - * the subsys array - */ - for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; + + /* modular subsystems can't use callbacks */ + if (!ss || ss->module) + continue; + if (ss->exit) { struct cgroup *old_cgrp = rcu_dereference_raw(cg->subsys[i])->cgroup; @@ -5158,13 +5166,17 @@ static int __init cgroup_disable(char *str) while ((token = strsep(&str, ",")) != NULL) { if (!*token) continue; - /* - * cgroup_disable, being at boot time, can't know about module - * subsystems, so we don't worry about them. - */ - for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; + /* + * cgroup_disable, being at boot time, can't + * know about module subsystems, so we don't + * worry about them. + */ + if (!ss || ss->module) + continue; + if (!strcmp(token, ss->name)) { ss->disabled = 1; printk(KERN_INFO "Disabling %s control group" -- cgit v1.2.3-71-gd317 From 5fc0b02544b3b9bd3db5a8156b5f3e7350f8e797 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:05 +0200 Subject: cgroup: Wrap subsystem selection macro Before we are able to define all subsystem ids at compile time we need a more fine grained control what gets defined when we include cgroup_subsys.h. For example we define the enums for the subsystems or to declare for struct cgroup_subsys (builtin subsystem) by including cgroup_subsys.h and defining SUBSYS accordingly. Currently, the decision if a subsys is used is defined inside the header by testing if CONFIG_*=y is true. By moving this test outside of cgroup_subsys.h we are able to control it on the include level. This is done by introducing IS_SUBSYS_ENABLED which then is defined according the task, e.g. is CONFIG_*=y or CONFIG_*=m. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: Gao feng Cc: Jamal Hadi Salim Cc: John Fastabend Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- include/linux/cgroup.h | 4 ++++ include/linux/cgroup_subsys.h | 24 ++++++++++++------------ kernel/cgroup.c | 1 + 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1916cdb071dc..a5ab5651441b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -46,10 +46,12 @@ extern const struct file_operations proc_cgroup_operations; /* Define the enumeration of all builtin cgroup subsystems */ #define SUBSYS(_x) _x ## _subsys_id, +#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) enum cgroup_subsys_id { #include __CGROUP_TEMPORARY_PLACEHOLDER }; +#undef IS_SUBSYS_ENABLED #undef SUBSYS /* * This define indicates the maximum number of subsystems that can be loaded @@ -528,7 +530,9 @@ struct cgroup_subsys { }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; +#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) #include +#undef IS_SUBSYS_ENABLED #undef SUBSYS static inline struct cgroup_subsys_state *cgroup_subsys_state( diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index dfae957398c3..f204a7a9cf38 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -7,73 +7,73 @@ /* */ -#ifdef CONFIG_CPUSETS +#if IS_SUBSYS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif /* */ -#ifdef CONFIG_CGROUP_DEBUG +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG) SUBSYS(debug) #endif /* */ -#ifdef CONFIG_CGROUP_SCHED +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED) SUBSYS(cpu_cgroup) #endif /* */ -#ifdef CONFIG_CGROUP_CPUACCT +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT) SUBSYS(cpuacct) #endif /* */ -#ifdef CONFIG_MEMCG +#if IS_SUBSYS_ENABLED(CONFIG_MEMCG) SUBSYS(mem_cgroup) #endif /* */ -#ifdef CONFIG_CGROUP_DEVICE +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE) SUBSYS(devices) #endif /* */ -#ifdef CONFIG_CGROUP_FREEZER +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER) SUBSYS(freezer) #endif /* */ -#ifdef CONFIG_NET_CLS_CGROUP +#if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP) SUBSYS(net_cls) #endif /* */ -#ifdef CONFIG_BLK_CGROUP +#if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP) SUBSYS(blkio) #endif /* */ -#ifdef CONFIG_CGROUP_PERF +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF) SUBSYS(perf) #endif /* */ -#ifdef CONFIG_NETPRIO_CGROUP +#if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP) SUBSYS(net_prio) #endif /* */ -#ifdef CONFIG_CGROUP_HUGETLB +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB) SUBSYS(hugetlb) #endif diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1b18090269ad..95e9f3fdb729 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -93,6 +93,7 @@ static DEFINE_MUTEX(cgroup_root_mutex); * cgroup_mutex. */ #define SUBSYS(_x) &_x ## _subsys, +#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { #include }; -- cgit v1.2.3-71-gd317 From 80f4c87774721e864d5a5a1f7aca3e95fd90e194 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:06 +0200 Subject: cgroup: Do not depend on a given order when populating the subsys array The *_subsys_id will be used as index to access the subsys. Therefore we need to care we populate the subsystem at the correct position by using designated initialization. With this change we are able to interleave builtin and modules in the subsys array. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: Gao feng Cc: Jamal Hadi Salim Cc: John Fastabend Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 95e9f3fdb729..2bfc78f531b6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -92,7 +92,7 @@ static DEFINE_MUTEX(cgroup_root_mutex); * registered after that. The mutable section of this array is protected by * cgroup_mutex. */ -#define SUBSYS(_x) &_x ## _subsys, +#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys, #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { #include -- cgit v1.2.3-71-gd317 From 8a8e04df4747661daaee77e98e102d99c9e09b98 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 Sep 2012 16:12:07 +0200 Subject: cgroup: Assign subsystem IDs during compile time WARNING: With this change it is impossible to load external built controllers anymore. In case where CONFIG_NETPRIO_CGROUP=m and CONFIG_NET_CLS_CGROUP=m is set, corresponding subsys_id should also be a constant. Up to now, net_prio_subsys_id and net_cls_subsys_id would be of the type int and the value would be assigned during runtime. By switching the macro definition IS_SUBSYS_ENABLED from IS_BUILTIN to IS_ENABLED, all *_subsys_id will have constant value. That means we need to remove all the code which assumes a value can be assigned to net_prio_subsys_id and net_cls_subsys_id. A close look is necessary on the RCU part which was introduces by following patch: commit f845172531fb7410c7fb7780b1a6e51ee6df7d52 Author: Herbert Xu Mon May 24 09:12:34 2010 Committer: David S. Miller Mon May 24 09:12:34 2010 cls_cgroup: Store classid in struct sock Tis code was added to init_cgroup_cls() /* We can't use rcu_assign_pointer because this is an int. */ smp_wmb(); net_cls_subsys_id = net_cls_subsys.subsys_id; respectively to exit_cgroup_cls() net_cls_subsys_id = -1; synchronize_rcu(); and in module version of task_cls_classid() rcu_read_lock(); id = rcu_dereference(net_cls_subsys_id); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; rcu_read_unlock(); Without an explicit explaination why the RCU part is needed. (The rcu_deference was fixed by exchanging it to rcu_derefence_index_check() in a later commit, but that is a minor detail.) So here is my pondering why it was introduced and why it safe to remove it now. Note that this code was copied over to net_prio the reasoning holds for that subsystem too. The idea behind the RCU use for net_cls_subsys_id is to make sure we get a valid pointer back from task_subsys_state(). task_subsys_state() is just blindly accessing the subsys array and returning the pointer. Obviously, passing in -1 as id into task_subsys_state() returns an invalid value (out of lower bound). So this code makes sure that only after module is loaded and the subsystem registered, the id is assigned. Before unregistering the module all old readers must have left the critical section. This is done by assigning -1 to the id and issuing a synchronized_rcu(). Any new readers wont call task_subsys_state() anymore and therefore it is safe to unregister the subsystem. The new code relies on the same trick, but it looks at the subsys pointer return by task_subsys_state() (remember the id is constant and therefore we allways have a valid index into the subsys array). No precautions need to be taken during module loading module. Eventually, all CPUs will get a valid pointer back from task_subsys_state() because rebind_subsystem() which is called after the module init() function will assigned subsys[net_cls_subsys_id] the newly loaded module subsystem pointer. When the subsystem is about to be removed, rebind_subsystem() will called before the module exit() function. In this case, rebind_subsys() will assign subsys[net_cls_subsys_id] a NULL pointer and then it calls synchronize_rcu(). All old readers have left by then the critical section. Any new reader wont access the subsystem anymore. At this point we are safe to unregister the subsystem. No synchronize_rcu() call is needed. Signed-off-by: Daniel Wagner Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Neil Horman Cc: "David S. Miller" Cc: "Paul E. McKenney" Cc: Andrew Morton Cc: Eric Dumazet Cc: Gao feng Cc: Glauber Costa Cc: Herbert Xu Cc: Jamal Hadi Salim Cc: John Fastabend Cc: Kamezawa Hiroyuki Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org --- include/linux/cgroup.h | 2 +- include/net/cls_cgroup.h | 12 ++++-------- include/net/netprio_cgroup.h | 18 +++++------------- kernel/cgroup.c | 22 +++------------------- net/core/netprio_cgroup.c | 11 ----------- net/core/sock.c | 11 ----------- net/sched/cls_cgroup.c | 13 ------------- 7 files changed, 13 insertions(+), 76 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a5ab5651441b..018f819405c8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -46,7 +46,7 @@ extern const struct file_operations proc_cgroup_operations; /* Define the enumeration of all builtin cgroup subsystems */ #define SUBSYS(_x) _x ## _subsys_id, -#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) +#define IS_SUBSYS_ENABLED(option) IS_ENABLED(option) enum cgroup_subsys_id { #include __CGROUP_TEMPORARY_PLACEHOLDER diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 9bd5db9e10ba..b6a6eeb3905f 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -42,22 +42,18 @@ static inline u32 task_cls_classid(struct task_struct *p) return classid; } #elif IS_MODULE(CONFIG_NET_CLS_CGROUP) - -extern int net_cls_subsys_id; - static inline u32 task_cls_classid(struct task_struct *p) { - int id; + struct cgroup_subsys_state *css; u32 classid = 0; if (in_interrupt()) return 0; rcu_read_lock(); - id = rcu_dereference_index_check(net_cls_subsys_id, - rcu_read_lock_held()); - if (id >= 0) - classid = container_of(task_subsys_state(p, id), + css = task_subsys_state(p, net_cls_subsys_id); + if (css) + classid = container_of(css, struct cgroup_cls_state, css)->classid; rcu_read_unlock(); diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index b202de882489..2760f4f4ae9b 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -30,10 +30,6 @@ struct cgroup_netprio_state { u32 prioidx; }; -#ifndef CONFIG_NETPRIO_CGROUP -extern int net_prio_subsys_id; -#endif - extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) @@ -55,18 +51,14 @@ static inline u32 task_netprioidx(struct task_struct *p) static inline u32 task_netprioidx(struct task_struct *p) { - struct cgroup_netprio_state *state; - int subsys_id; + struct cgroup_subsys_state *css; u32 idx = 0; rcu_read_lock(); - subsys_id = rcu_dereference_index_check(net_prio_subsys_id, - rcu_read_lock_held()); - if (subsys_id >= 0) { - state = container_of(task_subsys_state(p, subsys_id), - struct cgroup_netprio_state, css); - idx = state->prioidx; - } + css = task_subsys_state(p, net_prio_subsys_id); + if (css) + idx = container_of(css, + struct cgroup_netprio_state, css)->prioidx; rcu_read_unlock(); return idx; } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2bfc78f531b6..485cc1487ea2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4451,24 +4451,8 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) /* init base cftset */ cgroup_init_cftsets(ss); - /* - * need to register a subsys id before anything else - for example, - * init_cgroup_css needs it. - */ mutex_lock(&cgroup_mutex); - /* find the first empty slot in the array */ - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - if (subsys[i] == NULL) - break; - } - if (i == CGROUP_SUBSYS_COUNT) { - /* maximum number of subsystems already registered! */ - mutex_unlock(&cgroup_mutex); - return -EBUSY; - } - /* assign ourselves the subsys_id */ - ss->subsys_id = i; - subsys[i] = ss; + subsys[ss->subsys_id] = ss; /* * no ss->create seems to need anything important in the ss struct, so @@ -4477,7 +4461,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) css = ss->create(dummytop); if (IS_ERR(css)) { /* failure case - need to deassign the subsys[] slot. */ - subsys[i] = NULL; + subsys[ss->subsys_id] = NULL; mutex_unlock(&cgroup_mutex); return PTR_ERR(css); } @@ -4493,7 +4477,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) if (ret) { dummytop->subsys[ss->subsys_id] = NULL; ss->destroy(dummytop); - subsys[i] = NULL; + subsys[ss->subsys_id] = NULL; mutex_unlock(&cgroup_mutex); return ret; } diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..6bc460c38e4f 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -326,9 +326,7 @@ struct cgroup_subsys net_prio_subsys = { .create = cgrp_create, .destroy = cgrp_destroy, .attach = net_prio_attach, -#ifdef CONFIG_NETPRIO_CGROUP .subsys_id = net_prio_subsys_id, -#endif .base_cftypes = ss_files, .module = THIS_MODULE }; @@ -366,10 +364,6 @@ static int __init init_cgroup_netprio(void) ret = cgroup_load_subsys(&net_prio_subsys); if (ret) goto out; -#ifndef CONFIG_NETPRIO_CGROUP - smp_wmb(); - net_prio_subsys_id = net_prio_subsys.subsys_id; -#endif register_netdevice_notifier(&netprio_device_notifier); @@ -386,11 +380,6 @@ static void __exit exit_cgroup_netprio(void) cgroup_unload_subsys(&net_prio_subsys); -#ifndef CONFIG_NETPRIO_CGROUP - net_prio_subsys_id = -1; - synchronize_rcu(); -#endif - rtnl_lock(); for_each_netdev(&init_net, dev) { old = rtnl_dereference(dev->priomap); diff --git a/net/core/sock.c b/net/core/sock.c index ca3eaee66056..47b4ac048e88 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -326,17 +326,6 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(__sk_backlog_rcv); -#if defined(CONFIG_CGROUPS) -#if !defined(CONFIG_NET_CLS_CGROUP) -int net_cls_subsys_id = -1; -EXPORT_SYMBOL_GPL(net_cls_subsys_id); -#endif -#if !defined(CONFIG_NETPRIO_CGROUP) -int net_prio_subsys_id = -1; -EXPORT_SYMBOL_GPL(net_prio_subsys_id); -#endif -#endif - static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { struct timeval tv; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7743ea8d1d38..67cf90d962f4 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -77,9 +77,7 @@ struct cgroup_subsys net_cls_subsys = { .name = "net_cls", .create = cgrp_create, .destroy = cgrp_destroy, -#ifdef CONFIG_NET_CLS_CGROUP .subsys_id = net_cls_subsys_id, -#endif .base_cftypes = ss_files, .module = THIS_MODULE, }; @@ -283,12 +281,6 @@ static int __init init_cgroup_cls(void) if (ret) goto out; -#ifndef CONFIG_NET_CLS_CGROUP - /* We can't use rcu_assign_pointer because this is an int. */ - smp_wmb(); - net_cls_subsys_id = net_cls_subsys.subsys_id; -#endif - ret = register_tcf_proto_ops(&cls_cgroup_ops); if (ret) cgroup_unload_subsys(&net_cls_subsys); @@ -301,11 +293,6 @@ static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); -#ifndef CONFIG_NET_CLS_CGROUP - net_cls_subsys_id = -1; - synchronize_rcu(); -#endif - cgroup_unload_subsys(&net_cls_subsys); } -- cgit v1.2.3-71-gd317 From 8c7f6edbda01f1b1a2e60ad61f14fe38023e433b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Sep 2012 12:20:58 -0700 Subject: cgroup: mark subsystems with broken hierarchy support and whine if cgroups are nested for them Currently, cgroup hierarchy support is a mess. cpu related subsystems behave correctly - configuration, accounting and control on a parent properly cover its children. blkio and freezer completely ignore hierarchy and treat all cgroups as if they're directly under the root cgroup. Others show yet different behaviors. These differing interpretations of cgroup hierarchy make using cgroup confusing and it impossible to co-mount controllers into the same hierarchy and obtain sane behavior. Eventually, we want full hierarchy support from all subsystems and probably a unified hierarchy. Users using separate hierarchies expecting completely different behaviors depending on the mounted subsystem is deterimental to making any progress on this front. This patch adds cgroup_subsys.broken_hierarchy and sets it to %true for controllers which are lacking in hierarchy support. The goal of this patch is two-fold. * Move users away from using hierarchy on currently non-hierarchical subsystems, so that implementing proper hierarchy support on those doesn't surprise them. * Keep track of which controllers are broken how and nudge the subsystems to implement proper hierarchy support. For now, start with a single warning message. We can whine louder later on. v2: Fixed a typo spotted by Michal. Warning message updated. v3: Updated memcg part so that it doesn't generate warning in the cases where .use_hierarchy=false doesn't make the behavior different from root.use_hierarchy=true. Fixed a typo spotted by Glauber. v4: Check ->broken_hierarchy after cgroup creation is complete so that ->create() can affect the result per Michal. Dropped unnecessary memcg root handling per Michal. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Acked-by: Li Zefan Acked-by: Serge E. Hallyn Cc: Glauber Costa Cc: Peter Zijlstra Cc: Paul Turner Cc: Johannes Weiner Cc: Thomas Graf Cc: Vivek Goyal Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Neil Horman Cc: Aneesh Kumar K.V --- block/blk-cgroup.c | 8 ++++++++ include/linux/cgroup.h | 15 +++++++++++++++ kernel/cgroup.c | 12 +++++++++++- kernel/cgroup_freezer.c | 8 ++++++++ kernel/events/core.c | 7 +++++++ mm/memcontrol.c | 7 +++++++ net/core/netprio_cgroup.c | 12 +++++++++++- net/sched/cls_cgroup.c | 9 +++++++++ security/device_cgroup.c | 9 +++++++++ 9 files changed, 85 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f3b44a65fc7a..cafcd7431189 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -737,6 +737,14 @@ struct cgroup_subsys blkio_subsys = { .subsys_id = blkio_subsys_id, .base_cftypes = blkcg_files, .module = THIS_MODULE, + + /* + * blkio subsystem is utterly broken in terms of hierarchy support. + * It treats all cgroups equally regardless of where they're + * located in the hierarchy - all cgroups are treated as if they're + * right below the root. Fix it and remove the following. + */ + .broken_hierarchy = true, }; EXPORT_SYMBOL_GPL(blkio_subsys); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c90eaa803440..68e8df70487e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -496,6 +496,21 @@ struct cgroup_subsys { */ bool __DEPRECATED_clear_css_refs; + /* + * If %false, this subsystem is properly hierarchical - + * configuration, resource accounting and restriction on a parent + * cgroup cover those of its children. If %true, hierarchy support + * is broken in some ways - some subsystems ignore hierarchy + * completely while others are only implemented half-way. + * + * It's now disallowed to create nested cgroups if the subsystem is + * broken and cgroup core will emit a warning message on such + * cases. Eventually, all subsystems will be made properly + * hierarchical and this will go away. + */ + bool broken_hierarchy; + bool warned_broken_hierarchy; + #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 79818507e444..b7d9606b17d7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3954,8 +3954,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); for_each_subsys(root, ss) { - struct cgroup_subsys_state *css = ss->create(cgrp); + struct cgroup_subsys_state *css; + css = ss->create(cgrp); if (IS_ERR(css)) { err = PTR_ERR(css); goto err_destroy; @@ -3969,6 +3970,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, /* At error, ->destroy() callback has to free assigned ID. */ if (clone_children(parent) && ss->post_clone) ss->post_clone(cgrp); + + if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && + parent->parent) { + pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", + current->comm, current->pid, ss->name); + if (!strcmp(ss->name, "memory")) + pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n"); + ss->warned_broken_hierarchy = true; + } } list_add(&cgrp->sibling, &cgrp->parent->children); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 3649fc6b3eaa..b1724ce98981 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -373,4 +373,12 @@ struct cgroup_subsys freezer_subsys = { .can_attach = freezer_can_attach, .fork = freezer_fork, .base_cftypes = files, + + /* + * freezer subsys doesn't handle hierarchy at all. Frozen state + * should be inherited through the hierarchy - if a parent is + * frozen, all its children should be frozen. Fix it and remove + * the following. + */ + .broken_hierarchy = true, }; diff --git a/kernel/events/core.c b/kernel/events/core.c index b7935fcec7d9..f18a0a56e5aa 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7285,5 +7285,12 @@ struct cgroup_subsys perf_subsys = { .destroy = perf_cgroup_destroy, .exit = perf_cgroup_exit, .attach = perf_cgroup_attach, + + /* + * perf_event cgroup doesn't handle nesting correctly. + * ctx->nr_cgroups adjustments should be propagated through the + * cgroup hierarchy. Fix it and remove the following. + */ + .broken_hierarchy = true, }; #endif /* CONFIG_CGROUP_PERF */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 795e525afaba..a72f2ffdc3d0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4973,6 +4973,13 @@ mem_cgroup_create(struct cgroup *cont) } else { res_counter_init(&memcg->res, NULL); res_counter_init(&memcg->memsw, NULL); + /* + * Deeper hierachy with use_hierarchy == false doesn't make + * much sense so let cgroup subsystem know about this + * unfortunate state in our controller. + */ + if (parent && parent != root_mem_cgroup) + mem_cgroup_subsys.broken_hierarchy = true; } memcg->last_scanned_node = MAX_NUMNODES; INIT_LIST_HEAD(&memcg->oom_notify); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..34f3615b30ca 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -330,7 +330,17 @@ struct cgroup_subsys net_prio_subsys = { .subsys_id = net_prio_subsys_id, #endif .base_cftypes = ss_files, - .module = THIS_MODULE + .module = THIS_MODULE, + + /* + * net_prio has artificial limit on the number of cgroups and + * disallows nesting making it impossible to co-mount it with other + * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ + * limit and properly nest configuration such that children follow + * their parents' configurations by default and are allowed to + * override and remove the following. + */ + .broken_hierarchy = true, }; static int netprio_device_event(struct notifier_block *unused, diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7743ea8d1d38..907daf99ab2e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -82,6 +82,15 @@ struct cgroup_subsys net_cls_subsys = { #endif .base_cftypes = ss_files, .module = THIS_MODULE, + + /* + * While net_cls cgroup has the rudimentary hierarchy support of + * inheriting the parent's classid on cgroup creation, it doesn't + * properly propagates config changes in ancestors to their + * descendents. A child should follow the parent's configuration + * but be allowed to override it. Fix it and remove the following. + */ + .broken_hierarchy = true, }; struct cls_cgroup_head { diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 442204cc22d9..4b877a92a7ea 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -457,6 +457,15 @@ struct cgroup_subsys devices_subsys = { .destroy = devcgroup_destroy, .subsys_id = devices_subsys_id, .base_cftypes = dev_cgroup_files, + + /* + * While devices cgroup has the rudimentary hierarchy support which + * checks the parent's restriction, it doesn't properly propagates + * config changes in ancestors to their descendents. A child + * should only be allowed to add more restrictions to the parent's + * configuration. Fix it and remove the following. + */ + .broken_hierarchy = true, }; int __devcgroup_inode_permission(struct inode *inode, int mask) -- cgit v1.2.3-71-gd317 From 34e36d8ecbd958bc15f8e63deade1227de337eb1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 23:20:20 -0700 Subject: audit: Limit audit requests to processes in the initial pid and user namespaces. This allows the code to safely make the assumption that all of the uids gids and pids that need to be send in audit messages are in the initial namespaces. If someone cares we may lift this restriction someday but start with limiting access so at least the code is always correct. Cc: Al Viro Cc: Eric Paris Signed-off-by: "Eric W. Biederman" --- kernel/audit.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index ea3b7b6191c7..7b7268e3073b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -61,6 +61,7 @@ #include #include #include +#include #include "audit.h" @@ -588,6 +589,11 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) { int err = 0; + /* Only support the initial namespaces for now. */ + if ((current_user_ns() != &init_user_ns) || + (task_active_pid_ns(current) != &init_pid_ns)) + return -EPERM; + switch (msg_type) { case AUDIT_GET: case AUDIT_LIST: -- cgit v1.2.3-71-gd317 From 02276bda4a2bf094fcde89fb5db4d9e86347ebf4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 23:10:16 -0700 Subject: audit: Use current instead of NETLINK_CREDS() in audit_filter Get caller process uid and gid and pid values from the current task instead of the NETLINK_CB. This is simpler than passing NETLINK_CREDS from from audit_receive_msg to audit_filter_user_rules and avoid the chance of being hit by the occassional bugs in netlink uid/gid credential passing. This is a safe changes because all netlink requests are processed in the task of the sending process. Cc: Al Viro Cc: Eric Paris Signed-off-by: "Eric W. Biederman" --- include/linux/audit.h | 2 +- kernel/audit.c | 2 +- kernel/auditfilter.c | 13 ++++++------- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 36abf2aa7e68..9c9af0e95f93 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -700,7 +700,7 @@ extern void audit_log_secctx(struct audit_buffer *ab, u32 secid); extern int audit_update_lsm_rules(void); /* Private API (for audit.c only) */ -extern int audit_filter_user(struct netlink_skb_parms *cb); +extern int audit_filter_user(void); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int uid, int seq, void *data, size_t datasz, uid_t loginuid, diff --git a/kernel/audit.c b/kernel/audit.c index 7b7268e3073b..fecb1507b485 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -744,7 +744,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!audit_enabled && msg_type != AUDIT_USER_AVC) return 0; - err = audit_filter_user(&NETLINK_CB(skb)); + err = audit_filter_user(); if (err == 1) { err = 0; if (msg_type == AUDIT_USER_TTY) { diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a6c3f1abd206..b754f43bc56c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1236,8 +1236,7 @@ int audit_compare_dname_path(const char *dname, const char *path, return strncmp(p, dname, dlen); } -static int audit_filter_user_rules(struct netlink_skb_parms *cb, - struct audit_krule *rule, +static int audit_filter_user_rules(struct audit_krule *rule, enum audit_state *state) { int i; @@ -1249,13 +1248,13 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, switch (f->type) { case AUDIT_PID: - result = audit_comparator(cb->creds.pid, f->op, f->val); + result = audit_comparator(task_pid_vnr(current), f->op, f->val); break; case AUDIT_UID: - result = audit_comparator(cb->creds.uid, f->op, f->val); + result = audit_comparator(current_uid(), f->op, f->val); break; case AUDIT_GID: - result = audit_comparator(cb->creds.gid, f->op, f->val); + result = audit_comparator(current_gid(), f->op, f->val); break; case AUDIT_LOGINUID: result = audit_comparator(audit_get_loginuid(current), @@ -1287,7 +1286,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, return 1; } -int audit_filter_user(struct netlink_skb_parms *cb) +int audit_filter_user(void) { enum audit_state state = AUDIT_DISABLED; struct audit_entry *e; @@ -1295,7 +1294,7 @@ int audit_filter_user(struct netlink_skb_parms *cb) rcu_read_lock(); list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) { - if (audit_filter_user_rules(cb, &e->rule, &state)) { + if (audit_filter_user_rules(&e->rule, &state)) { if (state == AUDIT_DISABLED) ret = 0; break; -- cgit v1.2.3-71-gd317 From f95732e2e0a649c148be0242b72e3c7473092687 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 23:31:17 -0700 Subject: audit: kill audit_prepare_user_tty Now that netlink messages are processed in the context of the sender tty_audit_push_task can be called directly and audit_prepare_user_tty which only added looking up the task of the tty by process id is not needed. Cc: Al Viro Cc: Eric Paris Signed-off-by: "Eric W. Biederman" --- kernel/audit.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index fecb1507b485..58f704b432e4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -468,24 +468,6 @@ static int kauditd_thread(void *dummy) return 0; } -static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid) -{ - struct task_struct *tsk; - int err; - - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (!tsk) { - rcu_read_unlock(); - return -ESRCH; - } - get_task_struct(tsk); - rcu_read_unlock(); - err = tty_audit_push_task(tsk, loginuid, sessionid); - put_task_struct(tsk); - return err; -} - int audit_send_list(void *_dest) { struct audit_netlink_list *dest = _dest; @@ -748,7 +730,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err == 1) { err = 0; if (msg_type == AUDIT_USER_TTY) { - err = audit_prepare_user_tty(pid, loginuid, + err = tty_audit_push_task(current, loginuid, sessionid); if (err) break; -- cgit v1.2.3-71-gd317 From 8aa14b64981ee4b95959e1ed331b672d053aab62 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 23:43:14 -0700 Subject: audit: Simply AUDIT_TTY_SET and AUDIT_TTY_GET Use current instead of looking up the current up the current task by process identifier. Netlink requests are processed in trhe context of the sending task so this is safe. Cc: Al Viro Cc: Eric Paris Signed-off-by: "Eric W. Biederman" --- kernel/audit.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 58f704b432e4..2a8728fdefc4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -866,41 +866,29 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; case AUDIT_TTY_GET: { struct audit_tty_status s; - struct task_struct *tsk; - unsigned long flags; - - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk && lock_task_sighand(tsk, &flags)) { - s.enabled = tsk->signal->audit_tty != 0; - unlock_task_sighand(tsk, &flags); - } else - err = -ESRCH; - rcu_read_unlock(); - - if (!err) - audit_send_reply(NETLINK_CB(skb).pid, seq, - AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); + struct task_struct *tsk = current; + + spin_lock_irq(&tsk->sighand->siglock); + s.enabled = tsk->signal->audit_tty != 0; + spin_unlock_irq(&tsk->sighand->siglock); + + audit_send_reply(NETLINK_CB(skb).pid, seq, + AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_TTY_SET: { struct audit_tty_status *s; - struct task_struct *tsk; - unsigned long flags; + struct task_struct *tsk = current; if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) return -EINVAL; s = data; if (s->enabled != 0 && s->enabled != 1) return -EINVAL; - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk && lock_task_sighand(tsk, &flags)) { - tsk->signal->audit_tty = s->enabled != 0; - unlock_task_sighand(tsk, &flags); - } else - err = -ESRCH; - rcu_read_unlock(); + + spin_lock_irq(&tsk->sighand->siglock); + tsk->signal->audit_tty = s->enabled != 0; + spin_unlock_irq(&tsk->sighand->siglock); break; } default: -- cgit v1.2.3-71-gd317 From 35ce9888ad2a60c95849551e7345bd547714bbff Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Sep 2012 00:12:29 -0700 Subject: audit: Properly set the origin port id of audit messages. For user generated audit messages set the portid field in the netlink header to the netlink port where the user generated audit message came from. Reporting the process id in a port id field was just nonsense. Cc: Al Viro Cc: Eric Paris Signed-off-by: "Eric W. Biederman" --- kernel/audit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 2a8728fdefc4..9dd4d0936969 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -751,7 +751,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) size--; audit_log_n_untrustedstring(ab, data, size); } - audit_set_pid(ab, pid); + audit_set_pid(ab, NETLINK_CB(skb).pid); audit_log_end(ab); } break; -- cgit v1.2.3-71-gd317 From 017143fecb3364e5fed8107d206799899f5dd684 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Sep 2012 00:19:06 -0700 Subject: audit: Remove the unused uid parameter from audit_receive_filter Cc: Al Viro Cc: Eric Paris Signed-off-by: "Eric W. Biederman" --- include/linux/audit.h | 2 +- kernel/audit.c | 4 ++-- kernel/auditfilter.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 9c9af0e95f93..b9c5b22e34a5 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -702,7 +702,7 @@ extern int audit_update_lsm_rules(void); /* Private API (for audit.c only) */ extern int audit_filter_user(void); extern int audit_filter_type(int type); -extern int audit_receive_filter(int type, int pid, int uid, int seq, +extern int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz, uid_t loginuid, u32 sessionid, u32 sid); extern int audit_enabled; diff --git a/kernel/audit.c b/kernel/audit.c index 9dd4d0936969..a31e31bba2d3 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -771,7 +771,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* fallthrough */ case AUDIT_LIST: err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, - uid, seq, data, nlmsg_len(nlh), + seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; case AUDIT_ADD_RULE: @@ -790,7 +790,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* fallthrough */ case AUDIT_LIST_RULES: err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, - uid, seq, data, nlmsg_len(nlh), + seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; case AUDIT_TRIM: diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b754f43bc56c..e242dd9aa2d0 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1098,7 +1098,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, * @sessionid: sessionid for netlink audit message * @sid: SE Linux Security ID of sender */ -int audit_receive_filter(int type, int pid, int uid, int seq, void *data, +int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz, uid_t loginuid, u32 sessionid, u32 sid) { struct task_struct *tsk; -- cgit v1.2.3-71-gd317 From 860c0aaff75e714c21d325f32d36a37572b4fffb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Sep 2012 00:24:49 -0700 Subject: audit: Don't pass pid or uid to audit_log_common_recv_msg The only place we use the uid and the pid that we calculate in audit_receive_msg is in audit_log_common_recv_msg so move the calculation of these values into the audit_log_common_recv_msg. Simplify the calcuation of the current pid and uid by reading them from current instead of reading them from NETLINK_CREDS. Cc: Al Viro Cc: Eric Paris Signed-off-by: "Eric W. Biederman" --- kernel/audit.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index a31e31bba2d3..2e0dd5edf69b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -607,8 +607,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) } static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, - u32 pid, u32 uid, uid_t auid, u32 ses, - u32 sid) + uid_t auid, u32 ses, u32 sid) { int rc = 0; char *ctx = NULL; @@ -621,7 +620,9 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", - pid, uid, auid, ses); + task_tgid_vnr(current), + from_kuid(&init_user_ns, current_uid()), + auid, ses); if (sid) { rc = security_secid_to_secctx(sid, &ctx, &len); if (rc) @@ -637,7 +638,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - u32 uid, pid, seq, sid; + u32 seq, sid; void *data; struct audit_status *status_get, status_set; int err; @@ -663,8 +664,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } - pid = NETLINK_CREDS(skb)->pid; - uid = NETLINK_CREDS(skb)->uid; loginuid = audit_get_loginuid(current); sessionid = audit_get_sessionid(current); security_task_getsecid(current, &sid); @@ -735,7 +734,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err) break; } - audit_log_common_recv_msg(&ab, msg_type, pid, uid, + audit_log_common_recv_msg(&ab, msg_type, loginuid, sessionid, sid); if (msg_type != AUDIT_USER_TTY) @@ -760,8 +759,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlmsg_len(nlh) < sizeof(struct audit_rule)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { - audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, - uid, loginuid, sessionid, sid); + audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, + loginuid, sessionid, sid); audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled); @@ -779,8 +778,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { - audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, - uid, loginuid, sessionid, sid); + audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, + loginuid, sessionid, sid); audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled); @@ -796,8 +795,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_TRIM: audit_trim_trees(); - audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, - uid, loginuid, sessionid, sid); + audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, + loginuid, sessionid, sid); audit_log_format(ab, " op=trim res=1"); audit_log_end(ab); @@ -828,8 +827,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* OK, here comes... */ err = audit_tag_tree(old, new); - audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, - uid, loginuid, sessionid, sid); + audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, + loginuid, sessionid, sid); audit_log_format(ab, " op=make_equiv old="); audit_log_untrustedstring(ab, old); -- cgit v1.2.3-71-gd317 From ca57ec0f00c3f139c41bf6b0a5b9bcc95bbb2ad7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 11 Sep 2012 02:18:08 -0700 Subject: audit: Add typespecific uid and gid comparators The audit filter code guarantees that uid are always compared with uids and gids are always compared with gids, as the comparason operations are type specific. Take advantage of this proper to define audit_uid_comparator and audit_gid_comparator which use the type safe comparasons from uidgid.h. Build on audit_uid_comparator and audit_gid_comparator and replace audit_compare_id with audit_compare_uid and audit_compare_gid. This is one of those odd cases where being type safe and duplicating code leads to simpler shorter and more concise code. Don't allow bitmask operations in uid and gid comparisons in audit_data_to_entry. Bitmask operations are already denined in audit_rule_to_entry. Convert constants in audit_rule_to_entry and audit_data_to_entry into kuids and kgids when appropriate. Convert the uid and gid field in struct audit_names to be of type kuid_t and kgid_t respectively, so that the new uid and gid comparators can be applied in a type safe manner. Cc: Al Viro Cc: Eric Paris Signed-off-by: "Eric W. Biederman" --- include/linux/audit.h | 2 + kernel/audit.h | 2 + kernel/auditfilter.c | 119 +++++++++++++++++++++++++++++++++++---- kernel/auditsc.c | 150 ++++++++++++++++++++++++-------------------------- 4 files changed, 184 insertions(+), 89 deletions(-) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index b9c5b22e34a5..ca019bb74da3 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -442,6 +442,8 @@ struct audit_krule { struct audit_field { u32 type; u32 val; + kuid_t uid; + kgid_t gid; u32 op; char *lsm_str; void *lsm_rule; diff --git a/kernel/audit.h b/kernel/audit.h index 816766803371..4b428bb41ea3 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -76,6 +76,8 @@ static inline int audit_hash_ino(u32 ino) extern int audit_match_class(int class, unsigned syscall); extern int audit_comparator(const u32 left, const u32 op, const u32 right); +extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right); +extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right); extern int audit_compare_dname_path(const char *dname, const char *path, int *dirlen); extern struct sk_buff * audit_make_reply(int pid, int seq, int type, diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index e242dd9aa2d0..b30320cea26f 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -342,6 +342,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); f->val = rule->values[i]; + f->uid = INVALID_UID; + f->gid = INVALID_GID; err = -EINVAL; if (f->op == Audit_bad) @@ -350,16 +352,32 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) switch(f->type) { default: goto exit_free; - case AUDIT_PID: case AUDIT_UID: case AUDIT_EUID: case AUDIT_SUID: case AUDIT_FSUID: + case AUDIT_LOGINUID: + /* bit ops not implemented for uid comparisons */ + if (f->op == Audit_bitmask || f->op == Audit_bittest) + goto exit_free; + + f->uid = make_kuid(current_user_ns(), f->val); + if (!uid_valid(f->uid)) + goto exit_free; + break; case AUDIT_GID: case AUDIT_EGID: case AUDIT_SGID: case AUDIT_FSGID: - case AUDIT_LOGINUID: + /* bit ops not implemented for gid comparisons */ + if (f->op == Audit_bitmask || f->op == Audit_bittest) + goto exit_free; + + f->gid = make_kgid(current_user_ns(), f->val); + if (!gid_valid(f->gid)) + goto exit_free; + break; + case AUDIT_PID: case AUDIT_PERS: case AUDIT_MSGTYPE: case AUDIT_PPID: @@ -437,19 +455,39 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->type = data->fields[i]; f->val = data->values[i]; + f->uid = INVALID_UID; + f->gid = INVALID_GID; f->lsm_str = NULL; f->lsm_rule = NULL; switch(f->type) { - case AUDIT_PID: case AUDIT_UID: case AUDIT_EUID: case AUDIT_SUID: case AUDIT_FSUID: + case AUDIT_LOGINUID: + case AUDIT_OBJ_UID: + /* bit ops not implemented for uid comparisons */ + if (f->op == Audit_bitmask || f->op == Audit_bittest) + goto exit_free; + + f->uid = make_kuid(current_user_ns(), f->val); + if (!uid_valid(f->uid)) + goto exit_free; + break; case AUDIT_GID: case AUDIT_EGID: case AUDIT_SGID: case AUDIT_FSGID: - case AUDIT_LOGINUID: + case AUDIT_OBJ_GID: + /* bit ops not implemented for gid comparisons */ + if (f->op == Audit_bitmask || f->op == Audit_bittest) + goto exit_free; + + f->gid = make_kgid(current_user_ns(), f->val); + if (!gid_valid(f->gid)) + goto exit_free; + break; + case AUDIT_PID: case AUDIT_PERS: case AUDIT_MSGTYPE: case AUDIT_PPID: @@ -461,8 +499,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_ARG1: case AUDIT_ARG2: case AUDIT_ARG3: - case AUDIT_OBJ_UID: - case AUDIT_OBJ_GID: break; case AUDIT_ARCH: entry->rule.arch_f = f; @@ -707,6 +743,23 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) if (strcmp(a->filterkey, b->filterkey)) return 1; break; + case AUDIT_UID: + case AUDIT_EUID: + case AUDIT_SUID: + case AUDIT_FSUID: + case AUDIT_LOGINUID: + case AUDIT_OBJ_UID: + if (!uid_eq(a->fields[i].uid, b->fields[i].uid)) + return 1; + break; + case AUDIT_GID: + case AUDIT_EGID: + case AUDIT_SGID: + case AUDIT_FSGID: + case AUDIT_OBJ_GID: + if (!gid_eq(a->fields[i].gid, b->fields[i].gid)) + return 1; + break; default: if (a->fields[i].val != b->fields[i].val) return 1; @@ -1198,6 +1251,52 @@ int audit_comparator(u32 left, u32 op, u32 right) } } +int audit_uid_comparator(kuid_t left, u32 op, kuid_t right) +{ + switch (op) { + case Audit_equal: + return uid_eq(left, right); + case Audit_not_equal: + return !uid_eq(left, right); + case Audit_lt: + return uid_lt(left, right); + case Audit_le: + return uid_lte(left, right); + case Audit_gt: + return uid_gt(left, right); + case Audit_ge: + return uid_gte(left, right); + case Audit_bitmask: + case Audit_bittest: + default: + BUG(); + return 0; + } +} + +int audit_gid_comparator(kgid_t left, u32 op, kgid_t right) +{ + switch (op) { + case Audit_equal: + return gid_eq(left, right); + case Audit_not_equal: + return !gid_eq(left, right); + case Audit_lt: + return gid_lt(left, right); + case Audit_le: + return gid_lte(left, right); + case Audit_gt: + return gid_gt(left, right); + case Audit_ge: + return gid_gte(left, right); + case Audit_bitmask: + case Audit_bittest: + default: + BUG(); + return 0; + } +} + /* Compare given dentry name with last component in given path, * return of 0 indicates a match. */ int audit_compare_dname_path(const char *dname, const char *path, @@ -1251,14 +1350,14 @@ static int audit_filter_user_rules(struct audit_krule *rule, result = audit_comparator(task_pid_vnr(current), f->op, f->val); break; case AUDIT_UID: - result = audit_comparator(current_uid(), f->op, f->val); + result = audit_uid_comparator(current_uid(), f->op, f->uid); break; case AUDIT_GID: - result = audit_comparator(current_gid(), f->op, f->val); + result = audit_gid_comparator(current_gid(), f->op, f->gid); break; case AUDIT_LOGINUID: - result = audit_comparator(audit_get_loginuid(current), - f->op, f->val); + result = audit_uid_comparator(audit_get_loginuid(current), + f->op, f->uid); break; case AUDIT_SUBJ_USER: case AUDIT_SUBJ_ROLE: diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 4b96415527b8..0b5b8a232b55 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -113,8 +113,8 @@ struct audit_names { unsigned long ino; dev_t dev; umode_t mode; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; dev_t rdev; u32 osid; struct audit_cap_data fcap; @@ -464,37 +464,47 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree) return 0; } -static int audit_compare_id(uid_t uid1, - struct audit_names *name, - unsigned long name_offset, - struct audit_field *f, - struct audit_context *ctx) +static int audit_compare_uid(kuid_t uid, + struct audit_names *name, + struct audit_field *f, + struct audit_context *ctx) { struct audit_names *n; - unsigned long addr; - uid_t uid2; int rc; - - BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t)); - + if (name) { - addr = (unsigned long)name; - addr += name_offset; - - uid2 = *(uid_t *)addr; - rc = audit_comparator(uid1, f->op, uid2); + rc = audit_uid_comparator(uid, f->op, name->uid); if (rc) return rc; } - + if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { - addr = (unsigned long)n; - addr += name_offset; - - uid2 = *(uid_t *)addr; + rc = audit_uid_comparator(uid, f->op, n->uid); + if (rc) + return rc; + } + } + return 0; +} - rc = audit_comparator(uid1, f->op, uid2); +static int audit_compare_gid(kgid_t gid, + struct audit_names *name, + struct audit_field *f, + struct audit_context *ctx) +{ + struct audit_names *n; + int rc; + + if (name) { + rc = audit_gid_comparator(gid, f->op, name->gid); + if (rc) + return rc; + } + + if (ctx) { + list_for_each_entry(n, &ctx->names_list, list) { + rc = audit_gid_comparator(gid, f->op, n->gid); if (rc) return rc; } @@ -511,80 +521,62 @@ static int audit_field_compare(struct task_struct *tsk, switch (f->val) { /* process to file object comparisons */ case AUDIT_COMPARE_UID_TO_OBJ_UID: - return audit_compare_id(cred->uid, - name, offsetof(struct audit_names, uid), - f, ctx); + return audit_compare_uid(cred->uid, name, f, ctx); case AUDIT_COMPARE_GID_TO_OBJ_GID: - return audit_compare_id(cred->gid, - name, offsetof(struct audit_names, gid), - f, ctx); + return audit_compare_gid(cred->gid, name, f, ctx); case AUDIT_COMPARE_EUID_TO_OBJ_UID: - return audit_compare_id(cred->euid, - name, offsetof(struct audit_names, uid), - f, ctx); + return audit_compare_uid(cred->euid, name, f, ctx); case AUDIT_COMPARE_EGID_TO_OBJ_GID: - return audit_compare_id(cred->egid, - name, offsetof(struct audit_names, gid), - f, ctx); + return audit_compare_gid(cred->egid, name, f, ctx); case AUDIT_COMPARE_AUID_TO_OBJ_UID: - return audit_compare_id(tsk->loginuid, - name, offsetof(struct audit_names, uid), - f, ctx); + return audit_compare_uid(tsk->loginuid, name, f, ctx); case AUDIT_COMPARE_SUID_TO_OBJ_UID: - return audit_compare_id(cred->suid, - name, offsetof(struct audit_names, uid), - f, ctx); + return audit_compare_uid(cred->suid, name, f, ctx); case AUDIT_COMPARE_SGID_TO_OBJ_GID: - return audit_compare_id(cred->sgid, - name, offsetof(struct audit_names, gid), - f, ctx); + return audit_compare_gid(cred->sgid, name, f, ctx); case AUDIT_COMPARE_FSUID_TO_OBJ_UID: - return audit_compare_id(cred->fsuid, - name, offsetof(struct audit_names, uid), - f, ctx); + return audit_compare_uid(cred->fsuid, name, f, ctx); case AUDIT_COMPARE_FSGID_TO_OBJ_GID: - return audit_compare_id(cred->fsgid, - name, offsetof(struct audit_names, gid), - f, ctx); + return audit_compare_gid(cred->fsgid, name, f, ctx); /* uid comparisons */ case AUDIT_COMPARE_UID_TO_AUID: - return audit_comparator(cred->uid, f->op, tsk->loginuid); + return audit_uid_comparator(cred->uid, f->op, tsk->loginuid); case AUDIT_COMPARE_UID_TO_EUID: - return audit_comparator(cred->uid, f->op, cred->euid); + return audit_uid_comparator(cred->uid, f->op, cred->euid); case AUDIT_COMPARE_UID_TO_SUID: - return audit_comparator(cred->uid, f->op, cred->suid); + return audit_uid_comparator(cred->uid, f->op, cred->suid); case AUDIT_COMPARE_UID_TO_FSUID: - return audit_comparator(cred->uid, f->op, cred->fsuid); + return audit_uid_comparator(cred->uid, f->op, cred->fsuid); /* auid comparisons */ case AUDIT_COMPARE_AUID_TO_EUID: - return audit_comparator(tsk->loginuid, f->op, cred->euid); + return audit_uid_comparator(tsk->loginuid, f->op, cred->euid); case AUDIT_COMPARE_AUID_TO_SUID: - return audit_comparator(tsk->loginuid, f->op, cred->suid); + return audit_uid_comparator(tsk->loginuid, f->op, cred->suid); case AUDIT_COMPARE_AUID_TO_FSUID: - return audit_comparator(tsk->loginuid, f->op, cred->fsuid); + return audit_uid_comparator(tsk->loginuid, f->op, cred->fsuid); /* euid comparisons */ case AUDIT_COMPARE_EUID_TO_SUID: - return audit_comparator(cred->euid, f->op, cred->suid); + return audit_uid_comparator(cred->euid, f->op, cred->suid); case AUDIT_COMPARE_EUID_TO_FSUID: - return audit_comparator(cred->euid, f->op, cred->fsuid); + return audit_uid_comparator(cred->euid, f->op, cred->fsuid); /* suid comparisons */ case AUDIT_COMPARE_SUID_TO_FSUID: - return audit_comparator(cred->suid, f->op, cred->fsuid); + return audit_uid_comparator(cred->suid, f->op, cred->fsuid); /* gid comparisons */ case AUDIT_COMPARE_GID_TO_EGID: - return audit_comparator(cred->gid, f->op, cred->egid); + return audit_gid_comparator(cred->gid, f->op, cred->egid); case AUDIT_COMPARE_GID_TO_SGID: - return audit_comparator(cred->gid, f->op, cred->sgid); + return audit_gid_comparator(cred->gid, f->op, cred->sgid); case AUDIT_COMPARE_GID_TO_FSGID: - return audit_comparator(cred->gid, f->op, cred->fsgid); + return audit_gid_comparator(cred->gid, f->op, cred->fsgid); /* egid comparisons */ case AUDIT_COMPARE_EGID_TO_SGID: - return audit_comparator(cred->egid, f->op, cred->sgid); + return audit_gid_comparator(cred->egid, f->op, cred->sgid); case AUDIT_COMPARE_EGID_TO_FSGID: - return audit_comparator(cred->egid, f->op, cred->fsgid); + return audit_gid_comparator(cred->egid, f->op, cred->fsgid); /* sgid comparison */ case AUDIT_COMPARE_SGID_TO_FSGID: - return audit_comparator(cred->sgid, f->op, cred->fsgid); + return audit_gid_comparator(cred->sgid, f->op, cred->fsgid); default: WARN(1, "Missing AUDIT_COMPARE define. Report as a bug\n"); return 0; @@ -630,28 +622,28 @@ static int audit_filter_rules(struct task_struct *tsk, } break; case AUDIT_UID: - result = audit_comparator(cred->uid, f->op, f->val); + result = audit_uid_comparator(cred->uid, f->op, f->uid); break; case AUDIT_EUID: - result = audit_comparator(cred->euid, f->op, f->val); + result = audit_uid_comparator(cred->euid, f->op, f->uid); break; case AUDIT_SUID: - result = audit_comparator(cred->suid, f->op, f->val); + result = audit_uid_comparator(cred->suid, f->op, f->uid); break; case AUDIT_FSUID: - result = audit_comparator(cred->fsuid, f->op, f->val); + result = audit_uid_comparator(cred->fsuid, f->op, f->uid); break; case AUDIT_GID: - result = audit_comparator(cred->gid, f->op, f->val); + result = audit_gid_comparator(cred->gid, f->op, f->gid); break; case AUDIT_EGID: - result = audit_comparator(cred->egid, f->op, f->val); + result = audit_gid_comparator(cred->egid, f->op, f->gid); break; case AUDIT_SGID: - result = audit_comparator(cred->sgid, f->op, f->val); + result = audit_gid_comparator(cred->sgid, f->op, f->gid); break; case AUDIT_FSGID: - result = audit_comparator(cred->fsgid, f->op, f->val); + result = audit_gid_comparator(cred->fsgid, f->op, f->gid); break; case AUDIT_PERS: result = audit_comparator(tsk->personality, f->op, f->val); @@ -717,10 +709,10 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_OBJ_UID: if (name) { - result = audit_comparator(name->uid, f->op, f->val); + result = audit_uid_comparator(name->uid, f->op, f->uid); } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { - if (audit_comparator(n->uid, f->op, f->val)) { + if (audit_uid_comparator(n->uid, f->op, f->uid)) { ++result; break; } @@ -729,10 +721,10 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_OBJ_GID: if (name) { - result = audit_comparator(name->gid, f->op, f->val); + result = audit_gid_comparator(name->gid, f->op, f->gid); } else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { - if (audit_comparator(n->gid, f->op, f->val)) { + if (audit_gid_comparator(n->gid, f->op, f->gid)) { ++result; break; } @@ -750,7 +742,7 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_LOGINUID: result = 0; if (ctx) - result = audit_comparator(tsk->loginuid, f->op, f->val); + result = audit_uid_comparator(tsk->loginuid, f->op, f->uid); break; case AUDIT_SUBJ_USER: case AUDIT_SUBJ_ROLE: -- cgit v1.2.3-71-gd317 From e1760bd5ffae8cb98cffb030ee8e631eba28f3d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 22:39:43 -0700 Subject: userns: Convert the audit loginuid to be a kuid Always store audit loginuids in type kuid_t. Print loginuids by converting them into uids in the appropriate user namespace, and then printing the resulting uid. Modify audit_get_loginuid to return a kuid_t. Modify audit_set_loginuid to take a kuid_t. Modify /proc//loginuid on read to convert the loginuid into the user namespace of the opener of the file. Modify /proc//loginud on write to convert the loginuid rom the user namespace of the opener of the file. Cc: Al Viro Cc: Eric Paris Cc: Paul Moore ? Cc: David Miller Signed-off-by: Eric W. Biederman --- drivers/tty/tty_audit.c | 14 ++++++++------ fs/proc/base.c | 12 ++++++++++-- include/linux/audit.h | 6 +++--- include/linux/init_task.h | 2 +- include/linux/sched.h | 2 +- include/linux/tty.h | 4 ++-- include/net/netlabel.h | 2 +- include/net/xfrm.h | 23 ++++++++++++----------- kernel/audit.c | 20 ++++++++++---------- kernel/audit_watch.c | 2 +- kernel/auditfilter.c | 7 ++++--- kernel/auditsc.c | 20 +++++++++++--------- net/core/dev.c | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlabel/netlabel_user.c | 2 +- net/xfrm/xfrm_policy.c | 8 ++++---- net/xfrm/xfrm_state.c | 6 +++--- net/xfrm/xfrm_user.c | 12 ++++++------ 18 files changed, 80 insertions(+), 66 deletions(-) (limited to 'kernel') diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 7c5866920622..5b59bd7f4227 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -61,7 +61,7 @@ static void tty_audit_buf_put(struct tty_audit_buf *buf) } static void tty_audit_log(const char *description, struct task_struct *tsk, - uid_t loginuid, unsigned sessionid, int major, + kuid_t loginuid, unsigned sessionid, int major, int minor, unsigned char *data, size_t size) { struct audit_buffer *ab; @@ -73,7 +73,9 @@ static void tty_audit_log(const char *description, struct task_struct *tsk, audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u " "major=%d minor=%d comm=", description, - tsk->pid, uid, loginuid, sessionid, + tsk->pid, uid, + from_kuid(&init_user_ns, loginuid), + sessionid, major, minor); get_task_comm(name, tsk); audit_log_untrustedstring(ab, name); @@ -89,7 +91,7 @@ static void tty_audit_log(const char *description, struct task_struct *tsk, * Generate an audit message from the contents of @buf, which is owned by * @tsk with @loginuid. @buf->mutex must be locked. */ -static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, +static void tty_audit_buf_push(struct task_struct *tsk, kuid_t loginuid, unsigned int sessionid, struct tty_audit_buf *buf) { @@ -112,7 +114,7 @@ static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, */ static void tty_audit_buf_push_current(struct tty_audit_buf *buf) { - uid_t auid = audit_get_loginuid(current); + kuid_t auid = audit_get_loginuid(current); unsigned int sessionid = audit_get_sessionid(current); tty_audit_buf_push(current, auid, sessionid, buf); } @@ -179,7 +181,7 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch) } if (should_audit && audit_enabled) { - uid_t auid; + kuid_t auid; unsigned int sessionid; auid = audit_get_loginuid(current); @@ -199,7 +201,7 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch) * reference to the tty audit buffer if available. * Flush the buffer or return an appropriate error code. */ -int tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid) +int tty_audit_push_task(struct task_struct *tsk, kuid_t loginuid, u32 sessionid) { struct tty_audit_buf *buf = ERR_PTR(-EPERM); unsigned long flags; diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b6c84cbdb73..138cff4b05dd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1089,7 +1089,8 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", - audit_get_loginuid(task)); + from_kuid(file->f_cred->user_ns, + audit_get_loginuid(task))); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -1101,6 +1102,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, char *page, *tmp; ssize_t length; uid_t loginuid; + kuid_t kloginuid; rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { @@ -1130,7 +1132,13 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - length = audit_set_loginuid(loginuid); + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; + } + + length = audit_set_loginuid(kloginuid); if (likely(length == 0)) length = count; diff --git a/include/linux/audit.h b/include/linux/audit.h index ca019bb74da3..12367cbadfe1 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -527,7 +527,7 @@ static inline void audit_ptrace(struct task_struct *t) extern unsigned int audit_serial(void); extern int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); -extern int audit_set_loginuid(uid_t loginuid); +extern int audit_set_loginuid(kuid_t loginuid); #define audit_get_loginuid(t) ((t)->loginuid) #define audit_get_sessionid(t) ((t)->sessionid) extern void audit_log_task_context(struct audit_buffer *ab); @@ -639,7 +639,7 @@ extern int audit_signals; #define audit_core_dumps(i) do { ; } while (0) #define audit_seccomp(i,s,c) do { ; } while (0) #define auditsc_get_stamp(c,t,s) (0) -#define audit_get_loginuid(t) (-1) +#define audit_get_loginuid(t) (INVALID_UID) #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) #define audit_ipc_obj(i) ((void)0) @@ -705,7 +705,7 @@ extern int audit_update_lsm_rules(void); extern int audit_filter_user(void); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int seq, - void *data, size_t datasz, uid_t loginuid, + void *data, size_t datasz, kuid_t loginuid, u32 sessionid, u32 sid); extern int audit_enabled; #else diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 89f1cb1056f0..6d087c5f57f7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -92,7 +92,7 @@ extern struct group_info init_groups; #ifdef CONFIG_AUDITSYSCALL #define INIT_IDS \ - .loginuid = -1, \ + .loginuid = INVALID_UID, \ .sessionid = -1, #else #define INIT_IDS diff --git a/include/linux/sched.h b/include/linux/sched.h index c147e7024f11..f64d092f2bed 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1426,7 +1426,7 @@ struct task_struct { struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL - uid_t loginuid; + kuid_t loginuid; unsigned int sessionid; #endif struct seccomp seccomp; diff --git a/include/linux/tty.h b/include/linux/tty.h index 9f47ab540f65..7298385815e6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -553,7 +553,7 @@ extern void tty_audit_fork(struct signal_struct *sig); extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_task(struct task_struct *tsk, - uid_t loginuid, u32 sessionid); + kuid_t loginuid, u32 sessionid); #else static inline void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, size_t size) @@ -572,7 +572,7 @@ static inline void tty_audit_push(struct tty_struct *tty) { } static inline int tty_audit_push_task(struct task_struct *tsk, - uid_t loginuid, u32 sessionid) + kuid_t loginuid, u32 sessionid) { return 0; } diff --git a/include/net/netlabel.h b/include/net/netlabel.h index f67440970d7e..2c95d55f7914 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -110,7 +110,7 @@ struct cipso_v4_doi; /* NetLabel audit information */ struct netlbl_audit { u32 secid; - uid_t loginuid; + kuid_t loginuid; u32 sessionid; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9509eb29b80..1f217e2c5d82 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -662,7 +662,7 @@ struct xfrm_spi_skb_cb { /* Audit Information */ struct xfrm_audit { u32 secid; - uid_t loginuid; + kuid_t loginuid; u32 sessionid; }; @@ -681,13 +681,14 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op) return audit_buf; } -static inline void xfrm_audit_helper_usrinfo(uid_t auid, u32 ses, u32 secid, +static inline void xfrm_audit_helper_usrinfo(kuid_t auid, u32 ses, u32 secid, struct audit_buffer *audit_buf) { char *secctx; u32 secctx_len; - audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses); + audit_log_format(audit_buf, " auid=%u ses=%u", + from_kuid(&init_user_ns, auid), ses); if (secid != 0 && security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " subj=%s", secctx); @@ -697,13 +698,13 @@ static inline void xfrm_audit_helper_usrinfo(uid_t auid, u32 ses, u32 secid, } extern void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - u32 auid, u32 ses, u32 secid); + kuid_t auid, u32 ses, u32 secid); extern void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - u32 auid, u32 ses, u32 secid); + kuid_t auid, u32 ses, u32 secid); extern void xfrm_audit_state_add(struct xfrm_state *x, int result, - u32 auid, u32 ses, u32 secid); + kuid_t auid, u32 ses, u32 secid); extern void xfrm_audit_state_delete(struct xfrm_state *x, int result, - u32 auid, u32 ses, u32 secid); + kuid_t auid, u32 ses, u32 secid); extern void xfrm_audit_state_replay_overflow(struct xfrm_state *x, struct sk_buff *skb); extern void xfrm_audit_state_replay(struct xfrm_state *x, @@ -716,22 +717,22 @@ extern void xfrm_audit_state_icvfail(struct xfrm_state *x, #else static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - u32 auid, u32 ses, u32 secid) + kuid_t auid, u32 ses, u32 secid) { } static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - u32 auid, u32 ses, u32 secid) + kuid_t auid, u32 ses, u32 secid) { } static inline void xfrm_audit_state_add(struct xfrm_state *x, int result, - u32 auid, u32 ses, u32 secid) + kuid_t auid, u32 ses, u32 secid) { } static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result, - u32 auid, u32 ses, u32 secid) + kuid_t auid, u32 ses, u32 secid) { } diff --git a/kernel/audit.c b/kernel/audit.c index 2e0dd5edf69b..44a4b13c9f00 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -265,7 +265,7 @@ void audit_log_lost(const char *message) } static int audit_log_config_change(char *function_name, int new, int old, - uid_t loginuid, u32 sessionid, u32 sid, + kuid_t loginuid, u32 sessionid, u32 sid, int allow_changes) { struct audit_buffer *ab; @@ -273,7 +273,7 @@ static int audit_log_config_change(char *function_name, int new, int old, ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, - old, loginuid, sessionid); + old, from_kuid(&init_user_ns, loginuid), sessionid); if (sid) { char *ctx = NULL; u32 len; @@ -293,7 +293,7 @@ static int audit_log_config_change(char *function_name, int new, int old, } static int audit_do_config_change(char *function_name, int *to_change, - int new, uid_t loginuid, u32 sessionid, + int new, kuid_t loginuid, u32 sessionid, u32 sid) { int allow_changes, rc = 0, old = *to_change; @@ -320,21 +320,21 @@ static int audit_do_config_change(char *function_name, int *to_change, return rc; } -static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sessionid, +static int audit_set_rate_limit(int limit, kuid_t loginuid, u32 sessionid, u32 sid) { return audit_do_config_change("audit_rate_limit", &audit_rate_limit, limit, loginuid, sessionid, sid); } -static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sessionid, +static int audit_set_backlog_limit(int limit, kuid_t loginuid, u32 sessionid, u32 sid) { return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, limit, loginuid, sessionid, sid); } -static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid) +static int audit_set_enabled(int state, kuid_t loginuid, u32 sessionid, u32 sid) { int rc; if (state < AUDIT_OFF || state > AUDIT_LOCKED) @@ -349,7 +349,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid) return rc; } -static int audit_set_failure(int state, uid_t loginuid, u32 sessionid, u32 sid) +static int audit_set_failure(int state, kuid_t loginuid, u32 sessionid, u32 sid) { if (state != AUDIT_FAIL_SILENT && state != AUDIT_FAIL_PRINTK @@ -607,7 +607,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) } static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, - uid_t auid, u32 ses, u32 sid) + kuid_t auid, u32 ses, u32 sid) { int rc = 0; char *ctx = NULL; @@ -622,7 +622,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", task_tgid_vnr(current), from_kuid(&init_user_ns, current_uid()), - auid, ses); + from_kuid(&init_user_ns, auid), ses); if (sid) { rc = security_secid_to_secctx(sid, &ctx, &len); if (rc) @@ -644,7 +644,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; - uid_t loginuid; /* loginuid of sender */ + kuid_t loginuid; /* loginuid of sender */ u32 sessionid; struct audit_sig_info *sig_data; char *ctx = NULL; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3823281401b5..1c22ec3d87bc 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -241,7 +241,7 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc struct audit_buffer *ab; ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); audit_log_format(ab, "auid=%u ses=%u op=", - audit_get_loginuid(current), + from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); audit_log_string(ab, op); audit_log_format(ab, " path="); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b30320cea26f..c4bcdbaf4d4d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1109,7 +1109,7 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) } /* Log rule additions and removals */ -static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, +static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid, char *action, struct audit_krule *rule, int res) { @@ -1121,7 +1121,8 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (!ab) return; - audit_log_format(ab, "auid=%u ses=%u", loginuid, sessionid); + audit_log_format(ab, "auid=%u ses=%u", + from_kuid(&init_user_ns, loginuid), sessionid); if (sid) { char *ctx = NULL; u32 len; @@ -1152,7 +1153,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int seq, void *data, - size_t datasz, uid_t loginuid, u32 sessionid, u32 sid) + size_t datasz, kuid_t loginuid, u32 sessionid, u32 sid) { struct task_struct *tsk; struct audit_netlink_list *dest; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 0b5b8a232b55..26fdfc092e35 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -149,7 +149,7 @@ struct audit_aux_data_execve { struct audit_aux_data_pids { struct audit_aux_data d; pid_t target_pid[AUDIT_AUX_PIDS]; - uid_t target_auid[AUDIT_AUX_PIDS]; + kuid_t target_auid[AUDIT_AUX_PIDS]; uid_t target_uid[AUDIT_AUX_PIDS]; unsigned int target_sessionid[AUDIT_AUX_PIDS]; u32 target_sid[AUDIT_AUX_PIDS]; @@ -214,7 +214,7 @@ struct audit_context { int arch; pid_t target_pid; - uid_t target_auid; + kuid_t target_auid; uid_t target_uid; unsigned int target_sessionid; u32 target_sid; @@ -1176,7 +1176,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk } static int audit_log_pid_context(struct audit_context *context, pid_t pid, - uid_t auid, uid_t uid, unsigned int sessionid, + kuid_t auid, uid_t uid, unsigned int sessionid, u32 sid, char *comm) { struct audit_buffer *ab; @@ -1188,7 +1188,8 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, if (!ab) return rc; - audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, auid, + audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, + from_kuid(&init_user_ns, auid), uid, sessionid); if (security_secid_to_secctx(sid, &ctx, &len)) { audit_log_format(ab, " obj=(none)"); @@ -1630,7 +1631,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->name_count, context->ppid, context->pid, - tsk->loginuid, + from_kuid(&init_user_ns, tsk->loginuid), context->uid, context->gid, context->euid, context->suid, context->fsuid, @@ -2291,14 +2292,14 @@ static atomic_t session_id = ATOMIC_INIT(0); * * Called (set) from fs/proc/base.c::proc_loginuid_write(). */ -int audit_set_loginuid(uid_t loginuid) +int audit_set_loginuid(kuid_t loginuid) { struct task_struct *task = current; struct audit_context *context = task->audit_context; unsigned int sessionid; #ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE - if (task->loginuid != -1) + if (uid_valid(task->loginuid)) return -EPERM; #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ if (!capable(CAP_AUDIT_CONTROL)) @@ -2315,7 +2316,8 @@ int audit_set_loginuid(uid_t loginuid) "old auid=%u new auid=%u" " old ses=%u new ses=%u", task->pid, task_uid(task), - task->loginuid, loginuid, + from_kuid(&init_user_ns, task->loginuid), + from_kuid(&init_user_ns, loginuid), task->sessionid, sessionid); audit_log_end(ab); } @@ -2543,7 +2545,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (audit_pid && t->tgid == audit_pid) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { audit_sig_pid = tsk->pid; - if (tsk->loginuid != -1) + if (uid_valid(tsk->loginuid)) audit_sig_uid = tsk->loginuid; else audit_sig_uid = uid; diff --git a/net/core/dev.c b/net/core/dev.c index 026bb4a37665..1c0d0823a5a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4524,7 +4524,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), - audit_get_loginuid(current), + from_kuid(&init_user_ns, audit_get_loginuid(current)), from_kuid(&init_user_ns, uid), from_kgid(&init_user_ns, gid), audit_get_sessionid(current)); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e7ff694f1049..729a345c75a4 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1541,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void) * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ security_task_getsecid(current, &audit_info.secid); - audit_info.loginuid = 0; + audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 9fae63f10298..9650c4ad5f88 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -109,7 +109,7 @@ struct audit_buffer *netlbl_audit_start_common(int type, return NULL; audit_log_format(audit_buf, "netlabel: auid=%u ses=%u", - audit_info->loginuid, + from_kuid(&init_user_ns, audit_info->loginuid), audit_info->sessionid); if (audit_info->secid != 0 && diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..2f475151cea1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2630,12 +2630,12 @@ static void xfrm_policy_fini(struct net *net) flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); @@ -2742,7 +2742,7 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2757,7 +2757,7 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5b228f97d4b3..fce6a49bc7c6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2045,7 +2045,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - audit_info.loginuid = -1; + audit_info.loginuid = INVALID_UID; audit_info.sessionid = -1; audit_info.secid = 0; xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); @@ -2112,7 +2112,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } void xfrm_audit_state_add(struct xfrm_state *x, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2127,7 +2127,7 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result, EXPORT_SYMBOL_GPL(xfrm_audit_state_add); void xfrm_audit_state_delete(struct xfrm_state *x, int result, - uid_t auid, u32 sessionid, u32 secid) + kuid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e47f35c..9ea55db737b4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -575,7 +575,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -654,7 +654,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1369,7 +1369,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1624,7 +1624,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).pid); } } else { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1918,7 +1918,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; @@ -1961,7 +1961,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, current->pid); if (ue->hard) { - uid_t loginuid = audit_get_loginuid(current); + kuid_t loginuid = audit_get_loginuid(current); u32 sessionid = audit_get_sessionid(current); u32 sid; -- cgit v1.2.3-71-gd317 From cca080d9b622094831672a136e5ee4f702d116b1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:53:48 -0800 Subject: userns: Convert audit to work with user namespaces enabled - Explicitly format uids gids in audit messges in the initial user namespace. This is safe because auditd is restrected to be in the initial user namespace. - Convert audit_sig_uid into a kuid_t. - Enable building the audit code and user namespaces at the same time. The net result is that the audit subsystem now uses kuid_t and kgid_t whenever possible making it almost impossible to confuse a raw uid_t with a kuid_t preventing bugs. Cc: Al Viro Cc: Eric Paris Cc: Greg Kroah-Hartman Signed-off-by: Eric W. Biederman --- drivers/tty/tty_audit.c | 5 +++-- init/Kconfig | 2 -- kernel/audit.c | 4 ++-- kernel/audit.h | 2 +- kernel/auditsc.c | 51 ++++++++++++++++++++++++++++++------------------- 5 files changed, 37 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 5b59bd7f4227..b0b39b823ccf 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -69,11 +69,12 @@ static void tty_audit_log(const char *description, struct task_struct *tsk, ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_TTY); if (ab) { char name[sizeof(tsk->comm)]; - uid_t uid = task_uid(tsk); + kuid_t uid = task_uid(tsk); audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u " "major=%d minor=%d comm=", description, - tsk->pid, uid, + tsk->pid, + from_kuid(&init_user_ns, uid), from_kuid(&init_user_ns, loginuid), sessionid, major, minor); diff --git a/init/Kconfig b/init/Kconfig index fd8696b1a81e..b5ecb4e75518 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -927,8 +927,6 @@ config UIDGID_CONVERTED # Features depends on IMA = n depends on EVM = n - depends on AUDIT = n - depends on AUDITSYSCALL = n depends on TASKSTATS = n depends on TRACING = n depends on FS_POSIX_ACL = n diff --git a/kernel/audit.c b/kernel/audit.c index 44a4b13c9f00..511488a7bc71 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -105,7 +105,7 @@ static int audit_backlog_wait_time = 60 * HZ; static int audit_backlog_wait_overflow = 0; /* The identity of the user shutting down the audit system. */ -uid_t audit_sig_uid = -1; +kuid_t audit_sig_uid = INVALID_UID; pid_t audit_sig_pid = -1; u32 audit_sig_sid = 0; @@ -853,7 +853,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) security_release_secctx(ctx, len); return -ENOMEM; } - sig_data->uid = audit_sig_uid; + sig_data->uid = from_kuid(&init_user_ns, audit_sig_uid); sig_data->pid = audit_sig_pid; if (audit_sig_sid) { memcpy(sig_data->ctx, ctx, len); diff --git a/kernel/audit.h b/kernel/audit.h index 4b428bb41ea3..9eb3d79482b6 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -146,7 +146,7 @@ extern void audit_kill_trees(struct list_head *); extern char *audit_unpack_string(void **, size_t *, size_t); extern pid_t audit_sig_pid; -extern uid_t audit_sig_uid; +extern kuid_t audit_sig_uid; extern u32 audit_sig_sid; #ifdef CONFIG_AUDITSYSCALL diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 26fdfc092e35..ff4798fcb488 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -150,7 +150,7 @@ struct audit_aux_data_pids { struct audit_aux_data d; pid_t target_pid[AUDIT_AUX_PIDS]; kuid_t target_auid[AUDIT_AUX_PIDS]; - uid_t target_uid[AUDIT_AUX_PIDS]; + kuid_t target_uid[AUDIT_AUX_PIDS]; unsigned int target_sessionid[AUDIT_AUX_PIDS]; u32 target_sid[AUDIT_AUX_PIDS]; char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN]; @@ -208,14 +208,14 @@ struct audit_context { size_t sockaddr_len; /* Save things to print about task_struct */ pid_t pid, ppid; - uid_t uid, euid, suid, fsuid; - gid_t gid, egid, sgid, fsgid; + kuid_t uid, euid, suid, fsuid; + kgid_t gid, egid, sgid, fsgid; unsigned long personality; int arch; pid_t target_pid; kuid_t target_auid; - uid_t target_uid; + kuid_t target_uid; unsigned int target_sessionid; u32 target_sid; char target_comm[TASK_COMM_LEN]; @@ -231,8 +231,8 @@ struct audit_context { long args[6]; } socketcall; struct { - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; umode_t mode; u32 osid; int has_perm; @@ -1176,7 +1176,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk } static int audit_log_pid_context(struct audit_context *context, pid_t pid, - kuid_t auid, uid_t uid, unsigned int sessionid, + kuid_t auid, kuid_t uid, unsigned int sessionid, u32 sid, char *comm) { struct audit_buffer *ab; @@ -1190,7 +1190,7 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, from_kuid(&init_user_ns, auid), - uid, sessionid); + from_kuid(&init_user_ns, uid), sessionid); if (security_secid_to_secctx(sid, &ctx, &len)) { audit_log_format(ab, " obj=(none)"); rc = 1; @@ -1440,7 +1440,9 @@ static void show_special(struct audit_context *context, int *call_panic) u32 osid = context->ipc.osid; audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho", - context->ipc.uid, context->ipc.gid, context->ipc.mode); + from_kuid(&init_user_ns, context->ipc.uid), + from_kgid(&init_user_ns, context->ipc.gid), + context->ipc.mode); if (osid) { char *ctx = NULL; u32 len; @@ -1553,8 +1555,8 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, MAJOR(n->dev), MINOR(n->dev), n->mode, - n->uid, - n->gid, + from_kuid(&init_user_ns, n->uid), + from_kgid(&init_user_ns, n->gid), MAJOR(n->rdev), MINOR(n->rdev)); } @@ -1632,10 +1634,15 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->ppid, context->pid, from_kuid(&init_user_ns, tsk->loginuid), - context->uid, - context->gid, - context->euid, context->suid, context->fsuid, - context->egid, context->sgid, context->fsgid, tty, + from_kuid(&init_user_ns, context->uid), + from_kgid(&init_user_ns, context->gid), + from_kuid(&init_user_ns, context->euid), + from_kuid(&init_user_ns, context->suid), + from_kuid(&init_user_ns, context->fsuid), + from_kgid(&init_user_ns, context->egid), + from_kgid(&init_user_ns, context->sgid), + from_kgid(&init_user_ns, context->fsgid), + tty, tsk->sessionid); @@ -2315,7 +2322,8 @@ int audit_set_loginuid(kuid_t loginuid) audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u" " old ses=%u new ses=%u", - task->pid, task_uid(task), + task->pid, + from_kuid(&init_user_ns, task_uid(task)), from_kuid(&init_user_ns, task->loginuid), from_kuid(&init_user_ns, loginuid), task->sessionid, sessionid); @@ -2540,7 +2548,7 @@ int __audit_signal_info(int sig, struct task_struct *t) struct audit_aux_data_pids *axp; struct task_struct *tsk = current; struct audit_context *ctx = tsk->audit_context; - uid_t uid = current_uid(), t_uid = task_uid(t); + kuid_t uid = current_uid(), t_uid = task_uid(t); if (audit_pid && t->tgid == audit_pid) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { @@ -2666,8 +2674,8 @@ void __audit_mmap_fd(int fd, int flags) static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) { - uid_t auid, uid; - gid_t gid; + kuid_t auid, uid; + kgid_t gid; unsigned int sessionid; auid = audit_get_loginuid(current); @@ -2675,7 +2683,10 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) current_uid_gid(&uid, &gid); audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", - auid, uid, gid, sessionid); + from_kuid(&init_user_ns, auid), + from_kuid(&init_user_ns, uid), + from_kgid(&init_user_ns, gid), + sessionid); audit_log_task_context(ab); audit_log_format(ab, " pid=%d comm=", current->pid); audit_log_untrustedstring(ab, current->comm); -- cgit v1.2.3-71-gd317 From 4bd6e32acec66c55c6c1af4672f3216b2ac88e35 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 17:56:49 -0800 Subject: userns: Convert taskstats to handle the user and pid namespaces. - Explicitly limit exit task stat broadcast to the initial user and pid namespaces, as it is already limited to the initial network namespace. - For broadcast task stats explicitly generate all of the idenitiers in terms of the initial user namespace and the initial pid namespace. - For request stats report them in terms of the current user namespace and the current pid namespace. Netlink messages are delivered syncrhonously to the kernel allowing us to get the user namespace and the pid namespace from the current task. - Pass the namespaces for representing pids and uids and gids into bacct_add_task. Cc: Balbir Singh Signed-off-by: Eric W. Biederman --- include/linux/tsacct_kern.h | 8 ++++++-- init/Kconfig | 1 - kernel/taskstats.c | 23 +++++++++++++++++------ kernel/tsacct.c | 12 +++++++----- 4 files changed, 30 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 7e50ac795b0b..44893e5ec8f7 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -10,9 +10,13 @@ #include #ifdef CONFIG_TASKSTATS -extern void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk); +extern void bacct_add_tsk(struct user_namespace *user_ns, + struct pid_namespace *pid_ns, + struct taskstats *stats, struct task_struct *tsk); #else -static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +static inline void bacct_add_tsk(struct user_namespace *user_ns, + struct pid_namespace *pid_ns, + struct taskstats *stats, struct task_struct *tsk) {} #endif /* CONFIG_TASKSTATS */ diff --git a/init/Kconfig b/init/Kconfig index b5ecb4e75518..f0f636cf0ce7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -927,7 +927,6 @@ config UIDGID_CONVERTED # Features depends on IMA = n depends on EVM = n - depends on TASKSTATS = n depends on TRACING = n depends on FS_POSIX_ACL = n depends on QUOTA = n diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d0a32796550f..3880df2acf05 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -174,7 +175,9 @@ static void send_cpu_listeners(struct sk_buff *skb, up_write(&listeners->sem); } -static void fill_stats(struct task_struct *tsk, struct taskstats *stats) +static void fill_stats(struct user_namespace *user_ns, + struct pid_namespace *pid_ns, + struct task_struct *tsk, struct taskstats *stats) { memset(stats, 0, sizeof(*stats)); /* @@ -190,7 +193,7 @@ static void fill_stats(struct task_struct *tsk, struct taskstats *stats) stats->version = TASKSTATS_VERSION; stats->nvcsw = tsk->nvcsw; stats->nivcsw = tsk->nivcsw; - bacct_add_tsk(stats, tsk); + bacct_add_tsk(user_ns, pid_ns, stats, tsk); /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); @@ -207,7 +210,7 @@ static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) rcu_read_unlock(); if (!tsk) return -ESRCH; - fill_stats(tsk, stats); + fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); put_task_struct(tsk); return 0; } @@ -291,6 +294,12 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; + if (current_user_ns() != &init_user_ns) + return -EINVAL; + + if (task_active_pid_ns(current) != &init_pid_ns) + return -EINVAL; + if (isadd == REGISTER) { for_each_cpu(cpu, mask) { s = kmalloc_node(sizeof(struct listener), @@ -631,11 +640,12 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) if (rc < 0) return; - stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, + task_pid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; - fill_stats(tsk, stats); + fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); /* * Doesn't matter if tsk is the leader or the last group member leaving @@ -643,7 +653,8 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) if (!is_thread_group || !group_dead) goto send; - stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, + task_tgid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 23b4d784ebdd..625df0b44690 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -26,7 +26,9 @@ /* * fill in basic accounting fields */ -void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +void bacct_add_tsk(struct user_namespace *user_ns, + struct pid_namespace *pid_ns, + struct taskstats *stats, struct task_struct *tsk) { const struct cred *tcred; struct timespec uptime, ts; @@ -55,13 +57,13 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; - stats->ac_pid = tsk->pid; + stats->ac_pid = task_pid_nr_ns(tsk, pid_ns); rcu_read_lock(); tcred = __task_cred(tsk); - stats->ac_uid = tcred->uid; - stats->ac_gid = tcred->gid; + stats->ac_uid = from_kuid_munged(user_ns, tcred->uid); + stats->ac_gid = from_kgid_munged(user_ns, tcred->gid); stats->ac_ppid = pid_alive(tsk) ? - rcu_dereference(tsk->real_parent)->tgid : 0; + task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0; rcu_read_unlock(); stats->ac_utime = cputime_to_usecs(tsk->utime); stats->ac_stime = cputime_to_usecs(tsk->stime); -- cgit v1.2.3-71-gd317 From f8f3d4de2d04e1a5b4293b67faee8ebabc64e9fa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:54:50 -0800 Subject: userns: Convert bsd process accounting to use kuid and kgid where appropriate BSD process accounting conveniently passes the file the accounting records will be written into to do_acct_process. The file credentials captured the user namespace of the opener of the file. Use the file credentials to format the uid and the gid of the current process into the user namespace of the user that started the bsd process accounting. Cc: Pavel Emelyanov Reviewed-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- init/Kconfig | 1 - kernel/acct.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/init/Kconfig b/init/Kconfig index f0f636cf0ce7..6de46ef12e3f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -931,7 +931,6 @@ config UIDGID_CONVERTED depends on FS_POSIX_ACL = n depends on QUOTA = n depends on QUOTACTL = n - depends on BSD_PROCESS_ACCT = n # Networking depends on NET_9P = n diff --git a/kernel/acct.c b/kernel/acct.c index 02e6167a53b0..6cd7529c9e6a 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -507,8 +507,8 @@ static void do_acct_process(struct bsd_acct_struct *acct, do_div(elapsed, AHZ); ac.ac_btime = get_seconds() - elapsed; /* we really need to bite the bullet and change layout */ - ac.ac_uid = orig_cred->uid; - ac.ac_gid = orig_cred->gid; + ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); + ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); #if ACCT_VERSION==2 ac.ac_ahz = AHZ; #endif -- cgit v1.2.3-71-gd317 From d20b92ab668cc44fc84bba0001839c5a8013a5cd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 13 Mar 2012 16:02:19 -0700 Subject: userns: Teach trace to use from_kuid - When tracing capture the kuid. - When displaying the data to user space convert the kuid into the user namespace of the process that opened the report file. Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Eric W. Biederman --- init/Kconfig | 1 - kernel/trace/trace.c | 3 ++- kernel/trace/trace.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/init/Kconfig b/init/Kconfig index 6de46ef12e3f..2a388e569a28 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -927,7 +927,6 @@ config UIDGID_CONVERTED # Features depends on IMA = n depends on EVM = n - depends on TRACING = n depends on FS_POSIX_ACL = n depends on QUOTA = n depends on QUOTACTL = n diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c38c81496ce..c9ace838d509 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2060,7 +2060,8 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "# -----------------\n"); seq_printf(m, "# | task: %.16s-%d " "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", - data->comm, data->pid, data->uid, data->nice, + data->comm, data->pid, + from_kuid_munged(seq_user_ns(m), data->uid), data->nice, data->policy, data->rt_priority); seq_puts(m, "# -----------------\n"); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 55e1f7f0db12..40a6f30c985f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -147,7 +147,7 @@ struct trace_array_cpu { unsigned long skipped_entries; cycle_t preempt_timestamp; pid_t pid; - uid_t uid; + kuid_t uid; char comm[TASK_COMM_LEN]; }; -- cgit v1.2.3-71-gd317 From f76d207a66c3a53defea67e7d36c3eb1b7d6d61d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 30 Aug 2012 01:24:05 -0700 Subject: userns: Add kprojid_t and associated infrastructure in projid.h Implement kprojid_t a cousin of the kuid_t and kgid_t. The per user namespace mapping of project id values can be set with /proc//projid_map. A full compliment of helpers is provided: make_kprojid, from_kprojid, from_kprojid_munged, kporjid_has_mapping, projid_valid, projid_eq, projid_eq, projid_lt. Project identifiers are part of the generic disk quota interface, although it appears only xfs implements project identifiers currently. The xfs code allows anyone who has permission to set the project identifier on a file to use any project identifier so when setting up the user namespace project identifier mappings I do not require a capability. Cc: Dave Chinner Cc: Jan Kara Signed-off-by: "Eric W. Biederman" --- fs/proc/base.c | 15 +++++ include/linux/projid.h | 104 +++++++++++++++++++++++++++++++++ include/linux/user_namespace.h | 3 + kernel/user.c | 8 +++ kernel/user_namespace.c | 128 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 include/linux/projid.h (limited to 'kernel') diff --git a/fs/proc/base.c b/fs/proc/base.c index 138cff4b05dd..acd1960c28a2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2991,6 +2991,11 @@ static int proc_gid_map_open(struct inode *inode, struct file *file) return proc_id_map_open(inode, file, &proc_gid_seq_operations); } +static int proc_projid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_projid_seq_operations); +} + static const struct file_operations proc_uid_map_operations = { .open = proc_uid_map_open, .write = proc_uid_map_write, @@ -3006,6 +3011,14 @@ static const struct file_operations proc_gid_map_operations = { .llseek = seq_lseek, .release = proc_id_map_release, }; + +static const struct file_operations proc_projid_map_operations = { + .open = proc_projid_map_open, + .write = proc_projid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, @@ -3113,6 +3126,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), + REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), #endif }; @@ -3476,6 +3490,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), + REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), #endif }; diff --git a/include/linux/projid.h b/include/linux/projid.h new file mode 100644 index 000000000000..36517b95be5c --- /dev/null +++ b/include/linux/projid.h @@ -0,0 +1,104 @@ +#ifndef _LINUX_PROJID_H +#define _LINUX_PROJID_H + +/* + * A set of types for the internal kernel types representing project ids. + * + * The types defined in this header allow distinguishing which project ids in + * the kernel are values used by userspace and which project id values are + * the internal kernel values. With the addition of user namespaces the values + * can be different. Using the type system makes it possible for the compiler + * to detect when we overlook these differences. + * + */ +#include + +struct user_namespace; +extern struct user_namespace init_user_ns; + +typedef __kernel_uid32_t projid_t; + +#ifdef CONFIG_UIDGID_STRICT_TYPE_CHECKS + +typedef struct { + projid_t val; +} kprojid_t; + +static inline projid_t __kprojid_val(kprojid_t projid) +{ + return projid.val; +} + +#define KPROJIDT_INIT(value) (kprojid_t){ value } + +#else + +typedef projid_t kprojid_t; + +static inline projid_t __kprojid_val(kprojid_t projid) +{ + return projid; +} + +#define KPROJIDT_INIT(value) ((kprojid_t) value ) + +#endif + +#define INVALID_PROJID KPROJIDT_INIT(-1) +#define OVERFLOW_PROJID 65534 + +static inline bool projid_eq(kprojid_t left, kprojid_t right) +{ + return __kprojid_val(left) == __kprojid_val(right); +} + +static inline bool projid_lt(kprojid_t left, kprojid_t right) +{ + return __kprojid_val(left) < __kprojid_val(right); +} + +static inline bool projid_valid(kprojid_t projid) +{ + return !projid_eq(projid, INVALID_PROJID); +} + +#ifdef CONFIG_USER_NS + +extern kprojid_t make_kprojid(struct user_namespace *from, projid_t projid); + +extern projid_t from_kprojid(struct user_namespace *to, kprojid_t projid); +extern projid_t from_kprojid_munged(struct user_namespace *to, kprojid_t projid); + +static inline bool kprojid_has_mapping(struct user_namespace *ns, kprojid_t projid) +{ + return from_kprojid(ns, projid) != (projid_t)-1; +} + +#else + +static inline kprojid_t make_kprojid(struct user_namespace *from, projid_t projid) +{ + return KPROJIDT_INIT(projid); +} + +static inline projid_t from_kprojid(struct user_namespace *to, kprojid_t kprojid) +{ + return __kprojid_val(kprojid); +} + +static inline projid_t from_kprojid_munged(struct user_namespace *to, kprojid_t kprojid) +{ + projid_t projid = from_kprojid(to, kprojid); + if (projid == (projid_t)-1) + projid = OVERFLOW_PROJID; + return projid; +} + +static inline bool kprojid_has_mapping(struct user_namespace *ns, kprojid_t projid) +{ + return true; +} + +#endif /* CONFIG_USER_NS */ + +#endif /* _LINUX_PROJID_H */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4e72922e5a75..95142cae446a 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -20,6 +20,7 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; + struct uid_gid_map projid_map; struct kref kref; struct user_namespace *parent; kuid_t owner; @@ -49,8 +50,10 @@ static inline void put_user_ns(struct user_namespace *ns) struct seq_operations; extern struct seq_operations proc_uid_seq_operations; extern struct seq_operations proc_gid_seq_operations; +extern struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); +extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) diff --git a/kernel/user.c b/kernel/user.c index b815fefbe76f..750acffbe9ec 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -38,6 +38,14 @@ struct user_namespace init_user_ns = { .count = 4294967295U, }, }, + .projid_map = { + .nr_extents = 1, + .extent[0] = { + .first = 0, + .lower_first = 0, + .count = 4294967295U, + }, + }, .kref = { .refcount = ATOMIC_INIT(3), }, diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 86602316422d..456a6b9fba34 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -19,6 +19,7 @@ #include #include #include +#include static struct kmem_cache *user_ns_cachep __read_mostly; @@ -295,6 +296,75 @@ gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) } EXPORT_SYMBOL(from_kgid_munged); +/** + * make_kprojid - Map a user-namespace projid pair into a kprojid. + * @ns: User namespace that the projid is in + * @projid: Project identifier + * + * Maps a user-namespace uid pair into a kernel internal kuid, + * and returns that kuid. + * + * When there is no mapping defined for the user-namespace projid + * pair INVALID_PROJID is returned. Callers are expected to test + * for and handle handle INVALID_PROJID being returned. INVALID_PROJID + * may be tested for using projid_valid(). + */ +kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) +{ + /* Map the uid to a global kernel uid */ + return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); +} +EXPORT_SYMBOL(make_kprojid); + +/** + * from_kprojid - Create a projid from a kprojid user-namespace pair. + * @targ: The user namespace we want a projid in. + * @kprojid: The kernel internal project identifier to start with. + * + * Map @kprojid into the user-namespace specified by @targ and + * return the resulting projid. + * + * There is always a mapping into the initial user_namespace. + * + * If @kprojid has no mapping in @targ (projid_t)-1 is returned. + */ +projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) +{ + /* Map the uid from a global kernel uid */ + return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); +} +EXPORT_SYMBOL(from_kprojid); + +/** + * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. + * @targ: The user namespace we want a projid in. + * @kprojid: The kernel internal projid to start with. + * + * Map @kprojid into the user-namespace specified by @targ and + * return the resulting projid. + * + * There is always a mapping into the initial user_namespace. + * + * Unlike from_kprojid from_kprojid_munged never fails and always + * returns a valid projid. This makes from_kprojid_munged + * appropriate for use in syscalls like stat and where + * failing the system call and failing to provide a valid projid are + * not an options. + * + * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. + */ +projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) +{ + projid_t projid; + projid = from_kprojid(targ, kprojid); + + if (projid == (projid_t) -1) + projid = OVERFLOW_PROJID; + return projid; +} +EXPORT_SYMBOL(from_kprojid_munged); + + static int uid_m_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; @@ -337,6 +407,27 @@ static int gid_m_show(struct seq_file *seq, void *v) return 0; } +static int projid_m_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + struct uid_gid_extent *extent = v; + struct user_namespace *lower_ns; + projid_t lower; + + lower_ns = seq_user_ns(seq); + if ((lower_ns == ns) && lower_ns->parent) + lower_ns = lower_ns->parent; + + lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); + + seq_printf(seq, "%10u %10u %10u\n", + extent->first, + lower, + extent->count); + + return 0; +} + static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) { struct uid_gid_extent *extent = NULL; @@ -362,6 +453,13 @@ static void *gid_m_start(struct seq_file *seq, loff_t *ppos) return m_start(seq, ppos, &ns->gid_map); } +static void *projid_m_start(struct seq_file *seq, loff_t *ppos) +{ + struct user_namespace *ns = seq->private; + + return m_start(seq, ppos, &ns->projid_map); +} + static void *m_next(struct seq_file *seq, void *v, loff_t *pos) { (*pos)++; @@ -387,6 +485,13 @@ struct seq_operations proc_gid_seq_operations = { .show = gid_m_show, }; +struct seq_operations proc_projid_seq_operations = { + .start = projid_m_start, + .stop = m_stop, + .next = m_next, + .show = projid_m_show, +}; + static DEFINE_MUTEX(id_map_mutex); static ssize_t map_write(struct file *file, const char __user *buf, @@ -434,7 +539,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, /* Require the appropriate privilege CAP_SETUID or CAP_SETGID * over the user namespace in order to set the id mapping. */ - if (!ns_capable(ns, cap_setid)) + if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) goto out; /* Get a buffer */ @@ -584,9 +689,30 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz &ns->gid_map, &ns->parent->gid_map); } +ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + struct user_namespace *seq_ns = seq_user_ns(seq); + + if (!ns->parent) + return -EPERM; + + if ((seq_ns != ns) && (seq_ns != ns->parent)) + return -EPERM; + + /* Anyone can set any valid project id no capability needed */ + return map_write(file, buf, size, ppos, -1, + &ns->projid_map, &ns->parent->projid_map); +} + static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { + /* Allow anyone to set a mapping that doesn't require privilege */ + if (!cap_valid(cap_setid)) + return true; + /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. */ -- cgit v1.2.3-71-gd317 From ea1abd6197d5805655da1bb589929762f4b4aa08 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 09:59:22 -0700 Subject: workqueue: reimplement idle worker rebinding Currently rebind_workers() uses rebinds idle workers synchronously before proceeding to requesting busy workers to rebind. This is necessary because all workers on @worker_pool->idle_list must be bound before concurrency management local wake-ups from the busy workers take place. Unfortunately, the synchronous idle rebinding is quite complicated. This patch reimplements idle rebinding to simplify the code path. Rather than trying to make all idle workers bound before rebinding busy workers, we simply remove all to-be-bound idle workers from the idle list and let them add themselves back after completing rebinding (successful or not). As only workers which finished rebinding can on on the idle worker list, the idle worker list is guaranteed to have only bound workers unless CPU went down again and local wake-ups are safe. After the change, @worker_pool->nr_idle may deviate than the actual number of idle workers on @worker_pool->idle_list. More specifically, nr_idle may be non-zero while ->idle_list is empty. All users of ->nr_idle and ->idle_list are audited. The only affected one is too_many_workers() which is updated to check %false if ->idle_list is empty regardless of ->nr_idle. After this patch, rebind_workers() no longer performs the nasty idle-rebind retries which require temporary release of gcwq->lock, and both unbinding and rebinding are atomic w.r.t. global_cwq->lock. worker->idle_rebind and global_cwq->rebind_hold are now unnecessary and removed along with the definition of struct idle_rebind. Changed from V1: 1) remove unlikely from too_many_workers(), ->idle_list can be empty anytime, even before this patch, no reason to use unlikely. 2) fix a small rebasing mistake. (which is from rebasing the orignal fixing patch to for-next) 3) add a lot of comments. 4) clear WORKER_REBIND unconditionaly in idle_worker_rebind() tj: Updated comments and description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 141 ++++++++++++++++------------------------------------- 1 file changed, 42 insertions(+), 99 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 31d8a4586d4c..770c1a8128bf 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -126,7 +126,6 @@ enum { struct global_cwq; struct worker_pool; -struct idle_rebind; /* * The poor guys doing the actual heavy lifting. All on-duty workers @@ -150,7 +149,6 @@ struct worker { int id; /* I: worker id */ /* for rebinding worker to CPU */ - struct idle_rebind *idle_rebind; /* L: for idle worker */ struct work_struct rebind_work; /* L: for busy worker */ }; @@ -160,6 +158,8 @@ struct worker_pool { struct list_head worklist; /* L: list of pending works */ int nr_workers; /* L: total number of workers */ + + /* nr_idle includes the ones off idle_list for rebinding */ int nr_idle; /* L: currently idle ones */ struct list_head idle_list; /* X: list of idle workers */ @@ -186,8 +186,6 @@ struct global_cwq { struct worker_pool pools[NR_WORKER_POOLS]; /* normal and highpri pools */ - - wait_queue_head_t rebind_hold; /* rebind hold wait */ } ____cacheline_aligned_in_smp; /* @@ -687,6 +685,13 @@ static bool too_many_workers(struct worker_pool *pool) int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_busy = pool->nr_workers - nr_idle; + /* + * nr_idle and idle_list may disagree if idle rebinding is in + * progress. Never return %true if idle_list is empty. + */ + if (list_empty(&pool->idle_list)) + return false; + return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; } @@ -1611,37 +1616,26 @@ __acquires(&gcwq->lock) } } -struct idle_rebind { - int cnt; /* # workers to be rebound */ - struct completion done; /* all workers rebound */ -}; - /* - * Rebind an idle @worker to its CPU. During CPU onlining, this has to - * happen synchronously for idle workers. worker_thread() will test + * Rebind an idle @worker to its CPU. worker_thread() will test * %WORKER_REBIND before leaving idle and call this function. */ static void idle_worker_rebind(struct worker *worker) { struct global_cwq *gcwq = worker->pool->gcwq; - /* CPU must be online at this point */ - WARN_ON(!worker_maybe_bind_and_lock(worker)); - if (!--worker->idle_rebind->cnt) - complete(&worker->idle_rebind->done); - spin_unlock_irq(&worker->pool->gcwq->lock); - - /* we did our part, wait for rebind_workers() to finish up */ - wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); - /* - * rebind_workers() shouldn't finish until all workers passed the - * above WORKER_REBIND wait. Tell it when done. + * CPU may go down again inbetween. If rebinding fails, reinstate + * UNBOUND. We're off idle_list and nobody else can do it for us. */ - spin_lock_irq(&worker->pool->gcwq->lock); - if (!--worker->idle_rebind->cnt) - complete(&worker->idle_rebind->done); - spin_unlock_irq(&worker->pool->gcwq->lock); + if (!worker_maybe_bind_and_lock(worker)) + worker->flags |= WORKER_UNBOUND; + + worker_clr_flags(worker, WORKER_REBIND); + + /* rebind complete, become available again */ + list_add(&worker->entry, &worker->pool->idle_list); + spin_unlock_irq(&gcwq->lock); } /* @@ -1676,29 +1670,25 @@ static void busy_worker_rebind_fn(struct work_struct *work) * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding * is different for idle and busy ones. * - * The idle ones should be rebound synchronously and idle rebinding should - * be complete before any worker starts executing work items with - * concurrency management enabled; otherwise, scheduler may oops trying to - * wake up non-local idle worker from wq_worker_sleeping(). - * - * This is achieved by repeatedly requesting rebinding until all idle - * workers are known to have been rebound under @gcwq->lock and holding all - * idle workers from becoming busy until idle rebinding is complete. + * Idle ones will be removed from the idle_list and woken up. They will + * add themselves back after completing rebind. This ensures that the + * idle_list doesn't contain any unbound workers when re-bound busy workers + * try to perform local wake-ups for concurrency management. * - * Once idle workers are rebound, busy workers can be rebound as they - * finish executing their current work items. Queueing the rebind work at - * the head of their scheduled lists is enough. Note that nr_running will - * be properbly bumped as busy workers rebind. + * Busy workers can rebind after they finish their current work items. + * Queueing the rebind work item at the head of the scheduled list is + * enough. Note that nr_running will be properly bumped as busy workers + * rebind. * - * On return, all workers are guaranteed to either be bound or have rebind - * work item scheduled. + * On return, all non-manager workers are scheduled for rebind - see + * manage_workers() for the manager special case. Any idle worker + * including the manager will not appear on @idle_list until rebind is + * complete, making local wake-ups safe. */ static void rebind_workers(struct global_cwq *gcwq) - __releases(&gcwq->lock) __acquires(&gcwq->lock) { - struct idle_rebind idle_rebind; struct worker_pool *pool; - struct worker *worker; + struct worker *worker, *n; struct hlist_node *pos; int i; @@ -1707,46 +1697,29 @@ static void rebind_workers(struct global_cwq *gcwq) for_each_worker_pool(pool, gcwq) lockdep_assert_held(&pool->manager_mutex); - /* - * Rebind idle workers. Interlocked both ways. We wait for - * workers to rebind via @idle_rebind.done. Workers will wait for - * us to finish up by watching %WORKER_REBIND. - */ - init_completion(&idle_rebind.done); -retry: - idle_rebind.cnt = 1; - INIT_COMPLETION(idle_rebind.done); - - /* set REBIND and kick idle ones, we'll wait for these later */ + /* set REBIND and kick idle ones */ for_each_worker_pool(pool, gcwq) { - list_for_each_entry(worker, &pool->idle_list, entry) { + list_for_each_entry_safe(worker, n, &pool->idle_list, entry) { unsigned long worker_flags = worker->flags; - if (worker->flags & WORKER_REBIND) - continue; - /* morph UNBOUND to REBIND atomically */ worker_flags &= ~WORKER_UNBOUND; worker_flags |= WORKER_REBIND; ACCESS_ONCE(worker->flags) = worker_flags; - idle_rebind.cnt++; - worker->idle_rebind = &idle_rebind; + /* + * idle workers should be off @pool->idle_list + * until rebind is complete to avoid receiving + * premature local wake-ups. + */ + list_del_init(&worker->entry); /* worker_thread() will call idle_worker_rebind() */ wake_up_process(worker->task); } } - if (--idle_rebind.cnt) { - spin_unlock_irq(&gcwq->lock); - wait_for_completion(&idle_rebind.done); - spin_lock_irq(&gcwq->lock); - /* busy ones might have become idle while waiting, retry */ - goto retry; - } - - /* all idle workers are rebound, rebind busy workers */ + /* rebind busy workers */ for_each_busy_worker(worker, i, pos, gcwq) { unsigned long worker_flags = worker->flags; struct work_struct *rebind_work = &worker->rebind_work; @@ -1776,34 +1749,6 @@ retry: worker->scheduled.next, work_color_to_flags(WORK_NO_COLOR)); } - - /* - * All idle workers are rebound and waiting for %WORKER_REBIND to - * be cleared inside idle_worker_rebind(). Clear and release. - * Clearing %WORKER_REBIND from this foreign context is safe - * because these workers are still guaranteed to be idle. - * - * We need to make sure all idle workers passed WORKER_REBIND wait - * in idle_worker_rebind() before returning; otherwise, workers can - * get stuck at the wait if hotplug cycle repeats. - */ - idle_rebind.cnt = 1; - INIT_COMPLETION(idle_rebind.done); - - for_each_worker_pool(pool, gcwq) { - list_for_each_entry(worker, &pool->idle_list, entry) { - worker->flags &= ~WORKER_REBIND; - idle_rebind.cnt++; - } - } - - wake_up_all(&gcwq->rebind_hold); - - if (--idle_rebind.cnt) { - spin_unlock_irq(&gcwq->lock); - wait_for_completion(&idle_rebind.done); - spin_lock_irq(&gcwq->lock); - } } static struct worker *alloc_worker(void) @@ -3916,8 +3861,6 @@ static int __init init_workqueues(void) mutex_init(&pool->manager_mutex); ida_init(&pool->worker_ida); } - - init_waitqueue_head(&gcwq->rebind_hold); } /* create the initial worker */ -- cgit v1.2.3-71-gd317 From eab6d82843ee1df244f8847d1bf8bb89160ec4aa Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 09:59:22 -0700 Subject: workqueue: WORKER_REBIND is no longer necessary for busy rebinding Because the old unbind/rebinding implementation wasn't atomic w.r.t. GCWQ_DISASSOCIATED manipulation which is protected by global_cwq->lock, we had to use two flags, WORKER_UNBOUND and WORKER_REBIND, to avoid incorrectly losing all NOT_RUNNING bits with back-to-back CPU hotplug operations; otherwise, completion of rebinding while another unbinding is in progress could clear UNBIND prematurely. Now that both unbind/rebinding are atomic w.r.t. GCWQ_DISASSOCIATED, there's no need to use two flags. Just one is enough. Don't use WORKER_REBIND for busy rebinding. tj: Updated description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 770c1a8128bf..794724efb733 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1649,16 +1649,8 @@ static void busy_worker_rebind_fn(struct work_struct *work) struct worker *worker = container_of(work, struct worker, rebind_work); struct global_cwq *gcwq = worker->pool->gcwq; - worker_maybe_bind_and_lock(worker); - - /* - * %WORKER_REBIND must be cleared even if the above binding failed; - * otherwise, we may confuse the next CPU_UP cycle or oops / get - * stuck by calling idle_worker_rebind() prematurely. If CPU went - * down again inbetween, %WORKER_UNBOUND would be set, so clearing - * %WORKER_REBIND is always safe. - */ - worker_clr_flags(worker, WORKER_REBIND); + if (worker_maybe_bind_and_lock(worker)) + worker_clr_flags(worker, WORKER_UNBOUND); spin_unlock_irq(&gcwq->lock); } @@ -1721,15 +1713,9 @@ static void rebind_workers(struct global_cwq *gcwq) /* rebind busy workers */ for_each_busy_worker(worker, i, pos, gcwq) { - unsigned long worker_flags = worker->flags; struct work_struct *rebind_work = &worker->rebind_work; struct workqueue_struct *wq; - /* morph UNBOUND to REBIND atomically */ - worker_flags &= ~WORKER_UNBOUND; - worker_flags |= WORKER_REBIND; - ACCESS_ONCE(worker->flags) = worker_flags; - if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(rebind_work))) continue; -- cgit v1.2.3-71-gd317 From 5f7dabfd5cb115937afb4649e4c73b02f927f6ae Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 09:59:23 -0700 Subject: workqueue: WORKER_REBIND is no longer necessary for idle rebinding Now both worker destruction and idle rebinding remove the worker from idle list while it's still idle, so list_empty(&worker->entry) can be used to test whether either is pending and WORKER_DIE to distinguish between the two instead making WORKER_REBIND unnecessary. Use list_empty(&worker->entry) to determine whether destruction or rebinding is pending. This simplifies worker state transitions. WORKER_REBIND is not needed anymore. Remove it. tj: Updated comments and description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 794724efb733..cdc6bfc84b78 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -73,11 +73,10 @@ enum { WORKER_DIE = 1 << 1, /* die die die */ WORKER_IDLE = 1 << 2, /* is idle */ WORKER_PREP = 1 << 3, /* preparing to run works */ - WORKER_REBIND = 1 << 5, /* mom is home, come back */ WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ WORKER_UNBOUND = 1 << 7, /* worker is unbound */ - WORKER_NOT_RUNNING = WORKER_PREP | WORKER_REBIND | WORKER_UNBOUND | + WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND | WORKER_CPU_INTENSIVE, NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ @@ -1618,20 +1617,15 @@ __acquires(&gcwq->lock) /* * Rebind an idle @worker to its CPU. worker_thread() will test - * %WORKER_REBIND before leaving idle and call this function. + * list_empty(@worker->entry) before leaving idle and call this function. */ static void idle_worker_rebind(struct worker *worker) { struct global_cwq *gcwq = worker->pool->gcwq; - /* - * CPU may go down again inbetween. If rebinding fails, reinstate - * UNBOUND. We're off idle_list and nobody else can do it for us. - */ - if (!worker_maybe_bind_and_lock(worker)) - worker->flags |= WORKER_UNBOUND; - - worker_clr_flags(worker, WORKER_REBIND); + /* CPU may go down again inbetween, clear UNBOUND only on success */ + if (worker_maybe_bind_and_lock(worker)) + worker_clr_flags(worker, WORKER_UNBOUND); /* rebind complete, become available again */ list_add(&worker->entry, &worker->pool->idle_list); @@ -1689,16 +1683,9 @@ static void rebind_workers(struct global_cwq *gcwq) for_each_worker_pool(pool, gcwq) lockdep_assert_held(&pool->manager_mutex); - /* set REBIND and kick idle ones */ + /* dequeue and kick idle ones */ for_each_worker_pool(pool, gcwq) { list_for_each_entry_safe(worker, n, &pool->idle_list, entry) { - unsigned long worker_flags = worker->flags; - - /* morph UNBOUND to REBIND atomically */ - worker_flags &= ~WORKER_UNBOUND; - worker_flags |= WORKER_REBIND; - ACCESS_ONCE(worker->flags) = worker_flags; - /* * idle workers should be off @pool->idle_list * until rebind is complete to avoid receiving @@ -1706,7 +1693,10 @@ static void rebind_workers(struct global_cwq *gcwq) */ list_del_init(&worker->entry); - /* worker_thread() will call idle_worker_rebind() */ + /* + * worker_thread() will see the above dequeuing + * and call idle_worker_rebind(). + */ wake_up_process(worker->task); } } @@ -2176,7 +2166,7 @@ __acquires(&gcwq->lock) * necessary to avoid spurious warnings from rescuers servicing the * unbound or a disassociated gcwq. */ - WARN_ON_ONCE(!(worker->flags & (WORKER_UNBOUND | WORKER_REBIND)) && + WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && !(gcwq->flags & GCWQ_DISASSOCIATED) && raw_smp_processor_id() != gcwq->cpu); @@ -2300,18 +2290,17 @@ static int worker_thread(void *__worker) woke_up: spin_lock_irq(&gcwq->lock); - /* - * DIE can be set only while idle and REBIND set while busy has - * @worker->rebind_work scheduled. Checking here is enough. - */ - if (unlikely(worker->flags & (WORKER_REBIND | WORKER_DIE))) { + /* we are off idle list if destruction or rebind is requested */ + if (unlikely(list_empty(&worker->entry))) { spin_unlock_irq(&gcwq->lock); + /* if DIE is set, destruction is requested */ if (worker->flags & WORKER_DIE) { worker->task->flags &= ~PF_WQ_WORKER; return 0; } + /* otherwise, rebind */ idle_worker_rebind(worker); goto woke_up; } -- cgit v1.2.3-71-gd317 From b2eb83d123c1cc9f96a8e452b26a6ebe631b3ad7 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 09:59:23 -0700 Subject: workqueue: rename manager_mutex to assoc_mutex Now that manager_mutex's role has changed from synchronizing manager role to excluding hotplug against manager, the name is misleading. As it is protecting the CPU-association of the gcwq now, rename it to assoc_mutex. This patch is pure rename and doesn't introduce any functional change. tj: Updated comments and description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cdc6bfc84b78..e651239f1ece 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -58,7 +58,7 @@ enum { * be executing on any CPU. The gcwq behaves as an unbound one. * * Note that DISASSOCIATED can be flipped only while holding - * managership of all pools on the gcwq to avoid changing binding + * assoc_mutex of all pools on the gcwq to avoid changing binding * state while create_worker() is in progress. */ GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */ @@ -165,7 +165,7 @@ struct worker_pool { struct timer_list idle_timer; /* L: worker idle timeout */ struct timer_list mayday_timer; /* L: SOS timer for workers */ - struct mutex manager_mutex; /* mutex manager should hold */ + struct mutex assoc_mutex; /* protect GCWQ_DISASSOCIATED */ struct ida worker_ida; /* L: for worker IDs */ }; @@ -1681,7 +1681,7 @@ static void rebind_workers(struct global_cwq *gcwq) lockdep_assert_held(&gcwq->lock); for_each_worker_pool(pool, gcwq) - lockdep_assert_held(&pool->manager_mutex); + lockdep_assert_held(&pool->assoc_mutex); /* dequeue and kick idle ones */ for_each_worker_pool(pool, gcwq) { @@ -2081,22 +2081,22 @@ static bool manage_workers(struct worker *worker) * grab %POOL_MANAGING_WORKERS to achieve this because that can * lead to idle worker depletion (all become busy thinking someone * else is managing) which in turn can result in deadlock under - * extreme circumstances. Use @pool->manager_mutex to synchronize + * extreme circumstances. Use @pool->assoc_mutex to synchronize * manager against CPU hotplug. * - * manager_mutex would always be free unless CPU hotplug is in + * assoc_mutex would always be free unless CPU hotplug is in * progress. trylock first without dropping @gcwq->lock. */ - if (unlikely(!mutex_trylock(&pool->manager_mutex))) { + if (unlikely(!mutex_trylock(&pool->assoc_mutex))) { spin_unlock_irq(&pool->gcwq->lock); - mutex_lock(&pool->manager_mutex); + mutex_lock(&pool->assoc_mutex); /* * CPU hotplug could have happened while we were waiting - * for manager_mutex. Hotplug itself can't handle us + * for assoc_mutex. Hotplug itself can't handle us * because manager isn't either on idle or busy list, and * @gcwq's state and ours could have deviated. * - * As hotplug is now excluded via manager_mutex, we can + * As hotplug is now excluded via assoc_mutex, we can * simply try to bind. It will succeed or fail depending * on @gcwq's current state. Try it and adjust * %WORKER_UNBOUND accordingly. @@ -2119,7 +2119,7 @@ static bool manage_workers(struct worker *worker) ret |= maybe_create_worker(pool); pool->flags &= ~POOL_MANAGING_WORKERS; - mutex_unlock(&pool->manager_mutex); + mutex_unlock(&pool->assoc_mutex); return ret; } @@ -3474,23 +3474,23 @@ EXPORT_SYMBOL_GPL(work_busy); */ /* claim manager positions of all pools */ -static void gcwq_claim_management_and_lock(struct global_cwq *gcwq) +static void gcwq_claim_assoc_and_lock(struct global_cwq *gcwq) { struct worker_pool *pool; for_each_worker_pool(pool, gcwq) - mutex_lock_nested(&pool->manager_mutex, pool - gcwq->pools); + mutex_lock_nested(&pool->assoc_mutex, pool - gcwq->pools); spin_lock_irq(&gcwq->lock); } /* release manager positions */ -static void gcwq_release_management_and_unlock(struct global_cwq *gcwq) +static void gcwq_release_assoc_and_unlock(struct global_cwq *gcwq) { struct worker_pool *pool; spin_unlock_irq(&gcwq->lock); for_each_worker_pool(pool, gcwq) - mutex_unlock(&pool->manager_mutex); + mutex_unlock(&pool->assoc_mutex); } static void gcwq_unbind_fn(struct work_struct *work) @@ -3503,7 +3503,7 @@ static void gcwq_unbind_fn(struct work_struct *work) BUG_ON(gcwq->cpu != smp_processor_id()); - gcwq_claim_management_and_lock(gcwq); + gcwq_claim_assoc_and_lock(gcwq); /* * We've claimed all manager positions. Make all workers unbound @@ -3520,7 +3520,7 @@ static void gcwq_unbind_fn(struct work_struct *work) gcwq->flags |= GCWQ_DISASSOCIATED; - gcwq_release_management_and_unlock(gcwq); + gcwq_release_assoc_and_unlock(gcwq); /* * Call schedule() so that we cross rq->lock and thus can guarantee @@ -3576,10 +3576,10 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, case CPU_DOWN_FAILED: case CPU_ONLINE: - gcwq_claim_management_and_lock(gcwq); + gcwq_claim_assoc_and_lock(gcwq); gcwq->flags &= ~GCWQ_DISASSOCIATED; rebind_workers(gcwq); - gcwq_release_management_and_unlock(gcwq); + gcwq_release_assoc_and_unlock(gcwq); break; } return NOTIFY_OK; @@ -3833,7 +3833,7 @@ static int __init init_workqueues(void) setup_timer(&pool->mayday_timer, gcwq_mayday_timeout, (unsigned long)pool); - mutex_init(&pool->manager_mutex); + mutex_init(&pool->assoc_mutex); ida_init(&pool->worker_ida); } } -- cgit v1.2.3-71-gd317 From 9fdf9b73d61c87a9c16f101bb8bbe069d13046f5 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 09:59:23 -0700 Subject: workqueue: use __cpuinit instead of __devinit for cpu callbacks For workqueue hotplug callbacks, it makes less sense to use __devinit which discards the memory after boot if !HOTPLUG. __cpuinit, which discards the memory after boot if !HOTPLUG_CPU fits better. tj: Updated description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e651239f1ece..942bb750a650 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3548,7 +3548,7 @@ static void gcwq_unbind_fn(struct work_struct *work) * Workqueues should be brought up before normal priority CPU notifiers. * This will be registered high priority CPU notifier. */ -static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, +static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -3589,7 +3589,7 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, * Workqueues should be brought down after normal priority CPU notifiers. * This will be registered as low priority CPU notifier. */ -static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, +static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { -- cgit v1.2.3-71-gd317 From a5b4e57d7cc07cb28ccf16de0876a4770ae84920 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 09:59:23 -0700 Subject: workqueue: use hotcpu_notifier() for workqueue_cpu_down_callback() workqueue_cpu_down_callback() is used only if HOTPLUG_CPU=y, so hotcpu_notifier() fits better than cpu_notifier(). When HOTPLUG_CPU=y, hotcpu_notifier() and cpu_notifier() are the same. When HOTPLUG_CPU=n, if we use cpu_notifier(), workqueue_cpu_down_callback() will be called during boot to do nothing, and the memory of workqueue_cpu_down_callback() and gcwq_unbind_fn() will be discarded after boot. If we use hotcpu_notifier(), we can avoid the no-op call of workqueue_cpu_down_callback() and the memory of workqueue_cpu_down_callback() and gcwq_unbind_fn() will be discard at build time: $ ls -l kernel/workqueue.o.cpu_notifier kernel/workqueue.o.hotcpu_notifier -rw-rw-r-- 1 laijs laijs 484080 Sep 15 11:31 kernel/workqueue.o.cpu_notifier -rw-rw-r-- 1 laijs laijs 478240 Sep 15 11:31 kernel/workqueue.o.hotcpu_notifier $ size kernel/workqueue.o.cpu_notifier kernel/workqueue.o.hotcpu_notifier text data bss dec hex filename 18513 2387 1221 22121 5669 kernel/workqueue.o.cpu_notifier 18082 2355 1221 21658 549a kernel/workqueue.o.hotcpu_notifier tj: Updated description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 942bb750a650..48becaba1c94 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3807,7 +3807,7 @@ static int __init init_workqueues(void) WORK_CPU_LAST); cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); - cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); + hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); /* initialize gcwqs */ for_each_gcwq_cpu(cpu) { -- cgit v1.2.3-71-gd317 From 3aa62497594430ea522050b75c033f71f2c60ee6 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 10:40:00 -0700 Subject: workqueue: fix possible stall on try_to_grab_pending() of a delayed work item Currently, when try_to_grab_pending() grabs a delayed work item, it leaves its linked work items alone on the delayed_works. The linked work items are always NO_COLOR and will cause future cwq_activate_first_delayed() increase cwq->nr_active incorrectly, and may cause the whole cwq to stall. For example, state: cwq->max_active = 1, cwq->nr_active = 1 one work in cwq->pool, many in cwq->delayed_works. step1: try_to_grab_pending() removes a work item from delayed_works but leaves its NO_COLOR linked work items on it. step2: Later on, cwq_activate_first_delayed() activates the linked work item increasing ->nr_active. step3: cwq->nr_active = 1, but all activated work items of the cwq are NO_COLOR. When they finish, cwq->nr_active will not be decreased due to NO_COLOR, and no further work items will be activated from cwq->delayed_works. the cwq stalls. Fix it by ensuring the target work item is activated before stealing PENDING in try_to_grab_pending(). This ensures that all the linked work items are activated without incorrectly bumping cwq->nr_active. tj: Updated comment and description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo Cc: stable@kernel.org --- kernel/workqueue.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 48becaba1c94..d2fe8e77ceb7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -977,10 +977,9 @@ static void move_linked_works(struct work_struct *work, struct list_head *head, *nextp = n; } -static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +static void cwq_activate_delayed_work(struct work_struct *work) { - struct work_struct *work = list_first_entry(&cwq->delayed_works, - struct work_struct, entry); + struct cpu_workqueue_struct *cwq = get_work_cwq(work); trace_workqueue_activate_work(work); move_linked_works(work, &cwq->pool->worklist, NULL); @@ -988,6 +987,14 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) cwq->nr_active++; } +static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +{ + struct work_struct *work = list_first_entry(&cwq->delayed_works, + struct work_struct, entry); + + cwq_activate_delayed_work(work); +} + /** * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight * @cwq: cwq of interest @@ -1106,6 +1113,18 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, smp_rmb(); if (gcwq == get_work_gcwq(work)) { debug_work_deactivate(work); + + /* + * A delayed work item cannot be grabbed directly + * because it might have linked NO_COLOR work items + * which, if left on the delayed_list, will confuse + * cwq->nr_active management later on and cause + * stall. Make sure the work item is activated + * before grabbing. + */ + if (*work_data_bits(work) & WORK_STRUCT_DELAYED) + cwq_activate_delayed_work(work); + list_del_init(&work->entry); cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work), -- cgit v1.2.3-71-gd317 From b3f9f405a21a29c06c31fb2d6ab36ef9ba7c027b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 18 Sep 2012 10:40:00 -0700 Subject: workqueue: remove @delayed from cwq_dec_nr_in_flight() @delayed is now always false for all callers, remove it. tj: Updated description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d2fe8e77ceb7..3e324aae3c98 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -999,7 +999,6 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight * @cwq: cwq of interest * @color: color of work which left the queue - * @delayed: for a delayed work * * A work either has completed or is removed from pending queue, * decrement nr_in_flight of its cwq and handle workqueue flushing. @@ -1007,8 +1006,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) * CONTEXT: * spin_lock_irq(gcwq->lock). */ -static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, - bool delayed) +static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) { /* ignore uncolored works */ if (color == WORK_NO_COLOR) @@ -1016,13 +1014,11 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, cwq->nr_in_flight[color]--; - if (!delayed) { - cwq->nr_active--; - if (!list_empty(&cwq->delayed_works)) { - /* one down, submit a delayed one */ - if (cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); - } + cwq->nr_active--; + if (!list_empty(&cwq->delayed_works)) { + /* one down, submit a delayed one */ + if (cwq->nr_active < cwq->max_active) + cwq_activate_first_delayed(cwq); } /* is flush in progress and are we at the flushing tip? */ @@ -1127,8 +1123,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, list_del_init(&work->entry); cwq_dec_nr_in_flight(get_work_cwq(work), - get_work_color(work), - *work_data_bits(work) & WORK_STRUCT_DELAYED); + get_work_color(work)); spin_unlock(&gcwq->lock); return 1; @@ -2264,7 +2259,7 @@ __acquires(&gcwq->lock) hlist_del_init(&worker->hentry); worker->current_work = NULL; worker->current_cwq = NULL; - cwq_dec_nr_in_flight(cwq, work_color, false); + cwq_dec_nr_in_flight(cwq, work_color); } /** -- cgit v1.2.3-71-gd317 From 9f4bd4cddbb50d7617353102e10ce511c5ef6df2 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 19 Sep 2012 10:40:48 -0700 Subject: workqueue: introduce cwq_set_max_active() helper for thaw_workqueues() Using a helper instead of open code makes thaw_workqueues() clearer. The helper will also be used by the next patch. tj: Slight update to comment and description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3e324aae3c98..b5d722548ffd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3366,6 +3366,26 @@ void destroy_workqueue(struct workqueue_struct *wq) } EXPORT_SYMBOL_GPL(destroy_workqueue); +/** + * cwq_set_max_active - adjust max_active of a cwq + * @cwq: target cpu_workqueue_struct + * @max_active: new max_active value. + * + * Set @cwq->max_active to @max_active and activate delayed works if + * increased. + * + * CONTEXT: + * spin_lock_irq(gcwq->lock). + */ +static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active) +{ + cwq->max_active = max_active; + + while (!list_empty(&cwq->delayed_works) && + cwq->nr_active < cwq->max_active) + cwq_activate_first_delayed(cwq); +} + /** * workqueue_set_max_active - adjust max_active of a workqueue * @wq: target workqueue @@ -3792,11 +3812,7 @@ void thaw_workqueues(void) continue; /* restore max_active and repopulate worklist */ - cwq->max_active = wq->saved_max_active; - - while (!list_empty(&cwq->delayed_works) && - cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); + cwq_set_max_active(cwq, wq->saved_max_active); } for_each_worker_pool(pool, gcwq) -- cgit v1.2.3-71-gd317 From 70369b117a8fc5ac18a635ced23ee49f8e722e7b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 19 Sep 2012 10:40:48 -0700 Subject: workqueue: use cwq_set_max_active() helper for workqueue_set_max_active() workqueue_set_max_active() may increase ->max_active without activating delayed works and may make the activation order differ from the queueing order. Both aren't strictly bugs but the resulting behavior could be a bit odd. To make things more consistent, use cwq_set_max_active() helper which immediately makes use of the newly increased max_mactive if there are delayed work items and also keeps the activation order. tj: Slight update to description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b5d722548ffd..4f5c61f8b0e7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3413,7 +3413,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) if (!(wq->flags & WQ_FREEZABLE) || !(gcwq->flags & GCWQ_FREEZING)) - get_cwq(gcwq->cpu, wq)->max_active = max_active; + cwq_set_max_active(get_cwq(gcwq->cpu, wq), max_active); spin_unlock_irq(&gcwq->lock); } -- cgit v1.2.3-71-gd317 From 7c6e72e46c9ea4a88f3f8ba96edce9db4bd48726 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Sep 2012 10:03:19 -0700 Subject: workqueue: remove spurious WARN_ON_ONCE(in_irq()) from try_to_grab_pending() e0aecdd874 ("workqueue: use irqsafe timer for delayed_work") made try_to_grab_pending() safe to use from irq context but forgot to remove WARN_ON_ONCE(in_irq()). Remove it. Signed-off-by: Tejun Heo Reported-by: Fengguang Wu --- kernel/workqueue.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4f5c61f8b0e7..143fd8c751f4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1070,8 +1070,6 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, { struct global_cwq *gcwq; - WARN_ON_ONCE(in_irq()); - local_irq_save(*flags); /* try to steal the timer if it exists */ -- cgit v1.2.3-71-gd317 From 59a93c27c4892f04dfd8f91f8b64d0d6eae43e6e Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Thu, 9 Aug 2012 00:37:27 -0700 Subject: alarmtimer: Implement minimum alarm interval for allowing suspend alarmtimer suspend return -EBUSY if the next alarm will fire in less than 2 seconds. This allows one RTC seconds tick to occur subsequent to this check before the alarm wakeup time is set, ensuring the wakeup time is still in the future (assuming the RTC does not tick one more second prior to setting the alarm). If suspend is rejected due to an imminent alarm, hold a wakeup source for 2 seconds to process the alarm prior to reattempting suspend. If setting the alarm incurs an -ETIME for an alarm set in the past, or any other problem setting the alarm, abort suspend and hold a wakelock for 1 second while the alarm is allowed to be serviced or other hopefully transient conditions preventing the alarm clear up. Signed-off-by: Todd Poynor Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index aa27d391bfc8..54e7145c5414 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -46,6 +46,8 @@ static struct alarm_base { static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); +static struct wakeup_source *ws; + #ifdef CONFIG_RTC_CLASS /* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; @@ -250,6 +252,7 @@ static int alarmtimer_suspend(struct device *dev) unsigned long flags; struct rtc_device *rtc; int i; + int ret; spin_lock_irqsave(&freezer_delta_lock, flags); min = freezer_delta; @@ -279,8 +282,10 @@ static int alarmtimer_suspend(struct device *dev) if (min.tv64 == 0) return 0; - /* XXX - Should we enforce a minimum sleep time? */ - WARN_ON(min.tv64 < NSEC_PER_SEC); + if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { + __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); + return -EBUSY; + } /* Setup an rtc timer to fire that far in the future */ rtc_timer_cancel(rtc, &rtctimer); @@ -288,9 +293,11 @@ static int alarmtimer_suspend(struct device *dev) now = rtc_tm_to_ktime(tm); now = ktime_add(now, min); - rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); - - return 0; + /* Set alarm, if in the past reject suspend briefly to handle */ + ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); + if (ret < 0) + __pm_wakeup_event(ws, MSEC_PER_SEC); + return ret; } #else static int alarmtimer_suspend(struct device *dev) @@ -821,6 +828,7 @@ static int __init alarmtimer_init(void) error = PTR_ERR(pdev); goto out_drv; } + ws = wakeup_source_register("alarmtimer"); return 0; out_drv: -- cgit v1.2.3-71-gd317 From dae373be9fec6f850159a05af3a1c36236a70d43 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 13 Sep 2012 19:12:16 -0400 Subject: alarmtimer: Use hrtimer per-alarm instead of per-base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arve Hjønnevåg reported numerous crashes from the "BUG_ON(timer->state != HRTIMER_STATE_CALLBACK)" check in __run_hrtimer after it called alarmtimer_fired. It ends up the alarmtimer code was not properly handling possible failures of hrtimer_try_to_cancel, and because these faulres occur when the underlying base hrtimer is being run, this limits the ability to properly handle modifications to any alarmtimers on that base. Because much of the logic duplicates the hrtimer logic, it seems that we might as well have a per-alarmtimer hrtimer, and avoid the extra complextity of trying to multiplex many alarmtimers off of one hrtimer. Thus this patch moves the hrtimer to the alarm structure and simplifies the management logic. Changelog: v2: * Includes a fix for double alarm_start calls found by Arve Cc: Arve Hjønnevåg Cc: Colin Cross Cc: Thomas Gleixner Reported-by: Arve Hjønnevåg Tested-by: Arve Hjønnevåg Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 3 +- kernel/time/alarmtimer.c | 94 +++++++++++++--------------------------------- 2 files changed, 29 insertions(+), 68 deletions(-) (limited to 'kernel') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 96c5c249b086..f122c9fbf8c7 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -35,6 +35,7 @@ enum alarmtimer_restart { */ struct alarm { struct timerqueue_node node; + struct hrtimer timer; enum alarmtimer_restart (*function)(struct alarm *, ktime_t now); enum alarmtimer_type type; int state; @@ -43,7 +44,7 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); -void alarm_start(struct alarm *alarm, ktime_t start); +int alarm_start(struct alarm *alarm, ktime_t start); int alarm_try_to_cancel(struct alarm *alarm); int alarm_cancel(struct alarm *alarm); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 54e7145c5414..b1560ebe759f 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -37,7 +37,6 @@ static struct alarm_base { spinlock_t lock; struct timerqueue_head timerqueue; - struct hrtimer timer; ktime_t (*gettime)(void); clockid_t base_clockid; } alarm_bases[ALARM_NUMTYPE]; @@ -132,21 +131,17 @@ static inline void alarmtimer_rtc_timer_init(void) { } * @base: pointer to the base where the timer is being run * @alarm: pointer to alarm being enqueued. * - * Adds alarm to a alarm_base timerqueue and if necessary sets - * an hrtimer to run. + * Adds alarm to a alarm_base timerqueue * * Must hold base->lock when calling. */ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) { + if (alarm->state & ALARMTIMER_STATE_ENQUEUED) + timerqueue_del(&base->timerqueue, &alarm->node); + timerqueue_add(&base->timerqueue, &alarm->node); alarm->state |= ALARMTIMER_STATE_ENQUEUED; - - if (&alarm->node == timerqueue_getnext(&base->timerqueue)) { - hrtimer_try_to_cancel(&base->timer); - hrtimer_start(&base->timer, alarm->node.expires, - HRTIMER_MODE_ABS); - } } /** @@ -154,28 +149,17 @@ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) * @base: pointer to the base where the timer is running * @alarm: pointer to alarm being removed * - * Removes alarm to a alarm_base timerqueue and if necessary sets - * a new timer to run. + * Removes alarm to a alarm_base timerqueue * * Must hold base->lock when calling. */ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) { - struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue); - if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) return; timerqueue_del(&base->timerqueue, &alarm->node); alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; - - if (next == &alarm->node) { - hrtimer_try_to_cancel(&base->timer); - next = timerqueue_getnext(&base->timerqueue); - if (!next) - return; - hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS); - } } @@ -190,42 +174,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) */ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) { - struct alarm_base *base = container_of(timer, struct alarm_base, timer); - struct timerqueue_node *next; + struct alarm *alarm = container_of(timer, struct alarm, timer); + struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; - ktime_t now; int ret = HRTIMER_NORESTART; int restart = ALARMTIMER_NORESTART; spin_lock_irqsave(&base->lock, flags); - now = base->gettime(); - while ((next = timerqueue_getnext(&base->timerqueue))) { - struct alarm *alarm; - ktime_t expired = next->expires; - - if (expired.tv64 > now.tv64) - break; - - alarm = container_of(next, struct alarm, node); - - timerqueue_del(&base->timerqueue, &alarm->node); - alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; - - alarm->state |= ALARMTIMER_STATE_CALLBACK; - spin_unlock_irqrestore(&base->lock, flags); - if (alarm->function) - restart = alarm->function(alarm, now); - spin_lock_irqsave(&base->lock, flags); - alarm->state &= ~ALARMTIMER_STATE_CALLBACK; + alarmtimer_remove(base, alarm); + spin_unlock_irqrestore(&base->lock, flags); - if (restart != ALARMTIMER_NORESTART) { - timerqueue_add(&base->timerqueue, &alarm->node); - alarm->state |= ALARMTIMER_STATE_ENQUEUED; - } - } + if (alarm->function) + restart = alarm->function(alarm, base->gettime()); - if (next) { - hrtimer_set_expires(&base->timer, next->expires); + spin_lock_irqsave(&base->lock, flags); + if (restart != ALARMTIMER_NORESTART) { + hrtimer_set_expires(&alarm->timer, alarm->node.expires); + alarmtimer_enqueue(base, alarm); ret = HRTIMER_RESTART; } spin_unlock_irqrestore(&base->lock, flags); @@ -331,6 +296,9 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { timerqueue_init(&alarm->node); + hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, + HRTIMER_MODE_ABS); + alarm->timer.function = alarmtimer_fired; alarm->function = function; alarm->type = type; alarm->state = ALARMTIMER_STATE_INACTIVE; @@ -341,17 +309,19 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, * @alarm: ptr to alarm to set * @start: time to run the alarm */ -void alarm_start(struct alarm *alarm, ktime_t start) +int alarm_start(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; + int ret; spin_lock_irqsave(&base->lock, flags); - if (alarmtimer_active(alarm)) - alarmtimer_remove(base, alarm); alarm->node.expires = start; alarmtimer_enqueue(base, alarm); + ret = hrtimer_start(&alarm->timer, alarm->node.expires, + HRTIMER_MODE_ABS); spin_unlock_irqrestore(&base->lock, flags); + return ret; } /** @@ -365,18 +335,12 @@ int alarm_try_to_cancel(struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; - int ret = -1; - spin_lock_irqsave(&base->lock, flags); - - if (alarmtimer_callback_running(alarm)) - goto out; + int ret; - if (alarmtimer_is_queued(alarm)) { + spin_lock_irqsave(&base->lock, flags); + ret = hrtimer_try_to_cancel(&alarm->timer); + if (ret >= 0) alarmtimer_remove(base, alarm); - ret = 1; - } else - ret = 0; -out: spin_unlock_irqrestore(&base->lock, flags); return ret; } @@ -809,10 +773,6 @@ static int __init alarmtimer_init(void) for (i = 0; i < ALARM_NUMTYPE; i++) { timerqueue_init_head(&alarm_bases[i].timerqueue); spin_lock_init(&alarm_bases[i].lock); - hrtimer_init(&alarm_bases[i].timer, - alarm_bases[i].base_clockid, - HRTIMER_MODE_ABS); - alarm_bases[i].timer.function = alarmtimer_fired; } error = alarmtimer_rtc_interface_setup(); -- cgit v1.2.3-71-gd317 From a65bcc12ad74b3efe78847945a1e36cfcbcbc4e6 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 13 Sep 2012 19:25:22 -0400 Subject: alarmtimer: Rename alarmtimer_remove to alarmtimer_dequeue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that alarmtimer_remove has been simplified, change its name to _dequeue to better match its paired _enqueue function. Cc: Arve Hjønnevåg Cc: Colin Cross Cc: Thomas Gleixner Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index b1560ebe759f..f11d83b12949 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -145,7 +145,7 @@ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) } /** - * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue + * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue * @base: pointer to the base where the timer is running * @alarm: pointer to alarm being removed * @@ -153,7 +153,7 @@ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) * * Must hold base->lock when calling. */ -static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) +static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm) { if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) return; @@ -181,7 +181,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) int restart = ALARMTIMER_NORESTART; spin_lock_irqsave(&base->lock, flags); - alarmtimer_remove(base, alarm); + alarmtimer_dequeue(base, alarm); spin_unlock_irqrestore(&base->lock, flags); if (alarm->function) @@ -340,7 +340,7 @@ int alarm_try_to_cancel(struct alarm *alarm) spin_lock_irqsave(&base->lock, flags); ret = hrtimer_try_to_cancel(&alarm->timer); if (ret >= 0) - alarmtimer_remove(base, alarm); + alarmtimer_dequeue(base, alarm); spin_unlock_irqrestore(&base->lock, flags); return ret; } -- cgit v1.2.3-71-gd317 From b3c869d35b9b014f63ac0beacd31c57372084d01 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 4 Sep 2012 12:42:27 -0400 Subject: jiffies: Remove compile time assumptions about CLOCK_TICK_RATE CLOCK_TICK_RATE is used to accurately caclulate exactly how a tick will be at a given HZ. This is useful, because while we'd expect NSEC_PER_SEC/HZ, the underlying hardware will have some granularity limit, so we won't be able to have exactly HZ ticks per second. This slight error can cause timekeeping quality problems when using the jiffies or other jiffies driven clocksources. Thus we currently use compile time CLOCK_TICK_RATE value to generate SHIFTED_HZ and NSEC_PER_JIFFIES, which we then use to adjust the jiffies clocksource to correct this error. Unfortunately though, since CLOCK_TICK_RATE is a compile time value, and the jiffies clocksource is registered very early during boot, there are a number of cases where there are different possible hardware timers that have different tick rates. This causes problems in cases like ARM where there are numerous different types of hardware, each having their own compile-time CLOCK_TICK_RATE, making it hard to accurately support different hardware with a single kernel. For the most part, this doesn't matter all that much, as not too many systems actually utilize the jiffies or jiffies driven clocksource. Usually there are other highres clocksources who's granularity error is negligable. Even so, we have some complicated calcualtions that we do everywhere to handle these edge cases. This patch removes the compile time SHIFTED_HZ value, and introduces a register_refined_jiffies() function. This results in the default jiffies clock as being assumed a perfect HZ freq, and allows archtectures that care about jiffies accuracy to call register_refined_jiffies() with the tick rate, specified dynamically at boot. This allows us, where necessary, to not have a compile time CLOCK_TICK_RATE constant, simplifies the jiffies code, and still provides a way to have an accurate jiffies clock. NOTE: Since this patch does not add register_refinied_jiffies() calls for every arch, it may cause time quality regressions in some cases. Its likely these will not be noticable, but if they are an issue, adding the following to the end of setup_arch() should resolve the regression: register_refinied_jiffies(CLOCK_TICK_RATE) Cc: Catalin Marinas Cc: Arnd Bergmann Cc: Richard Cochran Cc: Prarit Bhargava Cc: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/kernel/setup.c | 3 +++ include/linux/jiffies.h | 15 ++------------- kernel/time/jiffies.c | 32 +++++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80e1b95..4062f15bfbdd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -68,6 +68,7 @@ #include #include #include +#include #include