From d5de4ddb1bc430289bede76c0d87cabee93f749a Mon Sep 17 00:00:00 2001 From: Tomasz Buchert Date: Wed, 27 Oct 2010 15:33:32 -0700 Subject: cgroup_freezer: unnecessary test in cgroup_freezing_or_frozen() The root freezer_state is always CGROUP_THAWED so we can remove the special case from the code. The test itself can be handy and is extracted to static function. Signed-off-by: Tomasz Buchert Cc: Matt Helsley Cc: Paul Menage Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index ce71ed53e88f..321e2a007d25 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -48,20 +48,19 @@ static inline struct freezer *task_freezer(struct task_struct *task) struct freezer, css); } -int cgroup_freezing_or_frozen(struct task_struct *task) +static inline int __cgroup_freezing_or_frozen(struct task_struct *task) { - struct freezer *freezer; - enum freezer_state state; + enum freezer_state state = task_freezer(task)->state; + return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); +} +int cgroup_freezing_or_frozen(struct task_struct *task) +{ + int result; task_lock(task); - freezer = task_freezer(task); - if (!freezer->css.cgroup->parent) - state = CGROUP_THAWED; /* root cgroup can't be frozen */ - else - state = freezer->state; + result = __cgroup_freezing_or_frozen(task); task_unlock(task); - - return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); + return result; } /* -- cgit v1.2.3-71-gd317 From 0bdba580ab052a21e3eda2764ed22d9ee962392b Mon Sep 17 00:00:00 2001 From: Tomasz Buchert Date: Wed, 27 Oct 2010 15:33:33 -0700 Subject: cgroup_freezer: fix can_attach() to prohibit moving from/to freezing/frozen cgroups It is possible to move a task from its cgroup even if this group is 'FREEZING'. This results in a nasty bug - the moved task will become frozen OUTSIDE its original cgroup and will remain in a permanent 'D' state. This patch allows to migrate the task only between THAWED cgroups. This behavior was observed and easily reproduced on a single core laptop. Notice that reproducibility depends highly on the machine used. Program and instructions how to reproduce the bug can be fetched from: http://pentium.hopto.org/~thinred/repos/linux-misc/freezer_bug.c Signed-off-by: Tomasz Buchert Cc: Matt Helsley Cc: Paul Menage Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 321e2a007d25..c287627bed3d 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -173,24 +173,25 @@ static int freezer_can_attach(struct cgroup_subsys *ss, /* * Anything frozen can't move or be moved to/from. - * - * Since orig_freezer->state == FROZEN means that @task has been - * frozen, so it's sufficient to check the latter condition. */ - if (is_task_frozen_enough(task)) + freezer = cgroup_freezer(new_cgroup); + if (freezer->state != CGROUP_THAWED) return -EBUSY; - freezer = cgroup_freezer(new_cgroup); - if (freezer->state == CGROUP_FROZEN) + rcu_read_lock(); + if (__cgroup_freezing_or_frozen(task)) { + rcu_read_unlock(); return -EBUSY; + } + rcu_read_unlock(); if (threadgroup) { struct task_struct *c; rcu_read_lock(); list_for_each_entry_rcu(c, &task->thread_group, thread_group) { - if (is_task_frozen_enough(c)) { + if (__cgroup_freezing_or_frozen(c)) { rcu_read_unlock(); return -EBUSY; } -- cgit v1.2.3-71-gd317 From 2d3cbf8bc852ac1bc3d098186143c5973f87b753 Mon Sep 17 00:00:00 2001 From: Tomasz Buchert Date: Wed, 27 Oct 2010 15:33:34 -0700 Subject: cgroup_freezer: update_freezer_state() does incorrect state transitions There are 4 state transitions possible for a freezer. Only FREEZING -> FROZEN transaction is done lazily. This patch allows update_freezer_state only to perform this transaction and renames the function to update_if_frozen. Moreover is_task_frozen_enough function is removed and its every occurence is replaced with frozen(). Therefore for a group to become FROZEN every task must be frozen. The previous version could trigger a following bug: When cgroup is in the process of freezing (but none of its tasks are frozen yet), update_freezer_state() (called from freezer_read or freezer_write) would incorrectly report that a group is 'THAWED' (because nfrozen = 0), allowing the transaction FREEZING -> THAWED without writing anything to 'freezer.state'. This is incorrect according to the documentation. This could result in a 'THAWED' cgroup with frozen tasks inside. A code to reproduce this bug is available here: http://pentium.hopto.org/~thinred/repos/linux-misc/freezer_bug2.c [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Tomasz Buchert Cc: Matt Helsley Cc: Paul Menage Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index c287627bed3d..e7bebb7c6c38 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -153,13 +153,6 @@ static void freezer_destroy(struct cgroup_subsys *ss, kfree(cgroup_freezer(cgroup)); } -/* Task is frozen or will freeze immediately when next it gets woken */ -static bool is_task_frozen_enough(struct task_struct *task) -{ - return frozen(task) || - (task_is_stopped_or_traced(task) && freezing(task)); -} - /* * The call to cgroup_lock() in the freezer.state write method prevents * a write to that file racing against an attach, and hence the @@ -236,31 +229,30 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) /* * caller must hold freezer->lock */ -static void update_freezer_state(struct cgroup *cgroup, +static void update_if_frozen(struct cgroup *cgroup, struct freezer *freezer) { struct cgroup_iter it; struct task_struct *task; unsigned int nfrozen = 0, ntotal = 0; + enum freezer_state old_state = freezer->state; cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { ntotal++; - if (is_task_frozen_enough(task)) + if (frozen(task)) nfrozen++; } - /* - * Transition to FROZEN when no new tasks can be added ensures - * that we never exist in the FROZEN state while there are unfrozen - * tasks. - */ - if (nfrozen == ntotal) - freezer->state = CGROUP_FROZEN; - else if (nfrozen > 0) - freezer->state = CGROUP_FREEZING; - else - freezer->state = CGROUP_THAWED; + if (old_state == CGROUP_THAWED) { + BUG_ON(nfrozen > 0); + } else if (old_state == CGROUP_FREEZING) { + if (nfrozen == ntotal) + freezer->state = CGROUP_FROZEN; + } else { /* old_state == CGROUP_FROZEN */ + BUG_ON(nfrozen != ntotal); + } + cgroup_iter_end(cgroup, &it); } @@ -279,7 +271,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft, if (state == CGROUP_FREEZING) { /* We change from FREEZING to FROZEN lazily if the cgroup was * only partially frozen when we exitted write. */ - update_freezer_state(cgroup, freezer); + update_if_frozen(cgroup, freezer); state = freezer->state; } spin_unlock_irq(&freezer->lock); @@ -301,7 +293,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) while ((task = cgroup_iter_next(cgroup, &it))) { if (!freeze_task(task, true)) continue; - if (is_task_frozen_enough(task)) + if (frozen(task)) continue; if (!freezing(task) && !freezer_should_skip(task)) num_cant_freeze_now++; @@ -335,7 +327,7 @@ static int freezer_change_state(struct cgroup *cgroup, spin_lock_irq(&freezer->lock); - update_freezer_state(cgroup, freezer); + update_if_frozen(cgroup, freezer); if (goal_state == freezer->state) goto out; -- cgit v1.2.3-71-gd317 From 97978e6d1f2da0073416870410459694fbdbfd9b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:35 -0700 Subject: cgroup: add clone_children control file The ns_cgroup is a control group interacting with the namespaces. When a new namespace is created, a corresponding cgroup is automatically created too. The cgroup name is the pid of the process who did 'unshare' or the child of 'clone'. This cgroup is tied with the namespace because it prevents a process to escape the control group and use the post_clone callback, so the child cgroup inherits the values of the parent cgroup. Unfortunately, the more we use this cgroup and the more we are facing problems with it: (1) when a process unshares, the cgroup name may conflict with a previous cgroup with the same pid, so unshare or clone return -EEXIST (2) the cgroup creation is out of control because there may have an application creating several namespaces where the system will automatically create several cgroups in his back and let them on the cgroupfs (eg. a vrf based on the network namespace). (3) the mix of (1) and (2) force an administrator to regularly check and clean these cgroups. This patchset removes the ns_cgroup by adding a new flag to the cgroup and the cgroupfs mount option. It enables the copy of the parent cgroup when a child cgroup is created. We can then safely remove the ns_cgroup as this flag brings a compatibility. We have now to manually create and add the task to a cgroup, which is consistent with the cgroup framework. This patch: Sent as an answer to a previous thread around the ns_cgroup. https://lists.linux-foundation.org/pipermail/containers/2009-June/018627.html It adds a control file 'clone_children' for a cgroup. This control file is a boolean specifying if the child cgroup should be a clone of the parent cgroup or not. The default value is 'false'. This flag makes the child cgroup to call the post_clone callback of all the subsystem, if it is available. At present, the cpuset is the only one which had implemented the post_clone callback. The option can be set at mount time by specifying the 'clone_children' mount option. Signed-off-by: Daniel Lezcano Signed-off-by: Serge E. Hallyn Cc: Eric W. Biederman Acked-by: Paul Menage Reviewed-by: Li Zefan Cc: Jamal Hadi Salim Cc: Matt Helsley Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 14 ++++++++++++-- include/linux/cgroup.h | 4 ++++ kernel/cgroup.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index b34823ff1646..190018b0c649 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -18,7 +18,8 @@ CONTENTS: 1.2 Why are cgroups needed ? 1.3 How are cgroups implemented ? 1.4 What does notify_on_release do ? - 1.5 How do I use cgroups ? + 1.5 What does clone_children do ? + 1.6 How do I use cgroups ? 2. Usage Examples and Syntax 2.1 Basic Usage 2.2 Attaching processes @@ -293,7 +294,16 @@ notify_on_release in the root cgroup at system boot is disabled value of their parents notify_on_release setting. The default value of a cgroup hierarchy's release_agent path is empty. -1.5 How do I use cgroups ? +1.5 What does clone_children do ? +--------------------------------- + +If the clone_children flag is enabled (1) in a cgroup, then all +cgroups created beneath will call the post_clone callbacks for each +subsystem of the newly created cgroup. Usually when this callback is +implemented for a subsystem, it copies the values of the parent +subsystem, this is the case for the cpuset. + +1.6 How do I use cgroups ? -------------------------- To start a new job that is to be contained within a cgroup, using diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 709dfb901d11..ed4ba111bc8d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -154,6 +154,10 @@ enum { * A thread in rmdir() is wating for this cgroup. */ CGRP_WAIT_ON_RMDIR, + /* + * Clone cgroup values when creating a new child cgroup + */ + CGRP_CLONE_CHILDREN, }; /* which pidlist file are we talking about? */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9270d532ec3c..4b218a46ddd3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp) return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } +static int clone_children(const struct cgroup *cgrp) +{ + return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); +} + /* * for_each_subsys() allows you to iterate on each subsystem attached to * an active hierarchy @@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noprefix"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); + if (clone_children(&root->top_cgroup)) + seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_printf(seq, ",name=%s", root->name); mutex_unlock(&cgroup_mutex); @@ -1050,6 +1057,7 @@ struct cgroup_sb_opts { unsigned long subsys_bits; unsigned long flags; char *release_agent; + bool clone_children; char *name; /* User explicitly requested empty subsystem */ bool none; @@ -1097,6 +1105,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) opts->none = true; } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); + } else if (!strcmp(token, "clone_children")) { + opts->clone_children = true; } else if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) @@ -1355,6 +1365,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) strcpy(root->release_agent_path, opts->release_agent); if (opts->name) strcpy(root->name, opts->name); + if (opts->clone_children) + set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); return root; } @@ -3173,6 +3185,23 @@ fail: return ret; } +static u64 cgroup_clone_children_read(struct cgroup *cgrp, + struct cftype *cft) +{ + return clone_children(cgrp); +} + +static int cgroup_clone_children_write(struct cgroup *cgrp, + struct cftype *cft, + u64 val) +{ + if (val) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + else + clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + return 0; +} + /* * for the common functions, 'private' gives the type of file */ @@ -3203,6 +3232,11 @@ static struct cftype files[] = { .write_string = cgroup_write_event_control, .mode = S_IWUGO, }, + { + .name = "cgroup.clone_children", + .read_u64 = cgroup_clone_children_read, + .write_u64 = cgroup_clone_children_write, + }, }; static struct cftype cft_release_agent = { @@ -3332,6 +3366,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + if (clone_children(parent)) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); @@ -3346,6 +3383,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, goto err_destroy; } /* At error, ->destroy() callback has to free assigned ID. */ + if (clone_children(parent) && ss->post_clone) + ss->post_clone(ss, cgrp); } cgroup_lock_hierarchy(root); -- cgit v1.2.3-71-gd317 From 32a8cf235e2f192eb002755076994525cdbaa35a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:37 -0700 Subject: cgroup: make the mount options parsing more accurate Current behavior: ================= (1) When we mount a cgroup, we can specify the 'all' option which means to enable all the cgroup subsystems. This is the default option when no option is specified. (2) If we want to mount a cgroup with a subset of the supported cgroup subsystems, we have to specify a subsystems name list for the mount option. (3) If we specify another option like 'noprefix' or 'release_agent', the actual code wants the 'all' or a subsystem name option specified also. Not critical but a bit not friendly as we should assume (1) in this case. (4) Logically, the 'all' option is mutually exclusive with a subsystem name, but this is not detected. In other words: succeed : mount -t cgroup -o all,freezer cgroup /cgroup => is it 'all' or 'freezer' ? fails : mount -t cgroup -o noprefix cgroup /cgroup => succeed if we do '-o noprefix,all' The following patches consolidate a bit the mount options check. New behavior: ============= (1) untouched (2) untouched (3) the 'all' option will be by default when specifying other than a subsystem name option (4) raises an error In other words: fails : mount -t cgroup -o all,freezer cgroup /cgroup succeed : mount -t cgroup -o noprefix cgroup /cgroup For the sake of lisibility, the if ... then ... else ... if ... indentation when parsing the options has been changed to: if ... then ... continue fi Signed-off-by: Daniel Lezcano Signed-off-by: Serge E. Hallyn Reviewed-by: Li Zefan Reviewed-by: Paul Menage Cc: Eric W. Biederman Cc: Jamal Hadi Salim Cc: Matt Helsley Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 90 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4b218a46ddd3..3e6517e51fd3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1074,7 +1074,8 @@ struct cgroup_sb_opts { */ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) { - char *token, *o = data ?: "all"; + char *token, *o = data; + bool all_ss = false, one_ss = false; unsigned long mask = (unsigned long)-1; int i; bool module_pin_failed = false; @@ -1090,24 +1091,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) while ((token = strsep(&o, ",")) != NULL) { if (!*token) return -EINVAL; - if (!strcmp(token, "all")) { - /* Add all non-disabled subsystems */ - opts->subsys_bits = 0; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup_subsys *ss = subsys[i]; - if (ss == NULL) - continue; - if (!ss->disabled) - opts->subsys_bits |= 1ul << i; - } - } else if (!strcmp(token, "none")) { + if (!strcmp(token, "none")) { /* Explicitly have no subsystems */ opts->none = true; - } else if (!strcmp(token, "noprefix")) { + continue; + } + if (!strcmp(token, "all")) { + /* Mutually exclusive option 'all' + subsystem name */ + if (one_ss) + return -EINVAL; + all_ss = true; + continue; + } + if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); - } else if (!strcmp(token, "clone_children")) { + continue; + } + if (!strcmp(token, "clone_children")) { opts->clone_children = true; - } else if (!strncmp(token, "release_agent=", 14)) { + continue; + } + if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) return -EINVAL; @@ -1115,7 +1119,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); if (!opts->release_agent) return -ENOMEM; - } else if (!strncmp(token, "name=", 5)) { + continue; + } + if (!strncmp(token, "name=", 5)) { const char *name = token + 5; /* Can't specify an empty name */ if (!strlen(name)) @@ -1137,20 +1143,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) GFP_KERNEL); if (!opts->name) return -ENOMEM; - } else { - struct cgroup_subsys *ss; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - ss = subsys[i]; - if (ss == NULL) - continue; - if (!strcmp(token, ss->name)) { - if (!ss->disabled) - set_bit(i, &opts->subsys_bits); - break; - } - } - if (i == CGROUP_SUBSYS_COUNT) - return -ENOENT; + + continue; + } + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss == NULL) + continue; + if (strcmp(token, ss->name)) + continue; + if (ss->disabled) + continue; + + /* Mutually exclusive option 'all' + subsystem name */ + if (all_ss) + return -EINVAL; + set_bit(i, &opts->subsys_bits); + one_ss = true; + + break; + } + if (i == CGROUP_SUBSYS_COUNT) + return -ENOENT; + } + + /* + * If the 'all' option was specified select all the subsystems, + * otherwise 'all, 'none' and a subsystem name options were not + * specified, let's default to 'all' + */ + if (all_ss || (!all_ss && !one_ss && !opts->none)) { + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss == NULL) + continue; + if (ss->disabled) + continue; + set_bit(i, &opts->subsys_bits); } } -- cgit v1.2.3-71-gd317 From f4a2589feaef0a9b737a3e582b37ee96695bb25f Mon Sep 17 00:00:00 2001 From: Evgeny Kuznetsov Date: Wed, 27 Oct 2010 15:33:37 -0700 Subject: cgroups: add check for strcpy destination string overflow Function "strcpy" is used without check for maximum allowed source string length and could cause destination string overflow. Check for string length is added before using "strcpy". Function now is return error if source string length is more than a maximum. akpm: presently considered NotABug, but add the check for general future-safeness and robustness. Signed-off-by: Evgeny Kuznetsov Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3e6517e51fd3..5cf366965d0c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1922,6 +1922,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, const char *buffer) { BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + if (strlen(buffer) >= PATH_MAX) + return -EINVAL; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; strcpy(cgrp->root->release_agent_path, buffer); -- cgit v1.2.3-71-gd317 From 45531757b45cae0ce64c5aff08c2534d5a0fa3e7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:38 -0700 Subject: cgroup: notify ns_cgroup deprecated The ns_cgroup will be removed very soon. Let's warn, for this version, ns_cgroup is deprecated. Make ns_cgroup and clone_children exclusive. If the clone_children is set and the ns_cgroup is mounted, let's fail with EINVAL when the ns_cgroup subsys is created (a printk will help the user to understand why the creation fails). Update the feature remove schedule file with the deprecated ns_cgroup. Signed-off-by: Daniel Lezcano Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/feature-removal-schedule.txt | 17 +++++++++++++++++ kernel/ns_cgroup.c | 8 ++++++++ 2 files changed, 25 insertions(+) (limited to 'kernel') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index d2af87ba96e1..f3da8c0a3af2 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -526,6 +526,23 @@ Who: FUJITA Tomonori ---------------------------- +What: namespace cgroup (ns_cgroup) +When: 2.6.38 +Why: The ns_cgroup leads to some problems: + * cgroup creation is out-of-control + * cgroup name can conflict when pids are looping + * it is not possible to have a single process handling + a lot of namespaces without falling in a exponential creation time + * we may want to create a namespace without creating a cgroup + + The ns_cgroup is replaced by a compatibility flag 'clone_children', + where a newly created cgroup will copy the parent cgroup values. + The userspace has to manually create a cgroup and add a task to + the 'tasks' file. +Who: Daniel Lezcano + +---------------------------- + What: iwlwifi disable_hw_scan module parameters When: 2.6.40 Why: Hareware scan is the prefer method for iwlwifi devices for diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 2a5dfec8efe0..2c98ad94ba0e 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c @@ -85,6 +85,14 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss, return ERR_PTR(-EPERM); if (!cgroup_is_descendant(cgroup, current)) return ERR_PTR(-EPERM); + if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) { + printk("ns_cgroup can't be created with parent " + "'clone_children' set.\n"); + return ERR_PTR(-EINVAL); + } + + printk_once("ns_cgroup deprecated: consider using the " + "'clone_children' flag without the ns_cgroup.\n"); ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); if (!ns_cgroup) -- cgit v1.2.3-71-gd317 From c4b5ed250eebf854d40f27b43362c80f115cb57a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:44 -0700 Subject: ptrace: annotate lock context change on exit_ptrace() exit_ptrace() releases and regrabs tasklist_lock but was missing proper annotation. Add it. Signed-off-by: Namhyung Kim Acked-by: Roland McGrath Cc: Ingo Molnar Cc: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index f34d798ef4a2..4afd9b86cc0b 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -329,6 +329,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data) * and reacquire the lock. */ void exit_ptrace(struct task_struct *tracer) + __releases(&tasklist_lock) + __acquires(&tasklist_lock) { struct task_struct *p, *n; LIST_HEAD(ptrace_dead); -- cgit v1.2.3-71-gd317 From 4abf986960ecda6a87fc2f795aacf888a2f0127e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:45 -0700 Subject: ptrace: change signature of sys_ptrace() and friends Since userspace API of ptrace syscall defines @addr and @data as void pointers, it would be more appropriate to define them as unsigned long in kernel. Therefore related functions are changed also. 'unsigned long' is typically used in other places in kernel as an opaque data type and that using this helps cleaning up a lot of warnings from sparse. Suggested-by: Arnd Bergmann Signed-off-by: Namhyung Kim Acked-by: Arnd Bergmann Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 9 ++++++--- include/linux/syscalls.h | 3 ++- kernel/ptrace.c | 16 ++++++++++------ 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 4272521e29e9..67a4cd77c352 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -108,7 +108,8 @@ extern int ptrace_attach(struct task_struct *tsk); extern int ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); -extern int ptrace_request(struct task_struct *child, long request, long addr, long data); +extern int ptrace_request(struct task_struct *child, long request, + unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); @@ -132,8 +133,10 @@ static inline void ptrace_unlink(struct task_struct *child) __ptrace_unlink(child); } -int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data); -int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); +int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, + unsigned long data); +int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, + unsigned long data); /** * task_ptrace - return %PT_* flags that apply to a task diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e6319d18a55d..cacc27a0e285 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -701,7 +701,8 @@ asmlinkage long sys_nfsservctl(int cmd, asmlinkage long sys_syslog(int type, char __user *buf, int len); asmlinkage long sys_uselib(const char __user *library); asmlinkage long sys_ni_syscall(void); -asmlinkage long sys_ptrace(long request, long pid, long addr, long data); +asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, + unsigned long data); asmlinkage long sys_add_key(const char __user *_type, const char __user *_description, diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4afd9b86cc0b..06981a8b271b 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -404,7 +404,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds return copied; } -static int ptrace_setoptions(struct task_struct *child, long data) +static int ptrace_setoptions(struct task_struct *child, unsigned long data) { child->ptrace &= ~PT_TRACE_MASK; @@ -483,7 +483,8 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) #define is_sysemu_singlestep(request) 0 #endif -static int ptrace_resume(struct task_struct *child, long request, long data) +static int ptrace_resume(struct task_struct *child, long request, + unsigned long data) { if (!valid_signal(data)) return -EIO; @@ -560,7 +561,7 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, #endif int ptrace_request(struct task_struct *child, long request, - long addr, long data) + unsigned long addr, unsigned long data) { int ret = -EIO; siginfo_t siginfo; @@ -693,7 +694,8 @@ static struct task_struct *ptrace_get_task_struct(pid_t pid) #define arch_ptrace_attach(child) do { } while (0) #endif -SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) +SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, + unsigned long, data) { struct task_struct *child; long ret; @@ -734,7 +736,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) return ret; } -int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) +int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, + unsigned long data) { unsigned long tmp; int copied; @@ -745,7 +748,8 @@ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) return put_user(tmp, (unsigned long __user *)data); } -int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) +int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, + unsigned long data) { int copied; -- cgit v1.2.3-71-gd317 From 9fed81dc40f5a1ac2783bcc78d4029873be72894 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:46 -0700 Subject: ptrace: cleanup ptrace_request() Use new 'datavp' and 'datalp' variables to remove unnecesary castings. Signed-off-by: Namhyung Kim Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 06981a8b271b..ea7ce0215cd1 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -565,6 +565,8 @@ int ptrace_request(struct task_struct *child, long request, { int ret = -EIO; siginfo_t siginfo; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = datavp; switch (request) { case PTRACE_PEEKTEXT: @@ -581,19 +583,17 @@ int ptrace_request(struct task_struct *child, long request, ret = ptrace_setoptions(child, data); break; case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, (unsigned long __user *) data); + ret = put_user(child->ptrace_message, datalp); break; case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) - ret = copy_siginfo_to_user((siginfo_t __user *) data, - &siginfo); + ret = copy_siginfo_to_user(datavp, &siginfo); break; case PTRACE_SETSIGINFO: - if (copy_from_user(&siginfo, (siginfo_t __user *) data, - sizeof siginfo)) + if (copy_from_user(&siginfo, datavp, sizeof siginfo)) ret = -EFAULT; else ret = ptrace_setsiginfo(child, &siginfo); @@ -624,7 +624,7 @@ int ptrace_request(struct task_struct *child, long request, } mmput(mm); - ret = put_user(tmp, (unsigned long __user *) data); + ret = put_user(tmp, datalp); break; } #endif @@ -653,7 +653,7 @@ int ptrace_request(struct task_struct *child, long request, case PTRACE_SETREGSET: { struct iovec kiov; - struct iovec __user *uiov = (struct iovec __user *) data; + struct iovec __user *uiov = datavp; if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) return -EFAULT; -- cgit v1.2.3-71-gd317 From b8ed374e202e23caaf9bd77dcadc9de6447faaa8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:06 -0700 Subject: signals: annotate lock_task_sighand() lock_task_sighand() grabs sighand->siglock in case of returning non-NULL but unlock_task_sighand() releases it unconditionally. This leads sparse to complain about the lock context imbalance. Rename and wrap lock_task_sighand() using __cond_lock() macro to make sparse happy. Suggested-by: Eric Dumazet Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 ++++++++- kernel/signal.c | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 393ce94e54b7..3ff5c8519abd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2236,9 +2236,16 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } -extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk, +extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, unsigned long *flags); +#define lock_task_sighand(tsk, flags) \ +({ struct sighand_struct *__ss; \ + __cond_lock(&(tsk)->sighand->siglock, \ + (__ss = __lock_task_sighand(tsk, flags))); \ + __ss; \ +}) \ + static inline void unlock_task_sighand(struct task_struct *tsk, unsigned long *flags) { diff --git a/kernel/signal.c b/kernel/signal.c index 919562c3d6b7..e921409b85a9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p) return count; } -struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) +struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) { struct sighand_struct *sighand; -- cgit v1.2.3-71-gd317 From b84011508360d6885a9d95a235ec77d56f133377 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:07 -0700 Subject: signals: annotate lock context change on ptrace_stop() ptrace_stop() releases and regrabs current->sighand->siglock but was missing proper annotation. Add it. Signed-off-by: Namhyung Kim Acked-by: Roland McGrath Cc: Ingo Molnar Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index e921409b85a9..4e3cff10fdce 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1618,6 +1618,8 @@ static int sigkill_pending(struct task_struct *tsk) * is gone, we keep current->exit_code unless clear_code. */ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) + __releases(¤t->sighand->siglock) + __acquires(¤t->sighand->siglock) { if (arch_ptrace_stop_needed(exit_code, info)) { /* -- cgit v1.2.3-71-gd317 From 9b1bf12d5d51bca178dea21b04a0805e29d60cf1 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 27 Oct 2010 15:34:08 -0700 Subject: signals: move cred_guard_mutex from task_struct to signal_struct Oleg Nesterov pointed out we have to prevent multiple-threads-inside-exec itself and we can reuse ->cred_guard_mutex for it. Yes, concurrent execve() has no worth. Let's move ->cred_guard_mutex from task_struct to signal_struct. It naturally prevent multiple-threads-inside-exec. Signed-off-by: KOSAKI Motohiro Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- fs/proc/base.c | 8 ++++---- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 7 ++++--- include/linux/tracehook.h | 2 +- kernel/cred.c | 4 +--- kernel/fork.c | 2 ++ kernel/ptrace.c | 4 ++-- 8 files changed, 21 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/fs/exec.c b/fs/exec.c index 3aa75b8888a1..9722909c4d88 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1083,14 +1083,14 @@ EXPORT_SYMBOL(setup_new_exec); */ int prepare_bprm_creds(struct linux_binprm *bprm) { - if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex)) return -ERESTARTNOINTR; bprm->cred = prepare_exec_creds(); if (likely(bprm->cred)) return 0; - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); return -ENOMEM; } @@ -1098,7 +1098,7 @@ void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } kfree(bprm); @@ -1119,13 +1119,13 @@ void install_exec_creds(struct linux_binprm *bprm) * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program - * - the caller must hold current->cred_guard_mutex to protect against + * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH */ int check_unsafe_exec(struct linux_binprm *bprm) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9b094c1c8465..f3d02ca461ec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; - if (mutex_lock_killable(&task->cred_guard_mutex)) + if (mutex_lock_killable(&task->signal->cred_guard_mutex)) return NULL; mm = get_task_mm(task); @@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) mmput(mm); mm = NULL; } - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); return mm; } @@ -2354,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, goto out_free; /* Guard against adverse ptrace interaction */ - length = mutex_lock_interruptible(&task->cred_guard_mutex); + length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); if (length < 0) goto out_free; length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, (void*)page, count); - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out_free: free_page((unsigned long) page); out: diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2fea6c8ef6ba..1f8c06ce0fa6 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -29,6 +29,8 @@ extern struct fs_struct init_fs; .running = 0, \ .lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ }, \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ } extern struct nsproxy init_nsproxy; @@ -145,8 +147,6 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .cred_guard_mutex = \ - __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ff5c8519abd..be7adb7588e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -626,6 +626,10 @@ struct signal_struct { int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ + + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ }; /* Context switch must be unlocked if interrupts are to be enabled */ @@ -1305,9 +1309,6 @@ struct task_struct { * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 10db0102a890..3a2e66d88a32 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -150,7 +150,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) * * Return %LSM_UNSAFE_* bits applied to an exec because of tracing. * - * @task->cred_guard_mutex is held by the caller through the do_execve(). + * @task->signal->cred_guard_mutex is held by the caller through the do_execve(). */ static inline int tracehook_unsafe_exec(struct task_struct *task) { diff --git a/kernel/cred.c b/kernel/cred.c index 9a3e22641fe7..6a1aa004e376 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds); /* * Prepare credentials for current to perform an execve() - * - The caller must hold current->cred_guard_mutex + * - The caller must hold ->cred_guard_mutex */ struct cred *prepare_exec_creds(void) { @@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; - mutex_init(&p->cred_guard_mutex); - if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/kernel/fork.c b/kernel/fork.c index e87aaaaf5131..3b159c5991b7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -908,6 +908,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; + mutex_init(&sig->cred_guard_mutex); + return 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ea7ce0215cd1..99bbaa3e5b0d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task) * under ptrace. */ retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(&task->cred_guard_mutex)) + if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); @@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task) unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out: return retval; } -- cgit v1.2.3-71-gd317 From d16e15f5b029fc7d03540ba0e5fb23b0abb0ebe0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:10 -0700 Subject: exit: add lock context annotation on find_new_reaper() find_new_reaper() releases and regrabs tasklist_lock but was missing proper annotations. Add it. This remove following sparse warning: warning: context imbalance in 'find_new_reaper' - unexpected unlock Signed-off-by: Namhyung Kim Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 894179a32ec1..b194febf5799 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -703,6 +703,8 @@ static void exit_mm(struct task_struct * tsk) * space. */ static struct task_struct *find_new_reaper(struct task_struct *father) + __releases(&tasklist_lock) + __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *thread; -- cgit v1.2.3-71-gd317 From 478735e38887077ac77a9756121b6ce0cb956e2f Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:34:15 -0700 Subject: /proc/stat: fix scalability of irq sum of all cpu In /proc/stat, the number of per-IRQ event is shown by making a sum each irq's events on all cpus. But we can make use of kstat_irqs(). kstat_irqs() do the same calculation, If !CONFIG_GENERIC_HARDIRQ, it's not a big cost. (Both of the number of cpus and irqs are small.) If a system is very big and CONFIG_GENERIC_HARDIRQ, it does for_each_irq() for_each_cpu() - look up a radix tree - read desc->irq_stat[cpu] This seems not efficient. This patch adds kstat_irqs() for CONFIG_GENRIC_HARDIRQ and change the calculation as for_each_irq() look up radix tree for_each_cpu() - read desc->irq_stat[cpu] This reduces cost. A test on (4096cpusp, 256 nodes, 4592 irqs) host (by Jack Steiner) %time cat /proc/stat > /dev/null Before Patch: 2.459 sec After Patch : .561 sec [akpm@linux-foundation.org: unexport kstat_irqs, coding-style tweaks] [akpm@linux-foundation.org: fix unused variable 'per_irq_sum'] Signed-off-by: KAMEZAWA Hiroyuki Tested-by: Jack Steiner Acked-by: Jack Steiner Cc: Yinghai Lu Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/stat.c | 10 ++-------- include/linux/kernel_stat.h | 4 ++++ kernel/irq/irqdesc.c | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index b80c620565bf..e15a19c93bae 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v) u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec boottime; - unsigned int per_irq_sum; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -108,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for_each_irq_nr(j) { - per_irq_sum = 0; - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); - - seq_printf(p, " %u", per_irq_sum); - } + for_each_irq_nr(j) + seq_printf(p, " %u", kstat_irqs(j)); seq_printf(p, "\nctxt %llu\n" diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 8b9b89085530..ad54c846911b 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -86,6 +86,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) /* * Number of interrupts per specific IRQ source, since bootup */ +#ifndef CONFIG_GENERIC_HARDIRQS static inline unsigned int kstat_irqs(unsigned int irq) { unsigned int sum = 0; @@ -96,6 +97,9 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } +#else +extern unsigned int kstat_irqs(unsigned int irq); +#endif /* * Number of interrupts per cpu, since bootup diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9d917ff72675..9988d03797f5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -393,3 +393,18 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) struct irq_desc *desc = irq_to_desc(irq); return desc ? desc->kstat_irqs[cpu] : 0; } + +#ifdef CONFIG_GENERIC_HARDIRQS +unsigned int kstat_irqs(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + int cpu; + int sum = 0; + + if (!desc) + return 0; + for_each_possible_cpu(cpu) + sum += desc->kstat_irqs[cpu]; + return sum; +} +#endif /* CONFIG_GENERIC_HARDIRQS */ -- cgit v1.2.3-71-gd317 From 85893120699f8bae8caa12a8ee18ab5fceac978e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 27 Oct 2010 15:34:43 -0700 Subject: delayacct: align to 8 byte boundary on 64-bit systems prepare_reply() sets up an skb for the response. The payload contains: +--------------------------------+ | genlmsghdr - 4 bytes | +--------------------------------+ | NLA header - 4 bytes | /* Aggregate header */ +-+------------------------------+ | | NLA header - 4 bytes | /* PID header */ | +------------------------------+ | | pid/tgid - 4 bytes | | +------------------------------+ | | NLA header - 4 bytes | /* stats header */ | + -----------------------------+ <- oops. aligned on 4 byte boundary | | struct taskstats - 328 bytes | +-+------------------------------+ The start of the taskstats struct must be 8 byte aligned on IA64 (and other systems with 8 byte alignment rules for 64-bit types) or runtime alignment warnings will be issued. This patch pads the pid/tgid field out to sizeof(long), which forces the alignment of taskstats. The getdelays userspace code is ok with this since it assumes 32-bit pid/tgid and then honors that header's length field. An array is used to avoid exposing kernel memory contents to userspace in the response. Signed-off-by: Jeff Mahoney Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 11281d5792bd..5a651aa63d61 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -360,6 +360,12 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) struct nlattr *na, *ret; int aggr; + /* If we don't pad, we end up with alignment on a 4 byte boundary. + * This causes lots of runtime warnings on systems requiring 8 byte + * alignment */ + u32 pids[2] = { pid, 0 }; + int pid_size = ALIGN(sizeof(pid), sizeof(long)); + aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; @@ -367,7 +373,7 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) na = nla_nest_start(skb, aggr); if (!na) goto err; - if (nla_put(skb, type, sizeof(pid), &pid) < 0) + if (nla_put(skb, type, pid_size, pids) < 0) goto err; ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); if (!ret) -- cgit v1.2.3-71-gd317 From 9323312592cca636d7c2580dc85fa4846efa86a2 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:44 -0700 Subject: taskstats: separate taskstats commands Move each taskstats command into a single function. This makes the code more readable and makes it easier to add new commands. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 118 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 40 deletions(-) (limited to 'kernel') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 5a651aa63d61..9970cae04f15 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -430,39 +430,46 @@ err: return rc; } -static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) +static int cmd_attr_register_cpumask(struct genl_info *info) { - int rc; - struct sk_buff *rep_skb; - struct taskstats *stats; - size_t size; cpumask_var_t mask; + int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) - goto free_return_rc; - if (rc == 0) { - rc = add_del_listener(info->snd_pid, mask, REGISTER); - goto free_return_rc; - } + goto out; + rc = add_del_listener(info->snd_pid, mask, REGISTER); +out: + free_cpumask_var(mask); + return rc; +} +static int cmd_attr_deregister_cpumask(struct genl_info *info) +{ + cpumask_var_t mask; + int rc; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) - goto free_return_rc; - if (rc == 0) { - rc = add_del_listener(info->snd_pid, mask, DEREGISTER); -free_return_rc: - free_cpumask_var(mask); - return rc; - } + goto out; + rc = add_del_listener(info->snd_pid, mask, DEREGISTER); +out: free_cpumask_var(mask); + return rc; +} + +static int cmd_attr_pid(struct genl_info *info) +{ + struct taskstats *stats; + struct sk_buff *rep_skb; + size_t size; + u32 pid; + int rc; - /* - * Size includes space for nested attributes - */ size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); @@ -471,33 +478,64 @@ free_return_rc: return rc; rc = -EINVAL; - if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { - u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); - stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); - if (!stats) - goto err; - - rc = fill_pid(pid, NULL, stats); - if (rc < 0) - goto err; - } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { - u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); - stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); - if (!stats) - goto err; - - rc = fill_tgid(tgid, NULL, stats); - if (rc < 0) - goto err; - } else + pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); + if (!stats) + goto err; + + rc = fill_pid(pid, NULL, stats); + if (rc < 0) + goto err; + return send_reply(rep_skb, info); +err: + nlmsg_free(rep_skb); + return rc; +} + +static int cmd_attr_tgid(struct genl_info *info) +{ + struct taskstats *stats; + struct sk_buff *rep_skb; + size_t size; + u32 tgid; + int rc; + + size = nla_total_size(sizeof(u32)) + + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + + rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); + if (rc < 0) + return rc; + + rc = -EINVAL; + tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); + if (!stats) goto err; + rc = fill_tgid(tgid, NULL, stats); + if (rc < 0) + goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } +static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) +{ + if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) + return cmd_attr_register_cpumask(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) + return cmd_attr_deregister_cpumask(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) + return cmd_attr_pid(info); + else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) + return cmd_attr_tgid(info); + else + return -EINVAL; +} + static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; -- cgit v1.2.3-71-gd317 From 3d9e0cf1fe007b88db55d43dfdb6839e1a029ca5 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:44 -0700 Subject: taskstats: split fill_pid function Separate the finding of a task_struct by pid or tgid from filling the taskstats data. This makes the code more readable. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 50 +++++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 9970cae04f15..c8231fb15708 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -175,22 +175,8 @@ static void send_cpu_listeners(struct sk_buff *skb, up_write(&listeners->sem); } -static int fill_pid(pid_t pid, struct task_struct *tsk, - struct taskstats *stats) +static void fill_stats(struct task_struct *tsk, struct taskstats *stats) { - int rc = 0; - - if (!tsk) { - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); - if (!tsk) - return -ESRCH; - } else - get_task_struct(tsk); - memset(stats, 0, sizeof(*stats)); /* * Each accounting subsystem adds calls to its functions to @@ -209,17 +195,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); +} - /* Define err: label here if needed */ - put_task_struct(tsk); - return rc; +static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) +{ + struct task_struct *tsk; + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return -ESRCH; + fill_stats(tsk, stats); + put_task_struct(tsk); + return 0; } -static int fill_tgid(pid_t tgid, struct task_struct *first, - struct taskstats *stats) +static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) { - struct task_struct *tsk; + struct task_struct *tsk, *first; unsigned long flags; int rc = -ESRCH; @@ -228,8 +224,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, * leaders who are already counted with the dead tasks */ rcu_read_lock(); - if (!first) - first = find_task_by_vpid(tgid); + first = find_task_by_vpid(tgid); if (!first || !lock_task_sighand(first, &flags)) goto out; @@ -268,7 +263,6 @@ out: return rc; } - static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; @@ -483,7 +477,7 @@ static int cmd_attr_pid(struct genl_info *info) if (!stats) goto err; - rc = fill_pid(pid, NULL, stats); + rc = fill_stats_for_pid(pid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); @@ -513,7 +507,7 @@ static int cmd_attr_tgid(struct genl_info *info) if (!stats) goto err; - rc = fill_tgid(tgid, NULL, stats); + rc = fill_stats_for_tgid(tgid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); @@ -599,9 +593,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) if (!stats) goto err; - rc = fill_pid(-1, tsk, stats); - if (rc < 0) - goto err; + fill_stats(tsk, stats); /* * Doesn't matter if tsk is the leader or the last group member leaving -- cgit v1.2.3-71-gd317 From d57af9b2142f31a39dcfdeb30776baadfc802827 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:45 -0700 Subject: taskstats: use real microsecond granularity for CPU times The taskstats interface uses microsecond granularity for the user and system time values. The conversion from cputime to the taskstats values uses the cputime_to_msecs primitive which effectively limits the granularity to milliseconds. Add the cputime_to_usecs primitive for architectures that have better, more precise CPU time values. Remove cputime_to_msecs primitive because there are no more users left. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Cc: Luck Tony Cc: Shailabh Nagar Cc: Martin Schwidefsky Cc: Oleg Nesterov Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Shailabh Nagar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/cputime.h | 6 +++--- arch/powerpc/include/asm/cputime.h | 12 ++++++------ arch/s390/include/asm/cputime.h | 10 +++++----- include/asm-generic/cputime.h | 6 +++--- kernel/tsacct.c | 10 ++++------ 5 files changed, 21 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 7fa8a8594660..6073b187528a 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -56,10 +56,10 @@ typedef u64 cputime64_t; #define jiffies64_to_cputime64(__jif) ((__jif) * (NSEC_PER_SEC / HZ)) /* - * Convert cputime <-> milliseconds + * Convert cputime <-> microseconds */ -#define cputime_to_msecs(__ct) ((__ct) / NSEC_PER_MSEC) -#define msecs_to_cputime(__msecs) ((__msecs) * NSEC_PER_MSEC) +#define cputime_to_usecs(__ct) ((__ct) / NSEC_PER_USEC) +#define usecs_to_cputime(__usecs) ((__usecs) * NSEC_PER_USEC) /* * Convert cputime <-> seconds diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 8bdc6a9e5773..1cf20bdfbeca 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -124,23 +124,23 @@ static inline u64 cputime64_to_jiffies64(const cputime_t ct) } /* - * Convert cputime <-> milliseconds + * Convert cputime <-> microseconds */ extern u64 __cputime_msec_factor; -static inline unsigned long cputime_to_msecs(const cputime_t ct) +static inline unsigned long cputime_to_usecs(const cputime_t ct) { - return mulhdu(ct, __cputime_msec_factor); + return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC; } -static inline cputime_t msecs_to_cputime(const unsigned long ms) +static inline cputime_t usecs_to_cputime(const unsigned long us) { cputime_t ct; unsigned long sec; /* have to be a little careful about overflow */ - ct = ms % 1000; - sec = ms / 1000; + ct = us % 1000000; + sec = us / 1000000; if (ct) { ct *= tb_ticks_per_sec; do_div(ct, 1000); diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 8b1a52a137c5..40e2ab0fa3f0 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -73,18 +73,18 @@ cputime64_to_jiffies64(cputime64_t cputime) } /* - * Convert cputime to milliseconds and back. + * Convert cputime to microseconds and back. */ static inline unsigned int -cputime_to_msecs(const cputime_t cputime) +cputime_to_usecs(const cputime_t cputime) { - return cputime_div(cputime, 4096000); + return cputime_div(cputime, 4096); } static inline cputime_t -msecs_to_cputime(const unsigned int m) +usecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 4096000; + return (cputime_t) m * 4096; } /* diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index ca0f239f0e13..2bcc5c7c22a6 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -33,10 +33,10 @@ typedef u64 cputime64_t; /* - * Convert cputime to milliseconds and back. + * Convert cputime to microseconds and back. */ -#define cputime_to_msecs(__ct) jiffies_to_msecs(__ct) -#define msecs_to_cputime(__msecs) msecs_to_jiffies(__msecs) +#define cputime_to_usecs(__ct) jiffies_to_usecs(__ct); +#define usecs_to_cputime(__msecs) usecs_to_jiffies(__msecs); /* * Convert cputime to seconds and back. diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 0a67e041edf8..24dc60d9fa1f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -63,12 +63,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_ppid = pid_alive(tsk) ? rcu_dereference(tsk->real_parent)->tgid : 0; rcu_read_unlock(); - stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; - stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; - stats->ac_utimescaled = - cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC; - stats->ac_stimescaled = - cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC; + stats->ac_utime = cputime_to_usecs(tsk->utime); + stats->ac_stime = cputime_to_usecs(tsk->stime); + stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); + stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt; -- cgit v1.2.3-71-gd317 From 5de1cb2d0f1c1e5475d2bedf65b76828f8cdde22 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 27 Oct 2010 15:34:52 -0700 Subject: kernel/resource.c: handle reinsertion of an already-inserted resource If the same resource is inserted to the resource tree (maybe not on purpose), a dead loop will be created. In this situation, The kernel does not report any warning or error :( The command below will show a endless print. #cat /proc/iomem [akpm@linux-foundation.org: add WARN_ON()] Signed-off-by: Huang Shijie Cc: Jesse Barnes Cc: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 7b36976e5dea..9c9841cb6902 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -453,6 +453,8 @@ static struct resource * __insert_resource(struct resource *parent, struct resou if (first == parent) return first; + if (WARN_ON(first == new)) /* duplicated insertion */ + return first; if ((first->start > new->start) || (first->end < new->end)) break; -- cgit v1.2.3-71-gd317 From 61d8e11e519ee7912ab59610fba1aaf08e3c1d84 Mon Sep 17 00:00:00 2001 From: Zimny Lech Date: Wed, 27 Oct 2010 15:34:53 -0700 Subject: Remove duplicate includes from many files Signed-off-by: Zimny Lech Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-tegra/timer.c | 1 - arch/arm/plat-nomadik/include/plat/ste_dma40.h | 1 - arch/tile/kernel/setup.c | 2 -- arch/x86/mm/init_64.c | 1 - arch/x86/xen/enlighten.c | 1 - drivers/media/IR/lirc_dev.c | 1 - drivers/platform/x86/intel_pmic_gpio.c | 1 - include/linux/virtio_9p.h | 1 - kernel/trace/trace_kprobe.c | 1 - 9 files changed, 10 deletions(-) (limited to 'kernel') diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c index 2f420210d406..9057d6fd1d31 100644 --- a/arch/arm/mach-tegra/timer.c +++ b/arch/arm/mach-tegra/timer.c @@ -27,7 +27,6 @@ #include #include -#include #include #include diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h index 5fbde4b8dc12..93a812672d9a 100644 --- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h +++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h @@ -14,7 +14,6 @@ #include #include #include -#include /* dev types for memcpy */ #define STEDMA40_DEV_DST_MEMORY (-1) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index f3a50e74f9a4..ae51cad12da0 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -30,8 +30,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 84346200e783..71a59296af80 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -51,7 +51,6 @@ #include #include #include -#include static int __init parse_direct_gbpages_off(char *arg) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 44ab12dc2a12..0cd12db0b142 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/IR/lirc_dev.c b/drivers/media/IR/lirc_dev.c index 0acf6396e068..202581808bdc 100644 --- a/drivers/media/IR/lirc_dev.c +++ b/drivers/media/IR/lirc_dev.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c index f540ff96c53f..e61db9dfebef 100644 --- a/drivers/platform/x86/intel_pmic_gpio.c +++ b/drivers/platform/x86/intel_pmic_gpio.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index 1faa80d92f05..e68b439b2860 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -5,7 +5,6 @@ #include #include #include -#include /* The feature bitmap for virtio 9P */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b8d2852baa4a..2dec9bcde8b4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include "trace.h" -- cgit v1.2.3-71-gd317