From fb00aca474405f4fa8a8519c3179fed722eabd83 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Nov 2013 14:43:43 +0100 Subject: rtmutex: Turn the plist into an rb-tree Turn the pi-chains from plist to rb-tree, in the rt_mutex code, and provide a proper comparison function for -deadline and -priority tasks. This is done mainly because: - classical prio field of the plist is just an int, which might not be enough for representing a deadline; - manipulating such a list would become O(nr_deadline_tasks), which might be to much, as the number of -deadline task increases. Therefore, an rb-tree is used, and tasks are queued in it according to the following logic: - among two -priority (i.e., SCHED_BATCH/OTHER/RR/FIFO) tasks, the one with the higher (lower, actually!) prio wins; - among a -priority and a -deadline task, the latter always wins; - among two -deadline tasks, the one with the earliest deadline wins. Queueing and dequeueing functions are changed accordingly, for both the list of a task's pi-waiters and the list of tasks blocked on a pi-lock. Signed-off-by: Peter Zijlstra Signed-off-by: Dario Faggioli Signed-off-by: Juri Lelli Signed-off-again-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1383831828-15501-10-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b0ed422e4e4a..f0e52383a001 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -154,6 +155,14 @@ extern struct task_group root_task_group; #define INIT_TASK_COMM "swapper" +#ifdef CONFIG_RT_MUTEXES +# define INIT_RT_MUTEXES(tsk) \ + .pi_waiters = RB_ROOT, \ + .pi_waiters_leftmost = NULL, +#else +# define INIT_RT_MUTEXES(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -221,6 +230,7 @@ extern struct task_group root_task_group; INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ INIT_CPUSET_SEQ(tsk) \ + INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ } -- cgit v1.2.3-71-gd317 From 4440e8548153e9e6d56db9abe6f3bc0e5b9eb74f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 27 Nov 2013 17:35:17 -0500 Subject: audit: convert all sessionid declaration to unsigned int Right now the sessionid value in the kernel is a combination of u32, int, and unsigned int. Just use unsigned int throughout. Signed-off-by: Eric Paris Signed-off-by: Richard Guy Briggs Signed-off-by: Eric Paris --- drivers/tty/tty_audit.c | 2 +- include/linux/init_task.h | 2 +- include/net/netlabel.h | 2 +- include/net/xfrm.h | 20 ++++++++++---------- kernel/audit.c | 2 +- kernel/auditfilter.c | 2 +- kernel/auditsc.c | 2 +- net/xfrm/xfrm_policy.c | 8 ++++---- net/xfrm/xfrm_state.c | 6 +++--- net/xfrm/xfrm_user.c | 12 ++++++------ 10 files changed, 29 insertions(+), 29 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index a4fdce74f883..b0e540137e39 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -67,7 +67,7 @@ static void tty_audit_log(const char *description, int major, int minor, struct task_struct *tsk = current; uid_t uid = from_kuid(&init_user_ns, task_uid(tsk)); uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(tsk)); - u32 sessionid = audit_get_sessionid(tsk); + unsigned int sessionid = audit_get_sessionid(tsk); ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_TTY); if (ab) { diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5cd0f0949927..a143df5ee548 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -95,7 +95,7 @@ extern struct group_info init_groups; #ifdef CONFIG_AUDITSYSCALL #define INIT_IDS \ .loginuid = INVALID_UID, \ - .sessionid = -1, + .sessionid = (unsigned int)-1, #else #define INIT_IDS #endif diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 2c95d55f7914..97e6dcaf12bb 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -111,7 +111,7 @@ struct cipso_v4_doi; struct netlbl_audit { u32 secid; kuid_t loginuid; - u32 sessionid; + unsigned int sessionid; }; /* diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e253bf0cc7ef..f8d32b908423 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -679,7 +679,7 @@ struct xfrm_spi_skb_cb { struct xfrm_audit { u32 secid; kuid_t loginuid; - u32 sessionid; + unsigned int sessionid; }; #ifdef CONFIG_AUDITSYSCALL @@ -697,7 +697,7 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op) return audit_buf; } -static inline void xfrm_audit_helper_usrinfo(kuid_t auid, u32 ses, u32 secid, +static inline void xfrm_audit_helper_usrinfo(kuid_t auid, unsigned int ses, u32 secid, struct audit_buffer *audit_buf) { char *secctx; @@ -714,13 +714,13 @@ static inline void xfrm_audit_helper_usrinfo(kuid_t auid, u32 ses, u32 secid, } extern void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, u32 ses, u32 secid); + kuid_t auid, unsigned int ses, u32 secid); extern void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, u32 ses, u32 secid); + kuid_t auid, unsigned int ses, u32 secid); extern void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, u32 ses, u32 secid); + kuid_t auid, unsigned int ses, u32 secid); extern void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, u32 ses, u32 secid); + kuid_t auid, unsigned int ses, u32 secid); extern void xfrm_audit_state_replay_overflow(struct xfrm_state *x, struct sk_buff *skb); extern void xfrm_audit_state_replay(struct xfrm_state *x, @@ -733,22 +733,22 @@ extern void xfrm_audit_state_icvfail(struct xfrm_state *x, #else static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, u32 ses, u32 secid) + kuid_t auid, unsigned int ses, u32 secid) { } static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, u32 ses, u32 secid) + kuid_t auid, unsigned int ses, u32 secid) { } static inline void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, u32 ses, u32 secid) + kuid_t auid, unsigned int ses, u32 secid) { } static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, u32 ses, u32 secid) + kuid_t auid, unsigned int ses, u32 secid) { } diff --git a/kernel/audit.c b/kernel/audit.c index 25e4ed016793..ce8514f9aa6c 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1611,7 +1611,7 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix, void audit_log_session_info(struct audit_buffer *ab) { - u32 sessionid = audit_get_sessionid(current); + unsigned int sessionid = audit_get_sessionid(current); uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current)); audit_log_format(ab, " auid=%u ses=%u", auid, sessionid); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 6cc8240b7aaf..629834aa4ca4 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1005,7 +1005,7 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re { struct audit_buffer *ab; uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current)); - u32 sessionid = audit_get_sessionid(current); + unsigned int sessionid = audit_get_sessionid(current); if (!audit_enabled) return; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 05634b3ba244..5c237343cd9b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2011,7 +2011,7 @@ int audit_set_loginuid(kuid_t loginuid) /* are we setting or clearing? */ if (uid_valid(loginuid)) - sessionid = atomic_inc_return(&session_id); + sessionid = (unsigned int)atomic_inc_return(&session_id); task->sessionid = sessionid; task->loginuid = loginuid; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 76e1873811d4..767c74a91db3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2899,12 +2899,12 @@ static void xfrm_policy_fini(struct net *net) flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY audit_info.loginuid = INVALID_UID; - audit_info.sessionid = -1; + audit_info.sessionid = (unsigned int)-1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif audit_info.loginuid = INVALID_UID; - audit_info.sessionid = -1; + audit_info.sessionid = (unsigned int)-1; audit_info.secid = 0; xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); @@ -3010,7 +3010,7 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, u32 sessionid, u32 secid) + kuid_t auid, unsigned int sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -3025,7 +3025,7 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, u32 sessionid, u32 secid) + kuid_t auid, unsigned int sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index b9c3f9e943a9..dbf0719df5b0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2043,7 +2043,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); audit_info.loginuid = INVALID_UID; - audit_info.sessionid = -1; + audit_info.sessionid = (unsigned int)-1; audit_info.secid = 0; xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); flush_work(&net->xfrm.state_gc_work); @@ -2109,7 +2109,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, u32 sessionid, u32 secid) + kuid_t auid, unsigned int sessionid, u32 secid) { struct audit_buffer *audit_buf; @@ -2124,7 +2124,7 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result, EXPORT_SYMBOL_GPL(xfrm_audit_state_add); void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, u32 sessionid, u32 secid) + kuid_t auid, unsigned int sessionid, u32 secid) { struct audit_buffer *audit_buf; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f964d4c00ffb..ec97e13743e6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -600,7 +600,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err; struct km_event c; kuid_t loginuid = audit_get_loginuid(current); - u32 sessionid = audit_get_sessionid(current); + unsigned int sessionid = audit_get_sessionid(current); u32 sid; err = verify_newsa_info(p, attrs); @@ -679,7 +679,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); kuid_t loginuid = audit_get_loginuid(current); - u32 sessionid = audit_get_sessionid(current); + unsigned int sessionid = audit_get_sessionid(current); u32 sid; x = xfrm_user_state_lookup(net, p, attrs, &err); @@ -1405,7 +1405,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, int err; int excl; kuid_t loginuid = audit_get_loginuid(current); - u32 sessionid = audit_get_sessionid(current); + unsigned int sessionid = audit_get_sessionid(current); u32 sid; err = verify_newpolicy_info(p); @@ -1663,7 +1663,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, } } else { kuid_t loginuid = audit_get_loginuid(current); - u32 sessionid = audit_get_sessionid(current); + unsigned int sessionid = audit_get_sessionid(current); u32 sid; security_task_getsecid(current, &sid); @@ -1959,7 +1959,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { kuid_t loginuid = audit_get_loginuid(current); - u32 sessionid = audit_get_sessionid(current); + unsigned int sessionid = audit_get_sessionid(current); u32 sid; security_task_getsecid(current, &sid); @@ -2002,7 +2002,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (ue->hard) { kuid_t loginuid = audit_get_loginuid(current); - u32 sessionid = audit_get_sessionid(current); + unsigned int sessionid = audit_get_sessionid(current); u32 sid; security_task_getsecid(current, &sid); -- cgit v1.2.3-71-gd317 From 0c740d0afc3bff0a097ad03a1c8df92757516f5c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 21 Jan 2014 15:49:56 -0800 Subject: introduce for_each_thread() to replace the buggy while_each_thread() while_each_thread() and next_thread() should die, almost every lockless usage is wrong. 1. Unless g == current, the lockless while_each_thread() is not safe. while_each_thread(g, t) can loop forever if g exits, next_thread() can't reach the unhashed thread in this case. Note that this can happen even if g is the group leader, it can exec. 2. Even if while_each_thread() itself was correct, people often use it wrongly. It was never safe to just take rcu_read_lock() and loop unless you verify that pid_alive(g) == T, even the first next_thread() can point to the already freed/reused memory. This patch adds signal_struct->thread_head and task->thread_node to create the normal rcu-safe list with the stable head. The new for_each_thread(g, t) helper is always safe under rcu_read_lock() as long as this task_struct can't go away. Note: of course it is ugly to have both task_struct->thread_node and the old task_struct->thread_group, we will kill it later, after we change the users of while_each_thread() to use for_each_thread(). Perhaps we can kill it even before we convert all users, we can reimplement next_thread(t) using the new thread_head/thread_node. But we can't do this right now because this will lead to subtle behavioural changes. For example, do/while_each_thread() always sees at least one task, while for_each_thread() can do nothing if the whole thread group has died. Or thread_group_empty(), currently its semantics is not clear unless thread_group_leader(p) and we need to audit the callers before we can change it. So this patch adds the new interface which has to coexist with the old one for some time, hopefully the next changes will be more or less straightforward and the old one will go away soon. Signed-off-by: Oleg Nesterov Reviewed-by: Sergey Dyasly Tested-by: Sergey Dyasly Reviewed-by: Sameer Nanda Acked-by: David Rientjes Cc: "Eric W. Biederman" Cc: Frederic Weisbecker Cc: Mandeep Singh Baines Cc: "Ma, Xindong" Cc: Michal Hocko Cc: "Tu, Xiaobing" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 2 ++ include/linux/sched.h | 12 ++++++++++++ kernel/exit.c | 1 + kernel/fork.c | 7 +++++++ 4 files changed, 22 insertions(+) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f0e52383a001..1516a8ff8f92 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -41,6 +41,7 @@ extern struct fs_struct init_fs; #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ + .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ .shared_pending = { \ .list = LIST_HEAD_INIT(sig.shared_pending.list), \ @@ -222,6 +223,7 @@ extern struct task_group root_task_group; [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ + .thread_node = LIST_HEAD_INIT(init_signals.thread_head), \ INIT_IDS \ INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ diff --git a/include/linux/sched.h b/include/linux/sched.h index ffccdad050b5..485234d2fd42 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -549,6 +549,7 @@ struct signal_struct { atomic_t sigcnt; atomic_t live; int nr_threads; + struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ @@ -1271,6 +1272,7 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; + struct list_head thread_node; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ @@ -2341,6 +2343,16 @@ extern bool current_is_single_threaded(void); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) +#define __for_each_thread(signal, t) \ + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + +#define for_each_thread(p, t) \ + __for_each_thread((p)->signal, t) + +/* Careful: this is a double loop, 'break' won't work as expected. */ +#define for_each_process_thread(p, t) \ + for_each_process(p) for_each_thread(p, t) + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; diff --git a/kernel/exit.c b/kernel/exit.c index a949819055d5..1e77fc645317 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead) __this_cpu_dec(process_counts); } list_del_rcu(&p->thread_group); + list_del_rcu(&p->thread_node); } /* diff --git a/kernel/fork.c b/kernel/fork.c index 294189fc7ac8..2f11bbe376b0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1035,6 +1035,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->nr_threads = 1; atomic_set(&sig->live, 1); atomic_set(&sig->sigcnt, 1); + + /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ + sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); + tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); + init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); @@ -1474,6 +1479,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, atomic_inc(¤t->signal->sigcnt); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); + list_add_tail_rcu(&p->thread_node, + &p->signal->thread_head); } attach_pid(p, PIDTYPE_PID); nr_threads++; -- cgit v1.2.3-71-gd317