core_sched.c (6773B)
1// SPDX-License-Identifier: GPL-2.0-only 2 3/* 4 * A simple wrapper around refcount. An allocated sched_core_cookie's 5 * address is used to compute the cookie of the task. 6 */ 7struct sched_core_cookie { 8 refcount_t refcnt; 9}; 10 11static unsigned long sched_core_alloc_cookie(void) 12{ 13 struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL); 14 if (!ck) 15 return 0; 16 17 refcount_set(&ck->refcnt, 1); 18 sched_core_get(); 19 20 return (unsigned long)ck; 21} 22 23static void sched_core_put_cookie(unsigned long cookie) 24{ 25 struct sched_core_cookie *ptr = (void *)cookie; 26 27 if (ptr && refcount_dec_and_test(&ptr->refcnt)) { 28 kfree(ptr); 29 sched_core_put(); 30 } 31} 32 33static unsigned long sched_core_get_cookie(unsigned long cookie) 34{ 35 struct sched_core_cookie *ptr = (void *)cookie; 36 37 if (ptr) 38 refcount_inc(&ptr->refcnt); 39 40 return cookie; 41} 42 43/* 44 * sched_core_update_cookie - replace the cookie on a task 45 * @p: the task to update 46 * @cookie: the new cookie 47 * 48 * Effectively exchange the task cookie; caller is responsible for lifetimes on 49 * both ends. 50 * 51 * Returns: the old cookie 52 */ 53static unsigned long sched_core_update_cookie(struct task_struct *p, 54 unsigned long cookie) 55{ 56 unsigned long old_cookie; 57 struct rq_flags rf; 58 struct rq *rq; 59 bool enqueued; 60 61 rq = task_rq_lock(p, &rf); 62 63 /* 64 * Since creating a cookie implies sched_core_get(), and we cannot set 65 * a cookie until after we've created it, similarly, we cannot destroy 66 * a cookie until after we've removed it, we must have core scheduling 67 * enabled here. 68 */ 69 SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq)); 70 71 enqueued = sched_core_enqueued(p); 72 if (enqueued) 73 sched_core_dequeue(rq, p, DEQUEUE_SAVE); 74 75 old_cookie = p->core_cookie; 76 p->core_cookie = cookie; 77 78 if (enqueued) 79 sched_core_enqueue(rq, p); 80 81 /* 82 * If task is currently running, it may not be compatible anymore after 83 * the cookie change, so enter the scheduler on its CPU to schedule it 84 * away. 85 * 86 * Note that it is possible that as a result of this cookie change, the 87 * core has now entered/left forced idle state. Defer accounting to the 88 * next scheduling edge, rather than always forcing a reschedule here. 89 */ 90 if (task_running(rq, p)) 91 resched_curr(rq); 92 93 task_rq_unlock(rq, p, &rf); 94 95 return old_cookie; 96} 97 98static unsigned long sched_core_clone_cookie(struct task_struct *p) 99{ 100 unsigned long cookie, flags; 101 102 raw_spin_lock_irqsave(&p->pi_lock, flags); 103 cookie = sched_core_get_cookie(p->core_cookie); 104 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 105 106 return cookie; 107} 108 109void sched_core_fork(struct task_struct *p) 110{ 111 RB_CLEAR_NODE(&p->core_node); 112 p->core_cookie = sched_core_clone_cookie(current); 113} 114 115void sched_core_free(struct task_struct *p) 116{ 117 sched_core_put_cookie(p->core_cookie); 118} 119 120static void __sched_core_set(struct task_struct *p, unsigned long cookie) 121{ 122 cookie = sched_core_get_cookie(cookie); 123 cookie = sched_core_update_cookie(p, cookie); 124 sched_core_put_cookie(cookie); 125} 126 127/* Called from prctl interface: PR_SCHED_CORE */ 128int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, 129 unsigned long uaddr) 130{ 131 unsigned long cookie = 0, id = 0; 132 struct task_struct *task, *p; 133 struct pid *grp; 134 int err = 0; 135 136 if (!static_branch_likely(&sched_smt_present)) 137 return -ENODEV; 138 139 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID); 140 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID); 141 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID); 142 143 if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || 144 (cmd != PR_SCHED_CORE_GET && uaddr)) 145 return -EINVAL; 146 147 rcu_read_lock(); 148 if (pid == 0) { 149 task = current; 150 } else { 151 task = find_task_by_vpid(pid); 152 if (!task) { 153 rcu_read_unlock(); 154 return -ESRCH; 155 } 156 } 157 get_task_struct(task); 158 rcu_read_unlock(); 159 160 /* 161 * Check if this process has the right to modify the specified 162 * process. Use the regular "ptrace_may_access()" checks. 163 */ 164 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { 165 err = -EPERM; 166 goto out; 167 } 168 169 switch (cmd) { 170 case PR_SCHED_CORE_GET: 171 if (type != PIDTYPE_PID || uaddr & 7) { 172 err = -EINVAL; 173 goto out; 174 } 175 cookie = sched_core_clone_cookie(task); 176 if (cookie) { 177 /* XXX improve ? */ 178 ptr_to_hashval((void *)cookie, &id); 179 } 180 err = put_user(id, (u64 __user *)uaddr); 181 goto out; 182 183 case PR_SCHED_CORE_CREATE: 184 cookie = sched_core_alloc_cookie(); 185 if (!cookie) { 186 err = -ENOMEM; 187 goto out; 188 } 189 break; 190 191 case PR_SCHED_CORE_SHARE_TO: 192 cookie = sched_core_clone_cookie(current); 193 break; 194 195 case PR_SCHED_CORE_SHARE_FROM: 196 if (type != PIDTYPE_PID) { 197 err = -EINVAL; 198 goto out; 199 } 200 cookie = sched_core_clone_cookie(task); 201 __sched_core_set(current, cookie); 202 goto out; 203 204 default: 205 err = -EINVAL; 206 goto out; 207 }; 208 209 if (type == PIDTYPE_PID) { 210 __sched_core_set(task, cookie); 211 goto out; 212 } 213 214 read_lock(&tasklist_lock); 215 grp = task_pid_type(task, type); 216 217 do_each_pid_thread(grp, type, p) { 218 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) { 219 err = -EPERM; 220 goto out_tasklist; 221 } 222 } while_each_pid_thread(grp, type, p); 223 224 do_each_pid_thread(grp, type, p) { 225 __sched_core_set(p, cookie); 226 } while_each_pid_thread(grp, type, p); 227out_tasklist: 228 read_unlock(&tasklist_lock); 229 230out: 231 sched_core_put_cookie(cookie); 232 put_task_struct(task); 233 return err; 234} 235 236#ifdef CONFIG_SCHEDSTATS 237 238/* REQUIRES: rq->core's clock recently updated. */ 239void __sched_core_account_forceidle(struct rq *rq) 240{ 241 const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq)); 242 u64 delta, now = rq_clock(rq->core); 243 struct rq *rq_i; 244 struct task_struct *p; 245 int i; 246 247 lockdep_assert_rq_held(rq); 248 249 WARN_ON_ONCE(!rq->core->core_forceidle_count); 250 251 if (rq->core->core_forceidle_start == 0) 252 return; 253 254 delta = now - rq->core->core_forceidle_start; 255 if (unlikely((s64)delta <= 0)) 256 return; 257 258 rq->core->core_forceidle_start = now; 259 260 if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) { 261 /* can't be forced idle without a running task */ 262 } else if (rq->core->core_forceidle_count > 1 || 263 rq->core->core_forceidle_occupation > 1) { 264 /* 265 * For larger SMT configurations, we need to scale the charged 266 * forced idle amount since there can be more than one forced 267 * idle sibling and more than one running cookied task. 268 */ 269 delta *= rq->core->core_forceidle_count; 270 delta = div_u64(delta, rq->core->core_forceidle_occupation); 271 } 272 273 for_each_cpu(i, smt_mask) { 274 rq_i = cpu_rq(i); 275 p = rq_i->core_pick ?: rq_i->curr; 276 277 if (p == rq_i->idle) 278 continue; 279 280 __schedstat_add(p->stats.core_forceidle_sum, delta); 281 } 282} 283 284void __sched_core_tick(struct rq *rq) 285{ 286 if (!rq->core->core_forceidle_count) 287 return; 288 289 if (rq != rq->core) 290 update_rq_clock(rq->core); 291 292 __sched_core_account_forceidle(rq); 293} 294 295#endif /* CONFIG_SCHEDSTATS */