From e9a2ea5a1ba09c35258f3663842fb8d8cf2e00c2 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Sat, 22 Oct 2016 06:19:49 -0700 Subject: cacheinfo: Introduce cache id Cache management software needs an id for each instance of a cache of a particular type. The current cacheinfo structure does not provide any information about the underlying hardware so there is no way to expose it. Hardware with cache management features provides means (cpuid, enumeration etc.) to retrieve the hardware id of a particular cache instance. Cache instances which share hardware have the same hardware id. Add an 'id' field to struct cacheinfo to store this information. Expose this information under the /sys/devices/system/cpu/cpu*/cache/index*/ directory as well. Signed-off-by: Fenghua Yu Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: "David Carrillo-Cisneros" Cc: "Sai Prakhya" Cc: "Peter Zijlstra" Cc: "Stephane Eranian" Cc: "Dave Hansen" Cc: "Shaohua Li" Cc: "Nilay Vaish" Cc: "Vikas Shivappa" Cc: "Ingo Molnar" Cc: "Borislav Petkov" Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1477142405-32078-3-git-send-email-fenghua.yu@intel.com Signed-off-by: Thomas Gleixner --- include/linux/cacheinfo.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2189935075b4..0bcbb674da9d 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -18,6 +18,7 @@ enum cache_type { /** * struct cacheinfo - represent a cache leaf node + * @id: This cache's id. It is unique among caches with the same (type, level). * @type: type of the cache - data, inst or unified * @level: represents the hierarchy in the multi-level cache * @coherency_line_size: size of each cache line usually representing @@ -44,6 +45,7 @@ enum cache_type { * keeping, the remaining members form the core properties of the cache */ struct cacheinfo { + unsigned int id; enum cache_type type; unsigned int level; unsigned int coherency_line_size; @@ -61,6 +63,7 @@ struct cacheinfo { #define CACHE_WRITE_ALLOCATE BIT(3) #define CACHE_ALLOCATE_POLICY_MASK \ (CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE) +#define CACHE_ID BIT(4) struct device_node *of_node; bool disable_sysfs; -- cgit v1.2.3-71-gd317 From 5ff193fbde20df5d80fec367cea3e7856c057320 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 28 Oct 2016 15:04:42 -0700 Subject: x86/intel_rdt: Add basic resctrl filesystem support Use kernfs as basis for our user interface filesystem. This patch supports mount/umount, and one mount parameter "cdp" to enable code/data prioritization (though all we do at this point is ensure that the system can support CDP). The file system is not populated yet in this patch. [ tglx: Fixed up a few nits and added cdp handling in case of error ] Signed-off-by: Fenghua Yu Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: "Shaohua Li" Cc: "Sai Prakhya" Cc: "Peter Zijlstra" Cc: "Stephane Eranian" Cc: "Dave Hansen" Cc: "David Carrillo-Cisneros" Cc: "Nilay Vaish" Cc: "Vikas Shivappa" Cc: "Ingo Molnar" Cc: "Borislav Petkov" Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1477692289-37412-4-git-send-email-fenghua.yu@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/intel_rdt.h | 26 +++ arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/intel_rdt.c | 8 +- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 271 +++++++++++++++++++++++++++++++ include/uapi/linux/magic.h | 1 + 5 files changed, 306 insertions(+), 2 deletions(-) create mode 100644 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c (limited to 'include') diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h index c0d0a6e6448c..09d00e6f7ad6 100644 --- a/arch/x86/include/asm/intel_rdt.h +++ b/arch/x86/include/asm/intel_rdt.h @@ -1,9 +1,31 @@ #ifndef _ASM_X86_INTEL_RDT_H #define _ASM_X86_INTEL_RDT_H +#include + +#define IA32_L3_QOS_CFG 0xc81 #define IA32_L3_CBM_BASE 0xc90 #define IA32_L2_CBM_BASE 0xd10 +#define L3_QOS_CDP_ENABLE 0x01ULL + +/** + * struct rdtgroup - store rdtgroup's data in resctrl file system. + * @kn: kernfs node + * @rdtgroup_list: linked list for all rdtgroups + * @closid: closid for this rdtgroup + */ +struct rdtgroup { + struct kernfs_node *kn; + struct list_head rdtgroup_list; + int closid; +}; + +/* List of all resource groups */ +extern struct list_head rdt_all_groups; + +int __init rdtgroup_init(void); + /** * struct rdt_resource - attributes of an RDT resource * @enabled: Is this feature enabled on this machine @@ -68,6 +90,10 @@ struct msr_param { extern struct mutex rdtgroup_mutex; extern struct rdt_resource rdt_resources_all[]; +extern struct rdtgroup rdtgroup_default; +DECLARE_STATIC_KEY_FALSE(rdt_enable_key); + +int __init rdtgroup_init(void); enum { RDT_RESOURCE_L3, diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cf4bfd030c0c..b4334e86c1a9 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_INTEL_RDT_A) += intel_rdt.o +obj-$(CONFIG_INTEL_RDT_A) += intel_rdt.o intel_rdt_rdtgroup.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 3d4b397a1181..9d95414f7117 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -361,7 +361,7 @@ static int intel_rdt_offline_cpu(unsigned int cpu) static int __init intel_rdt_late_init(void) { struct rdt_resource *r; - int state; + int state, ret; if (!get_rdt_resources()) return -ENODEV; @@ -372,6 +372,12 @@ static int __init intel_rdt_late_init(void) if (state < 0) return state; + ret = rdtgroup_init(); + if (ret) { + cpuhp_remove_state(state); + return ret; + } + for_each_capable_rdt_resource(r) pr_info("Intel RDT %s allocation detected\n", r->name); diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c new file mode 100644 index 000000000000..106e4ce8f7e0 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -0,0 +1,271 @@ +/* + * User interface for Resource Alloction in Resource Director Technology(RDT) + * + * Copyright (C) 2016 Intel Corporation + * + * Author: Fenghua Yu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * More information about RDT be found in the Intel (R) x86 Architecture + * Software Developer Manual. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include + +#include + +DEFINE_STATIC_KEY_FALSE(rdt_enable_key); +struct kernfs_root *rdt_root; +struct rdtgroup rdtgroup_default; +LIST_HEAD(rdt_all_groups); + +static void l3_qos_cfg_update(void *arg) +{ + bool *enable = arg; + + wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); +} + +static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) +{ + cpumask_var_t cpu_mask; + struct rdt_domain *d; + int cpu; + + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + + list_for_each_entry(d, &r->domains, list) { + /* Pick one CPU from each domain instance to update MSR */ + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + } + cpu = get_cpu(); + /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ + if (cpumask_test_cpu(cpu, cpu_mask)) + l3_qos_cfg_update(&enable); + /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ + smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); + put_cpu(); + + free_cpumask_var(cpu_mask); + + return 0; +} + +static int cdp_enable(void) +{ + struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; + struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; + struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + int ret; + + if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable) + return -EINVAL; + + ret = set_l3_qos_cfg(r_l3, true); + if (!ret) { + r_l3->enabled = false; + r_l3data->enabled = true; + r_l3code->enabled = true; + } + return ret; +} + +static void cdp_disable(void) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + + r->enabled = r->capable; + + if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) { + rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false; + rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false; + set_l3_qos_cfg(r, false); + } +} + +static int parse_rdtgroupfs_options(char *data) +{ + char *token, *o = data; + int ret = 0; + + while ((token = strsep(&o, ",")) != NULL) { + if (!*token) + return -EINVAL; + + if (!strcmp(token, "cdp")) + ret = cdp_enable(); + } + + return ret; +} + +static struct dentry *rdt_mount(struct file_system_type *fs_type, + int flags, const char *unused_dev_name, + void *data) +{ + struct dentry *dentry; + int ret; + + mutex_lock(&rdtgroup_mutex); + /* + * resctrl file system can only be mounted once. + */ + if (static_branch_unlikely(&rdt_enable_key)) { + dentry = ERR_PTR(-EBUSY); + goto out; + } + + ret = parse_rdtgroupfs_options(data); + if (ret) { + dentry = ERR_PTR(ret); + goto out_cdp; + } + + dentry = kernfs_mount(fs_type, flags, rdt_root, + RDTGROUP_SUPER_MAGIC, NULL); + if (IS_ERR(dentry)) + goto out_cdp; + + static_branch_enable(&rdt_enable_key); + goto out; + +out_cdp: + cdp_disable(); +out: + mutex_unlock(&rdtgroup_mutex); + + return dentry; +} + +static int reset_all_cbms(struct rdt_resource *r) +{ + struct msr_param msr_param; + cpumask_var_t cpu_mask; + struct rdt_domain *d; + int i, cpu; + + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + + msr_param.res = r; + msr_param.low = 0; + msr_param.high = r->num_closid; + + /* + * Disable resource control for this resource by setting all + * CBMs in all domains to the maximum mask value. Pick one CPU + * from each domain to update the MSRs below. + */ + list_for_each_entry(d, &r->domains, list) { + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + + for (i = 0; i < r->num_closid; i++) + d->cbm[i] = r->max_cbm; + } + cpu = get_cpu(); + /* Update CBM on this cpu if it's in cpu_mask. */ + if (cpumask_test_cpu(cpu, cpu_mask)) + rdt_cbm_update(&msr_param); + /* Update CBM on all other cpus in cpu_mask. */ + smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1); + put_cpu(); + + free_cpumask_var(cpu_mask); + + return 0; +} + +static void rdt_kill_sb(struct super_block *sb) +{ + struct rdt_resource *r; + + mutex_lock(&rdtgroup_mutex); + + /*Put everything back to default values. */ + for_each_enabled_rdt_resource(r) + reset_all_cbms(r); + cdp_disable(); + static_branch_disable(&rdt_enable_key); + kernfs_kill_sb(sb); + mutex_unlock(&rdtgroup_mutex); +} + +static struct file_system_type rdt_fs_type = { + .name = "resctrl", + .mount = rdt_mount, + .kill_sb = rdt_kill_sb, +}; + +static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { +}; + +static int __init rdtgroup_setup_root(void) +{ + rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, + KERNFS_ROOT_CREATE_DEACTIVATED, + &rdtgroup_default); + if (IS_ERR(rdt_root)) + return PTR_ERR(rdt_root); + + mutex_lock(&rdtgroup_mutex); + + rdtgroup_default.closid = 0; + list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); + + rdtgroup_default.kn = rdt_root->kn; + kernfs_activate(rdtgroup_default.kn); + + mutex_unlock(&rdtgroup_mutex); + + return 0; +} + +/* + * rdtgroup_init - rdtgroup initialization + * + * Setup resctrl file system including set up root, create mount point, + * register rdtgroup filesystem, and initialize files under root directory. + * + * Return: 0 on success or -errno + */ +int __init rdtgroup_init(void) +{ + int ret = 0; + + ret = rdtgroup_setup_root(); + if (ret) + return ret; + + ret = sysfs_create_mount_point(fs_kobj, "resctrl"); + if (ret) + goto cleanup_root; + + ret = register_filesystem(&rdt_fs_type); + if (ret) + goto cleanup_mountpoint; + + return 0; + +cleanup_mountpoint: + sysfs_remove_mount_point(fs_kobj, "resctrl"); +cleanup_root: + kernfs_destroy_root(rdt_root); + + return ret; +} diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 9bd559472c92..e230af2e6855 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -57,6 +57,7 @@ #define CGROUP_SUPER_MAGIC 0x27e0eb #define CGROUP2_SUPER_MAGIC 0x63677270 +#define RDTGROUP_SUPER_MAGIC 0x7655821 #define STACK_END_MAGIC 0x57AC6E9D -- cgit v1.2.3-71-gd317 From e02737d5b82640497637d18428e2793bb7f02881 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 28 Oct 2016 15:04:46 -0700 Subject: x86/intel_rdt: Add tasks files The root directory all subdirectories are automatically populated with a read/write (mode 0644) file named "tasks". When read it will show all the task IDs assigned to the resource group. Tasks can be added (one at a time) to a group by writing the task ID to the file. E.g. Membership in a resource group is indicated by a new field in the task_struct "int closid" which holds the CLOSID for each task. The default resource group uses CLOSID=0 which means that all existing tasks when the resctrl file system is mounted belong to the default group. If a group is removed, tasks which are members of that group are moved to the default group. Signed-off-by: Fenghua Yu Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: "Shaohua Li" Cc: "Sai Prakhya" Cc: "Peter Zijlstra" Cc: "Stephane Eranian" Cc: "Dave Hansen" Cc: "David Carrillo-Cisneros" Cc: "Nilay Vaish" Cc: "Vikas Shivappa" Cc: "Ingo Molnar" Cc: "Borislav Petkov" Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1477692289-37412-8-git-send-email-fenghua.yu@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 180 +++++++++++++++++++++++++++++++ include/linux/sched.h | 3 + 2 files changed, 183 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index e05a18685fc8..5cc0865f2908 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -267,6 +268,162 @@ unlock: return ret ?: nbytes; } +struct task_move_callback { + struct callback_head work; + struct rdtgroup *rdtgrp; +}; + +static void move_myself(struct callback_head *head) +{ + struct task_move_callback *callback; + struct rdtgroup *rdtgrp; + + callback = container_of(head, struct task_move_callback, work); + rdtgrp = callback->rdtgrp; + + /* + * If resource group was deleted before this task work callback + * was invoked, then assign the task to root group and free the + * resource group. + */ + if (atomic_dec_and_test(&rdtgrp->waitcount) && + (rdtgrp->flags & RDT_DELETED)) { + current->closid = 0; + kfree(rdtgrp); + } + + kfree(callback); +} + +static int __rdtgroup_move_task(struct task_struct *tsk, + struct rdtgroup *rdtgrp) +{ + struct task_move_callback *callback; + int ret; + + callback = kzalloc(sizeof(*callback), GFP_KERNEL); + if (!callback) + return -ENOMEM; + callback->work.func = move_myself; + callback->rdtgrp = rdtgrp; + + /* + * Take a refcount, so rdtgrp cannot be freed before the + * callback has been invoked. + */ + atomic_inc(&rdtgrp->waitcount); + ret = task_work_add(tsk, &callback->work, true); + if (ret) { + /* + * Task is exiting. Drop the refcount and free the callback. + * No need to check the refcount as the group cannot be + * deleted before the write function unlocks rdtgroup_mutex. + */ + atomic_dec(&rdtgrp->waitcount); + kfree(callback); + } else { + tsk->closid = rdtgrp->closid; + } + return ret; +} + +static int rdtgroup_task_write_permission(struct task_struct *task, + struct kernfs_open_file *of) +{ + const struct cred *tcred = get_task_cred(task); + const struct cred *cred = current_cred(); + int ret = 0; + + /* + * Even if we're attaching all tasks in the thread group, we only + * need to check permissions on one of them. + */ + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) + ret = -EPERM; + + put_cred(tcred); + return ret; +} + +static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, + struct kernfs_open_file *of) +{ + struct task_struct *tsk; + int ret; + + rcu_read_lock(); + if (pid) { + tsk = find_task_by_vpid(pid); + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + } else { + tsk = current; + } + + get_task_struct(tsk); + rcu_read_unlock(); + + ret = rdtgroup_task_write_permission(tsk, of); + if (!ret) + ret = __rdtgroup_move_task(tsk, rdtgrp); + + put_task_struct(tsk); + return ret; +} + +static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + pid_t pid; + + if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) + return -EINVAL; + rdtgrp = rdtgroup_kn_lock_live(of->kn); + + if (rdtgrp) + ret = rdtgroup_move_task(pid, rdtgrp, of); + else + ret = -ENOENT; + + rdtgroup_kn_unlock(of->kn); + + return ret ?: nbytes; +} + +static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) +{ + struct task_struct *p, *t; + + rcu_read_lock(); + for_each_process_thread(p, t) { + if (t->closid == r->closid) + seq_printf(s, "%d\n", t->pid); + } + rcu_read_unlock(); +} + +static int rdtgroup_tasks_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) + show_rdt_tasks(rdtgrp, s); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + /* Files in each rdtgroup */ static struct rftype rdtgroup_base_files[] = { { @@ -276,6 +433,13 @@ static struct rftype rdtgroup_base_files[] = { .write = rdtgroup_cpus_write, .seq_show = rdtgroup_cpus_show, }, + { + .name = "tasks", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_tasks_write, + .seq_show = rdtgroup_tasks_show, + }, }; static int rdt_num_closids_show(struct kernfs_open_file *of, @@ -592,6 +756,13 @@ static void rdt_reset_pqr_assoc_closid(void *v) static void rmdir_all_sub(void) { struct rdtgroup *rdtgrp, *tmp; + struct task_struct *p, *t; + + /* move all tasks to default resource group */ + read_lock(&tasklist_lock); + for_each_process_thread(p, t) + t->closid = 0; + read_unlock(&tasklist_lock); get_cpu(); /* Reset PQR_ASSOC MSR on this cpu. */ @@ -712,6 +883,7 @@ out_unlock: static int rdtgroup_rmdir(struct kernfs_node *kn) { + struct task_struct *p, *t; struct rdtgroup *rdtgrp; int cpu, ret = 0; @@ -721,6 +893,14 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) return -ENOENT; } + /* Give any tasks back to the default group */ + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + if (t->closid == rdtgrp->closid) + t->closid = 0; + } + read_unlock(&tasklist_lock); + /* Give any CPUs back to the default group */ cpumask_or(&rdtgroup_default.cpu_mask, &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..c8f4152e7265 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1791,6 +1791,9 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif +#ifdef CONFIG_INTEL_RDT_A + int closid; +#endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT -- cgit v1.2.3-71-gd317