cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cmf.c (32470B)


      1// SPDX-License-Identifier: GPL-2.0+
      2/*
      3 * Linux on zSeries Channel Measurement Facility support
      4 *
      5 * Copyright IBM Corp. 2000, 2006
      6 *
      7 * Authors: Arnd Bergmann <arndb@de.ibm.com>
      8 *	    Cornelia Huck <cornelia.huck@de.ibm.com>
      9 *
     10 * original idea from Natarajan Krishnaswami <nkrishna@us.ibm.com>
     11 */
     12
     13#define KMSG_COMPONENT "cio"
     14#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
     15
     16#include <linux/memblock.h>
     17#include <linux/device.h>
     18#include <linux/init.h>
     19#include <linux/list.h>
     20#include <linux/export.h>
     21#include <linux/moduleparam.h>
     22#include <linux/slab.h>
     23#include <linux/timex.h>	/* get_tod_clock() */
     24
     25#include <asm/ccwdev.h>
     26#include <asm/cio.h>
     27#include <asm/cmb.h>
     28#include <asm/div64.h>
     29
     30#include "cio.h"
     31#include "css.h"
     32#include "device.h"
     33#include "ioasm.h"
     34#include "chsc.h"
     35
     36/*
     37 * parameter to enable cmf during boot, possible uses are:
     38 *  "s390cmf" -- enable cmf and allocate 2 MB of ram so measuring can be
     39 *               used on any subchannel
     40 *  "s390cmf=<num>" -- enable cmf and allocate enough memory to measure
     41 *                     <num> subchannel, where <num> is an integer
     42 *                     between 1 and 65535, default is 1024
     43 */
     44#define ARGSTRING "s390cmf"
     45
     46/* indices for READCMB */
     47enum cmb_index {
     48	avg_utilization = -1,
     49 /* basic and exended format: */
     50	cmb_ssch_rsch_count = 0,
     51	cmb_sample_count,
     52	cmb_device_connect_time,
     53	cmb_function_pending_time,
     54	cmb_device_disconnect_time,
     55	cmb_control_unit_queuing_time,
     56	cmb_device_active_only_time,
     57 /* extended format only: */
     58	cmb_device_busy_time,
     59	cmb_initial_command_response_time,
     60};
     61
     62/**
     63 * enum cmb_format - types of supported measurement block formats
     64 *
     65 * @CMF_BASIC:      traditional channel measurement blocks supported
     66 *		    by all machines that we run on
     67 * @CMF_EXTENDED:   improved format that was introduced with the z990
     68 *		    machine
     69 * @CMF_AUTODETECT: default: use extended format when running on a machine
     70 *		    supporting extended format, otherwise fall back to
     71 *		    basic format
     72 */
     73enum cmb_format {
     74	CMF_BASIC,
     75	CMF_EXTENDED,
     76	CMF_AUTODETECT = -1,
     77};
     78
     79/*
     80 * format - actual format for all measurement blocks
     81 *
     82 * The format module parameter can be set to a value of 0 (zero)
     83 * or 1, indicating basic or extended format as described for
     84 * enum cmb_format.
     85 */
     86static int format = CMF_AUTODETECT;
     87module_param(format, bint, 0444);
     88
     89/**
     90 * struct cmb_operations - functions to use depending on cmb_format
     91 *
     92 * Most of these functions operate on a struct ccw_device. There is only
     93 * one instance of struct cmb_operations because the format of the measurement
     94 * data is guaranteed to be the same for every ccw_device.
     95 *
     96 * @alloc:	allocate memory for a channel measurement block,
     97 *		either with the help of a special pool or with kmalloc
     98 * @free:	free memory allocated with @alloc
     99 * @set:	enable or disable measurement
    100 * @read:	read a measurement entry at an index
    101 * @readall:	read a measurement block in a common format
    102 * @reset:	clear the data in the associated measurement block and
    103 *		reset its time stamp
    104 */
    105struct cmb_operations {
    106	int  (*alloc)  (struct ccw_device *);
    107	void (*free)   (struct ccw_device *);
    108	int  (*set)    (struct ccw_device *, u32);
    109	u64  (*read)   (struct ccw_device *, int);
    110	int  (*readall)(struct ccw_device *, struct cmbdata *);
    111	void (*reset)  (struct ccw_device *);
    112/* private: */
    113	struct attribute_group *attr_group;
    114};
    115static struct cmb_operations *cmbops;
    116
    117struct cmb_data {
    118	void *hw_block;   /* Pointer to block updated by hardware */
    119	void *last_block; /* Last changed block copied from hardware block */
    120	int size;	  /* Size of hw_block and last_block */
    121	unsigned long long last_update;  /* when last_block was updated */
    122};
    123
    124/*
    125 * Our user interface is designed in terms of nanoseconds,
    126 * while the hardware measures total times in its own
    127 * unit.
    128 */
    129static inline u64 time_to_nsec(u32 value)
    130{
    131	return ((u64)value) * 128000ull;
    132}
    133
    134/*
    135 * Users are usually interested in average times,
    136 * not accumulated time.
    137 * This also helps us with atomicity problems
    138 * when reading sinlge values.
    139 */
    140static inline u64 time_to_avg_nsec(u32 value, u32 count)
    141{
    142	u64 ret;
    143
    144	/* no samples yet, avoid division by 0 */
    145	if (count == 0)
    146		return 0;
    147
    148	/* value comes in units of 128 µsec */
    149	ret = time_to_nsec(value);
    150	do_div(ret, count);
    151
    152	return ret;
    153}
    154
    155#define CMF_OFF 0
    156#define CMF_ON	2
    157
    158/*
    159 * Activate or deactivate the channel monitor. When area is NULL,
    160 * the monitor is deactivated. The channel monitor needs to
    161 * be active in order to measure subchannels, which also need
    162 * to be enabled.
    163 */
    164static inline void cmf_activate(void *area, unsigned int onoff)
    165{
    166	/* activate channel measurement */
    167	asm volatile(
    168		"	lgr	1,%[r1]\n"
    169		"	lgr	2,%[mbo]\n"
    170		"	schm\n"
    171		:
    172		: [r1] "d" ((unsigned long)onoff), [mbo] "d" (area)
    173		: "1", "2");
    174}
    175
    176static int set_schib(struct ccw_device *cdev, u32 mme, int mbfc,
    177		     unsigned long address)
    178{
    179	struct subchannel *sch = to_subchannel(cdev->dev.parent);
    180	int ret;
    181
    182	sch->config.mme = mme;
    183	sch->config.mbfc = mbfc;
    184	/* address can be either a block address or a block index */
    185	if (mbfc)
    186		sch->config.mba = address;
    187	else
    188		sch->config.mbi = address;
    189
    190	ret = cio_commit_config(sch);
    191	if (!mme && ret == -ENODEV) {
    192		/*
    193		 * The task was to disable measurement block updates but
    194		 * the subchannel is already gone. Report success.
    195		 */
    196		ret = 0;
    197	}
    198	return ret;
    199}
    200
    201struct set_schib_struct {
    202	u32 mme;
    203	int mbfc;
    204	unsigned long address;
    205	wait_queue_head_t wait;
    206	int ret;
    207};
    208
    209#define CMF_PENDING 1
    210#define SET_SCHIB_TIMEOUT (10 * HZ)
    211
    212static int set_schib_wait(struct ccw_device *cdev, u32 mme,
    213			  int mbfc, unsigned long address)
    214{
    215	struct set_schib_struct set_data;
    216	int ret = -ENODEV;
    217
    218	spin_lock_irq(cdev->ccwlock);
    219	if (!cdev->private->cmb)
    220		goto out;
    221
    222	ret = set_schib(cdev, mme, mbfc, address);
    223	if (ret != -EBUSY)
    224		goto out;
    225
    226	/* if the device is not online, don't even try again */
    227	if (cdev->private->state != DEV_STATE_ONLINE)
    228		goto out;
    229
    230	init_waitqueue_head(&set_data.wait);
    231	set_data.mme = mme;
    232	set_data.mbfc = mbfc;
    233	set_data.address = address;
    234	set_data.ret = CMF_PENDING;
    235
    236	cdev->private->state = DEV_STATE_CMFCHANGE;
    237	cdev->private->cmb_wait = &set_data;
    238	spin_unlock_irq(cdev->ccwlock);
    239
    240	ret = wait_event_interruptible_timeout(set_data.wait,
    241					       set_data.ret != CMF_PENDING,
    242					       SET_SCHIB_TIMEOUT);
    243	spin_lock_irq(cdev->ccwlock);
    244	if (ret <= 0) {
    245		if (set_data.ret == CMF_PENDING) {
    246			set_data.ret = (ret == 0) ? -ETIME : ret;
    247			if (cdev->private->state == DEV_STATE_CMFCHANGE)
    248				cdev->private->state = DEV_STATE_ONLINE;
    249		}
    250	}
    251	cdev->private->cmb_wait = NULL;
    252	ret = set_data.ret;
    253out:
    254	spin_unlock_irq(cdev->ccwlock);
    255	return ret;
    256}
    257
    258void retry_set_schib(struct ccw_device *cdev)
    259{
    260	struct set_schib_struct *set_data = cdev->private->cmb_wait;
    261
    262	if (!set_data)
    263		return;
    264
    265	set_data->ret = set_schib(cdev, set_data->mme, set_data->mbfc,
    266				  set_data->address);
    267	wake_up(&set_data->wait);
    268}
    269
    270static int cmf_copy_block(struct ccw_device *cdev)
    271{
    272	struct subchannel *sch = to_subchannel(cdev->dev.parent);
    273	struct cmb_data *cmb_data;
    274	void *hw_block;
    275
    276	if (cio_update_schib(sch))
    277		return -ENODEV;
    278
    279	if (scsw_fctl(&sch->schib.scsw) & SCSW_FCTL_START_FUNC) {
    280		/* Don't copy if a start function is in progress. */
    281		if ((!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_SUSPENDED)) &&
    282		    (scsw_actl(&sch->schib.scsw) &
    283		     (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT)) &&
    284		    (!(scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_SEC_STATUS)))
    285			return -EBUSY;
    286	}
    287	cmb_data = cdev->private->cmb;
    288	hw_block = cmb_data->hw_block;
    289	memcpy(cmb_data->last_block, hw_block, cmb_data->size);
    290	cmb_data->last_update = get_tod_clock();
    291	return 0;
    292}
    293
    294struct copy_block_struct {
    295	wait_queue_head_t wait;
    296	int ret;
    297};
    298
    299static int cmf_cmb_copy_wait(struct ccw_device *cdev)
    300{
    301	struct copy_block_struct copy_block;
    302	int ret = -ENODEV;
    303
    304	spin_lock_irq(cdev->ccwlock);
    305	if (!cdev->private->cmb)
    306		goto out;
    307
    308	ret = cmf_copy_block(cdev);
    309	if (ret != -EBUSY)
    310		goto out;
    311
    312	if (cdev->private->state != DEV_STATE_ONLINE)
    313		goto out;
    314
    315	init_waitqueue_head(&copy_block.wait);
    316	copy_block.ret = CMF_PENDING;
    317
    318	cdev->private->state = DEV_STATE_CMFUPDATE;
    319	cdev->private->cmb_wait = &copy_block;
    320	spin_unlock_irq(cdev->ccwlock);
    321
    322	ret = wait_event_interruptible(copy_block.wait,
    323				       copy_block.ret != CMF_PENDING);
    324	spin_lock_irq(cdev->ccwlock);
    325	if (ret) {
    326		if (copy_block.ret == CMF_PENDING) {
    327			copy_block.ret = -ERESTARTSYS;
    328			if (cdev->private->state == DEV_STATE_CMFUPDATE)
    329				cdev->private->state = DEV_STATE_ONLINE;
    330		}
    331	}
    332	cdev->private->cmb_wait = NULL;
    333	ret = copy_block.ret;
    334out:
    335	spin_unlock_irq(cdev->ccwlock);
    336	return ret;
    337}
    338
    339void cmf_retry_copy_block(struct ccw_device *cdev)
    340{
    341	struct copy_block_struct *copy_block = cdev->private->cmb_wait;
    342
    343	if (!copy_block)
    344		return;
    345
    346	copy_block->ret = cmf_copy_block(cdev);
    347	wake_up(&copy_block->wait);
    348}
    349
    350static void cmf_generic_reset(struct ccw_device *cdev)
    351{
    352	struct cmb_data *cmb_data;
    353
    354	spin_lock_irq(cdev->ccwlock);
    355	cmb_data = cdev->private->cmb;
    356	if (cmb_data) {
    357		memset(cmb_data->last_block, 0, cmb_data->size);
    358		/*
    359		 * Need to reset hw block as well to make the hardware start
    360		 * from 0 again.
    361		 */
    362		memset(cmb_data->hw_block, 0, cmb_data->size);
    363		cmb_data->last_update = 0;
    364	}
    365	cdev->private->cmb_start_time = get_tod_clock();
    366	spin_unlock_irq(cdev->ccwlock);
    367}
    368
    369/**
    370 * struct cmb_area - container for global cmb data
    371 *
    372 * @mem:	pointer to CMBs (only in basic measurement mode)
    373 * @list:	contains a linked list of all subchannels
    374 * @num_channels: number of channels to be measured
    375 * @lock:	protect concurrent access to @mem and @list
    376 */
    377struct cmb_area {
    378	struct cmb *mem;
    379	struct list_head list;
    380	int num_channels;
    381	spinlock_t lock;
    382};
    383
    384static struct cmb_area cmb_area = {
    385	.lock = __SPIN_LOCK_UNLOCKED(cmb_area.lock),
    386	.list = LIST_HEAD_INIT(cmb_area.list),
    387	.num_channels  = 1024,
    388};
    389
    390/* ****** old style CMB handling ********/
    391
    392/*
    393 * Basic channel measurement blocks are allocated in one contiguous
    394 * block of memory, which can not be moved as long as any channel
    395 * is active. Therefore, a maximum number of subchannels needs to
    396 * be defined somewhere. This is a module parameter, defaulting to
    397 * a reasonable value of 1024, or 32 kb of memory.
    398 * Current kernels don't allow kmalloc with more than 128kb, so the
    399 * maximum is 4096.
    400 */
    401
    402module_param_named(maxchannels, cmb_area.num_channels, uint, 0444);
    403
    404/**
    405 * struct cmb - basic channel measurement block
    406 * @ssch_rsch_count: number of ssch and rsch
    407 * @sample_count: number of samples
    408 * @device_connect_time: time of device connect
    409 * @function_pending_time: time of function pending
    410 * @device_disconnect_time: time of device disconnect
    411 * @control_unit_queuing_time: time of control unit queuing
    412 * @device_active_only_time: time of device active only
    413 * @reserved: unused in basic measurement mode
    414 *
    415 * The measurement block as used by the hardware. The fields are described
    416 * further in z/Architecture Principles of Operation, chapter 17.
    417 *
    418 * The cmb area made up from these blocks must be a contiguous array and may
    419 * not be reallocated or freed.
    420 * Only one cmb area can be present in the system.
    421 */
    422struct cmb {
    423	u16 ssch_rsch_count;
    424	u16 sample_count;
    425	u32 device_connect_time;
    426	u32 function_pending_time;
    427	u32 device_disconnect_time;
    428	u32 control_unit_queuing_time;
    429	u32 device_active_only_time;
    430	u32 reserved[2];
    431};
    432
    433/*
    434 * Insert a single device into the cmb_area list.
    435 * Called with cmb_area.lock held from alloc_cmb.
    436 */
    437static int alloc_cmb_single(struct ccw_device *cdev,
    438			    struct cmb_data *cmb_data)
    439{
    440	struct cmb *cmb;
    441	struct ccw_device_private *node;
    442	int ret;
    443
    444	spin_lock_irq(cdev->ccwlock);
    445	if (!list_empty(&cdev->private->cmb_list)) {
    446		ret = -EBUSY;
    447		goto out;
    448	}
    449
    450	/*
    451	 * Find first unused cmb in cmb_area.mem.
    452	 * This is a little tricky: cmb_area.list
    453	 * remains sorted by ->cmb->hw_data pointers.
    454	 */
    455	cmb = cmb_area.mem;
    456	list_for_each_entry(node, &cmb_area.list, cmb_list) {
    457		struct cmb_data *data;
    458		data = node->cmb;
    459		if ((struct cmb*)data->hw_block > cmb)
    460			break;
    461		cmb++;
    462	}
    463	if (cmb - cmb_area.mem >= cmb_area.num_channels) {
    464		ret = -ENOMEM;
    465		goto out;
    466	}
    467
    468	/* insert new cmb */
    469	list_add_tail(&cdev->private->cmb_list, &node->cmb_list);
    470	cmb_data->hw_block = cmb;
    471	cdev->private->cmb = cmb_data;
    472	ret = 0;
    473out:
    474	spin_unlock_irq(cdev->ccwlock);
    475	return ret;
    476}
    477
    478static int alloc_cmb(struct ccw_device *cdev)
    479{
    480	int ret;
    481	struct cmb *mem;
    482	ssize_t size;
    483	struct cmb_data *cmb_data;
    484
    485	/* Allocate private cmb_data. */
    486	cmb_data = kzalloc(sizeof(struct cmb_data), GFP_KERNEL);
    487	if (!cmb_data)
    488		return -ENOMEM;
    489
    490	cmb_data->last_block = kzalloc(sizeof(struct cmb), GFP_KERNEL);
    491	if (!cmb_data->last_block) {
    492		kfree(cmb_data);
    493		return -ENOMEM;
    494	}
    495	cmb_data->size = sizeof(struct cmb);
    496	spin_lock(&cmb_area.lock);
    497
    498	if (!cmb_area.mem) {
    499		/* there is no user yet, so we need a new area */
    500		size = sizeof(struct cmb) * cmb_area.num_channels;
    501		WARN_ON(!list_empty(&cmb_area.list));
    502
    503		spin_unlock(&cmb_area.lock);
    504		mem = (void*)__get_free_pages(GFP_KERNEL | GFP_DMA,
    505				 get_order(size));
    506		spin_lock(&cmb_area.lock);
    507
    508		if (cmb_area.mem) {
    509			/* ok, another thread was faster */
    510			free_pages((unsigned long)mem, get_order(size));
    511		} else if (!mem) {
    512			/* no luck */
    513			ret = -ENOMEM;
    514			goto out;
    515		} else {
    516			/* everything ok */
    517			memset(mem, 0, size);
    518			cmb_area.mem = mem;
    519			cmf_activate(cmb_area.mem, CMF_ON);
    520		}
    521	}
    522
    523	/* do the actual allocation */
    524	ret = alloc_cmb_single(cdev, cmb_data);
    525out:
    526	spin_unlock(&cmb_area.lock);
    527	if (ret) {
    528		kfree(cmb_data->last_block);
    529		kfree(cmb_data);
    530	}
    531	return ret;
    532}
    533
    534static void free_cmb(struct ccw_device *cdev)
    535{
    536	struct ccw_device_private *priv;
    537	struct cmb_data *cmb_data;
    538
    539	spin_lock(&cmb_area.lock);
    540	spin_lock_irq(cdev->ccwlock);
    541
    542	priv = cdev->private;
    543	cmb_data = priv->cmb;
    544	priv->cmb = NULL;
    545	if (cmb_data)
    546		kfree(cmb_data->last_block);
    547	kfree(cmb_data);
    548	list_del_init(&priv->cmb_list);
    549
    550	if (list_empty(&cmb_area.list)) {
    551		ssize_t size;
    552		size = sizeof(struct cmb) * cmb_area.num_channels;
    553		cmf_activate(NULL, CMF_OFF);
    554		free_pages((unsigned long)cmb_area.mem, get_order(size));
    555		cmb_area.mem = NULL;
    556	}
    557	spin_unlock_irq(cdev->ccwlock);
    558	spin_unlock(&cmb_area.lock);
    559}
    560
    561static int set_cmb(struct ccw_device *cdev, u32 mme)
    562{
    563	u16 offset;
    564	struct cmb_data *cmb_data;
    565	unsigned long flags;
    566
    567	spin_lock_irqsave(cdev->ccwlock, flags);
    568	if (!cdev->private->cmb) {
    569		spin_unlock_irqrestore(cdev->ccwlock, flags);
    570		return -EINVAL;
    571	}
    572	cmb_data = cdev->private->cmb;
    573	offset = mme ? (struct cmb *)cmb_data->hw_block - cmb_area.mem : 0;
    574	spin_unlock_irqrestore(cdev->ccwlock, flags);
    575
    576	return set_schib_wait(cdev, mme, 0, offset);
    577}
    578
    579/* calculate utilization in 0.1 percent units */
    580static u64 __cmb_utilization(u64 device_connect_time, u64 function_pending_time,
    581			     u64 device_disconnect_time, u64 start_time)
    582{
    583	u64 utilization, elapsed_time;
    584
    585	utilization = time_to_nsec(device_connect_time +
    586				   function_pending_time +
    587				   device_disconnect_time);
    588
    589	elapsed_time = get_tod_clock() - start_time;
    590	elapsed_time = tod_to_ns(elapsed_time);
    591	elapsed_time /= 1000;
    592
    593	return elapsed_time ? (utilization / elapsed_time) : 0;
    594}
    595
    596static u64 read_cmb(struct ccw_device *cdev, int index)
    597{
    598	struct cmb_data *cmb_data;
    599	unsigned long flags;
    600	struct cmb *cmb;
    601	u64 ret = 0;
    602	u32 val;
    603
    604	spin_lock_irqsave(cdev->ccwlock, flags);
    605	cmb_data = cdev->private->cmb;
    606	if (!cmb_data)
    607		goto out;
    608
    609	cmb = cmb_data->hw_block;
    610	switch (index) {
    611	case avg_utilization:
    612		ret = __cmb_utilization(cmb->device_connect_time,
    613					cmb->function_pending_time,
    614					cmb->device_disconnect_time,
    615					cdev->private->cmb_start_time);
    616		goto out;
    617	case cmb_ssch_rsch_count:
    618		ret = cmb->ssch_rsch_count;
    619		goto out;
    620	case cmb_sample_count:
    621		ret = cmb->sample_count;
    622		goto out;
    623	case cmb_device_connect_time:
    624		val = cmb->device_connect_time;
    625		break;
    626	case cmb_function_pending_time:
    627		val = cmb->function_pending_time;
    628		break;
    629	case cmb_device_disconnect_time:
    630		val = cmb->device_disconnect_time;
    631		break;
    632	case cmb_control_unit_queuing_time:
    633		val = cmb->control_unit_queuing_time;
    634		break;
    635	case cmb_device_active_only_time:
    636		val = cmb->device_active_only_time;
    637		break;
    638	default:
    639		goto out;
    640	}
    641	ret = time_to_avg_nsec(val, cmb->sample_count);
    642out:
    643	spin_unlock_irqrestore(cdev->ccwlock, flags);
    644	return ret;
    645}
    646
    647static int readall_cmb(struct ccw_device *cdev, struct cmbdata *data)
    648{
    649	struct cmb *cmb;
    650	struct cmb_data *cmb_data;
    651	u64 time;
    652	unsigned long flags;
    653	int ret;
    654
    655	ret = cmf_cmb_copy_wait(cdev);
    656	if (ret < 0)
    657		return ret;
    658	spin_lock_irqsave(cdev->ccwlock, flags);
    659	cmb_data = cdev->private->cmb;
    660	if (!cmb_data) {
    661		ret = -ENODEV;
    662		goto out;
    663	}
    664	if (cmb_data->last_update == 0) {
    665		ret = -EAGAIN;
    666		goto out;
    667	}
    668	cmb = cmb_data->last_block;
    669	time = cmb_data->last_update - cdev->private->cmb_start_time;
    670
    671	memset(data, 0, sizeof(struct cmbdata));
    672
    673	/* we only know values before device_busy_time */
    674	data->size = offsetof(struct cmbdata, device_busy_time);
    675
    676	data->elapsed_time = tod_to_ns(time);
    677
    678	/* copy data to new structure */
    679	data->ssch_rsch_count = cmb->ssch_rsch_count;
    680	data->sample_count = cmb->sample_count;
    681
    682	/* time fields are converted to nanoseconds while copying */
    683	data->device_connect_time = time_to_nsec(cmb->device_connect_time);
    684	data->function_pending_time = time_to_nsec(cmb->function_pending_time);
    685	data->device_disconnect_time =
    686		time_to_nsec(cmb->device_disconnect_time);
    687	data->control_unit_queuing_time
    688		= time_to_nsec(cmb->control_unit_queuing_time);
    689	data->device_active_only_time
    690		= time_to_nsec(cmb->device_active_only_time);
    691	ret = 0;
    692out:
    693	spin_unlock_irqrestore(cdev->ccwlock, flags);
    694	return ret;
    695}
    696
    697static void reset_cmb(struct ccw_device *cdev)
    698{
    699	cmf_generic_reset(cdev);
    700}
    701
    702static int cmf_enabled(struct ccw_device *cdev)
    703{
    704	int enabled;
    705
    706	spin_lock_irq(cdev->ccwlock);
    707	enabled = !!cdev->private->cmb;
    708	spin_unlock_irq(cdev->ccwlock);
    709
    710	return enabled;
    711}
    712
    713static struct attribute_group cmf_attr_group;
    714
    715static struct cmb_operations cmbops_basic = {
    716	.alloc	= alloc_cmb,
    717	.free	= free_cmb,
    718	.set	= set_cmb,
    719	.read	= read_cmb,
    720	.readall    = readall_cmb,
    721	.reset	    = reset_cmb,
    722	.attr_group = &cmf_attr_group,
    723};
    724
    725/* ******** extended cmb handling ********/
    726
    727/**
    728 * struct cmbe - extended channel measurement block
    729 * @ssch_rsch_count: number of ssch and rsch
    730 * @sample_count: number of samples
    731 * @device_connect_time: time of device connect
    732 * @function_pending_time: time of function pending
    733 * @device_disconnect_time: time of device disconnect
    734 * @control_unit_queuing_time: time of control unit queuing
    735 * @device_active_only_time: time of device active only
    736 * @device_busy_time: time of device busy
    737 * @initial_command_response_time: initial command response time
    738 * @reserved: unused
    739 *
    740 * The measurement block as used by the hardware. May be in any 64 bit physical
    741 * location.
    742 * The fields are described further in z/Architecture Principles of Operation,
    743 * third edition, chapter 17.
    744 */
    745struct cmbe {
    746	u32 ssch_rsch_count;
    747	u32 sample_count;
    748	u32 device_connect_time;
    749	u32 function_pending_time;
    750	u32 device_disconnect_time;
    751	u32 control_unit_queuing_time;
    752	u32 device_active_only_time;
    753	u32 device_busy_time;
    754	u32 initial_command_response_time;
    755	u32 reserved[7];
    756} __packed __aligned(64);
    757
    758static struct kmem_cache *cmbe_cache;
    759
    760static int alloc_cmbe(struct ccw_device *cdev)
    761{
    762	struct cmb_data *cmb_data;
    763	struct cmbe *cmbe;
    764	int ret = -ENOMEM;
    765
    766	cmbe = kmem_cache_zalloc(cmbe_cache, GFP_KERNEL);
    767	if (!cmbe)
    768		return ret;
    769
    770	cmb_data = kzalloc(sizeof(*cmb_data), GFP_KERNEL);
    771	if (!cmb_data)
    772		goto out_free;
    773
    774	cmb_data->last_block = kzalloc(sizeof(struct cmbe), GFP_KERNEL);
    775	if (!cmb_data->last_block)
    776		goto out_free;
    777
    778	cmb_data->size = sizeof(*cmbe);
    779	cmb_data->hw_block = cmbe;
    780
    781	spin_lock(&cmb_area.lock);
    782	spin_lock_irq(cdev->ccwlock);
    783	if (cdev->private->cmb)
    784		goto out_unlock;
    785
    786	cdev->private->cmb = cmb_data;
    787
    788	/* activate global measurement if this is the first channel */
    789	if (list_empty(&cmb_area.list))
    790		cmf_activate(NULL, CMF_ON);
    791	list_add_tail(&cdev->private->cmb_list, &cmb_area.list);
    792
    793	spin_unlock_irq(cdev->ccwlock);
    794	spin_unlock(&cmb_area.lock);
    795	return 0;
    796
    797out_unlock:
    798	spin_unlock_irq(cdev->ccwlock);
    799	spin_unlock(&cmb_area.lock);
    800	ret = -EBUSY;
    801out_free:
    802	if (cmb_data)
    803		kfree(cmb_data->last_block);
    804	kfree(cmb_data);
    805	kmem_cache_free(cmbe_cache, cmbe);
    806
    807	return ret;
    808}
    809
    810static void free_cmbe(struct ccw_device *cdev)
    811{
    812	struct cmb_data *cmb_data;
    813
    814	spin_lock(&cmb_area.lock);
    815	spin_lock_irq(cdev->ccwlock);
    816	cmb_data = cdev->private->cmb;
    817	cdev->private->cmb = NULL;
    818	if (cmb_data) {
    819		kfree(cmb_data->last_block);
    820		kmem_cache_free(cmbe_cache, cmb_data->hw_block);
    821	}
    822	kfree(cmb_data);
    823
    824	/* deactivate global measurement if this is the last channel */
    825	list_del_init(&cdev->private->cmb_list);
    826	if (list_empty(&cmb_area.list))
    827		cmf_activate(NULL, CMF_OFF);
    828	spin_unlock_irq(cdev->ccwlock);
    829	spin_unlock(&cmb_area.lock);
    830}
    831
    832static int set_cmbe(struct ccw_device *cdev, u32 mme)
    833{
    834	unsigned long mba;
    835	struct cmb_data *cmb_data;
    836	unsigned long flags;
    837
    838	spin_lock_irqsave(cdev->ccwlock, flags);
    839	if (!cdev->private->cmb) {
    840		spin_unlock_irqrestore(cdev->ccwlock, flags);
    841		return -EINVAL;
    842	}
    843	cmb_data = cdev->private->cmb;
    844	mba = mme ? (unsigned long) cmb_data->hw_block : 0;
    845	spin_unlock_irqrestore(cdev->ccwlock, flags);
    846
    847	return set_schib_wait(cdev, mme, 1, mba);
    848}
    849
    850static u64 read_cmbe(struct ccw_device *cdev, int index)
    851{
    852	struct cmb_data *cmb_data;
    853	unsigned long flags;
    854	struct cmbe *cmb;
    855	u64 ret = 0;
    856	u32 val;
    857
    858	spin_lock_irqsave(cdev->ccwlock, flags);
    859	cmb_data = cdev->private->cmb;
    860	if (!cmb_data)
    861		goto out;
    862
    863	cmb = cmb_data->hw_block;
    864	switch (index) {
    865	case avg_utilization:
    866		ret = __cmb_utilization(cmb->device_connect_time,
    867					cmb->function_pending_time,
    868					cmb->device_disconnect_time,
    869					cdev->private->cmb_start_time);
    870		goto out;
    871	case cmb_ssch_rsch_count:
    872		ret = cmb->ssch_rsch_count;
    873		goto out;
    874	case cmb_sample_count:
    875		ret = cmb->sample_count;
    876		goto out;
    877	case cmb_device_connect_time:
    878		val = cmb->device_connect_time;
    879		break;
    880	case cmb_function_pending_time:
    881		val = cmb->function_pending_time;
    882		break;
    883	case cmb_device_disconnect_time:
    884		val = cmb->device_disconnect_time;
    885		break;
    886	case cmb_control_unit_queuing_time:
    887		val = cmb->control_unit_queuing_time;
    888		break;
    889	case cmb_device_active_only_time:
    890		val = cmb->device_active_only_time;
    891		break;
    892	case cmb_device_busy_time:
    893		val = cmb->device_busy_time;
    894		break;
    895	case cmb_initial_command_response_time:
    896		val = cmb->initial_command_response_time;
    897		break;
    898	default:
    899		goto out;
    900	}
    901	ret = time_to_avg_nsec(val, cmb->sample_count);
    902out:
    903	spin_unlock_irqrestore(cdev->ccwlock, flags);
    904	return ret;
    905}
    906
    907static int readall_cmbe(struct ccw_device *cdev, struct cmbdata *data)
    908{
    909	struct cmbe *cmb;
    910	struct cmb_data *cmb_data;
    911	u64 time;
    912	unsigned long flags;
    913	int ret;
    914
    915	ret = cmf_cmb_copy_wait(cdev);
    916	if (ret < 0)
    917		return ret;
    918	spin_lock_irqsave(cdev->ccwlock, flags);
    919	cmb_data = cdev->private->cmb;
    920	if (!cmb_data) {
    921		ret = -ENODEV;
    922		goto out;
    923	}
    924	if (cmb_data->last_update == 0) {
    925		ret = -EAGAIN;
    926		goto out;
    927	}
    928	time = cmb_data->last_update - cdev->private->cmb_start_time;
    929
    930	memset (data, 0, sizeof(struct cmbdata));
    931
    932	/* we only know values before device_busy_time */
    933	data->size = offsetof(struct cmbdata, device_busy_time);
    934
    935	data->elapsed_time = tod_to_ns(time);
    936
    937	cmb = cmb_data->last_block;
    938	/* copy data to new structure */
    939	data->ssch_rsch_count = cmb->ssch_rsch_count;
    940	data->sample_count = cmb->sample_count;
    941
    942	/* time fields are converted to nanoseconds while copying */
    943	data->device_connect_time = time_to_nsec(cmb->device_connect_time);
    944	data->function_pending_time = time_to_nsec(cmb->function_pending_time);
    945	data->device_disconnect_time =
    946		time_to_nsec(cmb->device_disconnect_time);
    947	data->control_unit_queuing_time
    948		= time_to_nsec(cmb->control_unit_queuing_time);
    949	data->device_active_only_time
    950		= time_to_nsec(cmb->device_active_only_time);
    951	data->device_busy_time = time_to_nsec(cmb->device_busy_time);
    952	data->initial_command_response_time
    953		= time_to_nsec(cmb->initial_command_response_time);
    954
    955	ret = 0;
    956out:
    957	spin_unlock_irqrestore(cdev->ccwlock, flags);
    958	return ret;
    959}
    960
    961static void reset_cmbe(struct ccw_device *cdev)
    962{
    963	cmf_generic_reset(cdev);
    964}
    965
    966static struct attribute_group cmf_attr_group_ext;
    967
    968static struct cmb_operations cmbops_extended = {
    969	.alloc	    = alloc_cmbe,
    970	.free	    = free_cmbe,
    971	.set	    = set_cmbe,
    972	.read	    = read_cmbe,
    973	.readall    = readall_cmbe,
    974	.reset	    = reset_cmbe,
    975	.attr_group = &cmf_attr_group_ext,
    976};
    977
    978static ssize_t cmb_show_attr(struct device *dev, char *buf, enum cmb_index idx)
    979{
    980	return sprintf(buf, "%lld\n",
    981		(unsigned long long) cmf_read(to_ccwdev(dev), idx));
    982}
    983
    984static ssize_t cmb_show_avg_sample_interval(struct device *dev,
    985					    struct device_attribute *attr,
    986					    char *buf)
    987{
    988	struct ccw_device *cdev = to_ccwdev(dev);
    989	unsigned long count;
    990	long interval;
    991
    992	count = cmf_read(cdev, cmb_sample_count);
    993	spin_lock_irq(cdev->ccwlock);
    994	if (count) {
    995		interval = get_tod_clock() - cdev->private->cmb_start_time;
    996		interval = tod_to_ns(interval);
    997		interval /= count;
    998	} else
    999		interval = -1;
   1000	spin_unlock_irq(cdev->ccwlock);
   1001	return sprintf(buf, "%ld\n", interval);
   1002}
   1003
   1004static ssize_t cmb_show_avg_utilization(struct device *dev,
   1005					struct device_attribute *attr,
   1006					char *buf)
   1007{
   1008	unsigned long u = cmf_read(to_ccwdev(dev), avg_utilization);
   1009
   1010	return sprintf(buf, "%02lu.%01lu%%\n", u / 10, u % 10);
   1011}
   1012
   1013#define cmf_attr(name) \
   1014static ssize_t show_##name(struct device *dev, \
   1015			   struct device_attribute *attr, char *buf)	\
   1016{ return cmb_show_attr((dev), buf, cmb_##name); } \
   1017static DEVICE_ATTR(name, 0444, show_##name, NULL);
   1018
   1019#define cmf_attr_avg(name) \
   1020static ssize_t show_avg_##name(struct device *dev, \
   1021			       struct device_attribute *attr, char *buf) \
   1022{ return cmb_show_attr((dev), buf, cmb_##name); } \
   1023static DEVICE_ATTR(avg_##name, 0444, show_avg_##name, NULL);
   1024
   1025cmf_attr(ssch_rsch_count);
   1026cmf_attr(sample_count);
   1027cmf_attr_avg(device_connect_time);
   1028cmf_attr_avg(function_pending_time);
   1029cmf_attr_avg(device_disconnect_time);
   1030cmf_attr_avg(control_unit_queuing_time);
   1031cmf_attr_avg(device_active_only_time);
   1032cmf_attr_avg(device_busy_time);
   1033cmf_attr_avg(initial_command_response_time);
   1034
   1035static DEVICE_ATTR(avg_sample_interval, 0444, cmb_show_avg_sample_interval,
   1036		   NULL);
   1037static DEVICE_ATTR(avg_utilization, 0444, cmb_show_avg_utilization, NULL);
   1038
   1039static struct attribute *cmf_attributes[] = {
   1040	&dev_attr_avg_sample_interval.attr,
   1041	&dev_attr_avg_utilization.attr,
   1042	&dev_attr_ssch_rsch_count.attr,
   1043	&dev_attr_sample_count.attr,
   1044	&dev_attr_avg_device_connect_time.attr,
   1045	&dev_attr_avg_function_pending_time.attr,
   1046	&dev_attr_avg_device_disconnect_time.attr,
   1047	&dev_attr_avg_control_unit_queuing_time.attr,
   1048	&dev_attr_avg_device_active_only_time.attr,
   1049	NULL,
   1050};
   1051
   1052static struct attribute_group cmf_attr_group = {
   1053	.name  = "cmf",
   1054	.attrs = cmf_attributes,
   1055};
   1056
   1057static struct attribute *cmf_attributes_ext[] = {
   1058	&dev_attr_avg_sample_interval.attr,
   1059	&dev_attr_avg_utilization.attr,
   1060	&dev_attr_ssch_rsch_count.attr,
   1061	&dev_attr_sample_count.attr,
   1062	&dev_attr_avg_device_connect_time.attr,
   1063	&dev_attr_avg_function_pending_time.attr,
   1064	&dev_attr_avg_device_disconnect_time.attr,
   1065	&dev_attr_avg_control_unit_queuing_time.attr,
   1066	&dev_attr_avg_device_active_only_time.attr,
   1067	&dev_attr_avg_device_busy_time.attr,
   1068	&dev_attr_avg_initial_command_response_time.attr,
   1069	NULL,
   1070};
   1071
   1072static struct attribute_group cmf_attr_group_ext = {
   1073	.name  = "cmf",
   1074	.attrs = cmf_attributes_ext,
   1075};
   1076
   1077static ssize_t cmb_enable_show(struct device *dev,
   1078			       struct device_attribute *attr,
   1079			       char *buf)
   1080{
   1081	struct ccw_device *cdev = to_ccwdev(dev);
   1082
   1083	return sprintf(buf, "%d\n", cmf_enabled(cdev));
   1084}
   1085
   1086static ssize_t cmb_enable_store(struct device *dev,
   1087				struct device_attribute *attr, const char *buf,
   1088				size_t c)
   1089{
   1090	struct ccw_device *cdev = to_ccwdev(dev);
   1091	unsigned long val;
   1092	int ret;
   1093
   1094	ret = kstrtoul(buf, 16, &val);
   1095	if (ret)
   1096		return ret;
   1097
   1098	switch (val) {
   1099	case 0:
   1100		ret = disable_cmf(cdev);
   1101		break;
   1102	case 1:
   1103		ret = enable_cmf(cdev);
   1104		break;
   1105	default:
   1106		ret = -EINVAL;
   1107	}
   1108
   1109	return ret ? ret : c;
   1110}
   1111DEVICE_ATTR_RW(cmb_enable);
   1112
   1113/**
   1114 * enable_cmf() - switch on the channel measurement for a specific device
   1115 *  @cdev:	The ccw device to be enabled
   1116 *
   1117 *  Enable channel measurements for @cdev. If this is called on a device
   1118 *  for which channel measurement is already enabled a reset of the
   1119 *  measurement data is triggered.
   1120 *  Returns: %0 for success or a negative error value.
   1121 *  Context:
   1122 *    non-atomic
   1123 */
   1124int enable_cmf(struct ccw_device *cdev)
   1125{
   1126	int ret = 0;
   1127
   1128	device_lock(&cdev->dev);
   1129	if (cmf_enabled(cdev)) {
   1130		cmbops->reset(cdev);
   1131		goto out_unlock;
   1132	}
   1133	get_device(&cdev->dev);
   1134	ret = cmbops->alloc(cdev);
   1135	if (ret)
   1136		goto out;
   1137	cmbops->reset(cdev);
   1138	ret = sysfs_create_group(&cdev->dev.kobj, cmbops->attr_group);
   1139	if (ret) {
   1140		cmbops->free(cdev);
   1141		goto out;
   1142	}
   1143	ret = cmbops->set(cdev, 2);
   1144	if (ret) {
   1145		sysfs_remove_group(&cdev->dev.kobj, cmbops->attr_group);
   1146		cmbops->free(cdev);
   1147	}
   1148out:
   1149	if (ret)
   1150		put_device(&cdev->dev);
   1151out_unlock:
   1152	device_unlock(&cdev->dev);
   1153	return ret;
   1154}
   1155
   1156/**
   1157 * __disable_cmf() - switch off the channel measurement for a specific device
   1158 *  @cdev:	The ccw device to be disabled
   1159 *
   1160 *  Returns: %0 for success or a negative error value.
   1161 *
   1162 *  Context:
   1163 *    non-atomic, device_lock() held.
   1164 */
   1165int __disable_cmf(struct ccw_device *cdev)
   1166{
   1167	int ret;
   1168
   1169	ret = cmbops->set(cdev, 0);
   1170	if (ret)
   1171		return ret;
   1172
   1173	sysfs_remove_group(&cdev->dev.kobj, cmbops->attr_group);
   1174	cmbops->free(cdev);
   1175	put_device(&cdev->dev);
   1176
   1177	return ret;
   1178}
   1179
   1180/**
   1181 * disable_cmf() - switch off the channel measurement for a specific device
   1182 *  @cdev:	The ccw device to be disabled
   1183 *
   1184 *  Returns: %0 for success or a negative error value.
   1185 *
   1186 *  Context:
   1187 *    non-atomic
   1188 */
   1189int disable_cmf(struct ccw_device *cdev)
   1190{
   1191	int ret;
   1192
   1193	device_lock(&cdev->dev);
   1194	ret = __disable_cmf(cdev);
   1195	device_unlock(&cdev->dev);
   1196
   1197	return ret;
   1198}
   1199
   1200/**
   1201 * cmf_read() - read one value from the current channel measurement block
   1202 * @cdev:	the channel to be read
   1203 * @index:	the index of the value to be read
   1204 *
   1205 * Returns: The value read or %0 if the value cannot be read.
   1206 *
   1207 *  Context:
   1208 *    any
   1209 */
   1210u64 cmf_read(struct ccw_device *cdev, int index)
   1211{
   1212	return cmbops->read(cdev, index);
   1213}
   1214
   1215/**
   1216 * cmf_readall() - read the current channel measurement block
   1217 * @cdev:	the channel to be read
   1218 * @data:	a pointer to a data block that will be filled
   1219 *
   1220 * Returns: %0 on success, a negative error value otherwise.
   1221 *
   1222 *  Context:
   1223 *    any
   1224 */
   1225int cmf_readall(struct ccw_device *cdev, struct cmbdata *data)
   1226{
   1227	return cmbops->readall(cdev, data);
   1228}
   1229
   1230/* Reenable cmf when a disconnected device becomes available again. */
   1231int cmf_reenable(struct ccw_device *cdev)
   1232{
   1233	cmbops->reset(cdev);
   1234	return cmbops->set(cdev, 2);
   1235}
   1236
   1237/**
   1238 * cmf_reactivate() - reactivate measurement block updates
   1239 *
   1240 * Use this during resume from hibernate.
   1241 */
   1242void cmf_reactivate(void)
   1243{
   1244	spin_lock(&cmb_area.lock);
   1245	if (!list_empty(&cmb_area.list))
   1246		cmf_activate(cmb_area.mem, CMF_ON);
   1247	spin_unlock(&cmb_area.lock);
   1248}
   1249
   1250static int __init init_cmbe(void)
   1251{
   1252	cmbe_cache = kmem_cache_create("cmbe_cache", sizeof(struct cmbe),
   1253				       __alignof__(struct cmbe), 0, NULL);
   1254
   1255	return cmbe_cache ? 0 : -ENOMEM;
   1256}
   1257
   1258static int __init init_cmf(void)
   1259{
   1260	char *format_string;
   1261	char *detect_string;
   1262	int ret;
   1263
   1264	/*
   1265	 * If the user did not give a parameter, see if we are running on a
   1266	 * machine supporting extended measurement blocks, otherwise fall back
   1267	 * to basic mode.
   1268	 */
   1269	if (format == CMF_AUTODETECT) {
   1270		if (!css_general_characteristics.ext_mb) {
   1271			format = CMF_BASIC;
   1272		} else {
   1273			format = CMF_EXTENDED;
   1274		}
   1275		detect_string = "autodetected";
   1276	} else {
   1277		detect_string = "parameter";
   1278	}
   1279
   1280	switch (format) {
   1281	case CMF_BASIC:
   1282		format_string = "basic";
   1283		cmbops = &cmbops_basic;
   1284		break;
   1285	case CMF_EXTENDED:
   1286		format_string = "extended";
   1287		cmbops = &cmbops_extended;
   1288
   1289		ret = init_cmbe();
   1290		if (ret)
   1291			return ret;
   1292		break;
   1293	default:
   1294		return -EINVAL;
   1295	}
   1296	pr_info("Channel measurement facility initialized using format "
   1297		"%s (mode %s)\n", format_string, detect_string);
   1298	return 0;
   1299}
   1300device_initcall(init_cmf);
   1301
   1302EXPORT_SYMBOL_GPL(enable_cmf);
   1303EXPORT_SYMBOL_GPL(disable_cmf);
   1304EXPORT_SYMBOL_GPL(cmf_read);
   1305EXPORT_SYMBOL_GPL(cmf_readall);