cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

drivetemp.c (17340B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Hwmon client for disk and solid state drives with temperature sensors
      4 * Copyright (C) 2019 Zodiac Inflight Innovations
      5 *
      6 * With input from:
      7 *    Hwmon client for S.M.A.R.T. hard disk drives with temperature sensors.
      8 *    (C) 2018 Linus Walleij
      9 *
     10 *    hwmon: Driver for SCSI/ATA temperature sensors
     11 *    by Constantin Baranov <const@mimas.ru>, submitted September 2009
     12 *
     13 * This drive supports reporting the temperature of SATA drives. It can be
     14 * easily extended to report the temperature of SCSI drives.
     15 *
     16 * The primary means to read drive temperatures and temperature limits
     17 * for ATA drives is the SCT Command Transport feature set as specified in
     18 * ATA8-ACS.
     19 * It can be used to read the current drive temperature, temperature limits,
     20 * and historic minimum and maximum temperatures. The SCT Command Transport
     21 * feature set is documented in "AT Attachment 8 - ATA/ATAPI Command Set
     22 * (ATA8-ACS)".
     23 *
     24 * If the SCT Command Transport feature set is not available, drive temperatures
     25 * may be readable through SMART attributes. Since SMART attributes are not well
     26 * defined, this method is only used as fallback mechanism.
     27 *
     28 * There are three SMART attributes which may report drive temperatures.
     29 * Those are defined as follows (from
     30 * http://www.cropel.com/library/smart-attribute-list.aspx).
     31 *
     32 * 190	Temperature	Temperature, monitored by a sensor somewhere inside
     33 *			the drive. Raw value typicaly holds the actual
     34 *			temperature (hexadecimal) in its rightmost two digits.
     35 *
     36 * 194	Temperature	Temperature, monitored by a sensor somewhere inside
     37 *			the drive. Raw value typicaly holds the actual
     38 *			temperature (hexadecimal) in its rightmost two digits.
     39 *
     40 * 231	Temperature	Temperature, monitored by a sensor somewhere inside
     41 *			the drive. Raw value typicaly holds the actual
     42 *			temperature (hexadecimal) in its rightmost two digits.
     43 *
     44 * Wikipedia defines attributes a bit differently.
     45 *
     46 * 190	Temperature	Value is equal to (100-temp. °C), allowing manufacturer
     47 *	Difference or	to set a minimum threshold which corresponds to a
     48 *	Airflow		maximum temperature. This also follows the convention of
     49 *	Temperature	100 being a best-case value and lower values being
     50 *			undesirable. However, some older drives may instead
     51 *			report raw Temperature (identical to 0xC2) or
     52 *			Temperature minus 50 here.
     53 * 194	Temperature or	Indicates the device temperature, if the appropriate
     54 *	Temperature	sensor is fitted. Lowest byte of the raw value contains
     55 *	Celsius		the exact temperature value (Celsius degrees).
     56 * 231	Life Left	Indicates the approximate SSD life left, in terms of
     57 *	(SSDs) or	program/erase cycles or available reserved blocks.
     58 *	Temperature	A normalized value of 100 represents a new drive, with
     59 *			a threshold value at 10 indicating a need for
     60 *			replacement. A value of 0 may mean that the drive is
     61 *			operating in read-only mode to allow data recovery.
     62 *			Previously (pre-2010) occasionally used for Drive
     63 *			Temperature (more typically reported at 0xC2).
     64 *
     65 * Common denominator is that the first raw byte reports the temperature
     66 * in degrees C on almost all drives. Some drives may report a fractional
     67 * temperature in the second raw byte.
     68 *
     69 * Known exceptions (from libatasmart):
     70 * - SAMSUNG SV0412H and SAMSUNG SV1204H) report the temperature in 10th
     71 *   degrees C in the first two raw bytes.
     72 * - A few Maxtor drives report an unknown or bad value in attribute 194.
     73 * - Certain Apple SSD drives report an unknown value in attribute 190.
     74 *   Only certain firmware versions are affected.
     75 *
     76 * Those exceptions affect older ATA drives and are currently ignored.
     77 * Also, the second raw byte (possibly reporting the fractional temperature)
     78 * is currently ignored.
     79 *
     80 * Many drives also report temperature limits in additional SMART data raw
     81 * bytes. The format of those is not well defined and varies widely.
     82 * The driver does not currently attempt to report those limits.
     83 *
     84 * According to data in smartmontools, attribute 231 is rarely used to report
     85 * drive temperatures. At the same time, several drives report SSD life left
     86 * in attribute 231, but do not support temperature sensors. For this reason,
     87 * attribute 231 is currently ignored.
     88 *
     89 * Following above definitions, temperatures are reported as follows.
     90 *   If SCT Command Transport is supported, it is used to read the
     91 *   temperature and, if available, temperature limits.
     92 * - Otherwise, if SMART attribute 194 is supported, it is used to read
     93 *   the temperature.
     94 * - Otherwise, if SMART attribute 190 is supported, it is used to read
     95 *   the temperature.
     96 */
     97
     98#include <linux/ata.h>
     99#include <linux/bits.h>
    100#include <linux/device.h>
    101#include <linux/hwmon.h>
    102#include <linux/kernel.h>
    103#include <linux/list.h>
    104#include <linux/module.h>
    105#include <linux/mutex.h>
    106#include <scsi/scsi_cmnd.h>
    107#include <scsi/scsi_device.h>
    108#include <scsi/scsi_driver.h>
    109#include <scsi/scsi_proto.h>
    110
    111struct drivetemp_data {
    112	struct list_head list;		/* list of instantiated devices */
    113	struct mutex lock;		/* protect data buffer accesses */
    114	struct scsi_device *sdev;	/* SCSI device */
    115	struct device *dev;		/* instantiating device */
    116	struct device *hwdev;		/* hardware monitoring device */
    117	u8 smartdata[ATA_SECT_SIZE];	/* local buffer */
    118	int (*get_temp)(struct drivetemp_data *st, u32 attr, long *val);
    119	bool have_temp_lowest;		/* lowest temp in SCT status */
    120	bool have_temp_highest;		/* highest temp in SCT status */
    121	bool have_temp_min;		/* have min temp */
    122	bool have_temp_max;		/* have max temp */
    123	bool have_temp_lcrit;		/* have lower critical limit */
    124	bool have_temp_crit;		/* have critical limit */
    125	int temp_min;			/* min temp */
    126	int temp_max;			/* max temp */
    127	int temp_lcrit;			/* lower critical limit */
    128	int temp_crit;			/* critical limit */
    129};
    130
    131static LIST_HEAD(drivetemp_devlist);
    132
    133#define ATA_MAX_SMART_ATTRS	30
    134#define SMART_TEMP_PROP_190	190
    135#define SMART_TEMP_PROP_194	194
    136
    137#define SCT_STATUS_REQ_ADDR	0xe0
    138#define  SCT_STATUS_VERSION_LOW		0	/* log byte offsets */
    139#define  SCT_STATUS_VERSION_HIGH	1
    140#define  SCT_STATUS_TEMP		200
    141#define  SCT_STATUS_TEMP_LOWEST		201
    142#define  SCT_STATUS_TEMP_HIGHEST	202
    143#define SCT_READ_LOG_ADDR	0xe1
    144#define  SMART_READ_LOG			0xd5
    145#define  SMART_WRITE_LOG		0xd6
    146
    147#define INVALID_TEMP		0x80
    148
    149#define temp_is_valid(temp)	((temp) != INVALID_TEMP)
    150#define temp_from_sct(temp)	(((s8)(temp)) * 1000)
    151
    152static inline bool ata_id_smart_supported(u16 *id)
    153{
    154	return id[ATA_ID_COMMAND_SET_1] & BIT(0);
    155}
    156
    157static inline bool ata_id_smart_enabled(u16 *id)
    158{
    159	return id[ATA_ID_CFS_ENABLE_1] & BIT(0);
    160}
    161
    162static int drivetemp_scsi_command(struct drivetemp_data *st,
    163				 u8 ata_command, u8 feature,
    164				 u8 lba_low, u8 lba_mid, u8 lba_high)
    165{
    166	u8 scsi_cmd[MAX_COMMAND_SIZE];
    167	int data_dir;
    168
    169	memset(scsi_cmd, 0, sizeof(scsi_cmd));
    170	scsi_cmd[0] = ATA_16;
    171	if (ata_command == ATA_CMD_SMART && feature == SMART_WRITE_LOG) {
    172		scsi_cmd[1] = (5 << 1);	/* PIO Data-out */
    173		/*
    174		 * No off.line or cc, write to dev, block count in sector count
    175		 * field.
    176		 */
    177		scsi_cmd[2] = 0x06;
    178		data_dir = DMA_TO_DEVICE;
    179	} else {
    180		scsi_cmd[1] = (4 << 1);	/* PIO Data-in */
    181		/*
    182		 * No off.line or cc, read from dev, block count in sector count
    183		 * field.
    184		 */
    185		scsi_cmd[2] = 0x0e;
    186		data_dir = DMA_FROM_DEVICE;
    187	}
    188	scsi_cmd[4] = feature;
    189	scsi_cmd[6] = 1;	/* 1 sector */
    190	scsi_cmd[8] = lba_low;
    191	scsi_cmd[10] = lba_mid;
    192	scsi_cmd[12] = lba_high;
    193	scsi_cmd[14] = ata_command;
    194
    195	return scsi_execute_req(st->sdev, scsi_cmd, data_dir,
    196				st->smartdata, ATA_SECT_SIZE, NULL, HZ, 5,
    197				NULL);
    198}
    199
    200static int drivetemp_ata_command(struct drivetemp_data *st, u8 feature,
    201				 u8 select)
    202{
    203	return drivetemp_scsi_command(st, ATA_CMD_SMART, feature, select,
    204				     ATA_SMART_LBAM_PASS, ATA_SMART_LBAH_PASS);
    205}
    206
    207static int drivetemp_get_smarttemp(struct drivetemp_data *st, u32 attr,
    208				  long *temp)
    209{
    210	u8 *buf = st->smartdata;
    211	bool have_temp = false;
    212	u8 temp_raw;
    213	u8 csum;
    214	int err;
    215	int i;
    216
    217	err = drivetemp_ata_command(st, ATA_SMART_READ_VALUES, 0);
    218	if (err)
    219		return err;
    220
    221	/* Checksum the read value table */
    222	csum = 0;
    223	for (i = 0; i < ATA_SECT_SIZE; i++)
    224		csum += buf[i];
    225	if (csum) {
    226		dev_dbg(&st->sdev->sdev_gendev,
    227			"checksum error reading SMART values\n");
    228		return -EIO;
    229	}
    230
    231	for (i = 0; i < ATA_MAX_SMART_ATTRS; i++) {
    232		u8 *attr = buf + i * 12;
    233		int id = attr[2];
    234
    235		if (!id)
    236			continue;
    237
    238		if (id == SMART_TEMP_PROP_190) {
    239			temp_raw = attr[7];
    240			have_temp = true;
    241		}
    242		if (id == SMART_TEMP_PROP_194) {
    243			temp_raw = attr[7];
    244			have_temp = true;
    245			break;
    246		}
    247	}
    248
    249	if (have_temp) {
    250		*temp = temp_raw * 1000;
    251		return 0;
    252	}
    253
    254	return -ENXIO;
    255}
    256
    257static int drivetemp_get_scttemp(struct drivetemp_data *st, u32 attr, long *val)
    258{
    259	u8 *buf = st->smartdata;
    260	int err;
    261
    262	err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR);
    263	if (err)
    264		return err;
    265	switch (attr) {
    266	case hwmon_temp_input:
    267		if (!temp_is_valid(buf[SCT_STATUS_TEMP]))
    268			return -ENODATA;
    269		*val = temp_from_sct(buf[SCT_STATUS_TEMP]);
    270		break;
    271	case hwmon_temp_lowest:
    272		if (!temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]))
    273			return -ENODATA;
    274		*val = temp_from_sct(buf[SCT_STATUS_TEMP_LOWEST]);
    275		break;
    276	case hwmon_temp_highest:
    277		if (!temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]))
    278			return -ENODATA;
    279		*val = temp_from_sct(buf[SCT_STATUS_TEMP_HIGHEST]);
    280		break;
    281	default:
    282		err = -EINVAL;
    283		break;
    284	}
    285	return err;
    286}
    287
    288static const char * const sct_avoid_models[] = {
    289/*
    290 * These drives will have WRITE FPDMA QUEUED command timeouts and sometimes just
    291 * freeze until power-cycled under heavy write loads when their temperature is
    292 * getting polled in SCT mode. The SMART mode seems to be fine, though.
    293 *
    294 * While only the 3 TB model (DT01ACA3) was actually caught exhibiting the
    295 * problem let's play safe here to avoid data corruption and ban the whole
    296 * DT01ACAx family.
    297
    298 * The models from this array are prefix-matched.
    299 */
    300	"TOSHIBA DT01ACA",
    301};
    302
    303static bool drivetemp_sct_avoid(struct drivetemp_data *st)
    304{
    305	struct scsi_device *sdev = st->sdev;
    306	unsigned int ctr;
    307
    308	if (!sdev->model)
    309		return false;
    310
    311	/*
    312	 * The "model" field contains just the raw SCSI INQUIRY response
    313	 * "product identification" field, which has a width of 16 bytes.
    314	 * This field is space-filled, but is NOT NULL-terminated.
    315	 */
    316	for (ctr = 0; ctr < ARRAY_SIZE(sct_avoid_models); ctr++)
    317		if (!strncmp(sdev->model, sct_avoid_models[ctr],
    318			     strlen(sct_avoid_models[ctr])))
    319			return true;
    320
    321	return false;
    322}
    323
    324static int drivetemp_identify_sata(struct drivetemp_data *st)
    325{
    326	struct scsi_device *sdev = st->sdev;
    327	u8 *buf = st->smartdata;
    328	struct scsi_vpd *vpd;
    329	bool is_ata, is_sata;
    330	bool have_sct_data_table;
    331	bool have_sct_temp;
    332	bool have_smart;
    333	bool have_sct;
    334	u16 *ata_id;
    335	u16 version;
    336	long temp;
    337	int err;
    338
    339	/* SCSI-ATA Translation present? */
    340	rcu_read_lock();
    341	vpd = rcu_dereference(sdev->vpd_pg89);
    342
    343	/*
    344	 * Verify that ATA IDENTIFY DEVICE data is included in ATA Information
    345	 * VPD and that the drive implements the SATA protocol.
    346	 */
    347	if (!vpd || vpd->len < 572 || vpd->data[56] != ATA_CMD_ID_ATA ||
    348	    vpd->data[36] != 0x34) {
    349		rcu_read_unlock();
    350		return -ENODEV;
    351	}
    352	ata_id = (u16 *)&vpd->data[60];
    353	is_ata = ata_id_is_ata(ata_id);
    354	is_sata = ata_id_is_sata(ata_id);
    355	have_sct = ata_id_sct_supported(ata_id);
    356	have_sct_data_table = ata_id_sct_data_tables(ata_id);
    357	have_smart = ata_id_smart_supported(ata_id) &&
    358				ata_id_smart_enabled(ata_id);
    359
    360	rcu_read_unlock();
    361
    362	/* bail out if this is not a SATA device */
    363	if (!is_ata || !is_sata)
    364		return -ENODEV;
    365
    366	if (have_sct && drivetemp_sct_avoid(st)) {
    367		dev_notice(&sdev->sdev_gendev,
    368			   "will avoid using SCT for temperature monitoring\n");
    369		have_sct = false;
    370	}
    371
    372	if (!have_sct)
    373		goto skip_sct;
    374
    375	err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR);
    376	if (err)
    377		goto skip_sct;
    378
    379	version = (buf[SCT_STATUS_VERSION_HIGH] << 8) |
    380		  buf[SCT_STATUS_VERSION_LOW];
    381	if (version != 2 && version != 3)
    382		goto skip_sct;
    383
    384	have_sct_temp = temp_is_valid(buf[SCT_STATUS_TEMP]);
    385	if (!have_sct_temp)
    386		goto skip_sct;
    387
    388	st->have_temp_lowest = temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]);
    389	st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]);
    390
    391	if (!have_sct_data_table)
    392		goto skip_sct_data;
    393
    394	/* Request and read temperature history table */
    395	memset(buf, '\0', sizeof(st->smartdata));
    396	buf[0] = 5;	/* data table command */
    397	buf[2] = 1;	/* read table */
    398	buf[4] = 2;	/* temperature history table */
    399
    400	err = drivetemp_ata_command(st, SMART_WRITE_LOG, SCT_STATUS_REQ_ADDR);
    401	if (err)
    402		goto skip_sct_data;
    403
    404	err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_READ_LOG_ADDR);
    405	if (err)
    406		goto skip_sct_data;
    407
    408	/*
    409	 * Temperature limits per AT Attachment 8 -
    410	 * ATA/ATAPI Command Set (ATA8-ACS)
    411	 */
    412	st->have_temp_max = temp_is_valid(buf[6]);
    413	st->have_temp_crit = temp_is_valid(buf[7]);
    414	st->have_temp_min = temp_is_valid(buf[8]);
    415	st->have_temp_lcrit = temp_is_valid(buf[9]);
    416
    417	st->temp_max = temp_from_sct(buf[6]);
    418	st->temp_crit = temp_from_sct(buf[7]);
    419	st->temp_min = temp_from_sct(buf[8]);
    420	st->temp_lcrit = temp_from_sct(buf[9]);
    421
    422skip_sct_data:
    423	if (have_sct_temp) {
    424		st->get_temp = drivetemp_get_scttemp;
    425		return 0;
    426	}
    427skip_sct:
    428	if (!have_smart)
    429		return -ENODEV;
    430	st->get_temp = drivetemp_get_smarttemp;
    431	return drivetemp_get_smarttemp(st, hwmon_temp_input, &temp);
    432}
    433
    434static int drivetemp_identify(struct drivetemp_data *st)
    435{
    436	struct scsi_device *sdev = st->sdev;
    437
    438	/* Bail out immediately if there is no inquiry data */
    439	if (!sdev->inquiry || sdev->inquiry_len < 16)
    440		return -ENODEV;
    441
    442	/* Disk device? */
    443	if (sdev->type != TYPE_DISK && sdev->type != TYPE_ZBC)
    444		return -ENODEV;
    445
    446	return drivetemp_identify_sata(st);
    447}
    448
    449static int drivetemp_read(struct device *dev, enum hwmon_sensor_types type,
    450			 u32 attr, int channel, long *val)
    451{
    452	struct drivetemp_data *st = dev_get_drvdata(dev);
    453	int err = 0;
    454
    455	if (type != hwmon_temp)
    456		return -EINVAL;
    457
    458	switch (attr) {
    459	case hwmon_temp_input:
    460	case hwmon_temp_lowest:
    461	case hwmon_temp_highest:
    462		mutex_lock(&st->lock);
    463		err = st->get_temp(st, attr, val);
    464		mutex_unlock(&st->lock);
    465		break;
    466	case hwmon_temp_lcrit:
    467		*val = st->temp_lcrit;
    468		break;
    469	case hwmon_temp_min:
    470		*val = st->temp_min;
    471		break;
    472	case hwmon_temp_max:
    473		*val = st->temp_max;
    474		break;
    475	case hwmon_temp_crit:
    476		*val = st->temp_crit;
    477		break;
    478	default:
    479		err = -EINVAL;
    480		break;
    481	}
    482	return err;
    483}
    484
    485static umode_t drivetemp_is_visible(const void *data,
    486				   enum hwmon_sensor_types type,
    487				   u32 attr, int channel)
    488{
    489	const struct drivetemp_data *st = data;
    490
    491	switch (type) {
    492	case hwmon_temp:
    493		switch (attr) {
    494		case hwmon_temp_input:
    495			return 0444;
    496		case hwmon_temp_lowest:
    497			if (st->have_temp_lowest)
    498				return 0444;
    499			break;
    500		case hwmon_temp_highest:
    501			if (st->have_temp_highest)
    502				return 0444;
    503			break;
    504		case hwmon_temp_min:
    505			if (st->have_temp_min)
    506				return 0444;
    507			break;
    508		case hwmon_temp_max:
    509			if (st->have_temp_max)
    510				return 0444;
    511			break;
    512		case hwmon_temp_lcrit:
    513			if (st->have_temp_lcrit)
    514				return 0444;
    515			break;
    516		case hwmon_temp_crit:
    517			if (st->have_temp_crit)
    518				return 0444;
    519			break;
    520		default:
    521			break;
    522		}
    523		break;
    524	default:
    525		break;
    526	}
    527	return 0;
    528}
    529
    530static const struct hwmon_channel_info *drivetemp_info[] = {
    531	HWMON_CHANNEL_INFO(chip,
    532			   HWMON_C_REGISTER_TZ),
    533	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT |
    534			   HWMON_T_LOWEST | HWMON_T_HIGHEST |
    535			   HWMON_T_MIN | HWMON_T_MAX |
    536			   HWMON_T_LCRIT | HWMON_T_CRIT),
    537	NULL
    538};
    539
    540static const struct hwmon_ops drivetemp_ops = {
    541	.is_visible = drivetemp_is_visible,
    542	.read = drivetemp_read,
    543};
    544
    545static const struct hwmon_chip_info drivetemp_chip_info = {
    546	.ops = &drivetemp_ops,
    547	.info = drivetemp_info,
    548};
    549
    550/*
    551 * The device argument points to sdev->sdev_dev. Its parent is
    552 * sdev->sdev_gendev, which we can use to get the scsi_device pointer.
    553 */
    554static int drivetemp_add(struct device *dev, struct class_interface *intf)
    555{
    556	struct scsi_device *sdev = to_scsi_device(dev->parent);
    557	struct drivetemp_data *st;
    558	int err;
    559
    560	st = kzalloc(sizeof(*st), GFP_KERNEL);
    561	if (!st)
    562		return -ENOMEM;
    563
    564	st->sdev = sdev;
    565	st->dev = dev;
    566	mutex_init(&st->lock);
    567
    568	if (drivetemp_identify(st)) {
    569		err = -ENODEV;
    570		goto abort;
    571	}
    572
    573	st->hwdev = hwmon_device_register_with_info(dev->parent, "drivetemp",
    574						    st, &drivetemp_chip_info,
    575						    NULL);
    576	if (IS_ERR(st->hwdev)) {
    577		err = PTR_ERR(st->hwdev);
    578		goto abort;
    579	}
    580
    581	list_add(&st->list, &drivetemp_devlist);
    582	return 0;
    583
    584abort:
    585	kfree(st);
    586	return err;
    587}
    588
    589static void drivetemp_remove(struct device *dev, struct class_interface *intf)
    590{
    591	struct drivetemp_data *st, *tmp;
    592
    593	list_for_each_entry_safe(st, tmp, &drivetemp_devlist, list) {
    594		if (st->dev == dev) {
    595			list_del(&st->list);
    596			hwmon_device_unregister(st->hwdev);
    597			kfree(st);
    598			break;
    599		}
    600	}
    601}
    602
    603static struct class_interface drivetemp_interface = {
    604	.add_dev = drivetemp_add,
    605	.remove_dev = drivetemp_remove,
    606};
    607
    608static int __init drivetemp_init(void)
    609{
    610	return scsi_register_interface(&drivetemp_interface);
    611}
    612
    613static void __exit drivetemp_exit(void)
    614{
    615	scsi_unregister_interface(&drivetemp_interface);
    616}
    617
    618module_init(drivetemp_init);
    619module_exit(drivetemp_exit);
    620
    621MODULE_AUTHOR("Guenter Roeck <linus@roeck-us.net>");
    622MODULE_DESCRIPTION("Hard drive temperature monitor");
    623MODULE_LICENSE("GPL");