sd_zbc.c (27165B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * SCSI Zoned Block commands 4 * 5 * Copyright (C) 2014-2015 SUSE Linux GmbH 6 * Written by: Hannes Reinecke <hare@suse.de> 7 * Modified by: Damien Le Moal <damien.lemoal@hgst.com> 8 * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com> 9 */ 10 11#include <linux/blkdev.h> 12#include <linux/vmalloc.h> 13#include <linux/sched/mm.h> 14#include <linux/mutex.h> 15 16#include <asm/unaligned.h> 17 18#include <scsi/scsi.h> 19#include <scsi/scsi_cmnd.h> 20 21#include "sd.h" 22 23/** 24 * sd_zbc_get_zone_wp_offset - Get zone write pointer offset. 25 * @zone: Zone for which to return the write pointer offset. 26 * 27 * Return: offset of the write pointer from the start of the zone. 28 */ 29static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone) 30{ 31 if (zone->type == ZBC_ZONE_TYPE_CONV) 32 return 0; 33 34 switch (zone->cond) { 35 case BLK_ZONE_COND_IMP_OPEN: 36 case BLK_ZONE_COND_EXP_OPEN: 37 case BLK_ZONE_COND_CLOSED: 38 return zone->wp - zone->start; 39 case BLK_ZONE_COND_FULL: 40 return zone->len; 41 case BLK_ZONE_COND_EMPTY: 42 case BLK_ZONE_COND_OFFLINE: 43 case BLK_ZONE_COND_READONLY: 44 default: 45 /* 46 * Offline and read-only zones do not have a valid 47 * write pointer. Use 0 as for an empty zone. 48 */ 49 return 0; 50 } 51} 52 53/* Whether or not a SCSI zone descriptor describes a gap zone. */ 54static bool sd_zbc_is_gap_zone(const u8 buf[64]) 55{ 56 return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP; 57} 58 59/** 60 * sd_zbc_parse_report - Parse a SCSI zone descriptor 61 * @sdkp: SCSI disk pointer. 62 * @buf: SCSI zone descriptor. 63 * @idx: Index of the zone relative to the first zone reported by the current 64 * sd_zbc_report_zones() call. 65 * @cb: Callback function pointer. 66 * @data: Second argument passed to @cb. 67 * 68 * Return: Value returned by @cb. 69 * 70 * Convert a SCSI zone descriptor into struct blk_zone format. Additionally, 71 * call @cb(blk_zone, @data). 72 */ 73static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64], 74 unsigned int idx, report_zones_cb cb, void *data) 75{ 76 struct scsi_device *sdp = sdkp->device; 77 struct blk_zone zone = { 0 }; 78 sector_t start_lba, gran; 79 int ret; 80 81 if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf))) 82 return -EINVAL; 83 84 zone.type = buf[0] & 0x0f; 85 zone.cond = (buf[1] >> 4) & 0xf; 86 if (buf[1] & 0x01) 87 zone.reset = 1; 88 if (buf[1] & 0x02) 89 zone.non_seq = 1; 90 91 start_lba = get_unaligned_be64(&buf[16]); 92 zone.start = logical_to_sectors(sdp, start_lba); 93 zone.capacity = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); 94 zone.len = zone.capacity; 95 if (sdkp->zone_starting_lba_gran) { 96 gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran); 97 if (zone.len > gran) { 98 sd_printk(KERN_ERR, sdkp, 99 "Invalid zone at LBA %llu with capacity %llu and length %llu; granularity = %llu\n", 100 start_lba, 101 sectors_to_logical(sdp, zone.capacity), 102 sectors_to_logical(sdp, zone.len), 103 sectors_to_logical(sdp, gran)); 104 return -EINVAL; 105 } 106 /* 107 * Use the starting LBA granularity instead of the zone length 108 * obtained from the REPORT ZONES command. 109 */ 110 zone.len = gran; 111 } 112 if (zone.cond == ZBC_ZONE_COND_FULL) 113 zone.wp = zone.start + zone.len; 114 else 115 zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); 116 117 ret = cb(&zone, idx, data); 118 if (ret) 119 return ret; 120 121 if (sdkp->rev_wp_offset) 122 sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone); 123 124 return 0; 125} 126 127/** 128 * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. 129 * @sdkp: The target disk 130 * @buf: vmalloc-ed buffer to use for the reply 131 * @buflen: the buffer size 132 * @lba: Start LBA of the report 133 * @partial: Do partial report 134 * 135 * For internal use during device validation. 136 * Using partial=true can significantly speed up execution of a report zones 137 * command because the disk does not have to count all possible report matching 138 * zones and will only report the count of zones fitting in the command reply 139 * buffer. 140 */ 141static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, 142 unsigned int buflen, sector_t lba, 143 bool partial) 144{ 145 struct scsi_device *sdp = sdkp->device; 146 const int timeout = sdp->request_queue->rq_timeout; 147 struct scsi_sense_hdr sshdr; 148 unsigned char cmd[16]; 149 unsigned int rep_len; 150 int result; 151 152 memset(cmd, 0, 16); 153 cmd[0] = ZBC_IN; 154 cmd[1] = ZI_REPORT_ZONES; 155 put_unaligned_be64(lba, &cmd[2]); 156 put_unaligned_be32(buflen, &cmd[10]); 157 if (partial) 158 cmd[14] = ZBC_REPORT_ZONE_PARTIAL; 159 160 result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, 161 buf, buflen, &sshdr, 162 timeout, SD_MAX_RETRIES, NULL); 163 if (result) { 164 sd_printk(KERN_ERR, sdkp, 165 "REPORT ZONES start lba %llu failed\n", lba); 166 sd_print_result(sdkp, "REPORT ZONES", result); 167 if (result > 0 && scsi_sense_valid(&sshdr)) 168 sd_print_sense_hdr(sdkp, &sshdr); 169 return -EIO; 170 } 171 172 rep_len = get_unaligned_be32(&buf[0]); 173 if (rep_len < 64) { 174 sd_printk(KERN_ERR, sdkp, 175 "REPORT ZONES report invalid length %u\n", 176 rep_len); 177 return -EIO; 178 } 179 180 return 0; 181} 182 183/** 184 * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply. 185 * @sdkp: The target disk 186 * @nr_zones: Maximum number of zones to report 187 * @buflen: Size of the buffer allocated 188 * 189 * Try to allocate a reply buffer for the number of requested zones. 190 * The size of the buffer allocated may be smaller than requested to 191 * satify the device constraint (max_hw_sectors, max_segments, etc). 192 * 193 * Return the address of the allocated buffer and update @buflen with 194 * the size of the allocated buffer. 195 */ 196static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, 197 unsigned int nr_zones, size_t *buflen) 198{ 199 struct request_queue *q = sdkp->disk->queue; 200 size_t bufsize; 201 void *buf; 202 203 /* 204 * Report zone buffer size should be at most 64B times the number of 205 * zones requested plus the 64B reply header, but should be aligned 206 * to SECTOR_SIZE for ATA devices. 207 * Make sure that this size does not exceed the hardware capabilities. 208 * Furthermore, since the report zone command cannot be split, make 209 * sure that the allocated buffer can always be mapped by limiting the 210 * number of pages allocated to the HBA max segments limit. 211 */ 212 nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); 213 bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); 214 bufsize = min_t(size_t, bufsize, 215 queue_max_hw_sectors(q) << SECTOR_SHIFT); 216 bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); 217 218 while (bufsize >= SECTOR_SIZE) { 219 buf = __vmalloc(bufsize, 220 GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY); 221 if (buf) { 222 *buflen = bufsize; 223 return buf; 224 } 225 bufsize = rounddown(bufsize >> 1, SECTOR_SIZE); 226 } 227 228 return NULL; 229} 230 231/** 232 * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors. 233 * @sdkp: The target disk 234 */ 235static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) 236{ 237 return logical_to_sectors(sdkp->device, sdkp->zone_info.zone_blocks); 238} 239 240/** 241 * sd_zbc_report_zones - SCSI .report_zones() callback. 242 * @disk: Disk to report zones for. 243 * @sector: Start sector. 244 * @nr_zones: Maximum number of zones to report. 245 * @cb: Callback function called to report zone information. 246 * @data: Second argument passed to @cb. 247 * 248 * Called by the block layer to iterate over zone information. See also the 249 * disk->fops->report_zones() calls in block/blk-zoned.c. 250 */ 251int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, 252 unsigned int nr_zones, report_zones_cb cb, void *data) 253{ 254 struct scsi_disk *sdkp = scsi_disk(disk); 255 sector_t lba = sectors_to_logical(sdkp->device, sector); 256 unsigned int nr, i; 257 unsigned char *buf; 258 u64 zone_length, start_lba; 259 size_t offset, buflen = 0; 260 int zone_idx = 0; 261 int ret; 262 263 if (!sd_is_zoned(sdkp)) 264 /* Not a zoned device */ 265 return -EOPNOTSUPP; 266 267 if (!sdkp->capacity) 268 /* Device gone or invalid */ 269 return -ENODEV; 270 271 buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); 272 if (!buf) 273 return -ENOMEM; 274 275 while (zone_idx < nr_zones && lba < sdkp->capacity) { 276 ret = sd_zbc_do_report_zones(sdkp, buf, buflen, lba, true); 277 if (ret) 278 goto out; 279 280 offset = 0; 281 nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64); 282 if (!nr) 283 break; 284 285 for (i = 0; i < nr && zone_idx < nr_zones; i++) { 286 offset += 64; 287 start_lba = get_unaligned_be64(&buf[offset + 16]); 288 zone_length = get_unaligned_be64(&buf[offset + 8]); 289 if ((zone_idx == 0 && 290 (lba < start_lba || 291 lba >= start_lba + zone_length)) || 292 (zone_idx > 0 && start_lba != lba) || 293 start_lba + zone_length < start_lba) { 294 sd_printk(KERN_ERR, sdkp, 295 "Zone %d at LBA %llu is invalid: %llu + %llu\n", 296 zone_idx, lba, start_lba, zone_length); 297 ret = -EINVAL; 298 goto out; 299 } 300 lba = start_lba + zone_length; 301 if (sd_zbc_is_gap_zone(&buf[offset])) { 302 if (sdkp->zone_starting_lba_gran) 303 continue; 304 sd_printk(KERN_ERR, sdkp, 305 "Gap zone without constant LBA offsets\n"); 306 ret = -EINVAL; 307 goto out; 308 } 309 310 ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx, 311 cb, data); 312 if (ret) 313 goto out; 314 315 zone_idx++; 316 } 317 } 318 319 ret = zone_idx; 320out: 321 kvfree(buf); 322 return ret; 323} 324 325static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd) 326{ 327 struct request *rq = scsi_cmd_to_rq(cmd); 328 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 329 sector_t sector = blk_rq_pos(rq); 330 331 if (!sd_is_zoned(sdkp)) 332 /* Not a zoned device */ 333 return BLK_STS_IOERR; 334 335 if (sdkp->device->changed) 336 return BLK_STS_IOERR; 337 338 if (sector & (sd_zbc_zone_sectors(sdkp) - 1)) 339 /* Unaligned request */ 340 return BLK_STS_IOERR; 341 342 return BLK_STS_OK; 343} 344 345#define SD_ZBC_INVALID_WP_OFST (~0u) 346#define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1) 347 348static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx, 349 void *data) 350{ 351 struct scsi_disk *sdkp = data; 352 353 lockdep_assert_held(&sdkp->zones_wp_offset_lock); 354 355 sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone); 356 357 return 0; 358} 359 360/* 361 * An attempt to append a zone triggered an invalid write pointer error. 362 * Reread the write pointer of the zone(s) in which the append failed. 363 */ 364static void sd_zbc_update_wp_offset_workfn(struct work_struct *work) 365{ 366 struct scsi_disk *sdkp; 367 unsigned long flags; 368 sector_t zno; 369 int ret; 370 371 sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work); 372 373 spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); 374 for (zno = 0; zno < sdkp->zone_info.nr_zones; zno++) { 375 if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) 376 continue; 377 378 spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); 379 ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf, 380 SD_BUF_SIZE, 381 zno * sdkp->zone_info.zone_blocks, true); 382 spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); 383 if (!ret) 384 sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64, 385 zno, sd_zbc_update_wp_offset_cb, 386 sdkp); 387 } 388 spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); 389 390 scsi_device_put(sdkp->device); 391} 392 393/** 394 * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command. 395 * @cmd: the command to setup 396 * @lba: the LBA to patch 397 * @nr_blocks: the number of LBAs to be written 398 * 399 * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND. 400 * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and 401 * patching of the lba for an emulated ZONE_APPEND command. 402 * 403 * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will 404 * schedule a REPORT ZONES command and return BLK_STS_IOERR. 405 */ 406blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, 407 unsigned int nr_blocks) 408{ 409 struct request *rq = scsi_cmd_to_rq(cmd); 410 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 411 unsigned int wp_offset, zno = blk_rq_zone_no(rq); 412 unsigned long flags; 413 blk_status_t ret; 414 415 ret = sd_zbc_cmnd_checks(cmd); 416 if (ret != BLK_STS_OK) 417 return ret; 418 419 if (!blk_rq_zone_is_seq(rq)) 420 return BLK_STS_IOERR; 421 422 /* Unlock of the write lock will happen in sd_zbc_complete() */ 423 if (!blk_req_zone_write_trylock(rq)) 424 return BLK_STS_ZONE_RESOURCE; 425 426 spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); 427 wp_offset = sdkp->zones_wp_offset[zno]; 428 switch (wp_offset) { 429 case SD_ZBC_INVALID_WP_OFST: 430 /* 431 * We are about to schedule work to update a zone write pointer 432 * offset, which will cause the zone append command to be 433 * requeued. So make sure that the scsi device does not go away 434 * while the work is being processed. 435 */ 436 if (scsi_device_get(sdkp->device)) { 437 ret = BLK_STS_IOERR; 438 break; 439 } 440 sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST; 441 schedule_work(&sdkp->zone_wp_offset_work); 442 fallthrough; 443 case SD_ZBC_UPDATING_WP_OFST: 444 ret = BLK_STS_DEV_RESOURCE; 445 break; 446 default: 447 wp_offset = sectors_to_logical(sdkp->device, wp_offset); 448 if (wp_offset + nr_blocks > sdkp->zone_info.zone_blocks) { 449 ret = BLK_STS_IOERR; 450 break; 451 } 452 453 *lba += wp_offset; 454 } 455 spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); 456 if (ret) 457 blk_req_zone_write_unlock(rq); 458 return ret; 459} 460 461/** 462 * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations 463 * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. 464 * @cmd: the command to setup 465 * @op: Operation to be performed 466 * @all: All zones control 467 * 468 * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL, 469 * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests. 470 */ 471blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, 472 unsigned char op, bool all) 473{ 474 struct request *rq = scsi_cmd_to_rq(cmd); 475 sector_t sector = blk_rq_pos(rq); 476 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 477 sector_t block = sectors_to_logical(sdkp->device, sector); 478 blk_status_t ret; 479 480 ret = sd_zbc_cmnd_checks(cmd); 481 if (ret != BLK_STS_OK) 482 return ret; 483 484 cmd->cmd_len = 16; 485 memset(cmd->cmnd, 0, cmd->cmd_len); 486 cmd->cmnd[0] = ZBC_OUT; 487 cmd->cmnd[1] = op; 488 if (all) 489 cmd->cmnd[14] = 0x1; 490 else 491 put_unaligned_be64(block, &cmd->cmnd[2]); 492 493 rq->timeout = SD_TIMEOUT; 494 cmd->sc_data_direction = DMA_NONE; 495 cmd->transfersize = 0; 496 cmd->allowed = 0; 497 498 return BLK_STS_OK; 499} 500 501static bool sd_zbc_need_zone_wp_update(struct request *rq) 502{ 503 switch (req_op(rq)) { 504 case REQ_OP_ZONE_APPEND: 505 case REQ_OP_ZONE_FINISH: 506 case REQ_OP_ZONE_RESET: 507 case REQ_OP_ZONE_RESET_ALL: 508 return true; 509 case REQ_OP_WRITE: 510 case REQ_OP_WRITE_ZEROES: 511 return blk_rq_zone_is_seq(rq); 512 default: 513 return false; 514 } 515} 516 517/** 518 * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion 519 * @cmd: Completed command 520 * @good_bytes: Command reply bytes 521 * 522 * Called from sd_zbc_complete() to handle the update of the cached zone write 523 * pointer value in case an update is needed. 524 */ 525static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, 526 unsigned int good_bytes) 527{ 528 int result = cmd->result; 529 struct request *rq = scsi_cmd_to_rq(cmd); 530 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 531 unsigned int zno = blk_rq_zone_no(rq); 532 enum req_opf op = req_op(rq); 533 unsigned long flags; 534 535 /* 536 * If we got an error for a command that needs updating the write 537 * pointer offset cache, we must mark the zone wp offset entry as 538 * invalid to force an update from disk the next time a zone append 539 * command is issued. 540 */ 541 spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); 542 543 if (result && op != REQ_OP_ZONE_RESET_ALL) { 544 if (op == REQ_OP_ZONE_APPEND) { 545 /* Force complete completion (no retry) */ 546 good_bytes = 0; 547 scsi_set_resid(cmd, blk_rq_bytes(rq)); 548 } 549 550 /* 551 * Force an update of the zone write pointer offset on 552 * the next zone append access. 553 */ 554 if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) 555 sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST; 556 goto unlock_wp_offset; 557 } 558 559 switch (op) { 560 case REQ_OP_ZONE_APPEND: 561 rq->__sector += sdkp->zones_wp_offset[zno]; 562 fallthrough; 563 case REQ_OP_WRITE_ZEROES: 564 case REQ_OP_WRITE: 565 if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp)) 566 sdkp->zones_wp_offset[zno] += 567 good_bytes >> SECTOR_SHIFT; 568 break; 569 case REQ_OP_ZONE_RESET: 570 sdkp->zones_wp_offset[zno] = 0; 571 break; 572 case REQ_OP_ZONE_FINISH: 573 sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp); 574 break; 575 case REQ_OP_ZONE_RESET_ALL: 576 memset(sdkp->zones_wp_offset, 0, 577 sdkp->zone_info.nr_zones * sizeof(unsigned int)); 578 break; 579 default: 580 break; 581 } 582 583unlock_wp_offset: 584 spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); 585 586 return good_bytes; 587} 588 589/** 590 * sd_zbc_complete - ZBC command post processing. 591 * @cmd: Completed command 592 * @good_bytes: Command reply bytes 593 * @sshdr: command sense header 594 * 595 * Called from sd_done() to handle zone commands errors and updates to the 596 * device queue zone write pointer offset cahce. 597 */ 598unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, 599 struct scsi_sense_hdr *sshdr) 600{ 601 int result = cmd->result; 602 struct request *rq = scsi_cmd_to_rq(cmd); 603 604 if (op_is_zone_mgmt(req_op(rq)) && 605 result && 606 sshdr->sense_key == ILLEGAL_REQUEST && 607 sshdr->asc == 0x24) { 608 /* 609 * INVALID FIELD IN CDB error: a zone management command was 610 * attempted on a conventional zone. Nothing to worry about, 611 * so be quiet about the error. 612 */ 613 rq->rq_flags |= RQF_QUIET; 614 } else if (sd_zbc_need_zone_wp_update(rq)) 615 good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes); 616 617 if (req_op(rq) == REQ_OP_ZONE_APPEND) 618 blk_req_zone_write_unlock(rq); 619 620 return good_bytes; 621} 622 623/** 624 * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics 625 * @sdkp: Target disk 626 * @buf: Buffer where to store the VPD page data 627 * 628 * Read VPD page B6, get information and check that reads are unconstrained. 629 */ 630static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, 631 unsigned char *buf) 632{ 633 u64 zone_starting_lba_gran; 634 635 if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) { 636 sd_printk(KERN_NOTICE, sdkp, 637 "Read zoned characteristics VPD page failed\n"); 638 return -ENODEV; 639 } 640 641 if (sdkp->device->type != TYPE_ZBC) { 642 /* Host-aware */ 643 sdkp->urswrz = 1; 644 sdkp->zones_optimal_open = get_unaligned_be32(&buf[8]); 645 sdkp->zones_optimal_nonseq = get_unaligned_be32(&buf[12]); 646 sdkp->zones_max_open = 0; 647 return 0; 648 } 649 650 /* Host-managed */ 651 sdkp->urswrz = buf[4] & 1; 652 sdkp->zones_optimal_open = 0; 653 sdkp->zones_optimal_nonseq = 0; 654 sdkp->zones_max_open = get_unaligned_be32(&buf[16]); 655 /* Check zone alignment method */ 656 switch (buf[23] & 0xf) { 657 case 0: 658 case ZBC_CONSTANT_ZONE_LENGTH: 659 /* Use zone length */ 660 break; 661 case ZBC_CONSTANT_ZONE_START_OFFSET: 662 zone_starting_lba_gran = get_unaligned_be64(&buf[24]); 663 if (zone_starting_lba_gran == 0 || 664 !is_power_of_2(zone_starting_lba_gran) || 665 logical_to_sectors(sdkp->device, zone_starting_lba_gran) > 666 UINT_MAX) { 667 sd_printk(KERN_ERR, sdkp, 668 "Invalid zone starting LBA granularity %llu\n", 669 zone_starting_lba_gran); 670 return -ENODEV; 671 } 672 sdkp->zone_starting_lba_gran = zone_starting_lba_gran; 673 break; 674 default: 675 sd_printk(KERN_ERR, sdkp, "Invalid zone alignment method\n"); 676 return -ENODEV; 677 } 678 679 /* 680 * Check for unconstrained reads: host-managed devices with 681 * constrained reads (drives failing read after write pointer) 682 * are not supported. 683 */ 684 if (!sdkp->urswrz) { 685 if (sdkp->first_scan) 686 sd_printk(KERN_NOTICE, sdkp, 687 "constrained reads devices are not supported\n"); 688 return -ENODEV; 689 } 690 691 return 0; 692} 693 694/** 695 * sd_zbc_check_capacity - Check the device capacity 696 * @sdkp: Target disk 697 * @buf: command buffer 698 * @zblocks: zone size in logical blocks 699 * 700 * Get the device zone size and check that the device capacity as reported 701 * by READ CAPACITY matches the max_lba value (plus one) of the report zones 702 * command reply for devices with RC_BASIS == 0. 703 * 704 * Returns 0 upon success or an error code upon failure. 705 */ 706static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, 707 u32 *zblocks) 708{ 709 u64 zone_blocks; 710 sector_t max_lba; 711 unsigned char *rec; 712 int ret; 713 714 /* Do a report zone to get max_lba and the size of the first zone */ 715 ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false); 716 if (ret) 717 return ret; 718 719 if (sdkp->rc_basis == 0) { 720 /* The max_lba field is the capacity of this device */ 721 max_lba = get_unaligned_be64(&buf[8]); 722 if (sdkp->capacity != max_lba + 1) { 723 if (sdkp->first_scan) 724 sd_printk(KERN_WARNING, sdkp, 725 "Changing capacity from %llu to max LBA+1 %llu\n", 726 (unsigned long long)sdkp->capacity, 727 (unsigned long long)max_lba + 1); 728 sdkp->capacity = max_lba + 1; 729 } 730 } 731 732 if (sdkp->zone_starting_lba_gran == 0) { 733 /* Get the size of the first reported zone */ 734 rec = buf + 64; 735 zone_blocks = get_unaligned_be64(&rec[8]); 736 if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) { 737 if (sdkp->first_scan) 738 sd_printk(KERN_NOTICE, sdkp, 739 "Zone size too large\n"); 740 return -EFBIG; 741 } 742 } else { 743 zone_blocks = sdkp->zone_starting_lba_gran; 744 } 745 746 if (!is_power_of_2(zone_blocks)) { 747 sd_printk(KERN_ERR, sdkp, 748 "Zone size %llu is not a power of two.\n", 749 zone_blocks); 750 return -EINVAL; 751 } 752 753 *zblocks = zone_blocks; 754 755 return 0; 756} 757 758static void sd_zbc_print_zones(struct scsi_disk *sdkp) 759{ 760 if (!sd_is_zoned(sdkp) || !sdkp->capacity) 761 return; 762 763 if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1)) 764 sd_printk(KERN_NOTICE, sdkp, 765 "%u zones of %u logical blocks + 1 runt zone\n", 766 sdkp->zone_info.nr_zones - 1, 767 sdkp->zone_info.zone_blocks); 768 else 769 sd_printk(KERN_NOTICE, sdkp, 770 "%u zones of %u logical blocks\n", 771 sdkp->zone_info.nr_zones, 772 sdkp->zone_info.zone_blocks); 773} 774 775static int sd_zbc_init_disk(struct scsi_disk *sdkp) 776{ 777 sdkp->zones_wp_offset = NULL; 778 spin_lock_init(&sdkp->zones_wp_offset_lock); 779 sdkp->rev_wp_offset = NULL; 780 mutex_init(&sdkp->rev_mutex); 781 INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn); 782 sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL); 783 if (!sdkp->zone_wp_update_buf) 784 return -ENOMEM; 785 786 return 0; 787} 788 789void sd_zbc_free_zone_info(struct scsi_disk *sdkp) 790{ 791 if (!sdkp->zone_wp_update_buf) 792 return; 793 794 /* Serialize against revalidate zones */ 795 mutex_lock(&sdkp->rev_mutex); 796 797 kvfree(sdkp->zones_wp_offset); 798 sdkp->zones_wp_offset = NULL; 799 kfree(sdkp->zone_wp_update_buf); 800 sdkp->zone_wp_update_buf = NULL; 801 802 sdkp->early_zone_info = (struct zoned_disk_info){ }; 803 sdkp->zone_info = (struct zoned_disk_info){ }; 804 805 mutex_unlock(&sdkp->rev_mutex); 806} 807 808static void sd_zbc_revalidate_zones_cb(struct gendisk *disk) 809{ 810 struct scsi_disk *sdkp = scsi_disk(disk); 811 812 swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset); 813} 814 815/* 816 * Call blk_revalidate_disk_zones() if any of the zoned disk properties have 817 * changed that make it necessary to call that function. Called by 818 * sd_revalidate_disk() after the gendisk capacity has been set. 819 */ 820int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) 821{ 822 struct gendisk *disk = sdkp->disk; 823 struct request_queue *q = disk->queue; 824 u32 zone_blocks = sdkp->early_zone_info.zone_blocks; 825 unsigned int nr_zones = sdkp->early_zone_info.nr_zones; 826 u32 max_append; 827 int ret = 0; 828 unsigned int flags; 829 830 /* 831 * For all zoned disks, initialize zone append emulation data if not 832 * already done. This is necessary also for host-aware disks used as 833 * regular disks due to the presence of partitions as these partitions 834 * may be deleted and the disk zoned model changed back from 835 * BLK_ZONED_NONE to BLK_ZONED_HA. 836 */ 837 if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) { 838 ret = sd_zbc_init_disk(sdkp); 839 if (ret) 840 return ret; 841 } 842 843 /* 844 * There is nothing to do for regular disks, including host-aware disks 845 * that have partitions. 846 */ 847 if (!blk_queue_is_zoned(q)) 848 return 0; 849 850 /* 851 * Make sure revalidate zones are serialized to ensure exclusive 852 * updates of the scsi disk data. 853 */ 854 mutex_lock(&sdkp->rev_mutex); 855 856 if (sdkp->zone_info.zone_blocks == zone_blocks && 857 sdkp->zone_info.nr_zones == nr_zones && 858 disk->queue->nr_zones == nr_zones) 859 goto unlock; 860 861 flags = memalloc_noio_save(); 862 sdkp->zone_info.zone_blocks = zone_blocks; 863 sdkp->zone_info.nr_zones = nr_zones; 864 sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL); 865 if (!sdkp->rev_wp_offset) { 866 ret = -ENOMEM; 867 memalloc_noio_restore(flags); 868 goto unlock; 869 } 870 871 ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb); 872 873 memalloc_noio_restore(flags); 874 kvfree(sdkp->rev_wp_offset); 875 sdkp->rev_wp_offset = NULL; 876 877 if (ret) { 878 sdkp->zone_info = (struct zoned_disk_info){ }; 879 sdkp->capacity = 0; 880 goto unlock; 881 } 882 883 max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks), 884 q->limits.max_segments << (PAGE_SHIFT - 9)); 885 max_append = min_t(u32, max_append, queue_max_hw_sectors(q)); 886 887 blk_queue_max_zone_append_sectors(q, max_append); 888 889 sd_zbc_print_zones(sdkp); 890 891unlock: 892 mutex_unlock(&sdkp->rev_mutex); 893 894 return ret; 895} 896 897/** 898 * sd_zbc_read_zones - Read zone information and update the request queue 899 * @sdkp: SCSI disk pointer. 900 * @buf: 512 byte buffer used for storing SCSI command output. 901 * 902 * Read zone information and update the request queue zone characteristics and 903 * also the zoned device information in *sdkp. Called by sd_revalidate_disk() 904 * before the gendisk capacity has been set. 905 */ 906int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) 907{ 908 struct gendisk *disk = sdkp->disk; 909 struct request_queue *q = disk->queue; 910 unsigned int nr_zones; 911 u32 zone_blocks = 0; 912 int ret; 913 914 if (!sd_is_zoned(sdkp)) { 915 /* 916 * Device managed or normal SCSI disk, no special handling 917 * required. Nevertheless, free the disk zone information in 918 * case the device type changed. 919 */ 920 sd_zbc_free_zone_info(sdkp); 921 return 0; 922 } 923 924 /* READ16/WRITE16 is mandatory for ZBC disks */ 925 sdkp->device->use_16_for_rw = 1; 926 sdkp->device->use_10_for_rw = 0; 927 928 if (!blk_queue_is_zoned(q)) { 929 /* 930 * This can happen for a host aware disk with partitions. 931 * The block device zone model was already cleared by 932 * blk_queue_set_zoned(). Only free the scsi disk zone 933 * information and exit early. 934 */ 935 sd_zbc_free_zone_info(sdkp); 936 return 0; 937 } 938 939 /* Check zoned block device characteristics (unconstrained reads) */ 940 ret = sd_zbc_check_zoned_characteristics(sdkp, buf); 941 if (ret) 942 goto err; 943 944 /* Check the device capacity reported by report zones */ 945 ret = sd_zbc_check_capacity(sdkp, buf, &zone_blocks); 946 if (ret != 0) 947 goto err; 948 949 /* The drive satisfies the kernel restrictions: set it up */ 950 blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); 951 blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); 952 if (sdkp->zones_max_open == U32_MAX) 953 blk_queue_max_open_zones(q, 0); 954 else 955 blk_queue_max_open_zones(q, sdkp->zones_max_open); 956 blk_queue_max_active_zones(q, 0); 957 nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); 958 959 /* 960 * Per ZBC and ZAC specifications, writes in sequential write required 961 * zones of host-managed devices must be aligned to the device physical 962 * block size. 963 */ 964 if (blk_queue_zoned_model(q) == BLK_ZONED_HM) 965 blk_queue_zone_write_granularity(q, sdkp->physical_block_size); 966 967 sdkp->early_zone_info.nr_zones = nr_zones; 968 sdkp->early_zone_info.zone_blocks = zone_blocks; 969 970 return 0; 971 972err: 973 sdkp->capacity = 0; 974 975 return ret; 976}