transport_rdma.c (60711B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (C) 2017, Microsoft Corporation. 4 * Copyright (C) 2018, LG Electronics. 5 * 6 * Author(s): Long Li <longli@microsoft.com>, 7 * Hyunchul Lee <hyc.lee@gmail.com> 8 */ 9 10#define SUBMOD_NAME "smb_direct" 11 12#include <linux/kthread.h> 13#include <linux/list.h> 14#include <linux/mempool.h> 15#include <linux/highmem.h> 16#include <linux/scatterlist.h> 17#include <rdma/ib_verbs.h> 18#include <rdma/rdma_cm.h> 19#include <rdma/rw.h> 20 21#include "glob.h" 22#include "connection.h" 23#include "smb_common.h" 24#include "smbstatus.h" 25#include "transport_rdma.h" 26 27#define SMB_DIRECT_PORT_IWARP 5445 28#define SMB_DIRECT_PORT_INFINIBAND 445 29 30#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100) 31 32/* SMB_DIRECT negotiation timeout in seconds */ 33#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120 34 35#define SMB_DIRECT_MAX_SEND_SGES 8 36#define SMB_DIRECT_MAX_RECV_SGES 1 37 38/* 39 * Default maximum number of RDMA read/write outstanding on this connection 40 * This value is possibly decreased during QP creation on hardware limit 41 */ 42#define SMB_DIRECT_CM_INITIATOR_DEPTH 8 43 44/* Maximum number of retries on data transfer operations */ 45#define SMB_DIRECT_CM_RETRY 6 46/* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 47#define SMB_DIRECT_CM_RNR_RETRY 0 48 49/* 50 * User configurable initial values per SMB_DIRECT transport connection 51 * as defined in [MS-SMBD] 3.1.1.1 52 * Those may change after a SMB_DIRECT negotiation 53 */ 54 55/* Set 445 port to SMB Direct port by default */ 56static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND; 57 58/* The local peer's maximum number of credits to grant to the peer */ 59static int smb_direct_receive_credit_max = 255; 60 61/* The remote peer's credit request of local peer */ 62static int smb_direct_send_credit_target = 255; 63 64/* The maximum single message size can be sent to remote peer */ 65static int smb_direct_max_send_size = 8192; 66 67/* The maximum fragmented upper-layer payload receive size supported */ 68static int smb_direct_max_fragmented_recv_size = 1024 * 1024; 69 70/* The maximum single-message size which can be received */ 71static int smb_direct_max_receive_size = 8192; 72 73static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 74 75static LIST_HEAD(smb_direct_device_list); 76static DEFINE_RWLOCK(smb_direct_device_lock); 77 78struct smb_direct_device { 79 struct ib_device *ib_dev; 80 struct list_head list; 81}; 82 83static struct smb_direct_listener { 84 struct rdma_cm_id *cm_id; 85} smb_direct_listener; 86 87static struct workqueue_struct *smb_direct_wq; 88 89enum smb_direct_status { 90 SMB_DIRECT_CS_NEW = 0, 91 SMB_DIRECT_CS_CONNECTED, 92 SMB_DIRECT_CS_DISCONNECTING, 93 SMB_DIRECT_CS_DISCONNECTED, 94}; 95 96struct smb_direct_transport { 97 struct ksmbd_transport transport; 98 99 enum smb_direct_status status; 100 bool full_packet_received; 101 wait_queue_head_t wait_status; 102 103 struct rdma_cm_id *cm_id; 104 struct ib_cq *send_cq; 105 struct ib_cq *recv_cq; 106 struct ib_pd *pd; 107 struct ib_qp *qp; 108 109 int max_send_size; 110 int max_recv_size; 111 int max_fragmented_send_size; 112 int max_fragmented_recv_size; 113 int max_rdma_rw_size; 114 115 spinlock_t reassembly_queue_lock; 116 struct list_head reassembly_queue; 117 int reassembly_data_length; 118 int reassembly_queue_length; 119 int first_entry_offset; 120 wait_queue_head_t wait_reassembly_queue; 121 122 spinlock_t receive_credit_lock; 123 int recv_credits; 124 int count_avail_recvmsg; 125 int recv_credit_max; 126 int recv_credit_target; 127 128 spinlock_t recvmsg_queue_lock; 129 struct list_head recvmsg_queue; 130 131 spinlock_t empty_recvmsg_queue_lock; 132 struct list_head empty_recvmsg_queue; 133 134 int send_credit_target; 135 atomic_t send_credits; 136 spinlock_t lock_new_recv_credits; 137 int new_recv_credits; 138 int max_rw_credits; 139 int pages_per_rw_credit; 140 atomic_t rw_credits; 141 142 wait_queue_head_t wait_send_credits; 143 wait_queue_head_t wait_rw_credits; 144 145 mempool_t *sendmsg_mempool; 146 struct kmem_cache *sendmsg_cache; 147 mempool_t *recvmsg_mempool; 148 struct kmem_cache *recvmsg_cache; 149 150 wait_queue_head_t wait_send_pending; 151 atomic_t send_pending; 152 153 struct delayed_work post_recv_credits_work; 154 struct work_struct send_immediate_work; 155 struct work_struct disconnect_work; 156 157 bool negotiation_requested; 158}; 159 160#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) 161 162enum { 163 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, 164 SMB_DIRECT_MSG_DATA_TRANSFER 165}; 166 167static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; 168 169struct smb_direct_send_ctx { 170 struct list_head msg_list; 171 int wr_cnt; 172 bool need_invalidate_rkey; 173 unsigned int remote_key; 174}; 175 176struct smb_direct_sendmsg { 177 struct smb_direct_transport *transport; 178 struct ib_send_wr wr; 179 struct list_head list; 180 int num_sge; 181 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES]; 182 struct ib_cqe cqe; 183 u8 packet[]; 184}; 185 186struct smb_direct_recvmsg { 187 struct smb_direct_transport *transport; 188 struct list_head list; 189 int type; 190 struct ib_sge sge; 191 struct ib_cqe cqe; 192 bool first_segment; 193 u8 packet[]; 194}; 195 196struct smb_direct_rdma_rw_msg { 197 struct smb_direct_transport *t; 198 struct ib_cqe cqe; 199 int status; 200 struct completion *completion; 201 struct list_head list; 202 struct rdma_rw_ctx rw_ctx; 203 struct sg_table sgt; 204 struct scatterlist sg_list[]; 205}; 206 207void init_smbd_max_io_size(unsigned int sz) 208{ 209 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); 210 smb_direct_max_read_write_size = sz; 211} 212 213unsigned int get_smbd_max_read_write_size(void) 214{ 215 return smb_direct_max_read_write_size; 216} 217 218static inline int get_buf_page_count(void *buf, int size) 219{ 220 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 221 (uintptr_t)buf / PAGE_SIZE; 222} 223 224static void smb_direct_destroy_pools(struct smb_direct_transport *transport); 225static void smb_direct_post_recv_credits(struct work_struct *work); 226static int smb_direct_post_send_data(struct smb_direct_transport *t, 227 struct smb_direct_send_ctx *send_ctx, 228 struct kvec *iov, int niov, 229 int remaining_data_length); 230 231static inline struct smb_direct_transport * 232smb_trans_direct_transfort(struct ksmbd_transport *t) 233{ 234 return container_of(t, struct smb_direct_transport, transport); 235} 236 237static inline void 238*smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg) 239{ 240 return (void *)recvmsg->packet; 241} 242 243static inline bool is_receive_credit_post_required(int receive_credits, 244 int avail_recvmsg_count) 245{ 246 return receive_credits <= (smb_direct_receive_credit_max >> 3) && 247 avail_recvmsg_count >= (receive_credits >> 2); 248} 249 250static struct 251smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) 252{ 253 struct smb_direct_recvmsg *recvmsg = NULL; 254 255 spin_lock(&t->recvmsg_queue_lock); 256 if (!list_empty(&t->recvmsg_queue)) { 257 recvmsg = list_first_entry(&t->recvmsg_queue, 258 struct smb_direct_recvmsg, 259 list); 260 list_del(&recvmsg->list); 261 } 262 spin_unlock(&t->recvmsg_queue_lock); 263 return recvmsg; 264} 265 266static void put_recvmsg(struct smb_direct_transport *t, 267 struct smb_direct_recvmsg *recvmsg) 268{ 269 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, 270 recvmsg->sge.length, DMA_FROM_DEVICE); 271 272 spin_lock(&t->recvmsg_queue_lock); 273 list_add(&recvmsg->list, &t->recvmsg_queue); 274 spin_unlock(&t->recvmsg_queue_lock); 275} 276 277static struct 278smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t) 279{ 280 struct smb_direct_recvmsg *recvmsg = NULL; 281 282 spin_lock(&t->empty_recvmsg_queue_lock); 283 if (!list_empty(&t->empty_recvmsg_queue)) { 284 recvmsg = list_first_entry(&t->empty_recvmsg_queue, 285 struct smb_direct_recvmsg, list); 286 list_del(&recvmsg->list); 287 } 288 spin_unlock(&t->empty_recvmsg_queue_lock); 289 return recvmsg; 290} 291 292static void put_empty_recvmsg(struct smb_direct_transport *t, 293 struct smb_direct_recvmsg *recvmsg) 294{ 295 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, 296 recvmsg->sge.length, DMA_FROM_DEVICE); 297 298 spin_lock(&t->empty_recvmsg_queue_lock); 299 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue); 300 spin_unlock(&t->empty_recvmsg_queue_lock); 301} 302 303static void enqueue_reassembly(struct smb_direct_transport *t, 304 struct smb_direct_recvmsg *recvmsg, 305 int data_length) 306{ 307 spin_lock(&t->reassembly_queue_lock); 308 list_add_tail(&recvmsg->list, &t->reassembly_queue); 309 t->reassembly_queue_length++; 310 /* 311 * Make sure reassembly_data_length is updated after list and 312 * reassembly_queue_length are updated. On the dequeue side 313 * reassembly_data_length is checked without a lock to determine 314 * if reassembly_queue_length and list is up to date 315 */ 316 virt_wmb(); 317 t->reassembly_data_length += data_length; 318 spin_unlock(&t->reassembly_queue_lock); 319} 320 321static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t) 322{ 323 if (!list_empty(&t->reassembly_queue)) 324 return list_first_entry(&t->reassembly_queue, 325 struct smb_direct_recvmsg, list); 326 else 327 return NULL; 328} 329 330static void smb_direct_disconnect_rdma_work(struct work_struct *work) 331{ 332 struct smb_direct_transport *t = 333 container_of(work, struct smb_direct_transport, 334 disconnect_work); 335 336 if (t->status == SMB_DIRECT_CS_CONNECTED) { 337 t->status = SMB_DIRECT_CS_DISCONNECTING; 338 rdma_disconnect(t->cm_id); 339 } 340} 341 342static void 343smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t) 344{ 345 if (t->status == SMB_DIRECT_CS_CONNECTED) 346 queue_work(smb_direct_wq, &t->disconnect_work); 347} 348 349static void smb_direct_send_immediate_work(struct work_struct *work) 350{ 351 struct smb_direct_transport *t = container_of(work, 352 struct smb_direct_transport, send_immediate_work); 353 354 if (t->status != SMB_DIRECT_CS_CONNECTED) 355 return; 356 357 smb_direct_post_send_data(t, NULL, NULL, 0, 0); 358} 359 360static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 361{ 362 struct smb_direct_transport *t; 363 struct ksmbd_conn *conn; 364 365 t = kzalloc(sizeof(*t), GFP_KERNEL); 366 if (!t) 367 return NULL; 368 369 t->cm_id = cm_id; 370 cm_id->context = t; 371 372 t->status = SMB_DIRECT_CS_NEW; 373 init_waitqueue_head(&t->wait_status); 374 375 spin_lock_init(&t->reassembly_queue_lock); 376 INIT_LIST_HEAD(&t->reassembly_queue); 377 t->reassembly_data_length = 0; 378 t->reassembly_queue_length = 0; 379 init_waitqueue_head(&t->wait_reassembly_queue); 380 init_waitqueue_head(&t->wait_send_credits); 381 init_waitqueue_head(&t->wait_rw_credits); 382 383 spin_lock_init(&t->receive_credit_lock); 384 spin_lock_init(&t->recvmsg_queue_lock); 385 INIT_LIST_HEAD(&t->recvmsg_queue); 386 387 spin_lock_init(&t->empty_recvmsg_queue_lock); 388 INIT_LIST_HEAD(&t->empty_recvmsg_queue); 389 390 init_waitqueue_head(&t->wait_send_pending); 391 atomic_set(&t->send_pending, 0); 392 393 spin_lock_init(&t->lock_new_recv_credits); 394 395 INIT_DELAYED_WORK(&t->post_recv_credits_work, 396 smb_direct_post_recv_credits); 397 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work); 398 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work); 399 400 conn = ksmbd_conn_alloc(); 401 if (!conn) 402 goto err; 403 conn->transport = KSMBD_TRANS(t); 404 KSMBD_TRANS(t)->conn = conn; 405 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 406 return t; 407err: 408 kfree(t); 409 return NULL; 410} 411 412static void free_transport(struct smb_direct_transport *t) 413{ 414 struct smb_direct_recvmsg *recvmsg; 415 416 wake_up_interruptible(&t->wait_send_credits); 417 418 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); 419 wait_event(t->wait_send_pending, 420 atomic_read(&t->send_pending) == 0); 421 422 cancel_work_sync(&t->disconnect_work); 423 cancel_delayed_work_sync(&t->post_recv_credits_work); 424 cancel_work_sync(&t->send_immediate_work); 425 426 if (t->qp) { 427 ib_drain_qp(t->qp); 428 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); 429 ib_destroy_qp(t->qp); 430 } 431 432 ksmbd_debug(RDMA, "drain the reassembly queue\n"); 433 do { 434 spin_lock(&t->reassembly_queue_lock); 435 recvmsg = get_first_reassembly(t); 436 if (recvmsg) { 437 list_del(&recvmsg->list); 438 spin_unlock(&t->reassembly_queue_lock); 439 put_recvmsg(t, recvmsg); 440 } else { 441 spin_unlock(&t->reassembly_queue_lock); 442 } 443 } while (recvmsg); 444 t->reassembly_data_length = 0; 445 446 if (t->send_cq) 447 ib_free_cq(t->send_cq); 448 if (t->recv_cq) 449 ib_free_cq(t->recv_cq); 450 if (t->pd) 451 ib_dealloc_pd(t->pd); 452 if (t->cm_id) 453 rdma_destroy_id(t->cm_id); 454 455 smb_direct_destroy_pools(t); 456 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 457 kfree(t); 458} 459 460static struct smb_direct_sendmsg 461*smb_direct_alloc_sendmsg(struct smb_direct_transport *t) 462{ 463 struct smb_direct_sendmsg *msg; 464 465 msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL); 466 if (!msg) 467 return ERR_PTR(-ENOMEM); 468 msg->transport = t; 469 INIT_LIST_HEAD(&msg->list); 470 msg->num_sge = 0; 471 return msg; 472} 473 474static void smb_direct_free_sendmsg(struct smb_direct_transport *t, 475 struct smb_direct_sendmsg *msg) 476{ 477 int i; 478 479 if (msg->num_sge > 0) { 480 ib_dma_unmap_single(t->cm_id->device, 481 msg->sge[0].addr, msg->sge[0].length, 482 DMA_TO_DEVICE); 483 for (i = 1; i < msg->num_sge; i++) 484 ib_dma_unmap_page(t->cm_id->device, 485 msg->sge[i].addr, msg->sge[i].length, 486 DMA_TO_DEVICE); 487 } 488 mempool_free(msg, t->sendmsg_mempool); 489} 490 491static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg) 492{ 493 switch (recvmsg->type) { 494 case SMB_DIRECT_MSG_DATA_TRANSFER: { 495 struct smb_direct_data_transfer *req = 496 (struct smb_direct_data_transfer *)recvmsg->packet; 497 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 498 + le32_to_cpu(req->data_offset)); 499 ksmbd_debug(RDMA, 500 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 501 le16_to_cpu(req->credits_granted), 502 le16_to_cpu(req->credits_requested), 503 req->data_length, req->remaining_data_length, 504 hdr->ProtocolId, hdr->Command); 505 break; 506 } 507 case SMB_DIRECT_MSG_NEGOTIATE_REQ: { 508 struct smb_direct_negotiate_req *req = 509 (struct smb_direct_negotiate_req *)recvmsg->packet; 510 ksmbd_debug(RDMA, 511 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 512 le16_to_cpu(req->min_version), 513 le16_to_cpu(req->max_version), 514 le16_to_cpu(req->credits_requested), 515 le32_to_cpu(req->preferred_send_size), 516 le32_to_cpu(req->max_receive_size), 517 le32_to_cpu(req->max_fragmented_size)); 518 if (le16_to_cpu(req->min_version) > 0x0100 || 519 le16_to_cpu(req->max_version) < 0x0100) 520 return -EOPNOTSUPP; 521 if (le16_to_cpu(req->credits_requested) <= 0 || 522 le32_to_cpu(req->max_receive_size) <= 128 || 523 le32_to_cpu(req->max_fragmented_size) <= 524 128 * 1024) 525 return -ECONNABORTED; 526 527 break; 528 } 529 default: 530 return -EINVAL; 531 } 532 return 0; 533} 534 535static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 536{ 537 struct smb_direct_recvmsg *recvmsg; 538 struct smb_direct_transport *t; 539 540 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe); 541 t = recvmsg->transport; 542 543 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 544 if (wc->status != IB_WC_WR_FLUSH_ERR) { 545 pr_err("Recv error. status='%s (%d)' opcode=%d\n", 546 ib_wc_status_msg(wc->status), wc->status, 547 wc->opcode); 548 smb_direct_disconnect_rdma_connection(t); 549 } 550 put_empty_recvmsg(t, recvmsg); 551 return; 552 } 553 554 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 555 ib_wc_status_msg(wc->status), wc->status, 556 wc->opcode); 557 558 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 559 recvmsg->sge.length, DMA_FROM_DEVICE); 560 561 switch (recvmsg->type) { 562 case SMB_DIRECT_MSG_NEGOTIATE_REQ: 563 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { 564 put_empty_recvmsg(t, recvmsg); 565 return; 566 } 567 t->negotiation_requested = true; 568 t->full_packet_received = true; 569 t->status = SMB_DIRECT_CS_CONNECTED; 570 enqueue_reassembly(t, recvmsg, 0); 571 wake_up_interruptible(&t->wait_status); 572 break; 573 case SMB_DIRECT_MSG_DATA_TRANSFER: { 574 struct smb_direct_data_transfer *data_transfer = 575 (struct smb_direct_data_transfer *)recvmsg->packet; 576 unsigned int data_length; 577 int avail_recvmsg_count, receive_credits; 578 579 if (wc->byte_len < 580 offsetof(struct smb_direct_data_transfer, padding)) { 581 put_empty_recvmsg(t, recvmsg); 582 return; 583 } 584 585 data_length = le32_to_cpu(data_transfer->data_length); 586 if (data_length) { 587 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + 588 (u64)data_length) { 589 put_empty_recvmsg(t, recvmsg); 590 return; 591 } 592 593 if (t->full_packet_received) 594 recvmsg->first_segment = true; 595 596 if (le32_to_cpu(data_transfer->remaining_data_length)) 597 t->full_packet_received = false; 598 else 599 t->full_packet_received = true; 600 601 enqueue_reassembly(t, recvmsg, (int)data_length); 602 wake_up_interruptible(&t->wait_reassembly_queue); 603 604 spin_lock(&t->receive_credit_lock); 605 receive_credits = --(t->recv_credits); 606 avail_recvmsg_count = t->count_avail_recvmsg; 607 spin_unlock(&t->receive_credit_lock); 608 } else { 609 put_empty_recvmsg(t, recvmsg); 610 611 spin_lock(&t->receive_credit_lock); 612 receive_credits = --(t->recv_credits); 613 avail_recvmsg_count = ++(t->count_avail_recvmsg); 614 spin_unlock(&t->receive_credit_lock); 615 } 616 617 t->recv_credit_target = 618 le16_to_cpu(data_transfer->credits_requested); 619 atomic_add(le16_to_cpu(data_transfer->credits_granted), 620 &t->send_credits); 621 622 if (le16_to_cpu(data_transfer->flags) & 623 SMB_DIRECT_RESPONSE_REQUESTED) 624 queue_work(smb_direct_wq, &t->send_immediate_work); 625 626 if (atomic_read(&t->send_credits) > 0) 627 wake_up_interruptible(&t->wait_send_credits); 628 629 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) 630 mod_delayed_work(smb_direct_wq, 631 &t->post_recv_credits_work, 0); 632 break; 633 } 634 default: 635 break; 636 } 637} 638 639static int smb_direct_post_recv(struct smb_direct_transport *t, 640 struct smb_direct_recvmsg *recvmsg) 641{ 642 struct ib_recv_wr wr; 643 int ret; 644 645 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device, 646 recvmsg->packet, t->max_recv_size, 647 DMA_FROM_DEVICE); 648 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr); 649 if (ret) 650 return ret; 651 recvmsg->sge.length = t->max_recv_size; 652 recvmsg->sge.lkey = t->pd->local_dma_lkey; 653 recvmsg->cqe.done = recv_done; 654 655 wr.wr_cqe = &recvmsg->cqe; 656 wr.next = NULL; 657 wr.sg_list = &recvmsg->sge; 658 wr.num_sge = 1; 659 660 ret = ib_post_recv(t->qp, &wr, NULL); 661 if (ret) { 662 pr_err("Can't post recv: %d\n", ret); 663 ib_dma_unmap_single(t->cm_id->device, 664 recvmsg->sge.addr, recvmsg->sge.length, 665 DMA_FROM_DEVICE); 666 smb_direct_disconnect_rdma_connection(t); 667 return ret; 668 } 669 return ret; 670} 671 672static int smb_direct_read(struct ksmbd_transport *t, char *buf, 673 unsigned int size) 674{ 675 struct smb_direct_recvmsg *recvmsg; 676 struct smb_direct_data_transfer *data_transfer; 677 int to_copy, to_read, data_read, offset; 678 u32 data_length, remaining_data_length, data_offset; 679 int rc; 680 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 681 682again: 683 if (st->status != SMB_DIRECT_CS_CONNECTED) { 684 pr_err("disconnected\n"); 685 return -ENOTCONN; 686 } 687 688 /* 689 * No need to hold the reassembly queue lock all the time as we are 690 * the only one reading from the front of the queue. The transport 691 * may add more entries to the back of the queue at the same time 692 */ 693 if (st->reassembly_data_length >= size) { 694 int queue_length; 695 int queue_removed = 0; 696 697 /* 698 * Need to make sure reassembly_data_length is read before 699 * reading reassembly_queue_length and calling 700 * get_first_reassembly. This call is lock free 701 * as we never read at the end of the queue which are being 702 * updated in SOFTIRQ as more data is received 703 */ 704 virt_rmb(); 705 queue_length = st->reassembly_queue_length; 706 data_read = 0; 707 to_read = size; 708 offset = st->first_entry_offset; 709 while (data_read < size) { 710 recvmsg = get_first_reassembly(st); 711 data_transfer = smb_direct_recvmsg_payload(recvmsg); 712 data_length = le32_to_cpu(data_transfer->data_length); 713 remaining_data_length = 714 le32_to_cpu(data_transfer->remaining_data_length); 715 data_offset = le32_to_cpu(data_transfer->data_offset); 716 717 /* 718 * The upper layer expects RFC1002 length at the 719 * beginning of the payload. Return it to indicate 720 * the total length of the packet. This minimize the 721 * change to upper layer packet processing logic. This 722 * will be eventually remove when an intermediate 723 * transport layer is added 724 */ 725 if (recvmsg->first_segment && size == 4) { 726 unsigned int rfc1002_len = 727 data_length + remaining_data_length; 728 *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 729 data_read = 4; 730 recvmsg->first_segment = false; 731 ksmbd_debug(RDMA, 732 "returning rfc1002 length %d\n", 733 rfc1002_len); 734 goto read_rfc1002_done; 735 } 736 737 to_copy = min_t(int, data_length - offset, to_read); 738 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 739 to_copy); 740 741 /* move on to the next buffer? */ 742 if (to_copy == data_length - offset) { 743 queue_length--; 744 /* 745 * No need to lock if we are not at the 746 * end of the queue 747 */ 748 if (queue_length) { 749 list_del(&recvmsg->list); 750 } else { 751 spin_lock_irq(&st->reassembly_queue_lock); 752 list_del(&recvmsg->list); 753 spin_unlock_irq(&st->reassembly_queue_lock); 754 } 755 queue_removed++; 756 put_recvmsg(st, recvmsg); 757 offset = 0; 758 } else { 759 offset += to_copy; 760 } 761 762 to_read -= to_copy; 763 data_read += to_copy; 764 } 765 766 spin_lock_irq(&st->reassembly_queue_lock); 767 st->reassembly_data_length -= data_read; 768 st->reassembly_queue_length -= queue_removed; 769 spin_unlock_irq(&st->reassembly_queue_lock); 770 771 spin_lock(&st->receive_credit_lock); 772 st->count_avail_recvmsg += queue_removed; 773 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) { 774 spin_unlock(&st->receive_credit_lock); 775 mod_delayed_work(smb_direct_wq, 776 &st->post_recv_credits_work, 0); 777 } else { 778 spin_unlock(&st->receive_credit_lock); 779 } 780 781 st->first_entry_offset = offset; 782 ksmbd_debug(RDMA, 783 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 784 data_read, st->reassembly_data_length, 785 st->first_entry_offset); 786read_rfc1002_done: 787 return data_read; 788 } 789 790 ksmbd_debug(RDMA, "wait_event on more data\n"); 791 rc = wait_event_interruptible(st->wait_reassembly_queue, 792 st->reassembly_data_length >= size || 793 st->status != SMB_DIRECT_CS_CONNECTED); 794 if (rc) 795 return -EINTR; 796 797 goto again; 798} 799 800static void smb_direct_post_recv_credits(struct work_struct *work) 801{ 802 struct smb_direct_transport *t = container_of(work, 803 struct smb_direct_transport, post_recv_credits_work.work); 804 struct smb_direct_recvmsg *recvmsg; 805 int receive_credits, credits = 0; 806 int ret; 807 int use_free = 1; 808 809 spin_lock(&t->receive_credit_lock); 810 receive_credits = t->recv_credits; 811 spin_unlock(&t->receive_credit_lock); 812 813 if (receive_credits < t->recv_credit_target) { 814 while (true) { 815 if (use_free) 816 recvmsg = get_free_recvmsg(t); 817 else 818 recvmsg = get_empty_recvmsg(t); 819 if (!recvmsg) { 820 if (use_free) { 821 use_free = 0; 822 continue; 823 } else { 824 break; 825 } 826 } 827 828 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; 829 recvmsg->first_segment = false; 830 831 ret = smb_direct_post_recv(t, recvmsg); 832 if (ret) { 833 pr_err("Can't post recv: %d\n", ret); 834 put_recvmsg(t, recvmsg); 835 break; 836 } 837 credits++; 838 } 839 } 840 841 spin_lock(&t->receive_credit_lock); 842 t->recv_credits += credits; 843 t->count_avail_recvmsg -= credits; 844 spin_unlock(&t->receive_credit_lock); 845 846 spin_lock(&t->lock_new_recv_credits); 847 t->new_recv_credits += credits; 848 spin_unlock(&t->lock_new_recv_credits); 849 850 if (credits) 851 queue_work(smb_direct_wq, &t->send_immediate_work); 852} 853 854static void send_done(struct ib_cq *cq, struct ib_wc *wc) 855{ 856 struct smb_direct_sendmsg *sendmsg, *sibling; 857 struct smb_direct_transport *t; 858 struct list_head *pos, *prev, *end; 859 860 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe); 861 t = sendmsg->transport; 862 863 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 864 ib_wc_status_msg(wc->status), wc->status, 865 wc->opcode); 866 867 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 868 pr_err("Send error. status='%s (%d)', opcode=%d\n", 869 ib_wc_status_msg(wc->status), wc->status, 870 wc->opcode); 871 smb_direct_disconnect_rdma_connection(t); 872 } 873 874 if (atomic_dec_and_test(&t->send_pending)) 875 wake_up(&t->wait_send_pending); 876 877 /* iterate and free the list of messages in reverse. the list's head 878 * is invalid. 879 */ 880 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next; 881 prev != end; pos = prev, prev = prev->prev) { 882 sibling = container_of(pos, struct smb_direct_sendmsg, list); 883 smb_direct_free_sendmsg(t, sibling); 884 } 885 886 sibling = container_of(pos, struct smb_direct_sendmsg, list); 887 smb_direct_free_sendmsg(t, sibling); 888} 889 890static int manage_credits_prior_sending(struct smb_direct_transport *t) 891{ 892 int new_credits; 893 894 spin_lock(&t->lock_new_recv_credits); 895 new_credits = t->new_recv_credits; 896 t->new_recv_credits = 0; 897 spin_unlock(&t->lock_new_recv_credits); 898 899 return new_credits; 900} 901 902static int smb_direct_post_send(struct smb_direct_transport *t, 903 struct ib_send_wr *wr) 904{ 905 int ret; 906 907 atomic_inc(&t->send_pending); 908 ret = ib_post_send(t->qp, wr, NULL); 909 if (ret) { 910 pr_err("failed to post send: %d\n", ret); 911 if (atomic_dec_and_test(&t->send_pending)) 912 wake_up(&t->wait_send_pending); 913 smb_direct_disconnect_rdma_connection(t); 914 } 915 return ret; 916} 917 918static void smb_direct_send_ctx_init(struct smb_direct_transport *t, 919 struct smb_direct_send_ctx *send_ctx, 920 bool need_invalidate_rkey, 921 unsigned int remote_key) 922{ 923 INIT_LIST_HEAD(&send_ctx->msg_list); 924 send_ctx->wr_cnt = 0; 925 send_ctx->need_invalidate_rkey = need_invalidate_rkey; 926 send_ctx->remote_key = remote_key; 927} 928 929static int smb_direct_flush_send_list(struct smb_direct_transport *t, 930 struct smb_direct_send_ctx *send_ctx, 931 bool is_last) 932{ 933 struct smb_direct_sendmsg *first, *last; 934 int ret; 935 936 if (list_empty(&send_ctx->msg_list)) 937 return 0; 938 939 first = list_first_entry(&send_ctx->msg_list, 940 struct smb_direct_sendmsg, 941 list); 942 last = list_last_entry(&send_ctx->msg_list, 943 struct smb_direct_sendmsg, 944 list); 945 946 last->wr.send_flags = IB_SEND_SIGNALED; 947 last->wr.wr_cqe = &last->cqe; 948 if (is_last && send_ctx->need_invalidate_rkey) { 949 last->wr.opcode = IB_WR_SEND_WITH_INV; 950 last->wr.ex.invalidate_rkey = send_ctx->remote_key; 951 } 952 953 ret = smb_direct_post_send(t, &first->wr); 954 if (!ret) { 955 smb_direct_send_ctx_init(t, send_ctx, 956 send_ctx->need_invalidate_rkey, 957 send_ctx->remote_key); 958 } else { 959 atomic_add(send_ctx->wr_cnt, &t->send_credits); 960 wake_up(&t->wait_send_credits); 961 list_for_each_entry_safe(first, last, &send_ctx->msg_list, 962 list) { 963 smb_direct_free_sendmsg(t, first); 964 } 965 } 966 return ret; 967} 968 969static int wait_for_credits(struct smb_direct_transport *t, 970 wait_queue_head_t *waitq, atomic_t *total_credits, 971 int needed) 972{ 973 int ret; 974 975 do { 976 if (atomic_sub_return(needed, total_credits) >= 0) 977 return 0; 978 979 atomic_add(needed, total_credits); 980 ret = wait_event_interruptible(*waitq, 981 atomic_read(total_credits) >= needed || 982 t->status != SMB_DIRECT_CS_CONNECTED); 983 984 if (t->status != SMB_DIRECT_CS_CONNECTED) 985 return -ENOTCONN; 986 else if (ret < 0) 987 return ret; 988 } while (true); 989} 990 991static int wait_for_send_credits(struct smb_direct_transport *t, 992 struct smb_direct_send_ctx *send_ctx) 993{ 994 int ret; 995 996 if (send_ctx && 997 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) { 998 ret = smb_direct_flush_send_list(t, send_ctx, false); 999 if (ret) 1000 return ret; 1001 } 1002 1003 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1); 1004} 1005 1006static int wait_for_rw_credits(struct smb_direct_transport *t, int credits) 1007{ 1008 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits); 1009} 1010 1011static int calc_rw_credits(struct smb_direct_transport *t, 1012 char *buf, unsigned int len) 1013{ 1014 return DIV_ROUND_UP(get_buf_page_count(buf, len), 1015 t->pages_per_rw_credit); 1016} 1017 1018static int smb_direct_create_header(struct smb_direct_transport *t, 1019 int size, int remaining_data_length, 1020 struct smb_direct_sendmsg **sendmsg_out) 1021{ 1022 struct smb_direct_sendmsg *sendmsg; 1023 struct smb_direct_data_transfer *packet; 1024 int header_length; 1025 int ret; 1026 1027 sendmsg = smb_direct_alloc_sendmsg(t); 1028 if (IS_ERR(sendmsg)) 1029 return PTR_ERR(sendmsg); 1030 1031 /* Fill in the packet header */ 1032 packet = (struct smb_direct_data_transfer *)sendmsg->packet; 1033 packet->credits_requested = cpu_to_le16(t->send_credit_target); 1034 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1035 1036 packet->flags = 0; 1037 packet->reserved = 0; 1038 if (!size) 1039 packet->data_offset = 0; 1040 else 1041 packet->data_offset = cpu_to_le32(24); 1042 packet->data_length = cpu_to_le32(size); 1043 packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1044 packet->padding = 0; 1045 1046 ksmbd_debug(RDMA, 1047 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1048 le16_to_cpu(packet->credits_requested), 1049 le16_to_cpu(packet->credits_granted), 1050 le32_to_cpu(packet->data_offset), 1051 le32_to_cpu(packet->data_length), 1052 le32_to_cpu(packet->remaining_data_length)); 1053 1054 /* Map the packet to DMA */ 1055 header_length = sizeof(struct smb_direct_data_transfer); 1056 /* If this is a packet without payload, don't send padding */ 1057 if (!size) 1058 header_length = 1059 offsetof(struct smb_direct_data_transfer, padding); 1060 1061 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1062 (void *)packet, 1063 header_length, 1064 DMA_TO_DEVICE); 1065 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1066 if (ret) { 1067 smb_direct_free_sendmsg(t, sendmsg); 1068 return ret; 1069 } 1070 1071 sendmsg->num_sge = 1; 1072 sendmsg->sge[0].length = header_length; 1073 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1074 1075 *sendmsg_out = sendmsg; 1076 return 0; 1077} 1078 1079static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1080{ 1081 bool high = is_vmalloc_addr(buf); 1082 struct page *page; 1083 int offset, len; 1084 int i = 0; 1085 1086 if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1087 return -EINVAL; 1088 1089 offset = offset_in_page(buf); 1090 buf -= offset; 1091 while (size > 0) { 1092 len = min_t(int, PAGE_SIZE - offset, size); 1093 if (high) 1094 page = vmalloc_to_page(buf); 1095 else 1096 page = kmap_to_page(buf); 1097 1098 if (!sg_list) 1099 return -EINVAL; 1100 sg_set_page(sg_list, page, len, offset); 1101 sg_list = sg_next(sg_list); 1102 1103 buf += PAGE_SIZE; 1104 size -= len; 1105 offset = 0; 1106 i++; 1107 } 1108 return i; 1109} 1110 1111static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1112 struct scatterlist *sg_list, int nentries, 1113 enum dma_data_direction dir) 1114{ 1115 int npages; 1116 1117 npages = get_sg_list(buf, size, sg_list, nentries); 1118 if (npages < 0) 1119 return -EINVAL; 1120 return ib_dma_map_sg(device, sg_list, npages, dir); 1121} 1122 1123static int post_sendmsg(struct smb_direct_transport *t, 1124 struct smb_direct_send_ctx *send_ctx, 1125 struct smb_direct_sendmsg *msg) 1126{ 1127 int i; 1128 1129 for (i = 0; i < msg->num_sge; i++) 1130 ib_dma_sync_single_for_device(t->cm_id->device, 1131 msg->sge[i].addr, msg->sge[i].length, 1132 DMA_TO_DEVICE); 1133 1134 msg->cqe.done = send_done; 1135 msg->wr.opcode = IB_WR_SEND; 1136 msg->wr.sg_list = &msg->sge[0]; 1137 msg->wr.num_sge = msg->num_sge; 1138 msg->wr.next = NULL; 1139 1140 if (send_ctx) { 1141 msg->wr.wr_cqe = NULL; 1142 msg->wr.send_flags = 0; 1143 if (!list_empty(&send_ctx->msg_list)) { 1144 struct smb_direct_sendmsg *last; 1145 1146 last = list_last_entry(&send_ctx->msg_list, 1147 struct smb_direct_sendmsg, 1148 list); 1149 last->wr.next = &msg->wr; 1150 } 1151 list_add_tail(&msg->list, &send_ctx->msg_list); 1152 send_ctx->wr_cnt++; 1153 return 0; 1154 } 1155 1156 msg->wr.wr_cqe = &msg->cqe; 1157 msg->wr.send_flags = IB_SEND_SIGNALED; 1158 return smb_direct_post_send(t, &msg->wr); 1159} 1160 1161static int smb_direct_post_send_data(struct smb_direct_transport *t, 1162 struct smb_direct_send_ctx *send_ctx, 1163 struct kvec *iov, int niov, 1164 int remaining_data_length) 1165{ 1166 int i, j, ret; 1167 struct smb_direct_sendmsg *msg; 1168 int data_length; 1169 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1]; 1170 1171 ret = wait_for_send_credits(t, send_ctx); 1172 if (ret) 1173 return ret; 1174 1175 data_length = 0; 1176 for (i = 0; i < niov; i++) 1177 data_length += iov[i].iov_len; 1178 1179 ret = smb_direct_create_header(t, data_length, remaining_data_length, 1180 &msg); 1181 if (ret) { 1182 atomic_inc(&t->send_credits); 1183 return ret; 1184 } 1185 1186 for (i = 0; i < niov; i++) { 1187 struct ib_sge *sge; 1188 int sg_cnt; 1189 1190 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1); 1191 sg_cnt = get_mapped_sg_list(t->cm_id->device, 1192 iov[i].iov_base, iov[i].iov_len, 1193 sg, SMB_DIRECT_MAX_SEND_SGES - 1, 1194 DMA_TO_DEVICE); 1195 if (sg_cnt <= 0) { 1196 pr_err("failed to map buffer\n"); 1197 ret = -ENOMEM; 1198 goto err; 1199 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) { 1200 pr_err("buffer not fitted into sges\n"); 1201 ret = -E2BIG; 1202 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt, 1203 DMA_TO_DEVICE); 1204 goto err; 1205 } 1206 1207 for (j = 0; j < sg_cnt; j++) { 1208 sge = &msg->sge[msg->num_sge]; 1209 sge->addr = sg_dma_address(&sg[j]); 1210 sge->length = sg_dma_len(&sg[j]); 1211 sge->lkey = t->pd->local_dma_lkey; 1212 msg->num_sge++; 1213 } 1214 } 1215 1216 ret = post_sendmsg(t, send_ctx, msg); 1217 if (ret) 1218 goto err; 1219 return 0; 1220err: 1221 smb_direct_free_sendmsg(t, msg); 1222 atomic_inc(&t->send_credits); 1223 return ret; 1224} 1225 1226static int smb_direct_writev(struct ksmbd_transport *t, 1227 struct kvec *iov, int niovs, int buflen, 1228 bool need_invalidate, unsigned int remote_key) 1229{ 1230 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1231 int remaining_data_length; 1232 int start, i, j; 1233 int max_iov_size = st->max_send_size - 1234 sizeof(struct smb_direct_data_transfer); 1235 int ret; 1236 struct kvec vec; 1237 struct smb_direct_send_ctx send_ctx; 1238 1239 if (st->status != SMB_DIRECT_CS_CONNECTED) 1240 return -ENOTCONN; 1241 1242 //FIXME: skip RFC1002 header.. 1243 buflen -= 4; 1244 iov[0].iov_base += 4; 1245 iov[0].iov_len -= 4; 1246 1247 remaining_data_length = buflen; 1248 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 1249 1250 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key); 1251 start = i = 0; 1252 buflen = 0; 1253 while (true) { 1254 buflen += iov[i].iov_len; 1255 if (buflen > max_iov_size) { 1256 if (i > start) { 1257 remaining_data_length -= 1258 (buflen - iov[i].iov_len); 1259 ret = smb_direct_post_send_data(st, &send_ctx, 1260 &iov[start], i - start, 1261 remaining_data_length); 1262 if (ret) 1263 goto done; 1264 } else { 1265 /* iov[start] is too big, break it */ 1266 int nvec = (buflen + max_iov_size - 1) / 1267 max_iov_size; 1268 1269 for (j = 0; j < nvec; j++) { 1270 vec.iov_base = 1271 (char *)iov[start].iov_base + 1272 j * max_iov_size; 1273 vec.iov_len = 1274 min_t(int, max_iov_size, 1275 buflen - max_iov_size * j); 1276 remaining_data_length -= vec.iov_len; 1277 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1, 1278 remaining_data_length); 1279 if (ret) 1280 goto done; 1281 } 1282 i++; 1283 if (i == niovs) 1284 break; 1285 } 1286 start = i; 1287 buflen = 0; 1288 } else { 1289 i++; 1290 if (i == niovs) { 1291 /* send out all remaining vecs */ 1292 remaining_data_length -= buflen; 1293 ret = smb_direct_post_send_data(st, &send_ctx, 1294 &iov[start], i - start, 1295 remaining_data_length); 1296 if (ret) 1297 goto done; 1298 break; 1299 } 1300 } 1301 } 1302 1303done: 1304 ret = smb_direct_flush_send_list(st, &send_ctx, true); 1305 1306 /* 1307 * As an optimization, we don't wait for individual I/O to finish 1308 * before sending the next one. 1309 * Send them all and wait for pending send count to get to 0 1310 * that means all the I/Os have been out and we are good to return 1311 */ 1312 1313 wait_event(st->wait_send_pending, 1314 atomic_read(&st->send_pending) == 0); 1315 return ret; 1316} 1317 1318static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 1319 struct smb_direct_rdma_rw_msg *msg, 1320 enum dma_data_direction dir) 1321{ 1322 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, 1323 msg->sgt.sgl, msg->sgt.nents, dir); 1324 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1325 kfree(msg); 1326} 1327 1328static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 1329 enum dma_data_direction dir) 1330{ 1331 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe, 1332 struct smb_direct_rdma_rw_msg, cqe); 1333 struct smb_direct_transport *t = msg->t; 1334 1335 if (wc->status != IB_WC_SUCCESS) { 1336 msg->status = -EIO; 1337 pr_err("read/write error. opcode = %d, status = %s(%d)\n", 1338 wc->opcode, ib_wc_status_msg(wc->status), wc->status); 1339 if (wc->status != IB_WC_WR_FLUSH_ERR) 1340 smb_direct_disconnect_rdma_connection(t); 1341 } 1342 1343 complete(msg->completion); 1344} 1345 1346static void read_done(struct ib_cq *cq, struct ib_wc *wc) 1347{ 1348 read_write_done(cq, wc, DMA_FROM_DEVICE); 1349} 1350 1351static void write_done(struct ib_cq *cq, struct ib_wc *wc) 1352{ 1353 read_write_done(cq, wc, DMA_TO_DEVICE); 1354} 1355 1356static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 1357 void *buf, int buf_len, 1358 struct smb2_buffer_desc_v1 *desc, 1359 unsigned int desc_len, 1360 bool is_read) 1361{ 1362 struct smb_direct_rdma_rw_msg *msg, *next_msg; 1363 int i, ret; 1364 DECLARE_COMPLETION_ONSTACK(completion); 1365 struct ib_send_wr *first_wr; 1366 LIST_HEAD(msg_list); 1367 char *desc_buf; 1368 int credits_needed; 1369 unsigned int desc_buf_len; 1370 size_t total_length = 0; 1371 1372 if (t->status != SMB_DIRECT_CS_CONNECTED) 1373 return -ENOTCONN; 1374 1375 /* calculate needed credits */ 1376 credits_needed = 0; 1377 desc_buf = buf; 1378 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1379 desc_buf_len = le32_to_cpu(desc[i].length); 1380 1381 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len); 1382 desc_buf += desc_buf_len; 1383 total_length += desc_buf_len; 1384 if (desc_buf_len == 0 || total_length > buf_len || 1385 total_length > t->max_rdma_rw_size) 1386 return -EINVAL; 1387 } 1388 1389 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 1390 is_read ? "read" : "write", buf_len, credits_needed); 1391 1392 ret = wait_for_rw_credits(t, credits_needed); 1393 if (ret < 0) 1394 return ret; 1395 1396 /* build rdma_rw_ctx for each descriptor */ 1397 desc_buf = buf; 1398 for (i = 0; i < desc_len / sizeof(*desc); i++) { 1399 msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) + 1400 sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL); 1401 if (!msg) { 1402 ret = -ENOMEM; 1403 goto out; 1404 } 1405 1406 desc_buf_len = le32_to_cpu(desc[i].length); 1407 1408 msg->t = t; 1409 msg->cqe.done = is_read ? read_done : write_done; 1410 msg->completion = &completion; 1411 1412 msg->sgt.sgl = &msg->sg_list[0]; 1413 ret = sg_alloc_table_chained(&msg->sgt, 1414 get_buf_page_count(desc_buf, desc_buf_len), 1415 msg->sg_list, SG_CHUNK_SIZE); 1416 if (ret) { 1417 kfree(msg); 1418 ret = -ENOMEM; 1419 goto out; 1420 } 1421 1422 ret = get_sg_list(desc_buf, desc_buf_len, 1423 msg->sgt.sgl, msg->sgt.orig_nents); 1424 if (ret < 0) { 1425 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1426 kfree(msg); 1427 goto out; 1428 } 1429 1430 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, 1431 msg->sgt.sgl, 1432 get_buf_page_count(desc_buf, desc_buf_len), 1433 0, 1434 le64_to_cpu(desc[i].offset), 1435 le32_to_cpu(desc[i].token), 1436 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1437 if (ret < 0) { 1438 pr_err("failed to init rdma_rw_ctx: %d\n", ret); 1439 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 1440 kfree(msg); 1441 goto out; 1442 } 1443 1444 list_add_tail(&msg->list, &msg_list); 1445 desc_buf += desc_buf_len; 1446 } 1447 1448 /* concatenate work requests of rdma_rw_ctxs */ 1449 first_wr = NULL; 1450 list_for_each_entry_reverse(msg, &msg_list, list) { 1451 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, 1452 &msg->cqe, first_wr); 1453 } 1454 1455 ret = ib_post_send(t->qp, first_wr, NULL); 1456 if (ret) { 1457 pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 1458 goto out; 1459 } 1460 1461 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list); 1462 wait_for_completion(&completion); 1463 ret = msg->status; 1464out: 1465 list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 1466 list_del(&msg->list); 1467 smb_direct_free_rdma_rw_msg(t, msg, 1468 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 1469 } 1470 atomic_add(credits_needed, &t->rw_credits); 1471 wake_up(&t->wait_rw_credits); 1472 return ret; 1473} 1474 1475static int smb_direct_rdma_write(struct ksmbd_transport *t, 1476 void *buf, unsigned int buflen, 1477 struct smb2_buffer_desc_v1 *desc, 1478 unsigned int desc_len) 1479{ 1480 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1481 desc, desc_len, false); 1482} 1483 1484static int smb_direct_rdma_read(struct ksmbd_transport *t, 1485 void *buf, unsigned int buflen, 1486 struct smb2_buffer_desc_v1 *desc, 1487 unsigned int desc_len) 1488{ 1489 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, 1490 desc, desc_len, true); 1491} 1492 1493static void smb_direct_disconnect(struct ksmbd_transport *t) 1494{ 1495 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1496 1497 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id); 1498 1499 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1500 wait_event_interruptible(st->wait_status, 1501 st->status == SMB_DIRECT_CS_DISCONNECTED); 1502 free_transport(st); 1503} 1504 1505static void smb_direct_shutdown(struct ksmbd_transport *t) 1506{ 1507 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1508 1509 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id); 1510 1511 smb_direct_disconnect_rdma_work(&st->disconnect_work); 1512} 1513 1514static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 1515 struct rdma_cm_event *event) 1516{ 1517 struct smb_direct_transport *t = cm_id->context; 1518 1519 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 1520 cm_id, rdma_event_msg(event->event), event->event); 1521 1522 switch (event->event) { 1523 case RDMA_CM_EVENT_ESTABLISHED: { 1524 t->status = SMB_DIRECT_CS_CONNECTED; 1525 wake_up_interruptible(&t->wait_status); 1526 break; 1527 } 1528 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1529 case RDMA_CM_EVENT_DISCONNECTED: { 1530 t->status = SMB_DIRECT_CS_DISCONNECTED; 1531 wake_up_interruptible(&t->wait_status); 1532 wake_up_interruptible(&t->wait_reassembly_queue); 1533 wake_up(&t->wait_send_credits); 1534 break; 1535 } 1536 case RDMA_CM_EVENT_CONNECT_ERROR: { 1537 t->status = SMB_DIRECT_CS_DISCONNECTED; 1538 wake_up_interruptible(&t->wait_status); 1539 break; 1540 } 1541 default: 1542 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 1543 cm_id, rdma_event_msg(event->event), 1544 event->event); 1545 break; 1546 } 1547 return 0; 1548} 1549 1550static void smb_direct_qpair_handler(struct ib_event *event, void *context) 1551{ 1552 struct smb_direct_transport *t = context; 1553 1554 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 1555 t->cm_id, ib_event_msg(event->event), event->event); 1556 1557 switch (event->event) { 1558 case IB_EVENT_CQ_ERR: 1559 case IB_EVENT_QP_FATAL: 1560 smb_direct_disconnect_rdma_connection(t); 1561 break; 1562 default: 1563 break; 1564 } 1565} 1566 1567static int smb_direct_send_negotiate_response(struct smb_direct_transport *t, 1568 int failed) 1569{ 1570 struct smb_direct_sendmsg *sendmsg; 1571 struct smb_direct_negotiate_resp *resp; 1572 int ret; 1573 1574 sendmsg = smb_direct_alloc_sendmsg(t); 1575 if (IS_ERR(sendmsg)) 1576 return -ENOMEM; 1577 1578 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet; 1579 if (failed) { 1580 memset(resp, 0, sizeof(*resp)); 1581 resp->min_version = cpu_to_le16(0x0100); 1582 resp->max_version = cpu_to_le16(0x0100); 1583 resp->status = STATUS_NOT_SUPPORTED; 1584 } else { 1585 resp->status = STATUS_SUCCESS; 1586 resp->min_version = SMB_DIRECT_VERSION_LE; 1587 resp->max_version = SMB_DIRECT_VERSION_LE; 1588 resp->negotiated_version = SMB_DIRECT_VERSION_LE; 1589 resp->reserved = 0; 1590 resp->credits_requested = 1591 cpu_to_le16(t->send_credit_target); 1592 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t)); 1593 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size); 1594 resp->preferred_send_size = cpu_to_le32(t->max_send_size); 1595 resp->max_receive_size = cpu_to_le32(t->max_recv_size); 1596 resp->max_fragmented_size = 1597 cpu_to_le32(t->max_fragmented_recv_size); 1598 } 1599 1600 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device, 1601 (void *)resp, sizeof(*resp), 1602 DMA_TO_DEVICE); 1603 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr); 1604 if (ret) { 1605 smb_direct_free_sendmsg(t, sendmsg); 1606 return ret; 1607 } 1608 1609 sendmsg->num_sge = 1; 1610 sendmsg->sge[0].length = sizeof(*resp); 1611 sendmsg->sge[0].lkey = t->pd->local_dma_lkey; 1612 1613 ret = post_sendmsg(t, NULL, sendmsg); 1614 if (ret) { 1615 smb_direct_free_sendmsg(t, sendmsg); 1616 return ret; 1617 } 1618 1619 wait_event(t->wait_send_pending, 1620 atomic_read(&t->send_pending) == 0); 1621 return 0; 1622} 1623 1624static int smb_direct_accept_client(struct smb_direct_transport *t) 1625{ 1626 struct rdma_conn_param conn_param; 1627 struct ib_port_immutable port_immutable; 1628 u32 ird_ord_hdr[2]; 1629 int ret; 1630 1631 memset(&conn_param, 0, sizeof(conn_param)); 1632 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom, 1633 SMB_DIRECT_CM_INITIATOR_DEPTH); 1634 conn_param.responder_resources = 0; 1635 1636 t->cm_id->device->ops.get_port_immutable(t->cm_id->device, 1637 t->cm_id->port_num, 1638 &port_immutable); 1639 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 1640 ird_ord_hdr[0] = conn_param.responder_resources; 1641 ird_ord_hdr[1] = 1; 1642 conn_param.private_data = ird_ord_hdr; 1643 conn_param.private_data_len = sizeof(ird_ord_hdr); 1644 } else { 1645 conn_param.private_data = NULL; 1646 conn_param.private_data_len = 0; 1647 } 1648 conn_param.retry_count = SMB_DIRECT_CM_RETRY; 1649 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 1650 conn_param.flow_control = 0; 1651 1652 ret = rdma_accept(t->cm_id, &conn_param); 1653 if (ret) { 1654 pr_err("error at rdma_accept: %d\n", ret); 1655 return ret; 1656 } 1657 return 0; 1658} 1659 1660static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) 1661{ 1662 int ret; 1663 struct smb_direct_recvmsg *recvmsg; 1664 1665 recvmsg = get_free_recvmsg(t); 1666 if (!recvmsg) 1667 return -ENOMEM; 1668 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ; 1669 1670 ret = smb_direct_post_recv(t, recvmsg); 1671 if (ret) { 1672 pr_err("Can't post recv: %d\n", ret); 1673 goto out_err; 1674 } 1675 1676 t->negotiation_requested = false; 1677 ret = smb_direct_accept_client(t); 1678 if (ret) { 1679 pr_err("Can't accept client\n"); 1680 goto out_err; 1681 } 1682 1683 smb_direct_post_recv_credits(&t->post_recv_credits_work.work); 1684 return 0; 1685out_err: 1686 put_recvmsg(t, recvmsg); 1687 return ret; 1688} 1689 1690static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t) 1691{ 1692 return min_t(unsigned int, 1693 t->cm_id->device->attrs.max_fast_reg_page_list_len, 1694 256); 1695} 1696 1697static int smb_direct_init_params(struct smb_direct_transport *t, 1698 struct ib_qp_cap *cap) 1699{ 1700 struct ib_device *device = t->cm_id->device; 1701 int max_send_sges, max_rw_wrs, max_send_wrs; 1702 unsigned int max_sge_per_wr, wrs_per_credit; 1703 1704 /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 1705 * SMB2 response could be mapped. 1706 */ 1707 t->max_send_size = smb_direct_max_send_size; 1708 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3; 1709 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { 1710 pr_err("max_send_size %d is too large\n", t->max_send_size); 1711 return -EINVAL; 1712 } 1713 1714 /* Calculate the number of work requests for RDMA R/W. 1715 * The maximum number of pages which can be registered 1716 * with one Memory region can be transferred with one 1717 * R/W credit. And at least 4 work requests for each credit 1718 * are needed for MR registration, RDMA R/W, local & remote 1719 * MR invalidation. 1720 */ 1721 t->max_rdma_rw_size = smb_direct_max_read_write_size; 1722 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t); 1723 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size, 1724 (t->pages_per_rw_credit - 1) * 1725 PAGE_SIZE); 1726 1727 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, 1728 device->attrs.max_sge_rd); 1729 max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, 1730 max_send_sges); 1731 wrs_per_credit = max_t(unsigned int, 4, 1732 DIV_ROUND_UP(t->pages_per_rw_credit, 1733 max_sge_per_wr) + 1); 1734 max_rw_wrs = t->max_rw_credits * wrs_per_credit; 1735 1736 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; 1737 if (max_send_wrs > device->attrs.max_cqe || 1738 max_send_wrs > device->attrs.max_qp_wr) { 1739 pr_err("consider lowering send_credit_target = %d\n", 1740 smb_direct_send_credit_target); 1741 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 1742 device->attrs.max_cqe, device->attrs.max_qp_wr); 1743 return -EINVAL; 1744 } 1745 1746 if (smb_direct_receive_credit_max > device->attrs.max_cqe || 1747 smb_direct_receive_credit_max > device->attrs.max_qp_wr) { 1748 pr_err("consider lowering receive_credit_max = %d\n", 1749 smb_direct_receive_credit_max); 1750 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", 1751 device->attrs.max_cqe, device->attrs.max_qp_wr); 1752 return -EINVAL; 1753 } 1754 1755 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { 1756 pr_err("warning: device max_recv_sge = %d too small\n", 1757 device->attrs.max_recv_sge); 1758 return -EINVAL; 1759 } 1760 1761 t->recv_credits = 0; 1762 t->count_avail_recvmsg = 0; 1763 1764 t->recv_credit_max = smb_direct_receive_credit_max; 1765 t->recv_credit_target = 10; 1766 t->new_recv_credits = 0; 1767 1768 t->send_credit_target = smb_direct_send_credit_target; 1769 atomic_set(&t->send_credits, 0); 1770 atomic_set(&t->rw_credits, t->max_rw_credits); 1771 1772 t->max_send_size = smb_direct_max_send_size; 1773 t->max_recv_size = smb_direct_max_receive_size; 1774 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 1775 1776 cap->max_send_wr = max_send_wrs; 1777 cap->max_recv_wr = t->recv_credit_max; 1778 cap->max_send_sge = max_sge_per_wr; 1779 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; 1780 cap->max_inline_data = 0; 1781 cap->max_rdma_ctxs = t->max_rw_credits; 1782 return 0; 1783} 1784 1785static void smb_direct_destroy_pools(struct smb_direct_transport *t) 1786{ 1787 struct smb_direct_recvmsg *recvmsg; 1788 1789 while ((recvmsg = get_free_recvmsg(t))) 1790 mempool_free(recvmsg, t->recvmsg_mempool); 1791 while ((recvmsg = get_empty_recvmsg(t))) 1792 mempool_free(recvmsg, t->recvmsg_mempool); 1793 1794 mempool_destroy(t->recvmsg_mempool); 1795 t->recvmsg_mempool = NULL; 1796 1797 kmem_cache_destroy(t->recvmsg_cache); 1798 t->recvmsg_cache = NULL; 1799 1800 mempool_destroy(t->sendmsg_mempool); 1801 t->sendmsg_mempool = NULL; 1802 1803 kmem_cache_destroy(t->sendmsg_cache); 1804 t->sendmsg_cache = NULL; 1805} 1806 1807static int smb_direct_create_pools(struct smb_direct_transport *t) 1808{ 1809 char name[80]; 1810 int i; 1811 struct smb_direct_recvmsg *recvmsg; 1812 1813 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t); 1814 t->sendmsg_cache = kmem_cache_create(name, 1815 sizeof(struct smb_direct_sendmsg) + 1816 sizeof(struct smb_direct_negotiate_resp), 1817 0, SLAB_HWCACHE_ALIGN, NULL); 1818 if (!t->sendmsg_cache) 1819 return -ENOMEM; 1820 1821 t->sendmsg_mempool = mempool_create(t->send_credit_target, 1822 mempool_alloc_slab, mempool_free_slab, 1823 t->sendmsg_cache); 1824 if (!t->sendmsg_mempool) 1825 goto err; 1826 1827 snprintf(name, sizeof(name), "smb_direct_resp_%p", t); 1828 t->recvmsg_cache = kmem_cache_create(name, 1829 sizeof(struct smb_direct_recvmsg) + 1830 t->max_recv_size, 1831 0, SLAB_HWCACHE_ALIGN, NULL); 1832 if (!t->recvmsg_cache) 1833 goto err; 1834 1835 t->recvmsg_mempool = 1836 mempool_create(t->recv_credit_max, mempool_alloc_slab, 1837 mempool_free_slab, t->recvmsg_cache); 1838 if (!t->recvmsg_mempool) 1839 goto err; 1840 1841 INIT_LIST_HEAD(&t->recvmsg_queue); 1842 1843 for (i = 0; i < t->recv_credit_max; i++) { 1844 recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL); 1845 if (!recvmsg) 1846 goto err; 1847 recvmsg->transport = t; 1848 list_add(&recvmsg->list, &t->recvmsg_queue); 1849 } 1850 t->count_avail_recvmsg = t->recv_credit_max; 1851 1852 return 0; 1853err: 1854 smb_direct_destroy_pools(t); 1855 return -ENOMEM; 1856} 1857 1858static int smb_direct_create_qpair(struct smb_direct_transport *t, 1859 struct ib_qp_cap *cap) 1860{ 1861 int ret; 1862 struct ib_qp_init_attr qp_attr; 1863 int pages_per_rw; 1864 1865 t->pd = ib_alloc_pd(t->cm_id->device, 0); 1866 if (IS_ERR(t->pd)) { 1867 pr_err("Can't create RDMA PD\n"); 1868 ret = PTR_ERR(t->pd); 1869 t->pd = NULL; 1870 return ret; 1871 } 1872 1873 t->send_cq = ib_alloc_cq(t->cm_id->device, t, 1874 smb_direct_send_credit_target + cap->max_rdma_ctxs, 1875 0, IB_POLL_WORKQUEUE); 1876 if (IS_ERR(t->send_cq)) { 1877 pr_err("Can't create RDMA send CQ\n"); 1878 ret = PTR_ERR(t->send_cq); 1879 t->send_cq = NULL; 1880 goto err; 1881 } 1882 1883 t->recv_cq = ib_alloc_cq(t->cm_id->device, t, 1884 t->recv_credit_max, 0, IB_POLL_WORKQUEUE); 1885 if (IS_ERR(t->recv_cq)) { 1886 pr_err("Can't create RDMA recv CQ\n"); 1887 ret = PTR_ERR(t->recv_cq); 1888 t->recv_cq = NULL; 1889 goto err; 1890 } 1891 1892 memset(&qp_attr, 0, sizeof(qp_attr)); 1893 qp_attr.event_handler = smb_direct_qpair_handler; 1894 qp_attr.qp_context = t; 1895 qp_attr.cap = *cap; 1896 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1897 qp_attr.qp_type = IB_QPT_RC; 1898 qp_attr.send_cq = t->send_cq; 1899 qp_attr.recv_cq = t->recv_cq; 1900 qp_attr.port_num = ~0; 1901 1902 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr); 1903 if (ret) { 1904 pr_err("Can't create RDMA QP: %d\n", ret); 1905 goto err; 1906 } 1907 1908 t->qp = t->cm_id->qp; 1909 t->cm_id->event_handler = smb_direct_cm_handler; 1910 1911 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; 1912 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { 1913 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, 1914 t->max_rw_credits, IB_MR_TYPE_MEM_REG, 1915 t->pages_per_rw_credit, 0); 1916 if (ret) { 1917 pr_err("failed to init mr pool count %d pages %d\n", 1918 t->max_rw_credits, t->pages_per_rw_credit); 1919 goto err; 1920 } 1921 } 1922 1923 return 0; 1924err: 1925 if (t->qp) { 1926 ib_destroy_qp(t->qp); 1927 t->qp = NULL; 1928 } 1929 if (t->recv_cq) { 1930 ib_destroy_cq(t->recv_cq); 1931 t->recv_cq = NULL; 1932 } 1933 if (t->send_cq) { 1934 ib_destroy_cq(t->send_cq); 1935 t->send_cq = NULL; 1936 } 1937 if (t->pd) { 1938 ib_dealloc_pd(t->pd); 1939 t->pd = NULL; 1940 } 1941 return ret; 1942} 1943 1944static int smb_direct_prepare(struct ksmbd_transport *t) 1945{ 1946 struct smb_direct_transport *st = smb_trans_direct_transfort(t); 1947 struct smb_direct_recvmsg *recvmsg; 1948 struct smb_direct_negotiate_req *req; 1949 int ret; 1950 1951 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 1952 ret = wait_event_interruptible_timeout(st->wait_status, 1953 st->negotiation_requested || 1954 st->status == SMB_DIRECT_CS_DISCONNECTED, 1955 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); 1956 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) 1957 return ret < 0 ? ret : -ETIMEDOUT; 1958 1959 recvmsg = get_first_reassembly(st); 1960 if (!recvmsg) 1961 return -ECONNABORTED; 1962 1963 ret = smb_direct_check_recvmsg(recvmsg); 1964 if (ret == -ECONNABORTED) 1965 goto out; 1966 1967 req = (struct smb_direct_negotiate_req *)recvmsg->packet; 1968 st->max_recv_size = min_t(int, st->max_recv_size, 1969 le32_to_cpu(req->preferred_send_size)); 1970 st->max_send_size = min_t(int, st->max_send_size, 1971 le32_to_cpu(req->max_receive_size)); 1972 st->max_fragmented_send_size = 1973 le32_to_cpu(req->max_fragmented_size); 1974 st->max_fragmented_recv_size = 1975 (st->recv_credit_max * st->max_recv_size) / 2; 1976 1977 ret = smb_direct_send_negotiate_response(st, ret); 1978out: 1979 spin_lock_irq(&st->reassembly_queue_lock); 1980 st->reassembly_queue_length--; 1981 list_del(&recvmsg->list); 1982 spin_unlock_irq(&st->reassembly_queue_lock); 1983 put_recvmsg(st, recvmsg); 1984 1985 return ret; 1986} 1987 1988static int smb_direct_connect(struct smb_direct_transport *st) 1989{ 1990 int ret; 1991 struct ib_qp_cap qp_cap; 1992 1993 ret = smb_direct_init_params(st, &qp_cap); 1994 if (ret) { 1995 pr_err("Can't configure RDMA parameters\n"); 1996 return ret; 1997 } 1998 1999 ret = smb_direct_create_pools(st); 2000 if (ret) { 2001 pr_err("Can't init RDMA pool: %d\n", ret); 2002 return ret; 2003 } 2004 2005 ret = smb_direct_create_qpair(st, &qp_cap); 2006 if (ret) { 2007 pr_err("Can't accept RDMA client: %d\n", ret); 2008 return ret; 2009 } 2010 2011 ret = smb_direct_prepare_negotiation(st); 2012 if (ret) { 2013 pr_err("Can't negotiate: %d\n", ret); 2014 return ret; 2015 } 2016 return 0; 2017} 2018 2019static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 2020{ 2021 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 2022 return false; 2023 if (attrs->max_fast_reg_page_list_len == 0) 2024 return false; 2025 return true; 2026} 2027 2028static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) 2029{ 2030 struct smb_direct_transport *t; 2031 int ret; 2032 2033 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 2034 ksmbd_debug(RDMA, 2035 "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 2036 new_cm_id->device->attrs.device_cap_flags); 2037 return -EPROTONOSUPPORT; 2038 } 2039 2040 t = alloc_transport(new_cm_id); 2041 if (!t) 2042 return -ENOMEM; 2043 2044 ret = smb_direct_connect(t); 2045 if (ret) 2046 goto out_err; 2047 2048 KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop, 2049 KSMBD_TRANS(t)->conn, "ksmbd:r%u", 2050 smb_direct_port); 2051 if (IS_ERR(KSMBD_TRANS(t)->handler)) { 2052 ret = PTR_ERR(KSMBD_TRANS(t)->handler); 2053 pr_err("Can't start thread\n"); 2054 goto out_err; 2055 } 2056 2057 return 0; 2058out_err: 2059 free_transport(t); 2060 return ret; 2061} 2062 2063static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 2064 struct rdma_cm_event *event) 2065{ 2066 switch (event->event) { 2067 case RDMA_CM_EVENT_CONNECT_REQUEST: { 2068 int ret = smb_direct_handle_connect_request(cm_id); 2069 2070 if (ret) { 2071 pr_err("Can't create transport: %d\n", ret); 2072 return ret; 2073 } 2074 2075 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 2076 cm_id); 2077 break; 2078 } 2079 default: 2080 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 2081 cm_id, rdma_event_msg(event->event), event->event); 2082 break; 2083 } 2084 return 0; 2085} 2086 2087static int smb_direct_listen(int port) 2088{ 2089 int ret; 2090 struct rdma_cm_id *cm_id; 2091 struct sockaddr_in sin = { 2092 .sin_family = AF_INET, 2093 .sin_addr.s_addr = htonl(INADDR_ANY), 2094 .sin_port = htons(port), 2095 }; 2096 2097 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 2098 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC); 2099 if (IS_ERR(cm_id)) { 2100 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 2101 return PTR_ERR(cm_id); 2102 } 2103 2104 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2105 if (ret) { 2106 pr_err("Can't bind: %d\n", ret); 2107 goto err; 2108 } 2109 2110 smb_direct_listener.cm_id = cm_id; 2111 2112 ret = rdma_listen(cm_id, 10); 2113 if (ret) { 2114 pr_err("Can't listen: %d\n", ret); 2115 goto err; 2116 } 2117 return 0; 2118err: 2119 smb_direct_listener.cm_id = NULL; 2120 rdma_destroy_id(cm_id); 2121 return ret; 2122} 2123 2124static int smb_direct_ib_client_add(struct ib_device *ib_dev) 2125{ 2126 struct smb_direct_device *smb_dev; 2127 2128 /* Set 5445 port if device type is iWARP(No IB) */ 2129 if (ib_dev->node_type != RDMA_NODE_IB_CA) 2130 smb_direct_port = SMB_DIRECT_PORT_IWARP; 2131 2132 if (!ib_dev->ops.get_netdev || 2133 !rdma_frwr_is_supported(&ib_dev->attrs)) 2134 return 0; 2135 2136 smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL); 2137 if (!smb_dev) 2138 return -ENOMEM; 2139 smb_dev->ib_dev = ib_dev; 2140 2141 write_lock(&smb_direct_device_lock); 2142 list_add(&smb_dev->list, &smb_direct_device_list); 2143 write_unlock(&smb_direct_device_lock); 2144 2145 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 2146 return 0; 2147} 2148 2149static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 2150 void *client_data) 2151{ 2152 struct smb_direct_device *smb_dev, *tmp; 2153 2154 write_lock(&smb_direct_device_lock); 2155 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 2156 if (smb_dev->ib_dev == ib_dev) { 2157 list_del(&smb_dev->list); 2158 kfree(smb_dev); 2159 break; 2160 } 2161 } 2162 write_unlock(&smb_direct_device_lock); 2163} 2164 2165static struct ib_client smb_direct_ib_client = { 2166 .name = "ksmbd_smb_direct_ib", 2167 .add = smb_direct_ib_client_add, 2168 .remove = smb_direct_ib_client_remove, 2169}; 2170 2171int ksmbd_rdma_init(void) 2172{ 2173 int ret; 2174 2175 smb_direct_listener.cm_id = NULL; 2176 2177 ret = ib_register_client(&smb_direct_ib_client); 2178 if (ret) { 2179 pr_err("failed to ib_register_client\n"); 2180 return ret; 2181 } 2182 2183 /* When a client is running out of send credits, the credits are 2184 * granted by the server's sending a packet using this queue. 2185 * This avoids the situation that a clients cannot send packets 2186 * for lack of credits 2187 */ 2188 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 2189 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); 2190 if (!smb_direct_wq) 2191 return -ENOMEM; 2192 2193 ret = smb_direct_listen(smb_direct_port); 2194 if (ret) { 2195 destroy_workqueue(smb_direct_wq); 2196 smb_direct_wq = NULL; 2197 pr_err("Can't listen: %d\n", ret); 2198 return ret; 2199 } 2200 2201 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n", 2202 smb_direct_listener.cm_id); 2203 return 0; 2204} 2205 2206void ksmbd_rdma_destroy(void) 2207{ 2208 if (!smb_direct_listener.cm_id) 2209 return; 2210 2211 ib_unregister_client(&smb_direct_ib_client); 2212 rdma_destroy_id(smb_direct_listener.cm_id); 2213 2214 smb_direct_listener.cm_id = NULL; 2215 2216 if (smb_direct_wq) { 2217 destroy_workqueue(smb_direct_wq); 2218 smb_direct_wq = NULL; 2219 } 2220} 2221 2222bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2223{ 2224 struct smb_direct_device *smb_dev; 2225 int i; 2226 bool rdma_capable = false; 2227 2228 read_lock(&smb_direct_device_lock); 2229 list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 2230 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 2231 struct net_device *ndev; 2232 2233 ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev, 2234 i + 1); 2235 if (!ndev) 2236 continue; 2237 2238 if (ndev == netdev) { 2239 dev_put(ndev); 2240 rdma_capable = true; 2241 goto out; 2242 } 2243 dev_put(ndev); 2244 } 2245 } 2246out: 2247 read_unlock(&smb_direct_device_lock); 2248 2249 if (rdma_capable == false) { 2250 struct ib_device *ibdev; 2251 2252 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 2253 if (ibdev) { 2254 if (rdma_frwr_is_supported(&ibdev->attrs)) 2255 rdma_capable = true; 2256 ib_device_put(ibdev); 2257 } 2258 } 2259 2260 return rdma_capable; 2261} 2262 2263static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 2264 .prepare = smb_direct_prepare, 2265 .disconnect = smb_direct_disconnect, 2266 .shutdown = smb_direct_shutdown, 2267 .writev = smb_direct_writev, 2268 .read = smb_direct_read, 2269 .rdma_read = smb_direct_rdma_read, 2270 .rdma_write = smb_direct_rdma_write, 2271};