libcxgb_ppm.c (13831B)
1/* 2 * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager 3 * 4 * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 * Written by: Karen Xie (kxie@chelsio.com) 35 */ 36 37#define DRV_NAME "libcxgb" 38#define pr_fmt(fmt) DRV_NAME ": " fmt 39 40#include <linux/kernel.h> 41#include <linux/module.h> 42#include <linux/errno.h> 43#include <linux/types.h> 44#include <linux/debugfs.h> 45#include <linux/export.h> 46#include <linux/list.h> 47#include <linux/skbuff.h> 48#include <linux/pci.h> 49#include <linux/scatterlist.h> 50 51#include "libcxgb_ppm.h" 52 53/* Direct Data Placement - 54 * Directly place the iSCSI Data-In or Data-Out PDU's payload into 55 * pre-posted final destination host-memory buffers based on the 56 * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) 57 * in Data-Out PDUs. The host memory address is programmed into 58 * h/w in the format of pagepod entries. The location of the 59 * pagepod entry is encoded into ddp tag which is used as the base 60 * for ITT/TTT. 61 */ 62 63/* Direct-Data Placement page size adjustment 64 */ 65int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz) 66{ 67 struct cxgbi_tag_format *tformat = &ppm->tformat; 68 int i; 69 70 for (i = 0; i < DDP_PGIDX_MAX; i++) { 71 if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT + 72 tformat->pgsz_order[i])) { 73 pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n", 74 __func__, ppm->ndev->name, pgsz, i); 75 return i; 76 } 77 } 78 pr_info("ippm: ddp page size %lu not supported.\n", pgsz); 79 return DDP_PGIDX_MAX; 80} 81 82/* DDP setup & teardown 83 */ 84static int ppm_find_unused_entries(unsigned long *bmap, 85 unsigned int max_ppods, 86 unsigned int start, 87 unsigned int nr, 88 unsigned int align_mask) 89{ 90 unsigned long i; 91 92 i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask); 93 94 if (unlikely(i >= max_ppods) && (start > nr)) 95 i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1, 96 align_mask); 97 if (unlikely(i >= max_ppods)) 98 return -ENOSPC; 99 100 bitmap_set(bmap, i, nr); 101 return (int)i; 102} 103 104static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count, 105 unsigned long caller_data) 106{ 107 struct cxgbi_ppod_data *pdata = ppm->ppod_data + i; 108 109 pdata->caller_data = caller_data; 110 pdata->npods = count; 111 112 if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1)) 113 pdata->color = 0; 114 else 115 pdata->color++; 116} 117 118static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count, 119 unsigned long caller_data) 120{ 121 struct cxgbi_ppm_pool *pool; 122 unsigned int cpu; 123 int i; 124 125 if (!ppm->pool) 126 return -EINVAL; 127 128 cpu = get_cpu(); 129 pool = per_cpu_ptr(ppm->pool, cpu); 130 spin_lock_bh(&pool->lock); 131 put_cpu(); 132 133 i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max, 134 pool->next, count, 0); 135 if (i < 0) { 136 pool->next = 0; 137 spin_unlock_bh(&pool->lock); 138 return -ENOSPC; 139 } 140 141 pool->next = i + count; 142 if (pool->next >= ppm->pool_index_max) 143 pool->next = 0; 144 145 spin_unlock_bh(&pool->lock); 146 147 pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n", 148 __func__, cpu, i, count, i + cpu * ppm->pool_index_max, 149 pool->next); 150 151 i += cpu * ppm->pool_index_max; 152 ppm_mark_entries(ppm, i, count, caller_data); 153 154 return i; 155} 156 157static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count, 158 unsigned long caller_data) 159{ 160 int i; 161 162 spin_lock_bh(&ppm->map_lock); 163 i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max, 164 ppm->next, count, 0); 165 if (i < 0) { 166 ppm->next = 0; 167 spin_unlock_bh(&ppm->map_lock); 168 pr_debug("ippm: NO suitable entries %u available.\n", 169 count); 170 return -ENOSPC; 171 } 172 173 ppm->next = i + count; 174 if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram)) 175 ppm->next = 0; 176 else if (ppm->next >= ppm->bmap_index_max) 177 ppm->next = 0; 178 179 spin_unlock_bh(&ppm->map_lock); 180 181 pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n", 182 __func__, i, count, i + ppm->pool_rsvd, ppm->next, 183 caller_data); 184 185 i += ppm->pool_rsvd; 186 ppm_mark_entries(ppm, i, count, caller_data); 187 188 return i; 189} 190 191static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count) 192{ 193 pr_debug("%s: idx %d + %d.\n", __func__, i, count); 194 195 if (i < ppm->pool_rsvd) { 196 unsigned int cpu; 197 struct cxgbi_ppm_pool *pool; 198 199 cpu = i / ppm->pool_index_max; 200 i %= ppm->pool_index_max; 201 202 pool = per_cpu_ptr(ppm->pool, cpu); 203 spin_lock_bh(&pool->lock); 204 bitmap_clear(pool->bmap, i, count); 205 206 if (i < pool->next) 207 pool->next = i; 208 spin_unlock_bh(&pool->lock); 209 210 pr_debug("%s: cpu %u, idx %d, next %u.\n", 211 __func__, cpu, i, pool->next); 212 } else { 213 spin_lock_bh(&ppm->map_lock); 214 215 i -= ppm->pool_rsvd; 216 bitmap_clear(ppm->ppod_bmap, i, count); 217 218 if (i < ppm->next) 219 ppm->next = i; 220 spin_unlock_bh(&ppm->map_lock); 221 222 pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next); 223 } 224} 225 226void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx) 227{ 228 struct cxgbi_ppod_data *pdata; 229 230 if (idx >= ppm->ppmax) { 231 pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax); 232 return; 233 } 234 235 pdata = ppm->ppod_data + idx; 236 if (!pdata->npods) { 237 pr_warn("ippm: idx %u, npods 0.\n", idx); 238 return; 239 } 240 241 pr_debug("release idx %u, npods %u.\n", idx, pdata->npods); 242 ppm_unmark_entries(ppm, idx, pdata->npods); 243} 244EXPORT_SYMBOL(cxgbi_ppm_ppod_release); 245 246int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages, 247 u32 per_tag_pg_idx, u32 *ppod_idx, 248 u32 *ddp_tag, unsigned long caller_data) 249{ 250 struct cxgbi_ppod_data *pdata; 251 unsigned int npods; 252 int idx = -1; 253 unsigned int hwidx; 254 u32 tag; 255 256 npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT; 257 if (!npods) { 258 pr_warn("%s: pages %u -> npods %u, full.\n", 259 __func__, nr_pages, npods); 260 return -EINVAL; 261 } 262 263 /* grab from cpu pool first */ 264 idx = ppm_get_cpu_entries(ppm, npods, caller_data); 265 /* try the general pool */ 266 if (idx < 0) 267 idx = ppm_get_entries(ppm, npods, caller_data); 268 if (idx < 0) { 269 pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n", 270 nr_pages, npods, ppm->next, caller_data); 271 return idx; 272 } 273 274 pdata = ppm->ppod_data + idx; 275 hwidx = ppm->base_idx + idx; 276 277 tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color); 278 279 if (per_tag_pg_idx) 280 tag |= (per_tag_pg_idx << 30) & 0xC0000000; 281 282 *ppod_idx = idx; 283 *ddp_tag = tag; 284 285 pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n", 286 nr_pages, tag, idx, npods, caller_data); 287 288 return npods; 289} 290EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve); 291 292void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag, 293 unsigned int tid, unsigned int offset, 294 unsigned int length, 295 struct cxgbi_pagepod_hdr *hdr) 296{ 297 /* The ddp tag in pagepod should be with bit 31:30 set to 0. 298 * The ddp Tag on the wire should be with non-zero 31:30 to the peer 299 */ 300 tag &= 0x3FFFFFFF; 301 302 hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid)); 303 304 hdr->rsvd = 0; 305 hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask); 306 hdr->max_offset = htonl(length); 307 hdr->page_offset = htonl(offset); 308 309 pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n", 310 tag, tid, length, offset); 311} 312EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr); 313 314static void ppm_free(struct cxgbi_ppm *ppm) 315{ 316 vfree(ppm); 317} 318 319static void ppm_destroy(struct kref *kref) 320{ 321 struct cxgbi_ppm *ppm = container_of(kref, 322 struct cxgbi_ppm, 323 refcnt); 324 pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n", 325 ppm->ndev->name, ppm); 326 327 *ppm->ppm_pp = NULL; 328 329 free_percpu(ppm->pool); 330 ppm_free(ppm); 331} 332 333int cxgbi_ppm_release(struct cxgbi_ppm *ppm) 334{ 335 if (ppm) { 336 int rv; 337 338 rv = kref_put(&ppm->refcnt, ppm_destroy); 339 return rv; 340 } 341 return 1; 342} 343EXPORT_SYMBOL(cxgbi_ppm_release); 344 345static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total, 346 unsigned int *pcpu_ppmax) 347{ 348 struct cxgbi_ppm_pool *pools; 349 unsigned int ppmax = (*total) / num_possible_cpus(); 350 unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3; 351 unsigned int bmap; 352 unsigned int alloc_sz; 353 unsigned int count = 0; 354 unsigned int cpu; 355 356 /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */ 357 if (ppmax > max) 358 ppmax = max; 359 360 /* pool size must be multiple of unsigned long */ 361 bmap = ppmax / BITS_PER_TYPE(unsigned long); 362 if (!bmap) 363 return NULL; 364 365 ppmax = (bmap * sizeof(unsigned long)) << 3; 366 367 alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap; 368 pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool)); 369 370 if (!pools) 371 return NULL; 372 373 for_each_possible_cpu(cpu) { 374 struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu); 375 376 memset(ppool, 0, alloc_sz); 377 spin_lock_init(&ppool->lock); 378 count += ppmax; 379 } 380 381 *total = count; 382 *pcpu_ppmax = ppmax; 383 384 return pools; 385} 386 387int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, 388 struct pci_dev *pdev, void *lldev, 389 struct cxgbi_tag_format *tformat, unsigned int iscsi_size, 390 unsigned int llimit, unsigned int start, 391 unsigned int reserve_factor, unsigned int iscsi_edram_start, 392 unsigned int iscsi_edram_size) 393{ 394 struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp); 395 struct cxgbi_ppm_pool *pool = NULL; 396 unsigned int pool_index_max = 0; 397 unsigned int ppmax_pool = 0; 398 unsigned int ppod_bmap_size; 399 unsigned int alloc_sz; 400 unsigned int ppmax; 401 402 if (!iscsi_edram_start) 403 iscsi_edram_size = 0; 404 405 if (iscsi_edram_size && 406 ((iscsi_edram_start + iscsi_edram_size) != start)) { 407 pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x " 408 "size 0x%x DDR start 0x%x\n", 409 iscsi_edram_start, iscsi_edram_size, start); 410 return -EINVAL; 411 } 412 413 if (iscsi_edram_size) { 414 reserve_factor = 0; 415 start = iscsi_edram_start; 416 } 417 418 ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT; 419 420 if (ppm) { 421 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", 422 ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax); 423 kref_get(&ppm->refcnt); 424 return 1; 425 } 426 427 if (reserve_factor) { 428 ppmax_pool = ppmax / reserve_factor; 429 pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max); 430 if (!pool) { 431 ppmax_pool = 0; 432 reserve_factor = 0; 433 } 434 435 pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n", 436 ndev->name, ppmax, ppmax_pool, pool_index_max); 437 } 438 439 ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool); 440 alloc_sz = sizeof(struct cxgbi_ppm) + 441 ppmax * (sizeof(struct cxgbi_ppod_data)) + 442 ppod_bmap_size * sizeof(unsigned long); 443 444 ppm = vzalloc(alloc_sz); 445 if (!ppm) 446 goto release_ppm_pool; 447 448 ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]); 449 450 if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) { 451 unsigned int start = ppmax - ppmax_pool; 452 unsigned int end = ppod_bmap_size >> 3; 453 454 bitmap_set(ppm->ppod_bmap, ppmax, end - start); 455 pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n", 456 __func__, ppmax, ppmax_pool, ppod_bmap_size, start, 457 end); 458 } 459 if (iscsi_edram_size) { 460 unsigned int first_ddr_idx = 461 iscsi_edram_size >> PPOD_SIZE_SHIFT; 462 463 ppm->max_index_in_edram = first_ddr_idx - 1; 464 bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1); 465 pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx); 466 } 467 468 spin_lock_init(&ppm->map_lock); 469 kref_init(&ppm->refcnt); 470 471 memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format)); 472 473 ppm->ppm_pp = ppm_pp; 474 ppm->ndev = ndev; 475 ppm->pdev = pdev; 476 ppm->lldev = lldev; 477 ppm->ppmax = ppmax; 478 ppm->next = 0; 479 ppm->llimit = llimit; 480 ppm->base_idx = start > llimit ? 481 (start - llimit + 1) >> PPOD_SIZE_SHIFT : 0; 482 ppm->bmap_index_max = ppmax - ppmax_pool; 483 484 ppm->pool = pool; 485 ppm->pool_rsvd = ppmax_pool; 486 ppm->pool_index_max = pool_index_max; 487 488 /* check one more time */ 489 if (*ppm_pp) { 490 ppm_free(ppm); 491 ppm = (struct cxgbi_ppm *)(*ppm_pp); 492 493 pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", 494 ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax); 495 496 kref_get(&ppm->refcnt); 497 return 1; 498 } 499 *ppm_pp = ppm; 500 501 ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE); 502 503 pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n", 504 ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE, 505 ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd, 506 ppm->pool_index_max); 507 508 return 0; 509 510release_ppm_pool: 511 free_percpu(pool); 512 return -ENOMEM; 513} 514EXPORT_SYMBOL(cxgbi_ppm_init); 515 516unsigned int cxgbi_tagmask_set(unsigned int ppmax) 517{ 518 unsigned int bits = fls(ppmax); 519 520 if (bits > PPOD_IDX_MAX_SIZE) 521 bits = PPOD_IDX_MAX_SIZE; 522 523 pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n", 524 ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT)); 525 526 return 1 << (bits + PPOD_IDX_SHIFT); 527} 528EXPORT_SYMBOL(cxgbi_tagmask_set); 529 530MODULE_AUTHOR("Chelsio Communications"); 531MODULE_DESCRIPTION("Chelsio common library"); 532MODULE_LICENSE("Dual BSD/GPL");