nxu.h (18722B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Hardware interface of the NX-GZIP compression accelerator 4 * 5 * Copyright (C) IBM Corporation, 2020 6 * 7 * Author: Bulent Abali <abali@us.ibm.com> 8 * 9 */ 10 11#ifndef _NXU_H 12#define _NXU_H 13 14#include <stdint.h> 15#include <endian.h> 16#include "nx.h" 17 18/* deflate */ 19#define LLSZ 286 20#define DSZ 30 21 22/* nx */ 23#define DHTSZ 18 24#define DHT_MAXSZ 288 25#define MAX_DDE_COUNT 256 26 27/* util */ 28#ifdef NXDBG 29#define NXPRT(X) X 30#else 31#define NXPRT(X) 32#endif 33 34#ifdef NXTIMER 35#include <sys/platform/ppc.h> 36#define NX_CLK(X) X 37#define nx_get_time() __ppc_get_timebase() 38#define nx_get_freq() __ppc_get_timebase_freq() 39#else 40#define NX_CLK(X) 41#define nx_get_time() (-1) 42#define nx_get_freq() (-1) 43#endif 44 45#define NX_MAX_FAULTS 500 46 47/* 48 * Definitions of acronyms used here. See 49 * P9 NX Gzip Accelerator User's Manual for details: 50 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf 51 * 52 * adler/crc: 32 bit checksums appended to stream tail 53 * ce: completion extension 54 * cpb: coprocessor parameter block (metadata) 55 * crb: coprocessor request block (command) 56 * csb: coprocessor status block (status) 57 * dht: dynamic huffman table 58 * dde: data descriptor element (address, length) 59 * ddl: list of ddes 60 * dh/fh: dynamic and fixed huffman types 61 * fc: coprocessor function code 62 * histlen: history/dictionary length 63 * history: sliding window of up to 32KB of data 64 * lzcount: Deflate LZ symbol counts 65 * rembytecnt: remaining byte count 66 * sfbt: source final block type; last block's type during decomp 67 * spbc: source processed byte count 68 * subc: source unprocessed bit count 69 * tebc: target ending bit count; valid bits in the last byte 70 * tpbc: target processed byte count 71 * vas: virtual accelerator switch; the user mode interface 72 */ 73 74union nx_qw_t { 75 uint32_t word[4]; 76 uint64_t dword[2]; 77} __aligned(16); 78 79/* 80 * Note: NX registers with fewer than 32 bits are declared by 81 * convention as uint32_t variables in unions. If *_offset and *_mask 82 * are defined for a variable, then use get_ put_ macros to 83 * conveniently access the register fields for endian conversions. 84 */ 85 86struct nx_dde_t { 87 /* Data Descriptor Element, Section 6.4 */ 88 union { 89 uint32_t dde_count; 90 /* When dde_count == 0 ddead is a pointer to a data buffer; 91 * ddebc is the buffer length bytes. 92 * When dde_count > 0 dde is an indirect dde; ddead is a 93 * pointer to a contiguous list of direct ddes; ddebc is the 94 * total length of all data pointed to by the list of direct 95 * ddes. Note that only one level of indirection is permitted. 96 * See Section 6.4 of the user manual for additional details. 97 */ 98 }; 99 uint32_t ddebc; /* dde byte count */ 100 uint64_t ddead; /* dde address */ 101} __aligned(16); 102 103struct nx_csb_t { 104 /* Coprocessor Status Block, Section 6.6 */ 105 union { 106 uint32_t csb_v; 107 /* Valid bit. v must be set to 0 by the program 108 * before submitting the coprocessor command. 109 * Software can poll for the v bit 110 */ 111 112 uint32_t csb_f; 113 /* 16B CSB size. Written to 0 by DMA when it writes the CPB */ 114 115 uint32_t csb_cs; 116 /* cs completion sequence; unused */ 117 118 uint32_t csb_cc; 119 /* cc completion code; cc != 0 exception occurred */ 120 121 uint32_t csb_ce; 122 /* ce completion extension */ 123 124 }; 125 uint32_t tpbc; 126 /* target processed byte count TPBC */ 127 128 uint64_t fsaddr; 129 /* Section 6.12.1 CSB NonZero error summary. FSA Failing storage 130 * address. Address where error occurred. When available, written 131 * to A field of CSB 132 */ 133} __aligned(16); 134 135struct nx_ccb_t { 136 /* Coprocessor Completion Block, Section 6.7 */ 137 138 uint32_t reserved[3]; 139 union { 140 /* When crb.c==0 (no ccb defined) it is reserved; 141 * When crb.c==1 (ccb defined) it is cm 142 */ 143 144 uint32_t ccb_cm; 145 /* Signal interrupt of crb.c==1 and cm==1 */ 146 147 uint32_t word; 148 /* generic access to the 32bit word */ 149 }; 150} __aligned(16); 151 152struct vas_stamped_crb_t { 153 /* 154 * CRB operand of the paste coprocessor instruction is stamped 155 * in quadword 4 with the information shown here as its written 156 * in to the receive FIFO of the coprocessor 157 */ 158 159 union { 160 uint32_t vas_buf_num; 161 /* Verification only vas buffer number which correlates to 162 * the low order bits of the atag in the paste command 163 */ 164 165 uint32_t send_wc_id; 166 /* Pointer to Send Window Context that provides for NX address 167 * translation information, such as MSR and LPCR bits, job 168 * completion interrupt RA, PSWID, and job utilization counter. 169 */ 170 171 }; 172 union { 173 uint32_t recv_wc_id; 174 /* Pointer to Receive Window Context. NX uses this to return 175 * credits to a Receive FIFO as entries are dequeued. 176 */ 177 178 }; 179 uint32_t reserved2; 180 union { 181 uint32_t vas_invalid; 182 /* Invalid bit. If this bit is 1 the CRB is discarded by 183 * NX upon fetching from the receive FIFO. If this bit is 0 184 * the CRB is processed normally. The bit is stamped to 0 185 * by VAS and may be written to 1 by hypervisor while 186 * the CRB is in the receive FIFO (in memory). 187 */ 188 189 }; 190}; 191 192struct nx_stamped_fault_crb_t { 193 /* 194 * A CRB that has a translation fault is stamped by NX in quadword 4 195 * and pasted to the Fault Send Window in VAS. 196 */ 197 uint64_t fsa; 198 union { 199 uint32_t nxsf_t; 200 uint32_t nxsf_fs; 201 }; 202 uint32_t pswid; 203}; 204 205union stamped_crb_t { 206 struct vas_stamped_crb_t vas; 207 struct nx_stamped_fault_crb_t nx; 208}; 209 210struct nx_gzip_cpb_t { 211 /* 212 * Coprocessor Parameter Block In/Out are used to pass metadata 213 * to/from accelerator. Tables 6.5 and 6.6 of the user manual. 214 */ 215 216 /* CPBInput */ 217 218 struct { 219 union { 220 union nx_qw_t qw0; 221 struct { 222 uint32_t in_adler; /* bits 0:31 */ 223 uint32_t in_crc; /* bits 32:63 */ 224 union { 225 uint32_t in_histlen; /* bits 64:75 */ 226 uint32_t in_subc; /* bits 93:95 */ 227 }; 228 union { 229 /* bits 108:111 */ 230 uint32_t in_sfbt; 231 /* bits 112:127 */ 232 uint32_t in_rembytecnt; 233 /* bits 116:127 */ 234 uint32_t in_dhtlen; 235 }; 236 }; 237 }; 238 union { 239 union nx_qw_t in_dht[DHTSZ]; /* qw[1:18] */ 240 char in_dht_char[DHT_MAXSZ]; /* byte access */ 241 }; 242 union nx_qw_t reserved[5]; /* qw[19:23] */ 243 }; 244 245 /* CPBOutput */ 246 247 volatile struct { 248 union { 249 union nx_qw_t qw24; 250 struct { 251 uint32_t out_adler; /* bits 0:31 qw[24] */ 252 uint32_t out_crc; /* bits 32:63 qw[24] */ 253 union { 254 /* bits 77:79 qw[24] */ 255 uint32_t out_tebc; 256 /* bits 80:95 qw[24] */ 257 uint32_t out_subc; 258 }; 259 union { 260 /* bits 108:111 qw[24] */ 261 uint32_t out_sfbt; 262 /* bits 112:127 qw[24] */ 263 uint32_t out_rembytecnt; 264 /* bits 116:127 qw[24] */ 265 uint32_t out_dhtlen; 266 }; 267 }; 268 }; 269 union { 270 union nx_qw_t qw25[79]; /* qw[25:103] */ 271 /* qw[25] compress no lzcounts or wrap */ 272 uint32_t out_spbc_comp_wrap; 273 uint32_t out_spbc_wrap; /* qw[25] wrap */ 274 /* qw[25] compress no lzcounts */ 275 uint32_t out_spbc_comp; 276 /* 286 LL and 30 D symbol counts */ 277 uint32_t out_lzcount[LLSZ+DSZ]; 278 struct { 279 union nx_qw_t out_dht[DHTSZ]; /* qw[25:42] */ 280 /* qw[43] decompress */ 281 uint32_t out_spbc_decomp; 282 }; 283 }; 284 /* qw[104] compress with lzcounts */ 285 uint32_t out_spbc_comp_with_count; 286 }; 287} __aligned(128); 288 289struct nx_gzip_crb_t { 290 union { /* byte[0:3] */ 291 uint32_t gzip_fc; /* bits[24-31] */ 292 }; 293 uint32_t reserved1; /* byte[4:7] */ 294 union { 295 uint64_t csb_address; /* byte[8:15] */ 296 struct { 297 uint32_t reserved2; 298 union { 299 uint32_t crb_c; 300 /* c==0 no ccb defined */ 301 302 uint32_t crb_at; 303 /* at==0 address type is ignored; 304 * all addrs effective assumed. 305 */ 306 307 }; 308 }; 309 }; 310 struct nx_dde_t source_dde; /* byte[16:31] */ 311 struct nx_dde_t target_dde; /* byte[32:47] */ 312 volatile struct nx_ccb_t ccb; /* byte[48:63] */ 313 volatile union { 314 /* byte[64:239] shift csb by 128 bytes out of the crb; csb was 315 * in crb earlier; JReilly says csb written with partial inject 316 */ 317 union nx_qw_t reserved64[11]; 318 union stamped_crb_t stamp; /* byte[64:79] */ 319 }; 320 volatile struct nx_csb_t csb; 321} __aligned(128); 322 323struct nx_gzip_crb_cpb_t { 324 struct nx_gzip_crb_t crb; 325 struct nx_gzip_cpb_t cpb; 326} __aligned(2048); 327 328 329/* 330 * NX hardware convention has the msb bit on the left numbered 0. 331 * The defines below has *_offset defined as the right most bit 332 * position of a field. x of size_mask(x) is the field width in bits. 333 */ 334 335#define size_mask(x) ((1U<<(x))-1) 336 337/* 338 * Offsets and Widths within the containing 32 bits of the various NX 339 * gzip hardware registers. Use the getnn/putnn macros to access 340 * these regs 341 */ 342 343#define dde_count_mask size_mask(8) 344#define dde_count_offset 23 345 346/* CSB */ 347 348#define csb_v_mask size_mask(1) 349#define csb_v_offset 0 350#define csb_f_mask size_mask(1) 351#define csb_f_offset 6 352#define csb_cs_mask size_mask(8) 353#define csb_cs_offset 15 354#define csb_cc_mask size_mask(8) 355#define csb_cc_offset 23 356#define csb_ce_mask size_mask(8) 357#define csb_ce_offset 31 358 359/* CCB */ 360 361#define ccb_cm_mask size_mask(3) 362#define ccb_cm_offset 31 363 364/* VAS stamped CRB fields */ 365 366#define vas_buf_num_mask size_mask(6) 367#define vas_buf_num_offset 5 368#define send_wc_id_mask size_mask(16) 369#define send_wc_id_offset 31 370#define recv_wc_id_mask size_mask(16) 371#define recv_wc_id_offset 31 372#define vas_invalid_mask size_mask(1) 373#define vas_invalid_offset 31 374 375/* NX stamped fault CRB fields */ 376 377#define nxsf_t_mask size_mask(1) 378#define nxsf_t_offset 23 379#define nxsf_fs_mask size_mask(8) 380#define nxsf_fs_offset 31 381 382/* CPB input */ 383 384#define in_histlen_mask size_mask(12) 385#define in_histlen_offset 11 386#define in_dhtlen_mask size_mask(12) 387#define in_dhtlen_offset 31 388#define in_subc_mask size_mask(3) 389#define in_subc_offset 31 390#define in_sfbt_mask size_mask(4) 391#define in_sfbt_offset 15 392#define in_rembytecnt_mask size_mask(16) 393#define in_rembytecnt_offset 31 394 395/* CPB output */ 396 397#define out_tebc_mask size_mask(3) 398#define out_tebc_offset 15 399#define out_subc_mask size_mask(16) 400#define out_subc_offset 31 401#define out_sfbt_mask size_mask(4) 402#define out_sfbt_offset 15 403#define out_rembytecnt_mask size_mask(16) 404#define out_rembytecnt_offset 31 405#define out_dhtlen_mask size_mask(12) 406#define out_dhtlen_offset 31 407 408/* CRB */ 409 410#define gzip_fc_mask size_mask(8) 411#define gzip_fc_offset 31 412#define crb_c_mask size_mask(1) 413#define crb_c_offset 28 414#define crb_at_mask size_mask(1) 415#define crb_at_offset 30 416#define csb_address_mask ~(15UL) /* mask off bottom 4b */ 417 418/* 419 * Access macros for the registers. Do not access registers directly 420 * because of the endian conversion. P9 processor may run either as 421 * Little or Big endian. However the NX coprocessor regs are always 422 * big endian. 423 * Use the 32 and 64b macros to access respective 424 * register sizes. 425 * Use nn forms for the register fields shorter than 32 bits. 426 */ 427 428#define getnn(ST, REG) ((be32toh(ST.REG) >> (31-REG##_offset)) \ 429 & REG##_mask) 430#define getpnn(ST, REG) ((be32toh((ST)->REG) >> (31-REG##_offset)) \ 431 & REG##_mask) 432#define get32(ST, REG) (be32toh(ST.REG)) 433#define getp32(ST, REG) (be32toh((ST)->REG)) 434#define get64(ST, REG) (be64toh(ST.REG)) 435#define getp64(ST, REG) (be64toh((ST)->REG)) 436 437#define unget32(ST, REG) (get32(ST, REG) & ~((REG##_mask) \ 438 << (31-REG##_offset))) 439/* get 32bits less the REG field */ 440 441#define ungetp32(ST, REG) (getp32(ST, REG) & ~((REG##_mask) \ 442 << (31-REG##_offset))) 443/* get 32bits less the REG field */ 444 445#define clear_regs(ST) memset((void *)(&(ST)), 0, sizeof(ST)) 446#define clear_dde(ST) do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \ 447 } while (0) 448#define clearp_dde(ST) do { (ST)->dde_count = (ST)->ddebc = 0; \ 449 (ST)->ddead = 0; \ 450 } while (0) 451#define clear_struct(ST) memset((void *)(&(ST)), 0, sizeof(ST)) 452#define putnn(ST, REG, X) (ST.REG = htobe32(unget32(ST, REG) | (((X) \ 453 & REG##_mask) << (31-REG##_offset)))) 454#define putpnn(ST, REG, X) ((ST)->REG = htobe32(ungetp32(ST, REG) \ 455 | (((X) & REG##_mask) << (31-REG##_offset)))) 456 457#define put32(ST, REG, X) (ST.REG = htobe32(X)) 458#define putp32(ST, REG, X) ((ST)->REG = htobe32(X)) 459#define put64(ST, REG, X) (ST.REG = htobe64(X)) 460#define putp64(ST, REG, X) ((ST)->REG = htobe64(X)) 461 462/* 463 * Completion extension ce(0) ce(1) ce(2). Bits ce(3-7) 464 * unused. Section 6.6 Figure 6.7. 465 */ 466 467#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce)) 468#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5) 469#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5)) 470 471#define CSB_CE_PARTIAL 0x4 472#define CSB_CE_TERMINATE 0x2 473#define CSB_CE_TPBC_VALID 0x1 474 475#define csb_ce_termination(X) (!!((X) & CSB_CE_TERMINATE)) 476/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */ 477 478#define csb_ce_check_completion(X) (!csb_ce_termination(X)) 479/* if not terminated then check full or partial completion */ 480 481#define csb_ce_partial_completion(X) (!!((X) & CSB_CE_PARTIAL)) 482#define csb_ce_full_completion(X) (!csb_ce_partial_completion(X)) 483#define csb_ce_tpbc_valid(X) (!!((X) & CSB_CE_TPBC_VALID)) 484/* TPBC indicates successfully stored data count */ 485 486#define csb_ce_default_err(X) csb_ce_termination(X) 487/* most error CEs have CE(0)=0 and CE(1)=1 */ 488 489#define csb_ce_cc3_partial(X) csb_ce_partial_completion(X) 490/* some CC=3 are partially completed, Table 6-8 */ 491 492#define csb_ce_cc64(X) ((X)&(CSB_CE_PARTIAL \ 493 | CSB_CE_TERMINATE) == 0) 494/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't 495 * compress smaller than source. 496 */ 497 498/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */ 499 500#define SFBT_BFINAL 0x1 501#define SFBT_LIT 0x4 502#define SFBT_FHT 0x5 503#define SFBT_DHT 0x6 504#define SFBT_HDR 0x7 505 506/* 507 * NX gzip function codes. Table 6.2. 508 * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to 509 * select one of the two Byte Count Limits. 510 */ 511 512#define GZIP_FC_LIMIT_MASK 0x01 513#define GZIP_FC_COMPRESS_FHT 0x00 514#define GZIP_FC_COMPRESS_DHT 0x02 515#define GZIP_FC_COMPRESS_FHT_COUNT 0x04 516#define GZIP_FC_COMPRESS_DHT_COUNT 0x06 517#define GZIP_FC_COMPRESS_RESUME_FHT 0x08 518#define GZIP_FC_COMPRESS_RESUME_DHT 0x0a 519#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT 0x0c 520#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT 0x0e 521#define GZIP_FC_DECOMPRESS 0x10 522#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND 0x12 523#define GZIP_FC_DECOMPRESS_RESUME 0x14 524#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND 0x16 525#define GZIP_FC_WRAP 0x1e 526 527#define fc_is_compress(fc) (((fc) & 0x10) == 0) 528#define fc_has_count(fc) (fc_is_compress(fc) && (((fc) & 0x4) != 0)) 529 530/* CSB.CC Error codes */ 531 532#define ERR_NX_OK 0 533#define ERR_NX_ALIGNMENT 1 534#define ERR_NX_OPOVERLAP 2 535#define ERR_NX_DATA_LENGTH 3 536#define ERR_NX_TRANSLATION 5 537#define ERR_NX_PROTECTION 6 538#define ERR_NX_EXTERNAL_UE7 7 539#define ERR_NX_INVALID_OP 8 540#define ERR_NX_PRIVILEGE 9 541#define ERR_NX_INTERNAL_UE 10 542#define ERR_NX_EXTERN_UE_WR 12 543#define ERR_NX_TARGET_SPACE 13 544#define ERR_NX_EXCESSIVE_DDE 14 545#define ERR_NX_TRANSL_WR 15 546#define ERR_NX_PROTECT_WR 16 547#define ERR_NX_SUBFUNCTION 17 548#define ERR_NX_FUNC_ABORT 18 549#define ERR_NX_BYTE_MAX 19 550#define ERR_NX_CORRUPT_CRB 20 551#define ERR_NX_INVALID_CRB 21 552#define ERR_NX_INVALID_DDE 30 553#define ERR_NX_SEGMENTED_DDL 31 554#define ERR_NX_DDE_OVERFLOW 33 555#define ERR_NX_TPBC_GT_SPBC 64 556#define ERR_NX_MISSING_CODE 66 557#define ERR_NX_INVALID_DIST 67 558#define ERR_NX_INVALID_DHT 68 559#define ERR_NX_EXTERNAL_UE90 90 560#define ERR_NX_WDOG_TIMER 224 561#define ERR_NX_AT_FAULT 250 562#define ERR_NX_INTR_SERVER 252 563#define ERR_NX_UE253 253 564#define ERR_NX_NO_HW 254 565#define ERR_NX_HUNG_OP 255 566#define ERR_NX_END 256 567 568/* initial values for non-resume operations */ 569#define INIT_CRC 0 /* crc32(0L, Z_NULL, 0) */ 570#define INIT_ADLER 1 /* adler32(0L, Z_NULL, 0) adler is initialized to 1 */ 571 572/* prototypes */ 573int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle); 574 575extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx); 576extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr); 577 578/* caller supplies a print buffer 4*sizeof(crb) */ 579 580char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf); 581char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf); 582char *nx_prt_hex(void *cp, int sz, char *prbuf); 583char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf); 584char *nx_strerror(int e); 585 586#ifdef NX_SIM 587#include <stdio.h> 588int nx_sim_init(void *ctx); 589int nx_sim_end(void *ctx); 590int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx); 591#endif /* NX_SIM */ 592 593/* Deflate stream manipulation */ 594 595#define set_final_bit(x) (x |= (unsigned char)1) 596#define clr_final_bit(x) (x &= ~(unsigned char)1) 597 598#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \ 599 } while (0) 600/* append 10 bits 0000001b 00...... ; 601 * assumes appending starts on a byte boundary; b is the final bit. 602 */ 603 604 605#ifdef NX_842 606 607/* 842 Engine */ 608 609struct nx_eft_crb_t { 610 union { /* byte[0:3] */ 611 uint32_t eft_fc; /* bits[29-31] */ 612 }; 613 uint32_t reserved1; /* byte[4:7] */ 614 union { 615 uint64_t csb_address; /* byte[8:15] */ 616 struct { 617 uint32_t reserved2; 618 union { 619 uint32_t crb_c; 620 /* c==0 no ccb defined */ 621 622 uint32_t crb_at; 623 /* at==0 address type is ignored; 624 * all addrs effective assumed. 625 */ 626 627 }; 628 }; 629 }; 630 struct nx_dde_t source_dde; /* byte[16:31] */ 631 struct nx_dde_t target_dde; /* byte[32:47] */ 632 struct nx_ccb_t ccb; /* byte[48:63] */ 633 union { 634 union nx_qw_t reserved64[3]; /* byte[64:96] */ 635 }; 636 struct nx_csb_t csb; 637} __aligned(128); 638 639/* 842 CRB */ 640 641#define EFT_FC_MASK size_mask(3) 642#define EFT_FC_OFFSET 31 643#define EFT_FC_COMPRESS 0x0 644#define EFT_FC_COMPRESS_WITH_CRC 0x1 645#define EFT_FC_DECOMPRESS 0x2 646#define EFT_FC_DECOMPRESS_WITH_CRC 0x3 647#define EFT_FC_BLK_DATA_MOVE 0x4 648#endif /* NX_842 */ 649 650#endif /* _NXU_H */