circ.c (16251B)
1/* 2 * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18#include <stdio.h> 19 20#define DEBUG 0 21#define DEBUG_PRINTF(...) \ 22 do { \ 23 if (DEBUG) { \ 24 printf(__VA_ARGS__); \ 25 } \ 26 } while (0) 27 28 29#define NBYTES (1 << 8) 30#define NHALFS (NBYTES / sizeof(short)) 31#define NWORDS (NBYTES / sizeof(int)) 32#define NDOBLS (NBYTES / sizeof(long long)) 33 34long long dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0}; 35int wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0}; 36short hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0}; 37unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0}; 38 39/* 40 * We use the C preporcessor to deal with the combinations of types 41 */ 42 43#define INIT(BUF, N) \ 44 void init_##BUF(void) \ 45 { \ 46 int i; \ 47 for (i = 0; i < N; i++) { \ 48 BUF[i] = i; \ 49 } \ 50 } \ 51 52INIT(bbuf, NBYTES) 53INIT(hbuf, NHALFS) 54INIT(wbuf, NWORDS) 55INIT(dbuf, NDOBLS) 56 57/* 58 * Macros for performing circular load 59 * RES result 60 * ADDR address 61 * START start address of buffer 62 * LEN length of buffer (in bytes) 63 * INC address increment (in bytes for IMM, elements for REG) 64 */ 65#define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \ 66 __asm__( \ 67 "r4 = %3\n\t" \ 68 "m0 = r4\n\t" \ 69 "cs0 = %2\n\t" \ 70 "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \ 71 : "=r"(RES), "+r"(ADDR) \ 72 : "r"(START), "r"(LEN) \ 73 : "r4", "m0", "cs0") 74#define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \ 75 CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC) 76#define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \ 77 CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC) 78#define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \ 79 CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC) 80#define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \ 81 CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC) 82#define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \ 83 CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC) 84#define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \ 85 CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC) 86 87/* 88 * The mreg has the following pieces 89 * mreg[31:28] increment[10:7] 90 * mreg[27:24] K value (used Hexagon v3 and earlier) 91 * mreg[23:17] increment[6:0] 92 * mreg[16:0] circular buffer length 93 */ 94static int build_mreg(int inc, int K, int len) 95{ 96 return ((inc & 0x780) << 21) | 97 ((K & 0xf) << 24) | 98 ((inc & 0x7f) << 17) | 99 (len & 0x1ffff); 100} 101 102#define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \ 103 __asm__( \ 104 "r4 = %2\n\t" \ 105 "m1 = r4\n\t" \ 106 "cs1 = %3\n\t" \ 107 "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \ 108 : "=r"(RES), "+r"(ADDR) \ 109 : "r"(build_mreg((INC), 0, (LEN))), \ 110 "r"(START) \ 111 : "r4", "m1", "cs1") 112#define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \ 113 CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC) 114#define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \ 115 CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC) 116#define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \ 117 CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC) 118#define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \ 119 CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC) 120#define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \ 121 CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC) 122#define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \ 123 CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC) 124 125/* 126 * Macros for performing circular store 127 * VAL value to store 128 * ADDR address 129 * START start address of buffer 130 * LEN length of buffer (in bytes) 131 * INC address increment (in bytes for IMM, elements for REG) 132 */ 133#define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \ 134 __asm__( \ 135 "r4 = %3\n\t" \ 136 "m0 = r4\n\t" \ 137 "cs0 = %1\n\t" \ 138 "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \ 139 : "+r"(ADDR) \ 140 : "r"(START), "r"(VAL), "r"(LEN) \ 141 : "r4", "m0", "cs0", "memory") 142#define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \ 143 CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC) 144#define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \ 145 CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC) 146#define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \ 147 CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC) 148#define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \ 149 CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC) 150#define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \ 151 CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC) 152 153#define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \ 154 __asm__( \ 155 "r4 = %3\n\t" \ 156 "m0 = r4\n\t" \ 157 "cs0 = %1\n\t" \ 158 "{\n\t" \ 159 " r5 = %2\n\t" \ 160 " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \ 161 "}\n\t" \ 162 : "+r"(ADDR) \ 163 : "r"(START), "r"(VAL), "r"(LEN) \ 164 : "r4", "r5", "m0", "cs0", "memory") 165#define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \ 166 CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC) 167#define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \ 168 CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC) 169#define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \ 170 CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC) 171 172#define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \ 173 __asm__( \ 174 "r4 = %1\n\t" \ 175 "m1 = r4\n\t" \ 176 "cs1 = %2\n\t" \ 177 "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \ 178 : "+r"(ADDR) \ 179 : "r"(build_mreg((INC), 0, (LEN))), \ 180 "r"(START), \ 181 "r"(VAL) \ 182 : "r4", "m1", "cs1", "memory") 183#define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \ 184 CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC) 185#define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \ 186 CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC) 187#define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \ 188 CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC) 189#define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \ 190 CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC) 191#define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \ 192 CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC) 193 194#define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \ 195 __asm__( \ 196 "r4 = %1\n\t" \ 197 "m1 = r4\n\t" \ 198 "cs1 = %2\n\t" \ 199 "{\n\t" \ 200 " r5 = %3\n\t" \ 201 " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \ 202 "}\n\t" \ 203 : "+r"(ADDR) \ 204 : "r"(build_mreg((INC), 0, (LEN))), \ 205 "r"(START), \ 206 "r"(VAL) \ 207 : "r4", "r5", "m1", "cs1", "memory") 208#define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \ 209 CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC) 210#define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \ 211 CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC) 212#define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \ 213 CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC) 214 215 216int err; 217 218/* We'll test increments +1 and -1 */ 219void check_load(int i, long long result, int inc, int size) 220{ 221 int expect = (i * inc); 222 while (expect >= size) { 223 expect -= size; 224 } 225 while (expect < 0) { 226 expect += size; 227 } 228 if (result != expect) { 229 printf("ERROR(%d): %lld != %d\n", i, result, expect); 230 err++; 231 } 232} 233 234#define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \ 235void circ_test_load_imm_##SZ(void) \ 236{ \ 237 TYPE *p = (TYPE *)BUF; \ 238 int size = 10; \ 239 int i; \ 240 for (i = 0; i < BUFSIZE; i++) { \ 241 TYPE element; \ 242 CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \ 243 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 244 i, p, element); \ 245 check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \ 246 } \ 247 p = (TYPE *)BUF; \ 248 for (i = 0; i < BUFSIZE; i++) { \ 249 TYPE element; \ 250 CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \ 251 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 252 i, p, element); \ 253 check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \ 254 } \ 255} 256 257TEST_LOAD_IMM(b, char, bbuf, NBYTES, 1, d) 258TEST_LOAD_IMM(ub, unsigned char, bbuf, NBYTES, 1, d) 259TEST_LOAD_IMM(h, short, hbuf, NHALFS, 2, d) 260TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d) 261TEST_LOAD_IMM(w, int, wbuf, NWORDS, 4, d) 262TEST_LOAD_IMM(d, long long, dbuf, NDOBLS, 8, lld) 263 264#define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \ 265void circ_test_load_reg_##SZ(void) \ 266{ \ 267 TYPE *p = (TYPE *)BUF; \ 268 int size = 13; \ 269 int i; \ 270 for (i = 0; i < BUFSIZE; i++) { \ 271 TYPE element; \ 272 CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \ 273 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 274 i, p, element); \ 275 check_load(i, element, 1, size); \ 276 } \ 277 p = (TYPE *)BUF; \ 278 for (i = 0; i < BUFSIZE; i++) { \ 279 TYPE element; \ 280 CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \ 281 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ 282 i, p, element); \ 283 check_load(i, element, -1, size); \ 284 } \ 285} 286 287TEST_LOAD_REG(b, char, bbuf, NBYTES, d) 288TEST_LOAD_REG(ub, unsigned char, bbuf, NBYTES, d) 289TEST_LOAD_REG(h, short, hbuf, NHALFS, d) 290TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d) 291TEST_LOAD_REG(w, int, wbuf, NWORDS, d) 292TEST_LOAD_REG(d, long long, dbuf, NDOBLS, lld) 293 294/* The circular stores will wrap around somewhere inside the buffer */ 295#define CIRC_VAL(SZ, TYPE, BUFSIZE) \ 296TYPE circ_val_##SZ(int i, int inc, int size) \ 297{ \ 298 int mod = BUFSIZE % size; \ 299 int elem = i * inc; \ 300 if (elem < 0) { \ 301 if (-elem <= size - mod) { \ 302 return (elem + BUFSIZE - mod); \ 303 } else { \ 304 return (elem + BUFSIZE + size - mod); \ 305 } \ 306 } else if (elem < mod) {\ 307 return (elem + BUFSIZE - mod); \ 308 } else { \ 309 return (elem + BUFSIZE - size - mod); \ 310 } \ 311} 312 313CIRC_VAL(b, unsigned char, NBYTES) 314CIRC_VAL(h, short, NHALFS) 315CIRC_VAL(w, int, NWORDS) 316CIRC_VAL(d, long long, NDOBLS) 317 318/* 319 * Circular stores should only write to the first "size" elements of the buffer 320 * the remainder of the elements should have BUF[i] == i 321 */ 322#define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \ 323void check_store_##SZ(int inc, int size) \ 324{ \ 325 int i; \ 326 for (i = 0; i < size; i++) { \ 327 DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \ 328 i, BUF[i], circ_val_##SZ(i, inc, size)); \ 329 if (BUF[i] != circ_val_##SZ(i, inc, size)) { \ 330 printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \ 331 i, BUF[i], circ_val_##SZ(i, inc, size)); \ 332 err++; \ 333 } \ 334 } \ 335 for (i = size; i < BUFSIZE; i++) { \ 336 if (BUF[i] != i) { \ 337 printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \ 338 err++; \ 339 } \ 340 } \ 341} 342 343CHECK_STORE(b, bbuf, NBYTES, x) 344CHECK_STORE(h, hbuf, NHALFS, x) 345CHECK_STORE(w, wbuf, NWORDS, x) 346CHECK_STORE(d, dbuf, NDOBLS, llx) 347 348#define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \ 349void circ_test_store_imm_##SZ(void) \ 350{ \ 351 unsigned int size = 27; \ 352 TYPE *p = BUF; \ 353 TYPE val = 0; \ 354 int i; \ 355 init_##BUF(); \ 356 for (i = 0; i < BUFSIZE; i++) { \ 357 CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \ 358 val++; \ 359 } \ 360 check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \ 361 p = BUF; \ 362 val = 0; \ 363 init_##BUF(); \ 364 for (i = 0; i < BUFSIZE; i++) { \ 365 CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \ 366 -(INC)); \ 367 val++; \ 368 } \ 369 check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \ 370} 371 372CIRC_TEST_STORE_IMM(b, b, unsigned char, bbuf, NBYTES, 0, 1) 373CIRC_TEST_STORE_IMM(h, h, short, hbuf, NHALFS, 0, 2) 374CIRC_TEST_STORE_IMM(f, h, short, hbuf, NHALFS, 16, 2) 375CIRC_TEST_STORE_IMM(w, w, int, wbuf, NWORDS, 0, 4) 376CIRC_TEST_STORE_IMM(d, d, long long, dbuf, NDOBLS, 0, 8) 377CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0, 1) 378CIRC_TEST_STORE_IMM(hnew, h, short, hbuf, NHALFS, 0, 2) 379CIRC_TEST_STORE_IMM(wnew, w, int, wbuf, NWORDS, 0, 4) 380 381#define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \ 382void circ_test_store_reg_##SZ(void) \ 383{ \ 384 TYPE *p = BUF; \ 385 unsigned int size = 19; \ 386 TYPE val = 0; \ 387 int i; \ 388 init_##BUF(); \ 389 for (i = 0; i < BUFSIZE; i++) { \ 390 CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \ 391 val++; \ 392 } \ 393 check_store_##CHK(1, size); \ 394 p = BUF; \ 395 val = 0; \ 396 init_##BUF(); \ 397 for (i = 0; i < BUFSIZE; i++) { \ 398 CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \ 399 val++; \ 400 } \ 401 check_store_##CHK(-1, size); \ 402} 403 404CIRC_TEST_STORE_REG(b, b, unsigned char, bbuf, NBYTES, 0) 405CIRC_TEST_STORE_REG(h, h, short, hbuf, NHALFS, 0) 406CIRC_TEST_STORE_REG(f, h, short, hbuf, NHALFS, 16) 407CIRC_TEST_STORE_REG(w, w, int, wbuf, NWORDS, 0) 408CIRC_TEST_STORE_REG(d, d, long long, dbuf, NDOBLS, 0) 409CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0) 410CIRC_TEST_STORE_REG(hnew, h, short, hbuf, NHALFS, 0) 411CIRC_TEST_STORE_REG(wnew, w, int, wbuf, NWORDS, 0) 412 413/* Test the old scheme used in Hexagon V3 */ 414static void circ_test_v3(void) 415{ 416 int *p = wbuf; 417 int size = 15; 418 int K = 4; /* 64 bytes */ 419 int element; 420 int i; 421 422 init_wbuf(); 423 424 for (i = 0; i < NWORDS; i++) { 425 __asm__( 426 "r4 = %2\n\t" 427 "m1 = r4\n\t" 428 "%0 = memw(%1++I:circ(M1))\n\t" 429 : "=r"(element), "+r"(p) 430 : "r"(build_mreg(1, K, size * sizeof(int))) 431 : "r4", "m1"); 432 DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element); 433 check_load(i, element, 1, size); 434 } 435} 436 437int main() 438{ 439 init_bbuf(); 440 init_hbuf(); 441 init_wbuf(); 442 init_dbuf(); 443 444 DEBUG_PRINTF("NBYTES = %d\n", NBYTES); 445 DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf); 446 DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf); 447 DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf); 448 DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf); 449 450 circ_test_load_imm_b(); 451 circ_test_load_imm_ub(); 452 circ_test_load_imm_h(); 453 circ_test_load_imm_uh(); 454 circ_test_load_imm_w(); 455 circ_test_load_imm_d(); 456 457 circ_test_load_reg_b(); 458 circ_test_load_reg_ub(); 459 circ_test_load_reg_h(); 460 circ_test_load_reg_uh(); 461 circ_test_load_reg_w(); 462 circ_test_load_reg_d(); 463 464 circ_test_store_imm_b(); 465 circ_test_store_imm_h(); 466 circ_test_store_imm_f(); 467 circ_test_store_imm_w(); 468 circ_test_store_imm_d(); 469 circ_test_store_imm_bnew(); 470 circ_test_store_imm_hnew(); 471 circ_test_store_imm_wnew(); 472 473 circ_test_store_reg_b(); 474 circ_test_store_reg_h(); 475 circ_test_store_reg_f(); 476 circ_test_store_reg_w(); 477 circ_test_store_reg_d(); 478 circ_test_store_reg_bnew(); 479 circ_test_store_reg_hnew(); 480 circ_test_store_reg_wnew(); 481 482 circ_test_v3(); 483 484 puts(err ? "FAIL" : "PASS"); 485 return err ? 1 : 0; 486}