mem_noshuf.c (10075B)
1/* 2 * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18#include <stdio.h> 19 20/* 21 * Make sure that the :mem_noshuf packet attribute is honored. 22 * This is important when the addresses overlap. 23 * The store instruction in slot 1 effectively executes first, 24 * followed by the load instruction in slot 0. 25 */ 26 27#define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ 28static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ 29{ \ 30 unsigned int ret; \ 31 asm volatile("{\n\t" \ 32 " " #ST_OP "(%1) = %3\n\t" \ 33 " %0 = " #LD_OP "(%2)\n\t" \ 34 "}:mem_noshuf\n" \ 35 : "=r"(ret) \ 36 : "r"(p), "r"(q), "r"(x) \ 37 : "memory"); \ 38 return ret; \ 39} 40 41#define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ 42static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ 43{ \ 44 unsigned long long ret; \ 45 asm volatile("{\n\t" \ 46 " " #ST_OP "(%1) = %3\n\t" \ 47 " %0 = " #LD_OP "(%2)\n\t" \ 48 "}:mem_noshuf\n" \ 49 : "=r"(ret) \ 50 : "r"(p), "r"(q), "r"(x) \ 51 : "memory"); \ 52 return ret; \ 53} 54 55/* Store byte combinations */ 56MEM_NOSHUF32(mem_noshuf_sb_lb, signed char, signed char, memb, memb) 57MEM_NOSHUF32(mem_noshuf_sb_lub, signed char, unsigned char, memb, memub) 58MEM_NOSHUF32(mem_noshuf_sb_lh, signed char, signed short, memb, memh) 59MEM_NOSHUF32(mem_noshuf_sb_luh, signed char, unsigned short, memb, memuh) 60MEM_NOSHUF32(mem_noshuf_sb_lw, signed char, signed int, memb, memw) 61MEM_NOSHUF64(mem_noshuf_sb_ld, signed char, signed long long, memb, memd) 62 63/* Store half combinations */ 64MEM_NOSHUF32(mem_noshuf_sh_lb, signed short, signed char, memh, memb) 65MEM_NOSHUF32(mem_noshuf_sh_lub, signed short, unsigned char, memh, memub) 66MEM_NOSHUF32(mem_noshuf_sh_lh, signed short, signed short, memh, memh) 67MEM_NOSHUF32(mem_noshuf_sh_luh, signed short, unsigned short, memh, memuh) 68MEM_NOSHUF32(mem_noshuf_sh_lw, signed short, signed int, memh, memw) 69MEM_NOSHUF64(mem_noshuf_sh_ld, signed short, signed long long, memh, memd) 70 71/* Store word combinations */ 72MEM_NOSHUF32(mem_noshuf_sw_lb, signed int, signed char, memw, memb) 73MEM_NOSHUF32(mem_noshuf_sw_lub, signed int, unsigned char, memw, memub) 74MEM_NOSHUF32(mem_noshuf_sw_lh, signed int, signed short, memw, memh) 75MEM_NOSHUF32(mem_noshuf_sw_luh, signed int, unsigned short, memw, memuh) 76MEM_NOSHUF32(mem_noshuf_sw_lw, signed int, signed int, memw, memw) 77MEM_NOSHUF64(mem_noshuf_sw_ld, signed int, signed long long, memw, memd) 78 79/* Store double combinations */ 80MEM_NOSHUF32(mem_noshuf_sd_lb, long long, signed char, memd, memb) 81MEM_NOSHUF32(mem_noshuf_sd_lub, long long, unsigned char, memd, memub) 82MEM_NOSHUF32(mem_noshuf_sd_lh, long long, signed short, memd, memh) 83MEM_NOSHUF32(mem_noshuf_sd_luh, long long, unsigned short, memd, memuh) 84MEM_NOSHUF32(mem_noshuf_sd_lw, long long, signed int, memd, memw) 85MEM_NOSHUF64(mem_noshuf_sd_ld, long long, signed long long, memd, memd) 86 87static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x) 88{ 89 unsigned int ret; 90 asm volatile("p0 = cmp.eq(%4, #0)\n\t" 91 "{\n\t" 92 " if (!p0) memw(%1) = %3\n\t" 93 " %0 = memb(%2)\n\t" 94 "}:mem_noshuf\n" 95 : "=r"(ret) 96 : "r"(p), "r"(q), "r"(x), "r"(pred) 97 : "p0", "memory"); 98 return ret; 99} 100 101static inline 102unsigned long long cancel_sw_ld(int pred, int *p, long long *q, int x) 103{ 104 long long ret; 105 asm volatile("p0 = cmp.eq(%4, #0)\n\t" 106 "{\n\t" 107 " if (!p0) memw(%1) = %3\n\t" 108 " %0 = memd(%2)\n\t" 109 "}:mem_noshuf\n" 110 : "=r"(ret) 111 : "r"(p), "r"(q), "r"(x), "r"(pred) 112 : "p0", "memory"); 113 return ret; 114} 115 116typedef union { 117 signed long long d[2]; 118 unsigned long long ud[2]; 119 signed int w[4]; 120 unsigned int uw[4]; 121 signed short h[8]; 122 unsigned short uh[8]; 123 signed char b[16]; 124 unsigned char ub[16]; 125} Memory; 126 127int err; 128 129static void check32(int n, int expect) 130{ 131 if (n != expect) { 132 printf("ERROR: 0x%08x != 0x%08x\n", n, expect); 133 err++; 134 } 135} 136 137static void check64(long long n, long long expect) 138{ 139 if (n != expect) { 140 printf("ERROR: 0x%08llx != 0x%08llx\n", n, expect); 141 err++; 142 } 143} 144 145int main() 146{ 147 Memory n; 148 unsigned int res32; 149 unsigned long long res64; 150 151 /* 152 * Store byte combinations 153 */ 154 n.w[0] = ~0; 155 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87); 156 check32(res32, 0xffffff87); 157 158 n.w[0] = ~0; 159 res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87); 160 check32(res32, 0x00000087); 161 162 n.w[0] = ~0; 163 res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87); 164 check32(res32, 0xffffff87); 165 166 n.w[0] = ~0; 167 res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87); 168 check32(res32, 0x0000ff87); 169 170 n.w[0] = ~0; 171 res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87); 172 check32(res32, 0xffffff87); 173 174 n.d[0] = ~0LL; 175 res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87); 176 check64(res64, 0xffffffffffffff87LL); 177 178 /* 179 * Store half combinations 180 */ 181 n.w[0] = ~0; 182 res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787); 183 check32(res32, 0xffffff87); 184 185 n.w[0] = ~0; 186 res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87); 187 check32(res32, 0x0000008f); 188 189 n.w[0] = ~0; 190 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87); 191 check32(res32, 0xffff8a87); 192 193 n.w[0] = ~0; 194 res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87); 195 check32(res32, 0x8a87); 196 197 n.w[0] = ~0; 198 res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87); 199 check32(res32, 0x8a87ffff); 200 201 n.w[0] = ~0; 202 res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87); 203 check64(res64, 0xffffffff8a87ffffLL); 204 205 /* 206 * Store word combinations 207 */ 208 n.w[0] = ~0; 209 res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687); 210 check32(res32, 0xffffff87); 211 212 n.w[0] = ~0; 213 res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687); 214 check32(res32, 0x00000087); 215 216 n.w[0] = ~0; 217 res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678); 218 check32(res32, 0xfffff678); 219 220 n.w[0] = ~0; 221 res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678); 222 check32(res32, 0x00005678); 223 224 n.w[0] = ~0; 225 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678); 226 check32(res32, 0x12345678); 227 228 n.d[0] = ~0LL; 229 res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678); 230 check64(res64, 0xffffffff12345678LL); 231 232 /* 233 * Store double combinations 234 */ 235 n.d[0] = ~0LL; 236 res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0); 237 check32(res32, 0xffffffde); 238 239 n.d[0] = ~0LL; 240 res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0); 241 check32(res32, 0x000000de); 242 243 n.d[0] = ~0LL; 244 res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0); 245 check32(res32, 0xffff9abc); 246 247 n.d[0] = ~0LL; 248 res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0); 249 check32(res32, 0x00009abc); 250 251 n.d[0] = ~0LL; 252 res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0); 253 check32(res32, 0x12345678); 254 255 n.d[0] = ~0LL; 256 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0); 257 check64(res64, 0x123456789abcdef0LL); 258 259 /* 260 * Predicated word stores 261 */ 262 n.w[0] = ~0; 263 res32 = cancel_sw_lb(0, &n.w[0], &n.b[0], 0x12345678); 264 check32(res32, 0xffffffff); 265 266 n.w[0] = ~0; 267 res32 = cancel_sw_lb(1, &n.w[0], &n.b[0], 0x12345687); 268 check32(res32, 0xffffff87); 269 270 /* 271 * Predicated double stores 272 */ 273 n.d[0] = ~0LL; 274 res64 = cancel_sw_ld(0, &n.w[0], &n.d[0], 0x12345678); 275 check64(res64, 0xffffffffffffffffLL); 276 277 n.d[0] = ~0LL; 278 res64 = cancel_sw_ld(1, &n.w[0], &n.d[0], 0x12345678); 279 check64(res64, 0xffffffff12345678LL); 280 281 n.d[0] = ~0LL; 282 res64 = cancel_sw_ld(0, &n.w[1], &n.d[0], 0x12345678); 283 check64(res64, 0xffffffffffffffffLL); 284 285 n.d[0] = ~0LL; 286 res64 = cancel_sw_ld(1, &n.w[1], &n.d[0], 0x12345678); 287 check64(res64, 0x12345678ffffffffLL); 288 289 /* 290 * No overlap tests 291 */ 292 n.w[0] = ~0; 293 res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87); 294 check32(res32, 0xffffffff); 295 296 n.w[0] = ~0; 297 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87); 298 check32(res32, 0xffffffff); 299 300 n.w[0] = ~0; 301 res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787); 302 check32(res32, 0xffffffff); 303 304 n.w[0] = ~0; 305 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787); 306 check32(res32, 0xffffffff); 307 308 n.d[0] = ~0LL; 309 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678); 310 check32(res32, 0xffffffff); 311 312 n.d[0] = ~0LL; 313 res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678); 314 check32(res32, 0xffffffff); 315 316 n.d[0] = ~0LL; 317 n.d[1] = ~0LL; 318 res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL); 319 check64(res64, 0xffffffffffffffffLL); 320 321 n.d[0] = ~0LL; 322 n.d[1] = ~0LL; 323 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL); 324 check64(res64, 0xffffffffffffffffLL); 325 326 puts(err ? "FAIL" : "PASS"); 327 return err; 328}