vec_string_helper.c (17383B)
1/* 2 * QEMU TCG support -- s390x vector string instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12#include "qemu/osdep.h" 13#include "qemu-common.h" 14#include "cpu.h" 15#include "s390x-internal.h" 16#include "vec.h" 17#include "tcg/tcg.h" 18#include "tcg/tcg-gvec-desc.h" 19#include "exec/helper-proto.h" 20 21/* 22 * Returns a bit set in the MSB of each element that is zero, 23 * as defined by the mask. 24 */ 25static inline uint64_t zero_search(uint64_t a, uint64_t mask) 26{ 27 return ~(((a & mask) + mask) | a | mask); 28} 29 30/* 31 * Returns a bit set in the MSB of each element that is not zero, 32 * as defined by the mask. 33 */ 34static inline uint64_t nonzero_search(uint64_t a, uint64_t mask) 35{ 36 return (((a & mask) + mask) | a) & ~mask; 37} 38 39/* 40 * Returns the byte offset for the first match, or 16 for no match. 41 */ 42static inline int match_index(uint64_t c0, uint64_t c1) 43{ 44 return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3; 45} 46 47/* 48 * Returns the number of bits composing one element. 49 */ 50static uint8_t get_element_bits(uint8_t es) 51{ 52 return (1 << es) * BITS_PER_BYTE; 53} 54 55/* 56 * Returns the bitmask for a single element. 57 */ 58static uint64_t get_single_element_mask(uint8_t es) 59{ 60 return -1ull >> (64 - get_element_bits(es)); 61} 62 63/* 64 * Returns the bitmask for a single element (excluding the MSB). 65 */ 66static uint64_t get_single_element_lsbs_mask(uint8_t es) 67{ 68 return -1ull >> (65 - get_element_bits(es)); 69} 70 71/* 72 * Returns the bitmasks for multiple elements (excluding the MSBs). 73 */ 74static uint64_t get_element_lsbs_mask(uint8_t es) 75{ 76 return dup_const(es, get_single_element_lsbs_mask(es)); 77} 78 79static int vfae(void *v1, const void *v2, const void *v3, bool in, 80 bool rt, bool zs, uint8_t es) 81{ 82 const uint64_t mask = get_element_lsbs_mask(es); 83 const int bits = get_element_bits(es); 84 uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; 85 uint64_t first_zero = 16; 86 uint64_t first_equal; 87 int i; 88 89 a0 = s390_vec_read_element64(v2, 0); 90 a1 = s390_vec_read_element64(v2, 1); 91 b0 = s390_vec_read_element64(v3, 0); 92 b1 = s390_vec_read_element64(v3, 1); 93 e0 = 0; 94 e1 = 0; 95 /* compare against equality with every other element */ 96 for (i = 0; i < 64; i += bits) { 97 t0 = rol64(b0, i); 98 t1 = rol64(b1, i); 99 e0 |= zero_search(a0 ^ t0, mask); 100 e0 |= zero_search(a0 ^ t1, mask); 101 e1 |= zero_search(a1 ^ t0, mask); 102 e1 |= zero_search(a1 ^ t1, mask); 103 } 104 /* invert the result if requested - invert only the MSBs */ 105 if (in) { 106 e0 = ~e0 & ~mask; 107 e1 = ~e1 & ~mask; 108 } 109 first_equal = match_index(e0, e1); 110 111 if (zs) { 112 z0 = zero_search(a0, mask); 113 z1 = zero_search(a1, mask); 114 first_zero = match_index(z0, z1); 115 } 116 117 if (rt) { 118 e0 = (e0 >> (bits - 1)) * get_single_element_mask(es); 119 e1 = (e1 >> (bits - 1)) * get_single_element_mask(es); 120 s390_vec_write_element64(v1, 0, e0); 121 s390_vec_write_element64(v1, 1, e1); 122 } else { 123 s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); 124 s390_vec_write_element64(v1, 1, 0); 125 } 126 127 if (first_zero == 16 && first_equal == 16) { 128 return 3; /* no match */ 129 } else if (first_zero == 16) { 130 return 1; /* matching elements, no match for zero */ 131 } else if (first_equal < first_zero) { 132 return 2; /* matching elements before match for zero */ 133 } 134 return 0; /* match for zero */ 135} 136 137#define DEF_VFAE_HELPER(BITS) \ 138void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \ 139 uint32_t desc) \ 140{ \ 141 const bool in = extract32(simd_data(desc), 3, 1); \ 142 const bool rt = extract32(simd_data(desc), 2, 1); \ 143 const bool zs = extract32(simd_data(desc), 1, 1); \ 144 \ 145 vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ 146} 147DEF_VFAE_HELPER(8) 148DEF_VFAE_HELPER(16) 149DEF_VFAE_HELPER(32) 150 151#define DEF_VFAE_CC_HELPER(BITS) \ 152void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ 153 CPUS390XState *env, uint32_t desc) \ 154{ \ 155 const bool in = extract32(simd_data(desc), 3, 1); \ 156 const bool rt = extract32(simd_data(desc), 2, 1); \ 157 const bool zs = extract32(simd_data(desc), 1, 1); \ 158 \ 159 env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ 160} 161DEF_VFAE_CC_HELPER(8) 162DEF_VFAE_CC_HELPER(16) 163DEF_VFAE_CC_HELPER(32) 164 165static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) 166{ 167 const uint64_t mask = get_element_lsbs_mask(es); 168 uint64_t a0, a1, b0, b1, e0, e1, z0, z1; 169 uint64_t first_zero = 16; 170 uint64_t first_equal; 171 172 a0 = s390_vec_read_element64(v2, 0); 173 a1 = s390_vec_read_element64(v2, 1); 174 b0 = s390_vec_read_element64(v3, 0); 175 b1 = s390_vec_read_element64(v3, 1); 176 e0 = zero_search(a0 ^ b0, mask); 177 e1 = zero_search(a1 ^ b1, mask); 178 first_equal = match_index(e0, e1); 179 180 if (zs) { 181 z0 = zero_search(a0, mask); 182 z1 = zero_search(a1, mask); 183 first_zero = match_index(z0, z1); 184 } 185 186 s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); 187 s390_vec_write_element64(v1, 1, 0); 188 if (first_zero == 16 && first_equal == 16) { 189 return 3; /* no match */ 190 } else if (first_zero == 16) { 191 return 1; /* matching elements, no match for zero */ 192 } else if (first_equal < first_zero) { 193 return 2; /* matching elements before match for zero */ 194 } 195 return 0; /* match for zero */ 196} 197 198#define DEF_VFEE_HELPER(BITS) \ 199void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \ 200 uint32_t desc) \ 201{ \ 202 const bool zs = extract32(simd_data(desc), 1, 1); \ 203 \ 204 vfee(v1, v2, v3, zs, MO_##BITS); \ 205} 206DEF_VFEE_HELPER(8) 207DEF_VFEE_HELPER(16) 208DEF_VFEE_HELPER(32) 209 210#define DEF_VFEE_CC_HELPER(BITS) \ 211void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ 212 CPUS390XState *env, uint32_t desc) \ 213{ \ 214 const bool zs = extract32(simd_data(desc), 1, 1); \ 215 \ 216 env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \ 217} 218DEF_VFEE_CC_HELPER(8) 219DEF_VFEE_CC_HELPER(16) 220DEF_VFEE_CC_HELPER(32) 221 222static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) 223{ 224 const uint64_t mask = get_element_lsbs_mask(es); 225 uint64_t a0, a1, b0, b1, e0, e1, z0, z1; 226 uint64_t first_zero = 16; 227 uint64_t first_inequal; 228 bool smaller = false; 229 230 a0 = s390_vec_read_element64(v2, 0); 231 a1 = s390_vec_read_element64(v2, 1); 232 b0 = s390_vec_read_element64(v3, 0); 233 b1 = s390_vec_read_element64(v3, 1); 234 e0 = nonzero_search(a0 ^ b0, mask); 235 e1 = nonzero_search(a1 ^ b1, mask); 236 first_inequal = match_index(e0, e1); 237 238 /* identify the smaller element */ 239 if (first_inequal < 16) { 240 uint8_t enr = first_inequal / (1 << es); 241 uint32_t a = s390_vec_read_element(v2, enr, es); 242 uint32_t b = s390_vec_read_element(v3, enr, es); 243 244 smaller = a < b; 245 } 246 247 if (zs) { 248 z0 = zero_search(a0, mask); 249 z1 = zero_search(a1, mask); 250 first_zero = match_index(z0, z1); 251 } 252 253 s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); 254 s390_vec_write_element64(v1, 1, 0); 255 if (first_zero == 16 && first_inequal == 16) { 256 return 3; 257 } else if (first_zero < first_inequal) { 258 return 0; 259 } 260 return smaller ? 1 : 2; 261} 262 263#define DEF_VFENE_HELPER(BITS) \ 264void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ 265 uint32_t desc) \ 266{ \ 267 const bool zs = extract32(simd_data(desc), 1, 1); \ 268 \ 269 vfene(v1, v2, v3, zs, MO_##BITS); \ 270} 271DEF_VFENE_HELPER(8) 272DEF_VFENE_HELPER(16) 273DEF_VFENE_HELPER(32) 274 275#define DEF_VFENE_CC_HELPER(BITS) \ 276void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ 277 CPUS390XState *env, uint32_t desc) \ 278{ \ 279 const bool zs = extract32(simd_data(desc), 1, 1); \ 280 \ 281 env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \ 282} 283DEF_VFENE_CC_HELPER(8) 284DEF_VFENE_CC_HELPER(16) 285DEF_VFENE_CC_HELPER(32) 286 287static int vistr(void *v1, const void *v2, uint8_t es) 288{ 289 const uint64_t mask = get_element_lsbs_mask(es); 290 uint64_t a0 = s390_vec_read_element64(v2, 0); 291 uint64_t a1 = s390_vec_read_element64(v2, 1); 292 uint64_t z; 293 int cc = 3; 294 295 z = zero_search(a0, mask); 296 if (z) { 297 a0 &= ~(-1ull >> clz64(z)); 298 a1 = 0; 299 cc = 0; 300 } else { 301 z = zero_search(a1, mask); 302 if (z) { 303 a1 &= ~(-1ull >> clz64(z)); 304 cc = 0; 305 } 306 } 307 308 s390_vec_write_element64(v1, 0, a0); 309 s390_vec_write_element64(v1, 1, a1); 310 return cc; 311} 312 313#define DEF_VISTR_HELPER(BITS) \ 314void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \ 315{ \ 316 vistr(v1, v2, MO_##BITS); \ 317} 318DEF_VISTR_HELPER(8) 319DEF_VISTR_HELPER(16) 320DEF_VISTR_HELPER(32) 321 322#define DEF_VISTR_CC_HELPER(BITS) \ 323void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \ 324 uint32_t desc) \ 325{ \ 326 env->cc_op = vistr(v1, v2, MO_##BITS); \ 327} 328DEF_VISTR_CC_HELPER(8) 329DEF_VISTR_CC_HELPER(16) 330DEF_VISTR_CC_HELPER(32) 331 332static bool element_compare(uint32_t data, uint32_t l, uint8_t c) 333{ 334 const bool equal = extract32(c, 7, 1); 335 const bool lower = extract32(c, 6, 1); 336 const bool higher = extract32(c, 5, 1); 337 338 if (data < l) { 339 return lower; 340 } else if (data > l) { 341 return higher; 342 } 343 return equal; 344} 345 346static int vstrc(void *v1, const void *v2, const void *v3, const void *v4, 347 bool in, bool rt, bool zs, uint8_t es) 348{ 349 const uint64_t mask = get_element_lsbs_mask(es); 350 uint64_t a0 = s390_vec_read_element64(v2, 0); 351 uint64_t a1 = s390_vec_read_element64(v2, 1); 352 int first_zero = 16, first_match = 16; 353 S390Vector rt_result = {}; 354 uint64_t z0, z1; 355 int i, j; 356 357 if (zs) { 358 z0 = zero_search(a0, mask); 359 z1 = zero_search(a1, mask); 360 first_zero = match_index(z0, z1); 361 } 362 363 for (i = 0; i < 16 / (1 << es); i++) { 364 const uint32_t data = s390_vec_read_element(v2, i, es); 365 const int cur_byte = i * (1 << es); 366 bool any_match = false; 367 368 /* if we don't need a bit vector, we can stop early */ 369 if (cur_byte == first_zero && !rt) { 370 break; 371 } 372 373 for (j = 0; j < 16 / (1 << es); j += 2) { 374 const uint32_t l1 = s390_vec_read_element(v3, j, es); 375 const uint32_t l2 = s390_vec_read_element(v3, j + 1, es); 376 /* we are only interested in the highest byte of each element */ 377 const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es)); 378 const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es)); 379 380 if (element_compare(data, l1, c1) && 381 element_compare(data, l2, c2)) { 382 any_match = true; 383 break; 384 } 385 } 386 /* invert the result if requested */ 387 any_match = in ^ any_match; 388 389 if (any_match) { 390 /* indicate bit vector if requested */ 391 if (rt) { 392 const uint64_t val = -1ull; 393 394 first_match = MIN(cur_byte, first_match); 395 s390_vec_write_element(&rt_result, i, es, val); 396 } else { 397 /* stop on the first match */ 398 first_match = cur_byte; 399 break; 400 } 401 } 402 } 403 404 if (rt) { 405 *(S390Vector *)v1 = rt_result; 406 } else { 407 s390_vec_write_element64(v1, 0, MIN(first_match, first_zero)); 408 s390_vec_write_element64(v1, 1, 0); 409 } 410 411 if (first_zero == 16 && first_match == 16) { 412 return 3; /* no match */ 413 } else if (first_zero == 16) { 414 return 1; /* matching elements, no match for zero */ 415 } else if (first_match < first_zero) { 416 return 2; /* matching elements before match for zero */ 417 } 418 return 0; /* match for zero */ 419} 420 421#define DEF_VSTRC_HELPER(BITS) \ 422void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \ 423 const void *v4, uint32_t desc) \ 424{ \ 425 const bool in = extract32(simd_data(desc), 3, 1); \ 426 const bool zs = extract32(simd_data(desc), 1, 1); \ 427 \ 428 vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ 429} 430DEF_VSTRC_HELPER(8) 431DEF_VSTRC_HELPER(16) 432DEF_VSTRC_HELPER(32) 433 434#define DEF_VSTRC_RT_HELPER(BITS) \ 435void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \ 436 const void *v4, uint32_t desc) \ 437{ \ 438 const bool in = extract32(simd_data(desc), 3, 1); \ 439 const bool zs = extract32(simd_data(desc), 1, 1); \ 440 \ 441 vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ 442} 443DEF_VSTRC_RT_HELPER(8) 444DEF_VSTRC_RT_HELPER(16) 445DEF_VSTRC_RT_HELPER(32) 446 447#define DEF_VSTRC_CC_HELPER(BITS) \ 448void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \ 449 const void *v4, CPUS390XState *env, \ 450 uint32_t desc) \ 451{ \ 452 const bool in = extract32(simd_data(desc), 3, 1); \ 453 const bool zs = extract32(simd_data(desc), 1, 1); \ 454 \ 455 env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ 456} 457DEF_VSTRC_CC_HELPER(8) 458DEF_VSTRC_CC_HELPER(16) 459DEF_VSTRC_CC_HELPER(32) 460 461#define DEF_VSTRC_CC_RT_HELPER(BITS) \ 462void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ 463 const void *v4, CPUS390XState *env, \ 464 uint32_t desc) \ 465{ \ 466 const bool in = extract32(simd_data(desc), 3, 1); \ 467 const bool zs = extract32(simd_data(desc), 1, 1); \ 468 \ 469 env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ 470} 471DEF_VSTRC_CC_RT_HELPER(8) 472DEF_VSTRC_CC_RT_HELPER(16) 473DEF_VSTRC_CC_RT_HELPER(32)