insn.c (17946B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * x86 instruction analysis 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004, 2009 6 */ 7 8#include <linux/kernel.h> 9#ifdef __KERNEL__ 10#include <linux/string.h> 11#else 12#include <string.h> 13#endif 14#include "../include/asm/inat.h" /* __ignore_sync_check__ */ 15#include "../include/asm/insn.h" /* __ignore_sync_check__ */ 16#include "../include/asm-generic/unaligned.h" /* __ignore_sync_check__ */ 17 18#include <linux/errno.h> 19#include <linux/kconfig.h> 20 21#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */ 22 23#define leXX_to_cpu(t, r) \ 24({ \ 25 __typeof__(t) v; \ 26 switch (sizeof(t)) { \ 27 case 4: v = le32_to_cpu(r); break; \ 28 case 2: v = le16_to_cpu(r); break; \ 29 case 1: v = r; break; \ 30 default: \ 31 BUILD_BUG(); break; \ 32 } \ 33 v; \ 34}) 35 36/* Verify next sizeof(t) bytes can be on the same instruction */ 37#define validate_next(t, insn, n) \ 38 ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) 39 40#define __get_next(t, insn) \ 41 ({ t r = get_unaligned((t *)(insn)->next_byte); (insn)->next_byte += sizeof(t); leXX_to_cpu(t, r); }) 42 43#define __peek_nbyte_next(t, insn, n) \ 44 ({ t r = get_unaligned((t *)(insn)->next_byte + n); leXX_to_cpu(t, r); }) 45 46#define get_next(t, insn) \ 47 ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) 48 49#define peek_nbyte_next(t, insn, n) \ 50 ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) 51 52#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) 53 54/** 55 * insn_init() - initialize struct insn 56 * @insn: &struct insn to be initialized 57 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 58 * @buf_len: length of the insn buffer at @kaddr 59 * @x86_64: !0 for 64-bit kernel or 64-bit app 60 */ 61void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) 62{ 63 /* 64 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid 65 * even if the input buffer is long enough to hold them. 66 */ 67 if (buf_len > MAX_INSN_SIZE) 68 buf_len = MAX_INSN_SIZE; 69 70 memset(insn, 0, sizeof(*insn)); 71 insn->kaddr = kaddr; 72 insn->end_kaddr = kaddr + buf_len; 73 insn->next_byte = kaddr; 74 insn->x86_64 = x86_64 ? 1 : 0; 75 insn->opnd_bytes = 4; 76 if (x86_64) 77 insn->addr_bytes = 8; 78 else 79 insn->addr_bytes = 4; 80} 81 82static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX }; 83static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX }; 84 85static int __insn_get_emulate_prefix(struct insn *insn, 86 const insn_byte_t *prefix, size_t len) 87{ 88 size_t i; 89 90 for (i = 0; i < len; i++) { 91 if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i]) 92 goto err_out; 93 } 94 95 insn->emulate_prefix_size = len; 96 insn->next_byte += len; 97 98 return 1; 99 100err_out: 101 return 0; 102} 103 104static void insn_get_emulate_prefix(struct insn *insn) 105{ 106 if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix))) 107 return; 108 109 __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix)); 110} 111 112/** 113 * insn_get_prefixes - scan x86 instruction prefix bytes 114 * @insn: &struct insn containing instruction 115 * 116 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte 117 * to point to the (first) opcode. No effect if @insn->prefixes.got 118 * is already set. 119 * 120 * * Returns: 121 * 0: on success 122 * < 0: on error 123 */ 124int insn_get_prefixes(struct insn *insn) 125{ 126 struct insn_field *prefixes = &insn->prefixes; 127 insn_attr_t attr; 128 insn_byte_t b, lb; 129 int i, nb; 130 131 if (prefixes->got) 132 return 0; 133 134 insn_get_emulate_prefix(insn); 135 136 nb = 0; 137 lb = 0; 138 b = peek_next(insn_byte_t, insn); 139 attr = inat_get_opcode_attribute(b); 140 while (inat_is_legacy_prefix(attr)) { 141 /* Skip if same prefix */ 142 for (i = 0; i < nb; i++) 143 if (prefixes->bytes[i] == b) 144 goto found; 145 if (nb == 4) 146 /* Invalid instruction */ 147 break; 148 prefixes->bytes[nb++] = b; 149 if (inat_is_address_size_prefix(attr)) { 150 /* address size switches 2/4 or 4/8 */ 151 if (insn->x86_64) 152 insn->addr_bytes ^= 12; 153 else 154 insn->addr_bytes ^= 6; 155 } else if (inat_is_operand_size_prefix(attr)) { 156 /* oprand size switches 2/4 */ 157 insn->opnd_bytes ^= 6; 158 } 159found: 160 prefixes->nbytes++; 161 insn->next_byte++; 162 lb = b; 163 b = peek_next(insn_byte_t, insn); 164 attr = inat_get_opcode_attribute(b); 165 } 166 /* Set the last prefix */ 167 if (lb && lb != insn->prefixes.bytes[3]) { 168 if (unlikely(insn->prefixes.bytes[3])) { 169 /* Swap the last prefix */ 170 b = insn->prefixes.bytes[3]; 171 for (i = 0; i < nb; i++) 172 if (prefixes->bytes[i] == lb) 173 insn_set_byte(prefixes, i, b); 174 } 175 insn_set_byte(&insn->prefixes, 3, lb); 176 } 177 178 /* Decode REX prefix */ 179 if (insn->x86_64) { 180 b = peek_next(insn_byte_t, insn); 181 attr = inat_get_opcode_attribute(b); 182 if (inat_is_rex_prefix(attr)) { 183 insn_field_set(&insn->rex_prefix, b, 1); 184 insn->next_byte++; 185 if (X86_REX_W(b)) 186 /* REX.W overrides opnd_size */ 187 insn->opnd_bytes = 8; 188 } 189 } 190 insn->rex_prefix.got = 1; 191 192 /* Decode VEX prefix */ 193 b = peek_next(insn_byte_t, insn); 194 attr = inat_get_opcode_attribute(b); 195 if (inat_is_vex_prefix(attr)) { 196 insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); 197 if (!insn->x86_64) { 198 /* 199 * In 32-bits mode, if the [7:6] bits (mod bits of 200 * ModRM) on the second byte are not 11b, it is 201 * LDS or LES or BOUND. 202 */ 203 if (X86_MODRM_MOD(b2) != 3) 204 goto vex_end; 205 } 206 insn_set_byte(&insn->vex_prefix, 0, b); 207 insn_set_byte(&insn->vex_prefix, 1, b2); 208 if (inat_is_evex_prefix(attr)) { 209 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 210 insn_set_byte(&insn->vex_prefix, 2, b2); 211 b2 = peek_nbyte_next(insn_byte_t, insn, 3); 212 insn_set_byte(&insn->vex_prefix, 3, b2); 213 insn->vex_prefix.nbytes = 4; 214 insn->next_byte += 4; 215 if (insn->x86_64 && X86_VEX_W(b2)) 216 /* VEX.W overrides opnd_size */ 217 insn->opnd_bytes = 8; 218 } else if (inat_is_vex3_prefix(attr)) { 219 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 220 insn_set_byte(&insn->vex_prefix, 2, b2); 221 insn->vex_prefix.nbytes = 3; 222 insn->next_byte += 3; 223 if (insn->x86_64 && X86_VEX_W(b2)) 224 /* VEX.W overrides opnd_size */ 225 insn->opnd_bytes = 8; 226 } else { 227 /* 228 * For VEX2, fake VEX3-like byte#2. 229 * Makes it easier to decode vex.W, vex.vvvv, 230 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. 231 */ 232 insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f); 233 insn->vex_prefix.nbytes = 2; 234 insn->next_byte += 2; 235 } 236 } 237vex_end: 238 insn->vex_prefix.got = 1; 239 240 prefixes->got = 1; 241 242 return 0; 243 244err_out: 245 return -ENODATA; 246} 247 248/** 249 * insn_get_opcode - collect opcode(s) 250 * @insn: &struct insn containing instruction 251 * 252 * Populates @insn->opcode, updates @insn->next_byte to point past the 253 * opcode byte(s), and set @insn->attr (except for groups). 254 * If necessary, first collects any preceding (prefix) bytes. 255 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got 256 * is already 1. 257 * 258 * Returns: 259 * 0: on success 260 * < 0: on error 261 */ 262int insn_get_opcode(struct insn *insn) 263{ 264 struct insn_field *opcode = &insn->opcode; 265 int pfx_id, ret; 266 insn_byte_t op; 267 268 if (opcode->got) 269 return 0; 270 271 if (!insn->prefixes.got) { 272 ret = insn_get_prefixes(insn); 273 if (ret) 274 return ret; 275 } 276 277 /* Get first opcode */ 278 op = get_next(insn_byte_t, insn); 279 insn_set_byte(opcode, 0, op); 280 opcode->nbytes = 1; 281 282 /* Check if there is VEX prefix or not */ 283 if (insn_is_avx(insn)) { 284 insn_byte_t m, p; 285 m = insn_vex_m_bits(insn); 286 p = insn_vex_p_bits(insn); 287 insn->attr = inat_get_avx_attribute(op, m, p); 288 if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || 289 (!inat_accept_vex(insn->attr) && 290 !inat_is_group(insn->attr))) { 291 /* This instruction is bad */ 292 insn->attr = 0; 293 return -EINVAL; 294 } 295 /* VEX has only 1 byte for opcode */ 296 goto end; 297 } 298 299 insn->attr = inat_get_opcode_attribute(op); 300 while (inat_is_escape(insn->attr)) { 301 /* Get escaped opcode */ 302 op = get_next(insn_byte_t, insn); 303 opcode->bytes[opcode->nbytes++] = op; 304 pfx_id = insn_last_prefix_id(insn); 305 insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); 306 } 307 308 if (inat_must_vex(insn->attr)) { 309 /* This instruction is bad */ 310 insn->attr = 0; 311 return -EINVAL; 312 } 313end: 314 opcode->got = 1; 315 return 0; 316 317err_out: 318 return -ENODATA; 319} 320 321/** 322 * insn_get_modrm - collect ModRM byte, if any 323 * @insn: &struct insn containing instruction 324 * 325 * Populates @insn->modrm and updates @insn->next_byte to point past the 326 * ModRM byte, if any. If necessary, first collects the preceding bytes 327 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. 328 * 329 * Returns: 330 * 0: on success 331 * < 0: on error 332 */ 333int insn_get_modrm(struct insn *insn) 334{ 335 struct insn_field *modrm = &insn->modrm; 336 insn_byte_t pfx_id, mod; 337 int ret; 338 339 if (modrm->got) 340 return 0; 341 342 if (!insn->opcode.got) { 343 ret = insn_get_opcode(insn); 344 if (ret) 345 return ret; 346 } 347 348 if (inat_has_modrm(insn->attr)) { 349 mod = get_next(insn_byte_t, insn); 350 insn_field_set(modrm, mod, 1); 351 if (inat_is_group(insn->attr)) { 352 pfx_id = insn_last_prefix_id(insn); 353 insn->attr = inat_get_group_attribute(mod, pfx_id, 354 insn->attr); 355 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { 356 /* Bad insn */ 357 insn->attr = 0; 358 return -EINVAL; 359 } 360 } 361 } 362 363 if (insn->x86_64 && inat_is_force64(insn->attr)) 364 insn->opnd_bytes = 8; 365 366 modrm->got = 1; 367 return 0; 368 369err_out: 370 return -ENODATA; 371} 372 373 374/** 375 * insn_rip_relative() - Does instruction use RIP-relative addressing mode? 376 * @insn: &struct insn containing instruction 377 * 378 * If necessary, first collects the instruction up to and including the 379 * ModRM byte. No effect if @insn->x86_64 is 0. 380 */ 381int insn_rip_relative(struct insn *insn) 382{ 383 struct insn_field *modrm = &insn->modrm; 384 int ret; 385 386 if (!insn->x86_64) 387 return 0; 388 389 if (!modrm->got) { 390 ret = insn_get_modrm(insn); 391 if (ret) 392 return 0; 393 } 394 /* 395 * For rip-relative instructions, the mod field (top 2 bits) 396 * is zero and the r/m field (bottom 3 bits) is 0x5. 397 */ 398 return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5); 399} 400 401/** 402 * insn_get_sib() - Get the SIB byte of instruction 403 * @insn: &struct insn containing instruction 404 * 405 * If necessary, first collects the instruction up to and including the 406 * ModRM byte. 407 * 408 * Returns: 409 * 0: if decoding succeeded 410 * < 0: otherwise. 411 */ 412int insn_get_sib(struct insn *insn) 413{ 414 insn_byte_t modrm; 415 int ret; 416 417 if (insn->sib.got) 418 return 0; 419 420 if (!insn->modrm.got) { 421 ret = insn_get_modrm(insn); 422 if (ret) 423 return ret; 424 } 425 426 if (insn->modrm.nbytes) { 427 modrm = insn->modrm.bytes[0]; 428 if (insn->addr_bytes != 2 && 429 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { 430 insn_field_set(&insn->sib, 431 get_next(insn_byte_t, insn), 1); 432 } 433 } 434 insn->sib.got = 1; 435 436 return 0; 437 438err_out: 439 return -ENODATA; 440} 441 442 443/** 444 * insn_get_displacement() - Get the displacement of instruction 445 * @insn: &struct insn containing instruction 446 * 447 * If necessary, first collects the instruction up to and including the 448 * SIB byte. 449 * Displacement value is sign-expanded. 450 * 451 * * Returns: 452 * 0: if decoding succeeded 453 * < 0: otherwise. 454 */ 455int insn_get_displacement(struct insn *insn) 456{ 457 insn_byte_t mod, rm, base; 458 int ret; 459 460 if (insn->displacement.got) 461 return 0; 462 463 if (!insn->sib.got) { 464 ret = insn_get_sib(insn); 465 if (ret) 466 return ret; 467 } 468 469 if (insn->modrm.nbytes) { 470 /* 471 * Interpreting the modrm byte: 472 * mod = 00 - no displacement fields (exceptions below) 473 * mod = 01 - 1-byte displacement field 474 * mod = 10 - displacement field is 4 bytes, or 2 bytes if 475 * address size = 2 (0x67 prefix in 32-bit mode) 476 * mod = 11 - no memory operand 477 * 478 * If address size = 2... 479 * mod = 00, r/m = 110 - displacement field is 2 bytes 480 * 481 * If address size != 2... 482 * mod != 11, r/m = 100 - SIB byte exists 483 * mod = 00, SIB base = 101 - displacement field is 4 bytes 484 * mod = 00, r/m = 101 - rip-relative addressing, displacement 485 * field is 4 bytes 486 */ 487 mod = X86_MODRM_MOD(insn->modrm.value); 488 rm = X86_MODRM_RM(insn->modrm.value); 489 base = X86_SIB_BASE(insn->sib.value); 490 if (mod == 3) 491 goto out; 492 if (mod == 1) { 493 insn_field_set(&insn->displacement, 494 get_next(signed char, insn), 1); 495 } else if (insn->addr_bytes == 2) { 496 if ((mod == 0 && rm == 6) || mod == 2) { 497 insn_field_set(&insn->displacement, 498 get_next(short, insn), 2); 499 } 500 } else { 501 if ((mod == 0 && rm == 5) || mod == 2 || 502 (mod == 0 && base == 5)) { 503 insn_field_set(&insn->displacement, 504 get_next(int, insn), 4); 505 } 506 } 507 } 508out: 509 insn->displacement.got = 1; 510 return 0; 511 512err_out: 513 return -ENODATA; 514} 515 516/* Decode moffset16/32/64. Return 0 if failed */ 517static int __get_moffset(struct insn *insn) 518{ 519 switch (insn->addr_bytes) { 520 case 2: 521 insn_field_set(&insn->moffset1, get_next(short, insn), 2); 522 break; 523 case 4: 524 insn_field_set(&insn->moffset1, get_next(int, insn), 4); 525 break; 526 case 8: 527 insn_field_set(&insn->moffset1, get_next(int, insn), 4); 528 insn_field_set(&insn->moffset2, get_next(int, insn), 4); 529 break; 530 default: /* opnd_bytes must be modified manually */ 531 goto err_out; 532 } 533 insn->moffset1.got = insn->moffset2.got = 1; 534 535 return 1; 536 537err_out: 538 return 0; 539} 540 541/* Decode imm v32(Iz). Return 0 if failed */ 542static int __get_immv32(struct insn *insn) 543{ 544 switch (insn->opnd_bytes) { 545 case 2: 546 insn_field_set(&insn->immediate, get_next(short, insn), 2); 547 break; 548 case 4: 549 case 8: 550 insn_field_set(&insn->immediate, get_next(int, insn), 4); 551 break; 552 default: /* opnd_bytes must be modified manually */ 553 goto err_out; 554 } 555 556 return 1; 557 558err_out: 559 return 0; 560} 561 562/* Decode imm v64(Iv/Ov), Return 0 if failed */ 563static int __get_immv(struct insn *insn) 564{ 565 switch (insn->opnd_bytes) { 566 case 2: 567 insn_field_set(&insn->immediate1, get_next(short, insn), 2); 568 break; 569 case 4: 570 insn_field_set(&insn->immediate1, get_next(int, insn), 4); 571 insn->immediate1.nbytes = 4; 572 break; 573 case 8: 574 insn_field_set(&insn->immediate1, get_next(int, insn), 4); 575 insn_field_set(&insn->immediate2, get_next(int, insn), 4); 576 break; 577 default: /* opnd_bytes must be modified manually */ 578 goto err_out; 579 } 580 insn->immediate1.got = insn->immediate2.got = 1; 581 582 return 1; 583err_out: 584 return 0; 585} 586 587/* Decode ptr16:16/32(Ap) */ 588static int __get_immptr(struct insn *insn) 589{ 590 switch (insn->opnd_bytes) { 591 case 2: 592 insn_field_set(&insn->immediate1, get_next(short, insn), 2); 593 break; 594 case 4: 595 insn_field_set(&insn->immediate1, get_next(int, insn), 4); 596 break; 597 case 8: 598 /* ptr16:64 is not exist (no segment) */ 599 return 0; 600 default: /* opnd_bytes must be modified manually */ 601 goto err_out; 602 } 603 insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2); 604 insn->immediate1.got = insn->immediate2.got = 1; 605 606 return 1; 607err_out: 608 return 0; 609} 610 611/** 612 * insn_get_immediate() - Get the immediate in an instruction 613 * @insn: &struct insn containing instruction 614 * 615 * If necessary, first collects the instruction up to and including the 616 * displacement bytes. 617 * Basically, most of immediates are sign-expanded. Unsigned-value can be 618 * computed by bit masking with ((1 << (nbytes * 8)) - 1) 619 * 620 * Returns: 621 * 0: on success 622 * < 0: on error 623 */ 624int insn_get_immediate(struct insn *insn) 625{ 626 int ret; 627 628 if (insn->immediate.got) 629 return 0; 630 631 if (!insn->displacement.got) { 632 ret = insn_get_displacement(insn); 633 if (ret) 634 return ret; 635 } 636 637 if (inat_has_moffset(insn->attr)) { 638 if (!__get_moffset(insn)) 639 goto err_out; 640 goto done; 641 } 642 643 if (!inat_has_immediate(insn->attr)) 644 /* no immediates */ 645 goto done; 646 647 switch (inat_immediate_size(insn->attr)) { 648 case INAT_IMM_BYTE: 649 insn_field_set(&insn->immediate, get_next(signed char, insn), 1); 650 break; 651 case INAT_IMM_WORD: 652 insn_field_set(&insn->immediate, get_next(short, insn), 2); 653 break; 654 case INAT_IMM_DWORD: 655 insn_field_set(&insn->immediate, get_next(int, insn), 4); 656 break; 657 case INAT_IMM_QWORD: 658 insn_field_set(&insn->immediate1, get_next(int, insn), 4); 659 insn_field_set(&insn->immediate2, get_next(int, insn), 4); 660 break; 661 case INAT_IMM_PTR: 662 if (!__get_immptr(insn)) 663 goto err_out; 664 break; 665 case INAT_IMM_VWORD32: 666 if (!__get_immv32(insn)) 667 goto err_out; 668 break; 669 case INAT_IMM_VWORD: 670 if (!__get_immv(insn)) 671 goto err_out; 672 break; 673 default: 674 /* Here, insn must have an immediate, but failed */ 675 goto err_out; 676 } 677 if (inat_has_second_immediate(insn->attr)) { 678 insn_field_set(&insn->immediate2, get_next(signed char, insn), 1); 679 } 680done: 681 insn->immediate.got = 1; 682 return 0; 683 684err_out: 685 return -ENODATA; 686} 687 688/** 689 * insn_get_length() - Get the length of instruction 690 * @insn: &struct insn containing instruction 691 * 692 * If necessary, first collects the instruction up to and including the 693 * immediates bytes. 694 * 695 * Returns: 696 * - 0 on success 697 * - < 0 on error 698*/ 699int insn_get_length(struct insn *insn) 700{ 701 int ret; 702 703 if (insn->length) 704 return 0; 705 706 if (!insn->immediate.got) { 707 ret = insn_get_immediate(insn); 708 if (ret) 709 return ret; 710 } 711 712 insn->length = (unsigned char)((unsigned long)insn->next_byte 713 - (unsigned long)insn->kaddr); 714 715 return 0; 716} 717 718/* Ensure this instruction is decoded completely */ 719static inline int insn_complete(struct insn *insn) 720{ 721 return insn->opcode.got && insn->modrm.got && insn->sib.got && 722 insn->displacement.got && insn->immediate.got; 723} 724 725/** 726 * insn_decode() - Decode an x86 instruction 727 * @insn: &struct insn to be initialized 728 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 729 * @buf_len: length of the insn buffer at @kaddr 730 * @m: insn mode, see enum insn_mode 731 * 732 * Returns: 733 * 0: if decoding succeeded 734 * < 0: otherwise. 735 */ 736int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) 737{ 738 int ret; 739 740#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */ 741 742 if (m == INSN_MODE_KERN) 743 insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); 744 else 745 insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); 746 747 ret = insn_get_length(insn); 748 if (ret) 749 return ret; 750 751 if (insn_complete(insn)) 752 return 0; 753 754 return -EINVAL; 755}