assembler-a64.h (143422B)
1// Copyright 2015, ARM Limited 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are met: 6// 7// * Redistributions of source code must retain the above copyright notice, 8// this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above copyright notice, 10// this list of conditions and the following disclaimer in the documentation 11// and/or other materials provided with the distribution. 12// * Neither the name of ARM Limited nor the names of its contributors may be 13// used to endorse or promote products derived from this software without 14// specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#ifndef VIXL_A64_ASSEMBLER_A64_H_ 28#define VIXL_A64_ASSEMBLER_A64_H_ 29 30 31#include "vixl/globals.h" 32#include "vixl/invalset.h" 33#include "vixl/utils.h" 34#include "vixl/code-buffer.h" 35#include "vixl/a64/instructions-a64.h" 36 37namespace vixl { 38 39typedef uint64_t RegList; 40static const int kRegListSizeInBits = sizeof(RegList) * 8; 41 42 43// Registers. 44 45// Some CPURegister methods can return Register or VRegister types, so we need 46// to declare them in advance. 47class Register; 48class VRegister; 49 50class CPURegister { 51 public: 52 enum RegisterType { 53 // The kInvalid value is used to detect uninitialized static instances, 54 // which are always zero-initialized before any constructors are called. 55 kInvalid = 0, 56 kRegister, 57 kVRegister, 58 kFPRegister = kVRegister, 59 kNoRegister 60 }; 61 62 CPURegister() : code_(0), size_(0), type_(kNoRegister) { 63 VIXL_ASSERT(!IsValid()); 64 VIXL_ASSERT(IsNone()); 65 } 66 67 CPURegister(unsigned code, unsigned size, RegisterType type) 68 : code_(code), size_(size), type_(type) { 69 VIXL_ASSERT(IsValidOrNone()); 70 } 71 72 unsigned code() const { 73 VIXL_ASSERT(IsValid()); 74 return code_; 75 } 76 77 RegisterType type() const { 78 VIXL_ASSERT(IsValidOrNone()); 79 return type_; 80 } 81 82 RegList Bit() const { 83 VIXL_ASSERT(code_ < (sizeof(RegList) * 8)); 84 return IsValid() ? (static_cast<RegList>(1) << code_) : 0; 85 } 86 87 unsigned size() const { 88 VIXL_ASSERT(IsValid()); 89 return size_; 90 } 91 92 int SizeInBytes() const { 93 VIXL_ASSERT(IsValid()); 94 VIXL_ASSERT(size() % 8 == 0); 95 return size_ / 8; 96 } 97 98 int SizeInBits() const { 99 VIXL_ASSERT(IsValid()); 100 return size_; 101 } 102 103 bool Is8Bits() const { 104 VIXL_ASSERT(IsValid()); 105 return size_ == 8; 106 } 107 108 bool Is16Bits() const { 109 VIXL_ASSERT(IsValid()); 110 return size_ == 16; 111 } 112 113 bool Is32Bits() const { 114 VIXL_ASSERT(IsValid()); 115 return size_ == 32; 116 } 117 118 bool Is64Bits() const { 119 VIXL_ASSERT(IsValid()); 120 return size_ == 64; 121 } 122 123 bool Is128Bits() const { 124 VIXL_ASSERT(IsValid()); 125 return size_ == 128; 126 } 127 128 bool IsValid() const { 129 if (IsValidRegister() || IsValidVRegister()) { 130 VIXL_ASSERT(!IsNone()); 131 return true; 132 } else { 133 // This assert is hit when the register has not been properly initialized. 134 // One cause for this can be an initialisation order fiasco. See 135 // https://isocpp.org/wiki/faq/ctors#static-init-order for some details. 136 VIXL_ASSERT(IsNone()); 137 return false; 138 } 139 } 140 141 bool IsValidRegister() const { 142 return IsRegister() && 143 ((size_ == kWRegSize) || (size_ == kXRegSize)) && 144 ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)); 145 } 146 147 bool IsValidVRegister() const { 148 return IsVRegister() && 149 ((size_ == kBRegSize) || (size_ == kHRegSize) || 150 (size_ == kSRegSize) || (size_ == kDRegSize) || 151 (size_ == kQRegSize)) && 152 (code_ < kNumberOfVRegisters); 153 } 154 155 bool IsValidFPRegister() const { 156 return IsFPRegister() && (code_ < kNumberOfVRegisters); 157 } 158 159 bool IsNone() const { 160 // kNoRegister types should always have size 0 and code 0. 161 VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0)); 162 VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0)); 163 164 return type_ == kNoRegister; 165 } 166 167 bool Aliases(const CPURegister& other) const { 168 VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone()); 169 return (code_ == other.code_) && (type_ == other.type_); 170 } 171 172 bool Is(const CPURegister& other) const { 173 VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone()); 174 return Aliases(other) && (size_ == other.size_); 175 } 176 177 bool IsZero() const { 178 VIXL_ASSERT(IsValid()); 179 return IsRegister() && (code_ == kZeroRegCode); 180 } 181 182 bool IsSP() const { 183 VIXL_ASSERT(IsValid()); 184 return IsRegister() && (code_ == kSPRegInternalCode); 185 } 186 187 bool IsRegister() const { 188 return type_ == kRegister; 189 } 190 191 bool IsVRegister() const { 192 return type_ == kVRegister; 193 } 194 195 bool IsFPRegister() const { 196 return IsS() || IsD(); 197 } 198 199 bool IsW() const { return IsValidRegister() && Is32Bits(); } 200 bool IsX() const { return IsValidRegister() && Is64Bits(); } 201 202 // These assertions ensure that the size and type of the register are as 203 // described. They do not consider the number of lanes that make up a vector. 204 // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD() 205 // does not imply Is1D() or Is8B(). 206 // Check the number of lanes, ie. the format of the vector, using methods such 207 // as Is8B(), Is1D(), etc. in the VRegister class. 208 bool IsV() const { return IsVRegister(); } 209 bool IsB() const { return IsV() && Is8Bits(); } 210 bool IsH() const { return IsV() && Is16Bits(); } 211 bool IsS() const { return IsV() && Is32Bits(); } 212 bool IsD() const { return IsV() && Is64Bits(); } 213 bool IsQ() const { return IsV() && Is128Bits(); } 214 215 const Register& W() const; 216 const Register& X() const; 217 const VRegister& V() const; 218 const VRegister& B() const; 219 const VRegister& H() const; 220 const VRegister& S() const; 221 const VRegister& D() const; 222 const VRegister& Q() const; 223 224 bool IsSameSizeAndType(const CPURegister& other) const { 225 return (size_ == other.size_) && (type_ == other.type_); 226 } 227 228 protected: 229 unsigned code_; 230 unsigned size_; 231 RegisterType type_; 232 233 private: 234 bool IsValidOrNone() const { 235 return IsValid() || IsNone(); 236 } 237}; 238 239 240class Register : public CPURegister { 241 public: 242 Register() : CPURegister() {} 243 explicit Register(const CPURegister& other) 244 : CPURegister(other.code(), other.size(), other.type()) { 245 VIXL_ASSERT(IsValidRegister()); 246 } 247 Register(unsigned code, unsigned size) 248 : CPURegister(code, size, kRegister) {} 249 250 bool IsValid() const { 251 VIXL_ASSERT(IsRegister() || IsNone()); 252 return IsValidRegister(); 253 } 254 255 static const Register& WRegFromCode(unsigned code); 256 static const Register& XRegFromCode(unsigned code); 257 258 private: 259 static const Register wregisters[]; 260 static const Register xregisters[]; 261}; 262 263 264class VRegister : public CPURegister { 265 public: 266 VRegister() : CPURegister(), lanes_(1) {} 267 explicit VRegister(const CPURegister& other) 268 : CPURegister(other.code(), other.size(), other.type()), lanes_(1) { 269 VIXL_ASSERT(IsValidVRegister()); 270 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); 271 } 272 VRegister(unsigned code, unsigned size, unsigned lanes = 1) 273 : CPURegister(code, size, kVRegister), lanes_(lanes) { 274 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); 275 } 276 VRegister(unsigned code, VectorFormat format) 277 : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister), 278 lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) { 279 VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); 280 } 281 282 bool IsValid() const { 283 VIXL_ASSERT(IsVRegister() || IsNone()); 284 return IsValidVRegister(); 285 } 286 287 static const VRegister& BRegFromCode(unsigned code); 288 static const VRegister& HRegFromCode(unsigned code); 289 static const VRegister& SRegFromCode(unsigned code); 290 static const VRegister& DRegFromCode(unsigned code); 291 static const VRegister& QRegFromCode(unsigned code); 292 static const VRegister& VRegFromCode(unsigned code); 293 294 VRegister V8B() const { return VRegister(code_, kDRegSize, 8); } 295 VRegister V16B() const { return VRegister(code_, kQRegSize, 16); } 296 VRegister V4H() const { return VRegister(code_, kDRegSize, 4); } 297 VRegister V8H() const { return VRegister(code_, kQRegSize, 8); } 298 VRegister V2S() const { return VRegister(code_, kDRegSize, 2); } 299 VRegister V4S() const { return VRegister(code_, kQRegSize, 4); } 300 VRegister V2D() const { return VRegister(code_, kQRegSize, 2); } 301 VRegister V1D() const { return VRegister(code_, kDRegSize, 1); } 302 303 bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); } 304 bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); } 305 bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); } 306 bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); } 307 bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); } 308 bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); } 309 bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); } 310 bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); } 311 312 // For consistency, we assert the number of lanes of these scalar registers, 313 // even though there are no vectors of equivalent total size with which they 314 // could alias. 315 bool Is1B() const { 316 VIXL_ASSERT(!(Is8Bits() && IsVector())); 317 return Is8Bits(); 318 } 319 bool Is1H() const { 320 VIXL_ASSERT(!(Is16Bits() && IsVector())); 321 return Is16Bits(); 322 } 323 bool Is1S() const { 324 VIXL_ASSERT(!(Is32Bits() && IsVector())); 325 return Is32Bits(); 326 } 327 328 bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; } 329 bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; } 330 bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; } 331 bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; } 332 333 int lanes() const { 334 return lanes_; 335 } 336 337 bool IsScalar() const { 338 return lanes_ == 1; 339 } 340 341 bool IsVector() const { 342 return lanes_ > 1; 343 } 344 345 bool IsSameFormat(const VRegister& other) const { 346 return (size_ == other.size_) && (lanes_ == other.lanes_); 347 } 348 349 unsigned LaneSizeInBytes() const { 350 return SizeInBytes() / lanes_; 351 } 352 353 unsigned LaneSizeInBits() const { 354 return LaneSizeInBytes() * 8; 355 } 356 357 private: 358 static const VRegister bregisters[]; 359 static const VRegister hregisters[]; 360 static const VRegister sregisters[]; 361 static const VRegister dregisters[]; 362 static const VRegister qregisters[]; 363 static const VRegister vregisters[]; 364 int lanes_; 365}; 366 367 368// Backward compatibility for FPRegisters. 369typedef VRegister FPRegister; 370 371// No*Reg is used to indicate an unused argument, or an error case. Note that 372// these all compare equal (using the Is() method). The Register and VRegister 373// variants are provided for convenience. 374const Register NoReg; 375const VRegister NoVReg; 376const FPRegister NoFPReg; // For backward compatibility. 377const CPURegister NoCPUReg; 378 379 380#define DEFINE_REGISTERS(N) \ 381const Register w##N(N, kWRegSize); \ 382const Register x##N(N, kXRegSize); 383REGISTER_CODE_LIST(DEFINE_REGISTERS) 384#undef DEFINE_REGISTERS 385const Register wsp(kSPRegInternalCode, kWRegSize); 386const Register sp(kSPRegInternalCode, kXRegSize); 387 388 389#define DEFINE_VREGISTERS(N) \ 390const VRegister b##N(N, kBRegSize); \ 391const VRegister h##N(N, kHRegSize); \ 392const VRegister s##N(N, kSRegSize); \ 393const VRegister d##N(N, kDRegSize); \ 394const VRegister q##N(N, kQRegSize); \ 395const VRegister v##N(N, kQRegSize); 396REGISTER_CODE_LIST(DEFINE_VREGISTERS) 397#undef DEFINE_VREGISTERS 398 399 400// Registers aliases. 401const Register ip0 = x16; 402const Register ip1 = x17; 403const Register lr = x30; 404const Register xzr = x31; 405const Register wzr = w31; 406 407 408// AreAliased returns true if any of the named registers overlap. Arguments 409// set to NoReg are ignored. The system stack pointer may be specified. 410bool AreAliased(const CPURegister& reg1, 411 const CPURegister& reg2, 412 const CPURegister& reg3 = NoReg, 413 const CPURegister& reg4 = NoReg, 414 const CPURegister& reg5 = NoReg, 415 const CPURegister& reg6 = NoReg, 416 const CPURegister& reg7 = NoReg, 417 const CPURegister& reg8 = NoReg); 418 419 420// AreSameSizeAndType returns true if all of the specified registers have the 421// same size, and are of the same type. The system stack pointer may be 422// specified. Arguments set to NoReg are ignored, as are any subsequent 423// arguments. At least one argument (reg1) must be valid (not NoCPUReg). 424bool AreSameSizeAndType(const CPURegister& reg1, 425 const CPURegister& reg2, 426 const CPURegister& reg3 = NoCPUReg, 427 const CPURegister& reg4 = NoCPUReg, 428 const CPURegister& reg5 = NoCPUReg, 429 const CPURegister& reg6 = NoCPUReg, 430 const CPURegister& reg7 = NoCPUReg, 431 const CPURegister& reg8 = NoCPUReg); 432 433 434// AreSameFormat returns true if all of the specified VRegisters have the same 435// vector format. Arguments set to NoReg are ignored, as are any subsequent 436// arguments. At least one argument (reg1) must be valid (not NoVReg). 437bool AreSameFormat(const VRegister& reg1, 438 const VRegister& reg2, 439 const VRegister& reg3 = NoVReg, 440 const VRegister& reg4 = NoVReg); 441 442 443// AreConsecutive returns true if all of the specified VRegisters are 444// consecutive in the register file. Arguments set to NoReg are ignored, as are 445// any subsequent arguments. At least one argument (reg1) must be valid 446// (not NoVReg). 447bool AreConsecutive(const VRegister& reg1, 448 const VRegister& reg2, 449 const VRegister& reg3 = NoVReg, 450 const VRegister& reg4 = NoVReg); 451 452 453// Lists of registers. 454class CPURegList { 455 public: 456 explicit CPURegList(CPURegister reg1, 457 CPURegister reg2 = NoCPUReg, 458 CPURegister reg3 = NoCPUReg, 459 CPURegister reg4 = NoCPUReg) 460 : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()), 461 size_(reg1.size()), type_(reg1.type()) { 462 VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4)); 463 VIXL_ASSERT(IsValid()); 464 } 465 466 CPURegList(CPURegister::RegisterType type, unsigned size, RegList list) 467 : list_(list), size_(size), type_(type) { 468 VIXL_ASSERT(IsValid()); 469 } 470 471 CPURegList(CPURegister::RegisterType type, unsigned size, 472 unsigned first_reg, unsigned last_reg) 473 : size_(size), type_(type) { 474 VIXL_ASSERT(((type == CPURegister::kRegister) && 475 (last_reg < kNumberOfRegisters)) || 476 ((type == CPURegister::kVRegister) && 477 (last_reg < kNumberOfVRegisters))); 478 VIXL_ASSERT(last_reg >= first_reg); 479 list_ = (UINT64_C(1) << (last_reg + 1)) - 1; 480 list_ &= ~((UINT64_C(1) << first_reg) - 1); 481 VIXL_ASSERT(IsValid()); 482 } 483 484 CPURegister::RegisterType type() const { 485 VIXL_ASSERT(IsValid()); 486 return type_; 487 } 488 489 // Combine another CPURegList into this one. Registers that already exist in 490 // this list are left unchanged. The type and size of the registers in the 491 // 'other' list must match those in this list. 492 void Combine(const CPURegList& other) { 493 VIXL_ASSERT(IsValid()); 494 VIXL_ASSERT(other.type() == type_); 495 VIXL_ASSERT(other.RegisterSizeInBits() == size_); 496 list_ |= other.list(); 497 } 498 499 // Remove every register in the other CPURegList from this one. Registers that 500 // do not exist in this list are ignored. The type and size of the registers 501 // in the 'other' list must match those in this list. 502 void Remove(const CPURegList& other) { 503 VIXL_ASSERT(IsValid()); 504 VIXL_ASSERT(other.type() == type_); 505 VIXL_ASSERT(other.RegisterSizeInBits() == size_); 506 list_ &= ~other.list(); 507 } 508 509 // Variants of Combine and Remove which take a single register. 510 void Combine(const CPURegister& other) { 511 VIXL_ASSERT(other.type() == type_); 512 VIXL_ASSERT(other.size() == size_); 513 Combine(other.code()); 514 } 515 516 void Remove(const CPURegister& other) { 517 VIXL_ASSERT(other.type() == type_); 518 VIXL_ASSERT(other.size() == size_); 519 Remove(other.code()); 520 } 521 522 // Variants of Combine and Remove which take a single register by its code; 523 // the type and size of the register is inferred from this list. 524 void Combine(int code) { 525 VIXL_ASSERT(IsValid()); 526 VIXL_ASSERT(CPURegister(code, size_, type_).IsValid()); 527 list_ |= (UINT64_C(1) << code); 528 } 529 530 void Remove(int code) { 531 VIXL_ASSERT(IsValid()); 532 VIXL_ASSERT(CPURegister(code, size_, type_).IsValid()); 533 list_ &= ~(UINT64_C(1) << code); 534 } 535 536 static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) { 537 VIXL_ASSERT(list_1.type_ == list_2.type_); 538 VIXL_ASSERT(list_1.size_ == list_2.size_); 539 return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_); 540 } 541 static CPURegList Union(const CPURegList& list_1, 542 const CPURegList& list_2, 543 const CPURegList& list_3); 544 static CPURegList Union(const CPURegList& list_1, 545 const CPURegList& list_2, 546 const CPURegList& list_3, 547 const CPURegList& list_4); 548 549 static CPURegList Intersection(const CPURegList& list_1, 550 const CPURegList& list_2) { 551 VIXL_ASSERT(list_1.type_ == list_2.type_); 552 VIXL_ASSERT(list_1.size_ == list_2.size_); 553 return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_); 554 } 555 static CPURegList Intersection(const CPURegList& list_1, 556 const CPURegList& list_2, 557 const CPURegList& list_3); 558 static CPURegList Intersection(const CPURegList& list_1, 559 const CPURegList& list_2, 560 const CPURegList& list_3, 561 const CPURegList& list_4); 562 563 bool Overlaps(const CPURegList& other) const { 564 return (type_ == other.type_) && ((list_ & other.list_) != 0); 565 } 566 567 RegList list() const { 568 VIXL_ASSERT(IsValid()); 569 return list_; 570 } 571 572 void set_list(RegList new_list) { 573 VIXL_ASSERT(IsValid()); 574 list_ = new_list; 575 } 576 577 // Remove all callee-saved registers from the list. This can be useful when 578 // preparing registers for an AAPCS64 function call, for example. 579 void RemoveCalleeSaved(); 580 581 CPURegister PopLowestIndex(); 582 CPURegister PopHighestIndex(); 583 584 // AAPCS64 callee-saved registers. 585 static CPURegList GetCalleeSaved(unsigned size = kXRegSize); 586 static CPURegList GetCalleeSavedV(unsigned size = kDRegSize); 587 588 // AAPCS64 caller-saved registers. Note that this includes lr. 589 // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top 590 // 64-bits being caller-saved. 591 static CPURegList GetCallerSaved(unsigned size = kXRegSize); 592 static CPURegList GetCallerSavedV(unsigned size = kDRegSize); 593 594 bool IsEmpty() const { 595 VIXL_ASSERT(IsValid()); 596 return list_ == 0; 597 } 598 599 bool IncludesAliasOf(const CPURegister& other) const { 600 VIXL_ASSERT(IsValid()); 601 return (type_ == other.type()) && ((other.Bit() & list_) != 0); 602 } 603 604 bool IncludesAliasOf(int code) const { 605 VIXL_ASSERT(IsValid()); 606 return ((code & list_) != 0); 607 } 608 609 int Count() const { 610 VIXL_ASSERT(IsValid()); 611 return CountSetBits(list_); 612 } 613 614 unsigned RegisterSizeInBits() const { 615 VIXL_ASSERT(IsValid()); 616 return size_; 617 } 618 619 unsigned RegisterSizeInBytes() const { 620 int size_in_bits = RegisterSizeInBits(); 621 VIXL_ASSERT((size_in_bits % 8) == 0); 622 return size_in_bits / 8; 623 } 624 625 unsigned TotalSizeInBytes() const { 626 VIXL_ASSERT(IsValid()); 627 return RegisterSizeInBytes() * Count(); 628 } 629 630 private: 631 RegList list_; 632 unsigned size_; 633 CPURegister::RegisterType type_; 634 635 bool IsValid() const; 636}; 637 638 639// AAPCS64 callee-saved registers. 640extern const CPURegList kCalleeSaved; 641extern const CPURegList kCalleeSavedV; 642 643 644// AAPCS64 caller-saved registers. Note that this includes lr. 645extern const CPURegList kCallerSaved; 646extern const CPURegList kCallerSavedV; 647 648 649// Operand. 650class Operand { 651 public: 652 // #<immediate> 653 // where <immediate> is int64_t. 654 // This is allowed to be an implicit constructor because Operand is 655 // a wrapper class that doesn't normally perform any type conversion. 656 Operand(int64_t immediate = 0); // NOLINT(runtime/explicit) 657 658 // rm, {<shift> #<shift_amount>} 659 // where <shift> is one of {LSL, LSR, ASR, ROR}. 660 // <shift_amount> is uint6_t. 661 // This is allowed to be an implicit constructor because Operand is 662 // a wrapper class that doesn't normally perform any type conversion. 663 Operand(Register reg, 664 Shift shift = LSL, 665 unsigned shift_amount = 0); // NOLINT(runtime/explicit) 666 667 // rm, {<extend> {#<shift_amount>}} 668 // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}. 669 // <shift_amount> is uint2_t. 670 explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0); 671 672 bool IsImmediate() const; 673 bool IsShiftedRegister() const; 674 bool IsExtendedRegister() const; 675 bool IsZero() const; 676 677 // This returns an LSL shift (<= 4) operand as an equivalent extend operand, 678 // which helps in the encoding of instructions that use the stack pointer. 679 Operand ToExtendedRegister() const; 680 681 int64_t immediate() const { 682 VIXL_ASSERT(IsImmediate()); 683 return immediate_; 684 } 685 686 Register reg() const { 687 VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister()); 688 return reg_; 689 } 690 691 Shift shift() const { 692 VIXL_ASSERT(IsShiftedRegister()); 693 return shift_; 694 } 695 696 Extend extend() const { 697 VIXL_ASSERT(IsExtendedRegister()); 698 return extend_; 699 } 700 701 unsigned shift_amount() const { 702 VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister()); 703 return shift_amount_; 704 } 705 706 private: 707 int64_t immediate_; 708 Register reg_; 709 Shift shift_; 710 Extend extend_; 711 unsigned shift_amount_; 712}; 713 714 715// MemOperand represents the addressing mode of a load or store instruction. 716class MemOperand { 717 public: 718 explicit MemOperand(Register base, 719 int64_t offset = 0, 720 AddrMode addrmode = Offset); 721 MemOperand(Register base, 722 Register regoffset, 723 Shift shift = LSL, 724 unsigned shift_amount = 0); 725 MemOperand(Register base, 726 Register regoffset, 727 Extend extend, 728 unsigned shift_amount = 0); 729 MemOperand(Register base, 730 const Operand& offset, 731 AddrMode addrmode = Offset); 732 733 const Register& base() const { return base_; } 734 const Register& regoffset() const { return regoffset_; } 735 int64_t offset() const { return offset_; } 736 AddrMode addrmode() const { return addrmode_; } 737 Shift shift() const { return shift_; } 738 Extend extend() const { return extend_; } 739 unsigned shift_amount() const { return shift_amount_; } 740 bool IsImmediateOffset() const; 741 bool IsRegisterOffset() const; 742 bool IsPreIndex() const; 743 bool IsPostIndex() const; 744 745 void AddOffset(int64_t offset); 746 747 private: 748 Register base_; 749 Register regoffset_; 750 int64_t offset_; 751 AddrMode addrmode_; 752 Shift shift_; 753 Extend extend_; 754 unsigned shift_amount_; 755}; 756 757 758class LabelTestHelper; // Forward declaration. 759 760 761class Label { 762 public: 763 Label() : location_(kLocationUnbound) {} 764 ~Label() { 765 // If the label has been linked to, it needs to be bound to a target. 766 VIXL_ASSERT(!IsLinked() || IsBound()); 767 } 768 769 bool IsBound() const { return location_ >= 0; } 770 bool IsLinked() const { return !links_.empty(); } 771 772 ptrdiff_t location() const { return location_; } 773 774 static const int kNPreallocatedLinks = 4; 775 static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX; 776 static const size_t kReclaimFrom = 512; 777 static const size_t kReclaimFactor = 2; 778 779 typedef InvalSet<ptrdiff_t, 780 kNPreallocatedLinks, 781 ptrdiff_t, 782 kInvalidLinkKey, 783 kReclaimFrom, 784 kReclaimFactor> LinksSetBase; 785 typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase; 786 787 private: 788 class LinksSet : public LinksSetBase { 789 public: 790 LinksSet() : LinksSetBase() {} 791 }; 792 793 // Allows iterating over the links of a label. The behaviour is undefined if 794 // the list of links is modified in any way while iterating. 795 class LabelLinksIterator : public LabelLinksIteratorBase { 796 public: 797 explicit LabelLinksIterator(Label* label) 798 : LabelLinksIteratorBase(&label->links_) {} 799 }; 800 801 void Bind(ptrdiff_t location) { 802 // Labels can only be bound once. 803 VIXL_ASSERT(!IsBound()); 804 location_ = location; 805 } 806 807 void AddLink(ptrdiff_t instruction) { 808 // If a label is bound, the assembler already has the information it needs 809 // to write the instruction, so there is no need to add it to links_. 810 VIXL_ASSERT(!IsBound()); 811 links_.insert(instruction); 812 } 813 814 void DeleteLink(ptrdiff_t instruction) { 815 links_.erase(instruction); 816 } 817 818 void ClearAllLinks() { 819 links_.clear(); 820 } 821 822 // TODO: The comment below considers average case complexity for our 823 // usual use-cases. The elements of interest are: 824 // - Branches to a label are emitted in order: branch instructions to a label 825 // are generated at an offset in the code generation buffer greater than any 826 // other branch to that same label already generated. As an example, this can 827 // be broken when an instruction is patched to become a branch. Note that the 828 // code will still work, but the complexity considerations below may locally 829 // not apply any more. 830 // - Veneers are generated in order: for multiple branches of the same type 831 // branching to the same unbound label going out of range, veneers are 832 // generated in growing order of the branch instruction offset from the start 833 // of the buffer. 834 // 835 // When creating a veneer for a branch going out of range, the link for this 836 // branch needs to be removed from this `links_`. Since all branches are 837 // tracked in one underlying InvalSet, the complexity for this deletion is the 838 // same as for finding the element, ie. O(n), where n is the number of links 839 // in the set. 840 // This could be reduced to O(1) by using the same trick as used when tracking 841 // branch information for veneers: split the container to use one set per type 842 // of branch. With that setup, when a veneer is created and the link needs to 843 // be deleted, if the two points above hold, it must be the minimum element of 844 // the set for its type of branch, and that minimum element will be accessible 845 // in O(1). 846 847 // The offsets of the instructions that have linked to this label. 848 LinksSet links_; 849 // The label location. 850 ptrdiff_t location_; 851 852 static const ptrdiff_t kLocationUnbound = -1; 853 854 // It is not safe to copy labels, so disable the copy constructor and operator 855 // by declaring them private (without an implementation). 856 Label(const Label&); 857 void operator=(const Label&); 858 859 // The Assembler class is responsible for binding and linking labels, since 860 // the stored offsets need to be consistent with the Assembler's buffer. 861 friend class Assembler; 862 // The MacroAssembler and VeneerPool handle resolution of branches to distant 863 // targets. 864 friend class MacroAssembler; 865 friend class VeneerPool; 866}; 867 868 869// Required InvalSet template specialisations. 870#define INVAL_SET_TEMPLATE_PARAMETERS \ 871 ptrdiff_t, \ 872 Label::kNPreallocatedLinks, \ 873 ptrdiff_t, \ 874 Label::kInvalidLinkKey, \ 875 Label::kReclaimFrom, \ 876 Label::kReclaimFactor 877template<> 878inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::Key( 879 const ptrdiff_t& element) { 880 return element; 881} 882template<> 883inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey( 884 ptrdiff_t* element, ptrdiff_t key) { 885 *element = key; 886} 887#undef INVAL_SET_TEMPLATE_PARAMETERS 888 889 890class Assembler; 891class LiteralPool; 892 893// A literal is a 32-bit or 64-bit piece of data stored in the instruction 894// stream and loaded through a pc relative load. The same literal can be 895// referred to by multiple instructions but a literal can only reside at one 896// place in memory. A literal can be used by a load before or after being 897// placed in memory. 898// 899// Internally an offset of 0 is associated with a literal which has been 900// neither used nor placed. Then two possibilities arise: 901// 1) the label is placed, the offset (stored as offset + 1) is used to 902// resolve any subsequent load using the label. 903// 2) the label is not placed and offset is the offset of the last load using 904// the literal (stored as -offset -1). If multiple loads refer to this 905// literal then the last load holds the offset of the preceding load and 906// all loads form a chain. Once the offset is placed all the loads in the 907// chain are resolved and future loads fall back to possibility 1. 908class RawLiteral { 909 public: 910 enum DeletionPolicy { 911 kDeletedOnPlacementByPool, 912 kDeletedOnPoolDestruction, 913 kManuallyDeleted 914 }; 915 916 RawLiteral(size_t size, 917 LiteralPool* literal_pool, 918 DeletionPolicy deletion_policy = kManuallyDeleted); 919 920 // The literal pool only sees and deletes `RawLiteral*` pointers, but they are 921 // actually pointing to `Literal<T>` objects. 922 virtual ~RawLiteral() {} 923 924 size_t size() { 925 VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes); 926 VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes); 927 VIXL_ASSERT((size_ == kXRegSizeInBytes) || 928 (size_ == kWRegSizeInBytes) || 929 (size_ == kQRegSizeInBytes)); 930 return size_; 931 } 932 uint64_t raw_value128_low64() { 933 VIXL_ASSERT(size_ == kQRegSizeInBytes); 934 return low64_; 935 } 936 uint64_t raw_value128_high64() { 937 VIXL_ASSERT(size_ == kQRegSizeInBytes); 938 return high64_; 939 } 940 uint64_t raw_value64() { 941 VIXL_ASSERT(size_ == kXRegSizeInBytes); 942 VIXL_ASSERT(high64_ == 0); 943 return low64_; 944 } 945 uint32_t raw_value32() { 946 VIXL_ASSERT(size_ == kWRegSizeInBytes); 947 VIXL_ASSERT(high64_ == 0); 948 VIXL_ASSERT(is_uint32(low64_) || is_int32(low64_)); 949 return static_cast<uint32_t>(low64_); 950 } 951 bool IsUsed() { return offset_ < 0; } 952 bool IsPlaced() { return offset_ > 0; } 953 954 LiteralPool* GetLiteralPool() const { 955 return literal_pool_; 956 } 957 958 ptrdiff_t offset() { 959 VIXL_ASSERT(IsPlaced()); 960 return offset_ - 1; 961 } 962 963 protected: 964 void set_offset(ptrdiff_t offset) { 965 VIXL_ASSERT(offset >= 0); 966 VIXL_ASSERT(IsWordAligned(offset)); 967 VIXL_ASSERT(!IsPlaced()); 968 offset_ = offset + 1; 969 } 970 ptrdiff_t last_use() { 971 VIXL_ASSERT(IsUsed()); 972 return -offset_ - 1; 973 } 974 void set_last_use(ptrdiff_t offset) { 975 VIXL_ASSERT(offset >= 0); 976 VIXL_ASSERT(IsWordAligned(offset)); 977 VIXL_ASSERT(!IsPlaced()); 978 offset_ = -offset - 1; 979 } 980 981 size_t size_; 982 ptrdiff_t offset_; 983 uint64_t low64_; 984 uint64_t high64_; 985 986 private: 987 LiteralPool* literal_pool_; 988 DeletionPolicy deletion_policy_; 989 990 friend class Assembler; 991 friend class LiteralPool; 992}; 993 994 995template <typename T> 996class Literal : public RawLiteral { 997 public: 998 explicit Literal(T value, 999 LiteralPool* literal_pool = NULL, 1000 RawLiteral::DeletionPolicy ownership = kManuallyDeleted) 1001 : RawLiteral(sizeof(value), literal_pool, ownership) { 1002 VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes); 1003 UpdateValue(value); 1004 } 1005 1006 Literal(T high64, T low64, 1007 LiteralPool* literal_pool = NULL, 1008 RawLiteral::DeletionPolicy ownership = kManuallyDeleted) 1009 : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) { 1010 VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2)); 1011 UpdateValue(high64, low64); 1012 } 1013 1014 virtual ~Literal() {} 1015 1016 // Update the value of this literal, if necessary by rewriting the value in 1017 // the pool. 1018 // If the literal has already been placed in a literal pool, the address of 1019 // the start of the code buffer must be provided, as the literal only knows it 1020 // offset from there. This also allows patching the value after the code has 1021 // been moved in memory. 1022 void UpdateValue(T new_value, uint8_t* code_buffer = NULL) { 1023 VIXL_ASSERT(sizeof(new_value) == size_); 1024 memcpy(&low64_, &new_value, sizeof(new_value)); 1025 if (IsPlaced()) { 1026 VIXL_ASSERT(code_buffer != NULL); 1027 RewriteValueInCode(code_buffer); 1028 } 1029 } 1030 1031 void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) { 1032 VIXL_ASSERT(sizeof(low64) == size_ / 2); 1033 memcpy(&low64_, &low64, sizeof(low64)); 1034 memcpy(&high64_, &high64, sizeof(high64)); 1035 if (IsPlaced()) { 1036 VIXL_ASSERT(code_buffer != NULL); 1037 RewriteValueInCode(code_buffer); 1038 } 1039 } 1040 1041 void UpdateValue(T new_value, const Assembler* assembler); 1042 void UpdateValue(T high64, T low64, const Assembler* assembler); 1043 1044 private: 1045 void RewriteValueInCode(uint8_t* code_buffer) { 1046 VIXL_ASSERT(IsPlaced()); 1047 VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes); 1048 switch (size()) { 1049 case kSRegSizeInBytes: 1050 *reinterpret_cast<uint32_t*>(code_buffer + offset()) = raw_value32(); 1051 break; 1052 case kDRegSizeInBytes: 1053 *reinterpret_cast<uint64_t*>(code_buffer + offset()) = raw_value64(); 1054 break; 1055 default: 1056 VIXL_ASSERT(size() == kQRegSizeInBytes); 1057 uint64_t* base_address = 1058 reinterpret_cast<uint64_t*>(code_buffer + offset()); 1059 *base_address = raw_value128_low64(); 1060 *(base_address + 1) = raw_value128_high64(); 1061 } 1062 } 1063}; 1064 1065 1066// Control whether or not position-independent code should be emitted. 1067enum PositionIndependentCodeOption { 1068 // All code generated will be position-independent; all branches and 1069 // references to labels generated with the Label class will use PC-relative 1070 // addressing. 1071 PositionIndependentCode, 1072 1073 // Allow VIXL to generate code that refers to absolute addresses. With this 1074 // option, it will not be possible to copy the code buffer and run it from a 1075 // different address; code must be generated in its final location. 1076 PositionDependentCode, 1077 1078 // Allow VIXL to assume that the bottom 12 bits of the address will be 1079 // constant, but that the top 48 bits may change. This allows `adrp` to 1080 // function in systems which copy code between pages, but otherwise maintain 1081 // 4KB page alignment. 1082 PageOffsetDependentCode 1083}; 1084 1085 1086// Control how scaled- and unscaled-offset loads and stores are generated. 1087enum LoadStoreScalingOption { 1088 // Prefer scaled-immediate-offset instructions, but emit unscaled-offset, 1089 // register-offset, pre-index or post-index instructions if necessary. 1090 PreferScaledOffset, 1091 1092 // Prefer unscaled-immediate-offset instructions, but emit scaled-offset, 1093 // register-offset, pre-index or post-index instructions if necessary. 1094 PreferUnscaledOffset, 1095 1096 // Require scaled-immediate-offset instructions. 1097 RequireScaledOffset, 1098 1099 // Require unscaled-immediate-offset instructions. 1100 RequireUnscaledOffset 1101}; 1102 1103 1104// Assembler. 1105class Assembler { 1106 public: 1107 Assembler(size_t capacity, 1108 PositionIndependentCodeOption pic = PositionIndependentCode); 1109 Assembler(byte* buffer, size_t capacity, 1110 PositionIndependentCodeOption pic = PositionIndependentCode); 1111 1112 // The destructor asserts that one of the following is true: 1113 // * The Assembler object has not been used. 1114 // * Nothing has been emitted since the last Reset() call. 1115 // * Nothing has been emitted since the last FinalizeCode() call. 1116 ~Assembler(); 1117 1118 // System functions. 1119 1120 // Start generating code from the beginning of the buffer, discarding any code 1121 // and data that has already been emitted into the buffer. 1122 void Reset(); 1123 1124 // Finalize a code buffer of generated instructions. This function must be 1125 // called before executing or copying code from the buffer. 1126 void FinalizeCode(); 1127 1128 // Label. 1129 // Bind a label to the current PC. 1130 void bind(Label* label); 1131 1132 // Bind a label to a specified offset from the start of the buffer. 1133 void BindToOffset(Label* label, ptrdiff_t offset); 1134 1135 // Place a literal at the current PC. 1136 void place(RawLiteral* literal); 1137 1138 ptrdiff_t CursorOffset() const { 1139 return buffer_->CursorOffset(); 1140 } 1141 1142 ptrdiff_t BufferEndOffset() const { 1143 return static_cast<ptrdiff_t>(buffer_->capacity()); 1144 } 1145 1146 // Return the address of an offset in the buffer. 1147 template <typename T> 1148 T GetOffsetAddress(ptrdiff_t offset) const { 1149 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); 1150 return buffer_->GetOffsetAddress<T>(offset); 1151 } 1152 1153 // Return the address of a bound label. 1154 template <typename T> 1155 T GetLabelAddress(const Label * label) const { 1156 VIXL_ASSERT(label->IsBound()); 1157 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); 1158 return GetOffsetAddress<T>(label->location()); 1159 } 1160 1161 // Return the address of the cursor. 1162 template <typename T> 1163 T GetCursorAddress() const { 1164 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); 1165 return GetOffsetAddress<T>(CursorOffset()); 1166 } 1167 1168 // Return the address of the start of the buffer. 1169 template <typename T> 1170 T GetStartAddress() const { 1171 VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); 1172 return GetOffsetAddress<T>(0); 1173 } 1174 1175 Instruction* InstructionAt(ptrdiff_t instruction_offset) { 1176 return GetOffsetAddress<Instruction*>(instruction_offset); 1177 } 1178 1179 ptrdiff_t InstructionOffset(Instruction* instruction) { 1180 VIXL_STATIC_ASSERT(sizeof(*instruction) == 1); 1181 ptrdiff_t offset = instruction - GetStartAddress<Instruction*>(); 1182 VIXL_ASSERT((0 <= offset) && 1183 (offset < static_cast<ptrdiff_t>(BufferCapacity()))); 1184 return offset; 1185 } 1186 1187 // Instruction set functions. 1188 1189 // Branch / Jump instructions. 1190 // Branch to register. 1191 void br(const Register& xn); 1192 1193 // Branch with link to register. 1194 void blr(const Register& xn); 1195 1196 // Branch to register with return hint. 1197 void ret(const Register& xn = lr); 1198 1199 // Unconditional branch to label. 1200 void b(Label* label); 1201 1202 // Conditional branch to label. 1203 void b(Label* label, Condition cond); 1204 1205 // Unconditional branch to PC offset. 1206 void b(int imm26); 1207 1208 // Conditional branch to PC offset. 1209 void b(int imm19, Condition cond); 1210 1211 // Branch with link to label. 1212 void bl(Label* label); 1213 1214 // Branch with link to PC offset. 1215 void bl(int imm26); 1216 1217 // Compare and branch to label if zero. 1218 void cbz(const Register& rt, Label* label); 1219 1220 // Compare and branch to PC offset if zero. 1221 void cbz(const Register& rt, int imm19); 1222 1223 // Compare and branch to label if not zero. 1224 void cbnz(const Register& rt, Label* label); 1225 1226 // Compare and branch to PC offset if not zero. 1227 void cbnz(const Register& rt, int imm19); 1228 1229 // Table lookup from one register. 1230 void tbl(const VRegister& vd, 1231 const VRegister& vn, 1232 const VRegister& vm); 1233 1234 // Table lookup from two registers. 1235 void tbl(const VRegister& vd, 1236 const VRegister& vn, 1237 const VRegister& vn2, 1238 const VRegister& vm); 1239 1240 // Table lookup from three registers. 1241 void tbl(const VRegister& vd, 1242 const VRegister& vn, 1243 const VRegister& vn2, 1244 const VRegister& vn3, 1245 const VRegister& vm); 1246 1247 // Table lookup from four registers. 1248 void tbl(const VRegister& vd, 1249 const VRegister& vn, 1250 const VRegister& vn2, 1251 const VRegister& vn3, 1252 const VRegister& vn4, 1253 const VRegister& vm); 1254 1255 // Table lookup extension from one register. 1256 void tbx(const VRegister& vd, 1257 const VRegister& vn, 1258 const VRegister& vm); 1259 1260 // Table lookup extension from two registers. 1261 void tbx(const VRegister& vd, 1262 const VRegister& vn, 1263 const VRegister& vn2, 1264 const VRegister& vm); 1265 1266 // Table lookup extension from three registers. 1267 void tbx(const VRegister& vd, 1268 const VRegister& vn, 1269 const VRegister& vn2, 1270 const VRegister& vn3, 1271 const VRegister& vm); 1272 1273 // Table lookup extension from four registers. 1274 void tbx(const VRegister& vd, 1275 const VRegister& vn, 1276 const VRegister& vn2, 1277 const VRegister& vn3, 1278 const VRegister& vn4, 1279 const VRegister& vm); 1280 1281 // Test bit and branch to label if zero. 1282 void tbz(const Register& rt, unsigned bit_pos, Label* label); 1283 1284 // Test bit and branch to PC offset if zero. 1285 void tbz(const Register& rt, unsigned bit_pos, int imm14); 1286 1287 // Test bit and branch to label if not zero. 1288 void tbnz(const Register& rt, unsigned bit_pos, Label* label); 1289 1290 // Test bit and branch to PC offset if not zero. 1291 void tbnz(const Register& rt, unsigned bit_pos, int imm14); 1292 1293 // Address calculation instructions. 1294 // Calculate a PC-relative address. Unlike for branches the offset in adr is 1295 // unscaled (i.e. the result can be unaligned). 1296 1297 // Calculate the address of a label. 1298 void adr(const Register& rd, Label* label); 1299 1300 // Calculate the address of a PC offset. 1301 void adr(const Register& rd, int imm21); 1302 1303 // Calculate the page address of a label. 1304 void adrp(const Register& rd, Label* label); 1305 1306 // Calculate the page address of a PC offset. 1307 void adrp(const Register& rd, int imm21); 1308 1309 // Data Processing instructions. 1310 // Add. 1311 void add(const Register& rd, 1312 const Register& rn, 1313 const Operand& operand); 1314 1315 // Add and update status flags. 1316 void adds(const Register& rd, 1317 const Register& rn, 1318 const Operand& operand); 1319 1320 // Compare negative. 1321 void cmn(const Register& rn, const Operand& operand); 1322 1323 // Subtract. 1324 void sub(const Register& rd, 1325 const Register& rn, 1326 const Operand& operand); 1327 1328 // Subtract and update status flags. 1329 void subs(const Register& rd, 1330 const Register& rn, 1331 const Operand& operand); 1332 1333 // Compare. 1334 void cmp(const Register& rn, const Operand& operand); 1335 1336 // Negate. 1337 void neg(const Register& rd, 1338 const Operand& operand); 1339 1340 // Negate and update status flags. 1341 void negs(const Register& rd, 1342 const Operand& operand); 1343 1344 // Add with carry bit. 1345 void adc(const Register& rd, 1346 const Register& rn, 1347 const Operand& operand); 1348 1349 // Add with carry bit and update status flags. 1350 void adcs(const Register& rd, 1351 const Register& rn, 1352 const Operand& operand); 1353 1354 // Subtract with carry bit. 1355 void sbc(const Register& rd, 1356 const Register& rn, 1357 const Operand& operand); 1358 1359 // Subtract with carry bit and update status flags. 1360 void sbcs(const Register& rd, 1361 const Register& rn, 1362 const Operand& operand); 1363 1364 // Negate with carry bit. 1365 void ngc(const Register& rd, 1366 const Operand& operand); 1367 1368 // Negate with carry bit and update status flags. 1369 void ngcs(const Register& rd, 1370 const Operand& operand); 1371 1372 // Logical instructions. 1373 // Bitwise and (A & B). 1374 void and_(const Register& rd, 1375 const Register& rn, 1376 const Operand& operand); 1377 1378 // Bitwise and (A & B) and update status flags. 1379 void ands(const Register& rd, 1380 const Register& rn, 1381 const Operand& operand); 1382 1383 // Bit test and set flags. 1384 void tst(const Register& rn, const Operand& operand); 1385 1386 // Bit clear (A & ~B). 1387 void bic(const Register& rd, 1388 const Register& rn, 1389 const Operand& operand); 1390 1391 // Bit clear (A & ~B) and update status flags. 1392 void bics(const Register& rd, 1393 const Register& rn, 1394 const Operand& operand); 1395 1396 // Bitwise or (A | B). 1397 void orr(const Register& rd, const Register& rn, const Operand& operand); 1398 1399 // Bitwise nor (A | ~B). 1400 void orn(const Register& rd, const Register& rn, const Operand& operand); 1401 1402 // Bitwise eor/xor (A ^ B). 1403 void eor(const Register& rd, const Register& rn, const Operand& operand); 1404 1405 // Bitwise enor/xnor (A ^ ~B). 1406 void eon(const Register& rd, const Register& rn, const Operand& operand); 1407 1408 // Logical shift left by variable. 1409 void lslv(const Register& rd, const Register& rn, const Register& rm); 1410 1411 // Logical shift right by variable. 1412 void lsrv(const Register& rd, const Register& rn, const Register& rm); 1413 1414 // Arithmetic shift right by variable. 1415 void asrv(const Register& rd, const Register& rn, const Register& rm); 1416 1417 // Rotate right by variable. 1418 void rorv(const Register& rd, const Register& rn, const Register& rm); 1419 1420 // Bitfield instructions. 1421 // Bitfield move. 1422 void bfm(const Register& rd, 1423 const Register& rn, 1424 unsigned immr, 1425 unsigned imms); 1426 1427 // Signed bitfield move. 1428 void sbfm(const Register& rd, 1429 const Register& rn, 1430 unsigned immr, 1431 unsigned imms); 1432 1433 // Unsigned bitfield move. 1434 void ubfm(const Register& rd, 1435 const Register& rn, 1436 unsigned immr, 1437 unsigned imms); 1438 1439 // Bfm aliases. 1440 // Bitfield insert. 1441 void bfi(const Register& rd, 1442 const Register& rn, 1443 unsigned lsb, 1444 unsigned width) { 1445 VIXL_ASSERT(width >= 1); 1446 VIXL_ASSERT(lsb + width <= rn.size()); 1447 bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); 1448 } 1449 1450 // Bitfield extract and insert low. 1451 void bfxil(const Register& rd, 1452 const Register& rn, 1453 unsigned lsb, 1454 unsigned width) { 1455 VIXL_ASSERT(width >= 1); 1456 VIXL_ASSERT(lsb + width <= rn.size()); 1457 bfm(rd, rn, lsb, lsb + width - 1); 1458 } 1459 1460 // Sbfm aliases. 1461 // Arithmetic shift right. 1462 void asr(const Register& rd, const Register& rn, unsigned shift) { 1463 VIXL_ASSERT(shift < rd.size()); 1464 sbfm(rd, rn, shift, rd.size() - 1); 1465 } 1466 1467 // Signed bitfield insert with zero at right. 1468 void sbfiz(const Register& rd, 1469 const Register& rn, 1470 unsigned lsb, 1471 unsigned width) { 1472 VIXL_ASSERT(width >= 1); 1473 VIXL_ASSERT(lsb + width <= rn.size()); 1474 sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); 1475 } 1476 1477 // Signed bitfield extract. 1478 void sbfx(const Register& rd, 1479 const Register& rn, 1480 unsigned lsb, 1481 unsigned width) { 1482 VIXL_ASSERT(width >= 1); 1483 VIXL_ASSERT(lsb + width <= rn.size()); 1484 sbfm(rd, rn, lsb, lsb + width - 1); 1485 } 1486 1487 // Signed extend byte. 1488 void sxtb(const Register& rd, const Register& rn) { 1489 sbfm(rd, rn, 0, 7); 1490 } 1491 1492 // Signed extend halfword. 1493 void sxth(const Register& rd, const Register& rn) { 1494 sbfm(rd, rn, 0, 15); 1495 } 1496 1497 // Signed extend word. 1498 void sxtw(const Register& rd, const Register& rn) { 1499 sbfm(rd, rn, 0, 31); 1500 } 1501 1502 // Ubfm aliases. 1503 // Logical shift left. 1504 void lsl(const Register& rd, const Register& rn, unsigned shift) { 1505 unsigned reg_size = rd.size(); 1506 VIXL_ASSERT(shift < reg_size); 1507 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); 1508 } 1509 1510 // Logical shift right. 1511 void lsr(const Register& rd, const Register& rn, unsigned shift) { 1512 VIXL_ASSERT(shift < rd.size()); 1513 ubfm(rd, rn, shift, rd.size() - 1); 1514 } 1515 1516 // Unsigned bitfield insert with zero at right. 1517 void ubfiz(const Register& rd, 1518 const Register& rn, 1519 unsigned lsb, 1520 unsigned width) { 1521 VIXL_ASSERT(width >= 1); 1522 VIXL_ASSERT(lsb + width <= rn.size()); 1523 ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1); 1524 } 1525 1526 // Unsigned bitfield extract. 1527 void ubfx(const Register& rd, 1528 const Register& rn, 1529 unsigned lsb, 1530 unsigned width) { 1531 VIXL_ASSERT(width >= 1); 1532 VIXL_ASSERT(lsb + width <= rn.size()); 1533 ubfm(rd, rn, lsb, lsb + width - 1); 1534 } 1535 1536 // Unsigned extend byte. 1537 void uxtb(const Register& rd, const Register& rn) { 1538 ubfm(rd, rn, 0, 7); 1539 } 1540 1541 // Unsigned extend halfword. 1542 void uxth(const Register& rd, const Register& rn) { 1543 ubfm(rd, rn, 0, 15); 1544 } 1545 1546 // Unsigned extend word. 1547 void uxtw(const Register& rd, const Register& rn) { 1548 ubfm(rd, rn, 0, 31); 1549 } 1550 1551 // Extract. 1552 void extr(const Register& rd, 1553 const Register& rn, 1554 const Register& rm, 1555 unsigned lsb); 1556 1557 // Conditional select: rd = cond ? rn : rm. 1558 void csel(const Register& rd, 1559 const Register& rn, 1560 const Register& rm, 1561 Condition cond); 1562 1563 // Conditional select increment: rd = cond ? rn : rm + 1. 1564 void csinc(const Register& rd, 1565 const Register& rn, 1566 const Register& rm, 1567 Condition cond); 1568 1569 // Conditional select inversion: rd = cond ? rn : ~rm. 1570 void csinv(const Register& rd, 1571 const Register& rn, 1572 const Register& rm, 1573 Condition cond); 1574 1575 // Conditional select negation: rd = cond ? rn : -rm. 1576 void csneg(const Register& rd, 1577 const Register& rn, 1578 const Register& rm, 1579 Condition cond); 1580 1581 // Conditional set: rd = cond ? 1 : 0. 1582 void cset(const Register& rd, Condition cond); 1583 1584 // Conditional set mask: rd = cond ? -1 : 0. 1585 void csetm(const Register& rd, Condition cond); 1586 1587 // Conditional increment: rd = cond ? rn + 1 : rn. 1588 void cinc(const Register& rd, const Register& rn, Condition cond); 1589 1590 // Conditional invert: rd = cond ? ~rn : rn. 1591 void cinv(const Register& rd, const Register& rn, Condition cond); 1592 1593 // Conditional negate: rd = cond ? -rn : rn. 1594 void cneg(const Register& rd, const Register& rn, Condition cond); 1595 1596 // Rotate right. 1597 void ror(const Register& rd, const Register& rs, unsigned shift) { 1598 extr(rd, rs, rs, shift); 1599 } 1600 1601 // Conditional comparison. 1602 // Conditional compare negative. 1603 void ccmn(const Register& rn, 1604 const Operand& operand, 1605 StatusFlags nzcv, 1606 Condition cond); 1607 1608 // Conditional compare. 1609 void ccmp(const Register& rn, 1610 const Operand& operand, 1611 StatusFlags nzcv, 1612 Condition cond); 1613 1614 // CRC-32 checksum from byte. 1615 void crc32b(const Register& rd, 1616 const Register& rn, 1617 const Register& rm); 1618 1619 // CRC-32 checksum from half-word. 1620 void crc32h(const Register& rd, 1621 const Register& rn, 1622 const Register& rm); 1623 1624 // CRC-32 checksum from word. 1625 void crc32w(const Register& rd, 1626 const Register& rn, 1627 const Register& rm); 1628 1629 // CRC-32 checksum from double word. 1630 void crc32x(const Register& rd, 1631 const Register& rn, 1632 const Register& rm); 1633 1634 // CRC-32 C checksum from byte. 1635 void crc32cb(const Register& rd, 1636 const Register& rn, 1637 const Register& rm); 1638 1639 // CRC-32 C checksum from half-word. 1640 void crc32ch(const Register& rd, 1641 const Register& rn, 1642 const Register& rm); 1643 1644 // CRC-32 C checksum from word. 1645 void crc32cw(const Register& rd, 1646 const Register& rn, 1647 const Register& rm); 1648 1649 // CRC-32C checksum from double word. 1650 void crc32cx(const Register& rd, 1651 const Register& rn, 1652 const Register& rm); 1653 1654 // Multiply. 1655 void mul(const Register& rd, const Register& rn, const Register& rm); 1656 1657 // Negated multiply. 1658 void mneg(const Register& rd, const Register& rn, const Register& rm); 1659 1660 // Signed long multiply: 32 x 32 -> 64-bit. 1661 void smull(const Register& rd, const Register& rn, const Register& rm); 1662 1663 // Signed multiply high: 64 x 64 -> 64-bit <127:64>. 1664 void smulh(const Register& xd, const Register& xn, const Register& xm); 1665 1666 // Multiply and accumulate. 1667 void madd(const Register& rd, 1668 const Register& rn, 1669 const Register& rm, 1670 const Register& ra); 1671 1672 // Multiply and subtract. 1673 void msub(const Register& rd, 1674 const Register& rn, 1675 const Register& rm, 1676 const Register& ra); 1677 1678 // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit. 1679 void smaddl(const Register& rd, 1680 const Register& rn, 1681 const Register& rm, 1682 const Register& ra); 1683 1684 // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit. 1685 void umaddl(const Register& rd, 1686 const Register& rn, 1687 const Register& rm, 1688 const Register& ra); 1689 1690 // Unsigned long multiply: 32 x 32 -> 64-bit. 1691 void umull(const Register& rd, 1692 const Register& rn, 1693 const Register& rm) { 1694 umaddl(rd, rn, rm, xzr); 1695 } 1696 1697 // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>. 1698 void umulh(const Register& xd, 1699 const Register& xn, 1700 const Register& xm); 1701 1702 // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit. 1703 void smsubl(const Register& rd, 1704 const Register& rn, 1705 const Register& rm, 1706 const Register& ra); 1707 1708 // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit. 1709 void umsubl(const Register& rd, 1710 const Register& rn, 1711 const Register& rm, 1712 const Register& ra); 1713 1714 // Signed integer divide. 1715 void sdiv(const Register& rd, const Register& rn, const Register& rm); 1716 1717 // Unsigned integer divide. 1718 void udiv(const Register& rd, const Register& rn, const Register& rm); 1719 1720 // Bit reverse. 1721 void rbit(const Register& rd, const Register& rn); 1722 1723 // Reverse bytes in 16-bit half words. 1724 void rev16(const Register& rd, const Register& rn); 1725 1726 // Reverse bytes in 32-bit words. 1727 void rev32(const Register& rd, const Register& rn); 1728 1729 // Reverse bytes. 1730 void rev(const Register& rd, const Register& rn); 1731 1732 // Count leading zeroes. 1733 void clz(const Register& rd, const Register& rn); 1734 1735 // Count leading sign bits. 1736 void cls(const Register& rd, const Register& rn); 1737 1738 // Memory instructions. 1739 // Load integer or FP register. 1740 void ldr(const CPURegister& rt, const MemOperand& src, 1741 LoadStoreScalingOption option = PreferScaledOffset); 1742 1743 // Store integer or FP register. 1744 void str(const CPURegister& rt, const MemOperand& dst, 1745 LoadStoreScalingOption option = PreferScaledOffset); 1746 1747 // Load word with sign extension. 1748 void ldrsw(const Register& rt, const MemOperand& src, 1749 LoadStoreScalingOption option = PreferScaledOffset); 1750 1751 // Load byte. 1752 void ldrb(const Register& rt, const MemOperand& src, 1753 LoadStoreScalingOption option = PreferScaledOffset); 1754 1755 // Store byte. 1756 void strb(const Register& rt, const MemOperand& dst, 1757 LoadStoreScalingOption option = PreferScaledOffset); 1758 1759 // Load byte with sign extension. 1760 void ldrsb(const Register& rt, const MemOperand& src, 1761 LoadStoreScalingOption option = PreferScaledOffset); 1762 1763 // Load half-word. 1764 void ldrh(const Register& rt, const MemOperand& src, 1765 LoadStoreScalingOption option = PreferScaledOffset); 1766 1767 // Store half-word. 1768 void strh(const Register& rt, const MemOperand& dst, 1769 LoadStoreScalingOption option = PreferScaledOffset); 1770 1771 // Load half-word with sign extension. 1772 void ldrsh(const Register& rt, const MemOperand& src, 1773 LoadStoreScalingOption option = PreferScaledOffset); 1774 1775 // Load integer or FP register (with unscaled offset). 1776 void ldur(const CPURegister& rt, const MemOperand& src, 1777 LoadStoreScalingOption option = PreferUnscaledOffset); 1778 1779 // Store integer or FP register (with unscaled offset). 1780 void stur(const CPURegister& rt, const MemOperand& src, 1781 LoadStoreScalingOption option = PreferUnscaledOffset); 1782 1783 // Load word with sign extension. 1784 void ldursw(const Register& rt, const MemOperand& src, 1785 LoadStoreScalingOption option = PreferUnscaledOffset); 1786 1787 // Load byte (with unscaled offset). 1788 void ldurb(const Register& rt, const MemOperand& src, 1789 LoadStoreScalingOption option = PreferUnscaledOffset); 1790 1791 // Store byte (with unscaled offset). 1792 void sturb(const Register& rt, const MemOperand& dst, 1793 LoadStoreScalingOption option = PreferUnscaledOffset); 1794 1795 // Load byte with sign extension (and unscaled offset). 1796 void ldursb(const Register& rt, const MemOperand& src, 1797 LoadStoreScalingOption option = PreferUnscaledOffset); 1798 1799 // Load half-word (with unscaled offset). 1800 void ldurh(const Register& rt, const MemOperand& src, 1801 LoadStoreScalingOption option = PreferUnscaledOffset); 1802 1803 // Store half-word (with unscaled offset). 1804 void sturh(const Register& rt, const MemOperand& dst, 1805 LoadStoreScalingOption option = PreferUnscaledOffset); 1806 1807 // Load half-word with sign extension (and unscaled offset). 1808 void ldursh(const Register& rt, const MemOperand& src, 1809 LoadStoreScalingOption option = PreferUnscaledOffset); 1810 1811 // Load integer or FP register pair. 1812 void ldp(const CPURegister& rt, const CPURegister& rt2, 1813 const MemOperand& src); 1814 1815 // Store integer or FP register pair. 1816 void stp(const CPURegister& rt, const CPURegister& rt2, 1817 const MemOperand& dst); 1818 1819 // Load word pair with sign extension. 1820 void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src); 1821 1822 // Load integer or FP register pair, non-temporal. 1823 void ldnp(const CPURegister& rt, const CPURegister& rt2, 1824 const MemOperand& src); 1825 1826 // Store integer or FP register pair, non-temporal. 1827 void stnp(const CPURegister& rt, const CPURegister& rt2, 1828 const MemOperand& dst); 1829 1830 // Load integer or FP register from literal pool. 1831 void ldr(const CPURegister& rt, RawLiteral* literal); 1832 1833 // Load word with sign extension from literal pool. 1834 void ldrsw(const Register& rt, RawLiteral* literal); 1835 1836 // Load integer or FP register from pc + imm19 << 2. 1837 void ldr(const CPURegister& rt, int imm19); 1838 1839 // Load word with sign extension from pc + imm19 << 2. 1840 void ldrsw(const Register& rt, int imm19); 1841 1842 // Store exclusive byte. 1843 void stxrb(const Register& rs, const Register& rt, const MemOperand& dst); 1844 1845 // Store exclusive half-word. 1846 void stxrh(const Register& rs, const Register& rt, const MemOperand& dst); 1847 1848 // Store exclusive register. 1849 void stxr(const Register& rs, const Register& rt, const MemOperand& dst); 1850 1851 // Load exclusive byte. 1852 void ldxrb(const Register& rt, const MemOperand& src); 1853 1854 // Load exclusive half-word. 1855 void ldxrh(const Register& rt, const MemOperand& src); 1856 1857 // Load exclusive register. 1858 void ldxr(const Register& rt, const MemOperand& src); 1859 1860 // Store exclusive register pair. 1861 void stxp(const Register& rs, 1862 const Register& rt, 1863 const Register& rt2, 1864 const MemOperand& dst); 1865 1866 // Load exclusive register pair. 1867 void ldxp(const Register& rt, const Register& rt2, const MemOperand& src); 1868 1869 // Store-release exclusive byte. 1870 void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst); 1871 1872 // Store-release exclusive half-word. 1873 void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst); 1874 1875 // Store-release exclusive register. 1876 void stlxr(const Register& rs, const Register& rt, const MemOperand& dst); 1877 1878 // Load-acquire exclusive byte. 1879 void ldaxrb(const Register& rt, const MemOperand& src); 1880 1881 // Load-acquire exclusive half-word. 1882 void ldaxrh(const Register& rt, const MemOperand& src); 1883 1884 // Load-acquire exclusive register. 1885 void ldaxr(const Register& rt, const MemOperand& src); 1886 1887 // Store-release exclusive register pair. 1888 void stlxp(const Register& rs, 1889 const Register& rt, 1890 const Register& rt2, 1891 const MemOperand& dst); 1892 1893 // Load-acquire exclusive register pair. 1894 void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src); 1895 1896 // Store-release byte. 1897 void stlrb(const Register& rt, const MemOperand& dst); 1898 1899 // Store-release half-word. 1900 void stlrh(const Register& rt, const MemOperand& dst); 1901 1902 // Store-release register. 1903 void stlr(const Register& rt, const MemOperand& dst); 1904 1905 // Load-acquire byte. 1906 void ldarb(const Register& rt, const MemOperand& src); 1907 1908 // Load-acquire half-word. 1909 void ldarh(const Register& rt, const MemOperand& src); 1910 1911 // Load-acquire register. 1912 void ldar(const Register& rt, const MemOperand& src); 1913 1914 // Prefetch memory. 1915 void prfm(PrefetchOperation op, const MemOperand& addr, 1916 LoadStoreScalingOption option = PreferScaledOffset); 1917 1918 // Prefetch memory (with unscaled offset). 1919 void prfum(PrefetchOperation op, const MemOperand& addr, 1920 LoadStoreScalingOption option = PreferUnscaledOffset); 1921 1922 // Prefetch memory in the literal pool. 1923 void prfm(PrefetchOperation op, RawLiteral* literal); 1924 1925 // Prefetch from pc + imm19 << 2. 1926 void prfm(PrefetchOperation op, int imm19); 1927 1928 // Move instructions. The default shift of -1 indicates that the move 1929 // instruction will calculate an appropriate 16-bit immediate and left shift 1930 // that is equal to the 64-bit immediate argument. If an explicit left shift 1931 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. 1932 // 1933 // For movk, an explicit shift can be used to indicate which half word should 1934 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant 1935 // half word with zero, whereas movk(x0, 0, 48) will overwrite the 1936 // most-significant. 1937 1938 // Move immediate and keep. 1939 void movk(const Register& rd, uint64_t imm, int shift = -1) { 1940 MoveWide(rd, imm, shift, MOVK); 1941 } 1942 1943 // Move inverted immediate. 1944 void movn(const Register& rd, uint64_t imm, int shift = -1) { 1945 MoveWide(rd, imm, shift, MOVN); 1946 } 1947 1948 // Move immediate. 1949 void movz(const Register& rd, uint64_t imm, int shift = -1) { 1950 MoveWide(rd, imm, shift, MOVZ); 1951 } 1952 1953 // Misc instructions. 1954 // Monitor debug-mode breakpoint. 1955 void brk(int code); 1956 1957 // Halting debug-mode breakpoint. 1958 void hlt(int code); 1959 1960 // Generate exception targeting EL1. 1961 void svc(int code); 1962 1963 // Move register to register. 1964 void mov(const Register& rd, const Register& rn); 1965 1966 // Move inverted operand to register. 1967 void mvn(const Register& rd, const Operand& operand); 1968 1969 // System instructions. 1970 // Move to register from system register. 1971 void mrs(const Register& rt, SystemRegister sysreg); 1972 1973 // Move from register to system register. 1974 void msr(SystemRegister sysreg, const Register& rt); 1975 1976 // System instruction. 1977 void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr); 1978 1979 // System instruction with pre-encoded op (op1:crn:crm:op2). 1980 void sys(int op, const Register& rt = xzr); 1981 1982 // System data cache operation. 1983 void dc(DataCacheOp op, const Register& rt); 1984 1985 // System instruction cache operation. 1986 void ic(InstructionCacheOp op, const Register& rt); 1987 1988 // System hint. 1989 void hint(SystemHint code); 1990 1991 // Clear exclusive monitor. 1992 void clrex(int imm4 = 0xf); 1993 1994 // Data memory barrier. 1995 void dmb(BarrierDomain domain, BarrierType type); 1996 1997 // Data synchronization barrier. 1998 void dsb(BarrierDomain domain, BarrierType type); 1999 2000 // Instruction synchronization barrier. 2001 void isb(); 2002 2003 // Alias for system instructions. 2004 // No-op. 2005 void nop() { 2006 hint(NOP); 2007 } 2008 2009 // FP and NEON instructions. 2010 // Move double precision immediate to FP register. 2011 void fmov(const VRegister& vd, double imm); 2012 2013 // Move single precision immediate to FP register. 2014 void fmov(const VRegister& vd, float imm); 2015 2016 // Move FP register to register. 2017 void fmov(const Register& rd, const VRegister& fn); 2018 2019 // Move register to FP register. 2020 void fmov(const VRegister& vd, const Register& rn); 2021 2022 // Move FP register to FP register. 2023 void fmov(const VRegister& vd, const VRegister& fn); 2024 2025 // Move 64-bit register to top half of 128-bit FP register. 2026 void fmov(const VRegister& vd, int index, const Register& rn); 2027 2028 // Move top half of 128-bit FP register to 64-bit register. 2029 void fmov(const Register& rd, const VRegister& vn, int index); 2030 2031 // FP add. 2032 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2033 2034 // FP subtract. 2035 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2036 2037 // FP multiply. 2038 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2039 2040 // FP fused multiply-add. 2041 void fmadd(const VRegister& vd, 2042 const VRegister& vn, 2043 const VRegister& vm, 2044 const VRegister& va); 2045 2046 // FP fused multiply-subtract. 2047 void fmsub(const VRegister& vd, 2048 const VRegister& vn, 2049 const VRegister& vm, 2050 const VRegister& va); 2051 2052 // FP fused multiply-add and negate. 2053 void fnmadd(const VRegister& vd, 2054 const VRegister& vn, 2055 const VRegister& vm, 2056 const VRegister& va); 2057 2058 // FP fused multiply-subtract and negate. 2059 void fnmsub(const VRegister& vd, 2060 const VRegister& vn, 2061 const VRegister& vm, 2062 const VRegister& va); 2063 2064 // FP multiply-negate scalar. 2065 void fnmul(const VRegister& vd, 2066 const VRegister& vn, 2067 const VRegister& vm); 2068 2069 // FP reciprocal exponent scalar. 2070 void frecpx(const VRegister& vd, 2071 const VRegister& vn); 2072 2073 // FP divide. 2074 void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm); 2075 2076 // FP maximum. 2077 void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm); 2078 2079 // FP minimum. 2080 void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm); 2081 2082 // FP maximum number. 2083 void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); 2084 2085 // FP minimum number. 2086 void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); 2087 2088 // FP absolute. 2089 void fabs(const VRegister& vd, const VRegister& vn); 2090 2091 // FP negate. 2092 void fneg(const VRegister& vd, const VRegister& vn); 2093 2094 // FP square root. 2095 void fsqrt(const VRegister& vd, const VRegister& vn); 2096 2097 // FP round to integer, nearest with ties to away. 2098 void frinta(const VRegister& vd, const VRegister& vn); 2099 2100 // FP round to integer, implicit rounding. 2101 void frinti(const VRegister& vd, const VRegister& vn); 2102 2103 // FP round to integer, toward minus infinity. 2104 void frintm(const VRegister& vd, const VRegister& vn); 2105 2106 // FP round to integer, nearest with ties to even. 2107 void frintn(const VRegister& vd, const VRegister& vn); 2108 2109 // FP round to integer, toward plus infinity. 2110 void frintp(const VRegister& vd, const VRegister& vn); 2111 2112 // FP round to integer, exact, implicit rounding. 2113 void frintx(const VRegister& vd, const VRegister& vn); 2114 2115 // FP round to integer, towards zero. 2116 void frintz(const VRegister& vd, const VRegister& vn); 2117 2118 void FPCompareMacro(const VRegister& vn, 2119 double value, 2120 FPTrapFlags trap); 2121 2122 void FPCompareMacro(const VRegister& vn, 2123 const VRegister& vm, 2124 FPTrapFlags trap); 2125 2126 // FP compare registers. 2127 void fcmp(const VRegister& vn, const VRegister& vm); 2128 2129 // FP compare immediate. 2130 void fcmp(const VRegister& vn, double value); 2131 2132 void FPCCompareMacro(const VRegister& vn, 2133 const VRegister& vm, 2134 StatusFlags nzcv, 2135 Condition cond, 2136 FPTrapFlags trap); 2137 2138 // FP conditional compare. 2139 void fccmp(const VRegister& vn, 2140 const VRegister& vm, 2141 StatusFlags nzcv, 2142 Condition cond); 2143 2144 // FP signaling compare registers. 2145 void fcmpe(const VRegister& vn, const VRegister& vm); 2146 2147 // FP signaling compare immediate. 2148 void fcmpe(const VRegister& vn, double value); 2149 2150 // FP conditional signaling compare. 2151 void fccmpe(const VRegister& vn, 2152 const VRegister& vm, 2153 StatusFlags nzcv, 2154 Condition cond); 2155 2156 // FP conditional select. 2157 void fcsel(const VRegister& vd, 2158 const VRegister& vn, 2159 const VRegister& vm, 2160 Condition cond); 2161 2162 // Common FP Convert functions. 2163 void NEONFPConvertToInt(const Register& rd, 2164 const VRegister& vn, 2165 Instr op); 2166 void NEONFPConvertToInt(const VRegister& vd, 2167 const VRegister& vn, 2168 Instr op); 2169 2170 // FP convert between precisions. 2171 void fcvt(const VRegister& vd, const VRegister& vn); 2172 2173 // FP convert to higher precision. 2174 void fcvtl(const VRegister& vd, const VRegister& vn); 2175 2176 // FP convert to higher precision (second part). 2177 void fcvtl2(const VRegister& vd, const VRegister& vn); 2178 2179 // FP convert to lower precision. 2180 void fcvtn(const VRegister& vd, const VRegister& vn); 2181 2182 // FP convert to lower prevision (second part). 2183 void fcvtn2(const VRegister& vd, const VRegister& vn); 2184 2185 // FP convert to lower precision, rounding to odd. 2186 void fcvtxn(const VRegister& vd, const VRegister& vn); 2187 2188 // FP convert to lower precision, rounding to odd (second part). 2189 void fcvtxn2(const VRegister& vd, const VRegister& vn); 2190 2191 // FP convert to signed integer, nearest with ties to away. 2192 void fcvtas(const Register& rd, const VRegister& vn); 2193 2194 // FP convert to unsigned integer, nearest with ties to away. 2195 void fcvtau(const Register& rd, const VRegister& vn); 2196 2197 // FP convert to signed integer, nearest with ties to away. 2198 void fcvtas(const VRegister& vd, const VRegister& vn); 2199 2200 // FP convert to unsigned integer, nearest with ties to away. 2201 void fcvtau(const VRegister& vd, const VRegister& vn); 2202 2203 // FP convert to signed integer, round towards -infinity. 2204 void fcvtms(const Register& rd, const VRegister& vn); 2205 2206 // FP convert to unsigned integer, round towards -infinity. 2207 void fcvtmu(const Register& rd, const VRegister& vn); 2208 2209 // FP convert to signed integer, round towards -infinity. 2210 void fcvtms(const VRegister& vd, const VRegister& vn); 2211 2212 // FP convert to unsigned integer, round towards -infinity. 2213 void fcvtmu(const VRegister& vd, const VRegister& vn); 2214 2215 // FP convert to signed integer, nearest with ties to even. 2216 void fcvtns(const Register& rd, const VRegister& vn); 2217 2218 // FP convert to unsigned integer, nearest with ties to even. 2219 void fcvtnu(const Register& rd, const VRegister& vn); 2220 2221 // FP convert to signed integer, nearest with ties to even. 2222 void fcvtns(const VRegister& rd, const VRegister& vn); 2223 2224 // FP convert to unsigned integer, nearest with ties to even. 2225 void fcvtnu(const VRegister& rd, const VRegister& vn); 2226 2227 // FP convert to signed integer or fixed-point, round towards zero. 2228 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); 2229 2230 // FP convert to unsigned integer or fixed-point, round towards zero. 2231 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); 2232 2233 // FP convert to signed integer or fixed-point, round towards zero. 2234 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); 2235 2236 // FP convert to unsigned integer or fixed-point, round towards zero. 2237 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); 2238 2239 // FP convert to signed integer, round towards +infinity. 2240 void fcvtps(const Register& rd, const VRegister& vn); 2241 2242 // FP convert to unsigned integer, round towards +infinity. 2243 void fcvtpu(const Register& rd, const VRegister& vn); 2244 2245 // FP convert to signed integer, round towards +infinity. 2246 void fcvtps(const VRegister& vd, const VRegister& vn); 2247 2248 // FP convert to unsigned integer, round towards +infinity. 2249 void fcvtpu(const VRegister& vd, const VRegister& vn); 2250 2251 // Convert signed integer or fixed point to FP. 2252 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); 2253 2254 // Convert unsigned integer or fixed point to FP. 2255 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); 2256 2257 // Convert signed integer or fixed-point to FP. 2258 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 2259 2260 // Convert unsigned integer or fixed-point to FP. 2261 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 2262 2263 // Unsigned absolute difference. 2264 void uabd(const VRegister& vd, 2265 const VRegister& vn, 2266 const VRegister& vm); 2267 2268 // Signed absolute difference. 2269 void sabd(const VRegister& vd, 2270 const VRegister& vn, 2271 const VRegister& vm); 2272 2273 // Unsigned absolute difference and accumulate. 2274 void uaba(const VRegister& vd, 2275 const VRegister& vn, 2276 const VRegister& vm); 2277 2278 // Signed absolute difference and accumulate. 2279 void saba(const VRegister& vd, 2280 const VRegister& vn, 2281 const VRegister& vm); 2282 2283 // Add. 2284 void add(const VRegister& vd, 2285 const VRegister& vn, 2286 const VRegister& vm); 2287 2288 // Subtract. 2289 void sub(const VRegister& vd, 2290 const VRegister& vn, 2291 const VRegister& vm); 2292 2293 // Unsigned halving add. 2294 void uhadd(const VRegister& vd, 2295 const VRegister& vn, 2296 const VRegister& vm); 2297 2298 // Signed halving add. 2299 void shadd(const VRegister& vd, 2300 const VRegister& vn, 2301 const VRegister& vm); 2302 2303 // Unsigned rounding halving add. 2304 void urhadd(const VRegister& vd, 2305 const VRegister& vn, 2306 const VRegister& vm); 2307 2308 // Signed rounding halving add. 2309 void srhadd(const VRegister& vd, 2310 const VRegister& vn, 2311 const VRegister& vm); 2312 2313 // Unsigned halving sub. 2314 void uhsub(const VRegister& vd, 2315 const VRegister& vn, 2316 const VRegister& vm); 2317 2318 // Signed halving sub. 2319 void shsub(const VRegister& vd, 2320 const VRegister& vn, 2321 const VRegister& vm); 2322 2323 // Unsigned saturating add. 2324 void uqadd(const VRegister& vd, 2325 const VRegister& vn, 2326 const VRegister& vm); 2327 2328 // Signed saturating add. 2329 void sqadd(const VRegister& vd, 2330 const VRegister& vn, 2331 const VRegister& vm); 2332 2333 // Unsigned saturating subtract. 2334 void uqsub(const VRegister& vd, 2335 const VRegister& vn, 2336 const VRegister& vm); 2337 2338 // Signed saturating subtract. 2339 void sqsub(const VRegister& vd, 2340 const VRegister& vn, 2341 const VRegister& vm); 2342 2343 // Add pairwise. 2344 void addp(const VRegister& vd, 2345 const VRegister& vn, 2346 const VRegister& vm); 2347 2348 // Add pair of elements scalar. 2349 void addp(const VRegister& vd, 2350 const VRegister& vn); 2351 2352 // Multiply-add to accumulator. 2353 void mla(const VRegister& vd, 2354 const VRegister& vn, 2355 const VRegister& vm); 2356 2357 // Multiply-subtract to accumulator. 2358 void mls(const VRegister& vd, 2359 const VRegister& vn, 2360 const VRegister& vm); 2361 2362 // Multiply. 2363 void mul(const VRegister& vd, 2364 const VRegister& vn, 2365 const VRegister& vm); 2366 2367 // Multiply by scalar element. 2368 void mul(const VRegister& vd, 2369 const VRegister& vn, 2370 const VRegister& vm, 2371 int vm_index); 2372 2373 // Multiply-add by scalar element. 2374 void mla(const VRegister& vd, 2375 const VRegister& vn, 2376 const VRegister& vm, 2377 int vm_index); 2378 2379 // Multiply-subtract by scalar element. 2380 void mls(const VRegister& vd, 2381 const VRegister& vn, 2382 const VRegister& vm, 2383 int vm_index); 2384 2385 // Signed long multiply-add by scalar element. 2386 void smlal(const VRegister& vd, 2387 const VRegister& vn, 2388 const VRegister& vm, 2389 int vm_index); 2390 2391 // Signed long multiply-add by scalar element (second part). 2392 void smlal2(const VRegister& vd, 2393 const VRegister& vn, 2394 const VRegister& vm, 2395 int vm_index); 2396 2397 // Unsigned long multiply-add by scalar element. 2398 void umlal(const VRegister& vd, 2399 const VRegister& vn, 2400 const VRegister& vm, 2401 int vm_index); 2402 2403 // Unsigned long multiply-add by scalar element (second part). 2404 void umlal2(const VRegister& vd, 2405 const VRegister& vn, 2406 const VRegister& vm, 2407 int vm_index); 2408 2409 // Signed long multiply-sub by scalar element. 2410 void smlsl(const VRegister& vd, 2411 const VRegister& vn, 2412 const VRegister& vm, 2413 int vm_index); 2414 2415 // Signed long multiply-sub by scalar element (second part). 2416 void smlsl2(const VRegister& vd, 2417 const VRegister& vn, 2418 const VRegister& vm, 2419 int vm_index); 2420 2421 // Unsigned long multiply-sub by scalar element. 2422 void umlsl(const VRegister& vd, 2423 const VRegister& vn, 2424 const VRegister& vm, 2425 int vm_index); 2426 2427 // Unsigned long multiply-sub by scalar element (second part). 2428 void umlsl2(const VRegister& vd, 2429 const VRegister& vn, 2430 const VRegister& vm, 2431 int vm_index); 2432 2433 // Signed long multiply by scalar element. 2434 void smull(const VRegister& vd, 2435 const VRegister& vn, 2436 const VRegister& vm, 2437 int vm_index); 2438 2439 // Signed long multiply by scalar element (second part). 2440 void smull2(const VRegister& vd, 2441 const VRegister& vn, 2442 const VRegister& vm, 2443 int vm_index); 2444 2445 // Unsigned long multiply by scalar element. 2446 void umull(const VRegister& vd, 2447 const VRegister& vn, 2448 const VRegister& vm, 2449 int vm_index); 2450 2451 // Unsigned long multiply by scalar element (second part). 2452 void umull2(const VRegister& vd, 2453 const VRegister& vn, 2454 const VRegister& vm, 2455 int vm_index); 2456 2457 // Signed saturating double long multiply by element. 2458 void sqdmull(const VRegister& vd, 2459 const VRegister& vn, 2460 const VRegister& vm, 2461 int vm_index); 2462 2463 // Signed saturating double long multiply by element (second part). 2464 void sqdmull2(const VRegister& vd, 2465 const VRegister& vn, 2466 const VRegister& vm, 2467 int vm_index); 2468 2469 // Signed saturating doubling long multiply-add by element. 2470 void sqdmlal(const VRegister& vd, 2471 const VRegister& vn, 2472 const VRegister& vm, 2473 int vm_index); 2474 2475 // Signed saturating doubling long multiply-add by element (second part). 2476 void sqdmlal2(const VRegister& vd, 2477 const VRegister& vn, 2478 const VRegister& vm, 2479 int vm_index); 2480 2481 // Signed saturating doubling long multiply-sub by element. 2482 void sqdmlsl(const VRegister& vd, 2483 const VRegister& vn, 2484 const VRegister& vm, 2485 int vm_index); 2486 2487 // Signed saturating doubling long multiply-sub by element (second part). 2488 void sqdmlsl2(const VRegister& vd, 2489 const VRegister& vn, 2490 const VRegister& vm, 2491 int vm_index); 2492 2493 // Compare equal. 2494 void cmeq(const VRegister& vd, 2495 const VRegister& vn, 2496 const VRegister& vm); 2497 2498 // Compare signed greater than or equal. 2499 void cmge(const VRegister& vd, 2500 const VRegister& vn, 2501 const VRegister& vm); 2502 2503 // Compare signed greater than. 2504 void cmgt(const VRegister& vd, 2505 const VRegister& vn, 2506 const VRegister& vm); 2507 2508 // Compare unsigned higher. 2509 void cmhi(const VRegister& vd, 2510 const VRegister& vn, 2511 const VRegister& vm); 2512 2513 // Compare unsigned higher or same. 2514 void cmhs(const VRegister& vd, 2515 const VRegister& vn, 2516 const VRegister& vm); 2517 2518 // Compare bitwise test bits nonzero. 2519 void cmtst(const VRegister& vd, 2520 const VRegister& vn, 2521 const VRegister& vm); 2522 2523 // Compare bitwise to zero. 2524 void cmeq(const VRegister& vd, 2525 const VRegister& vn, 2526 int value); 2527 2528 // Compare signed greater than or equal to zero. 2529 void cmge(const VRegister& vd, 2530 const VRegister& vn, 2531 int value); 2532 2533 // Compare signed greater than zero. 2534 void cmgt(const VRegister& vd, 2535 const VRegister& vn, 2536 int value); 2537 2538 // Compare signed less than or equal to zero. 2539 void cmle(const VRegister& vd, 2540 const VRegister& vn, 2541 int value); 2542 2543 // Compare signed less than zero. 2544 void cmlt(const VRegister& vd, 2545 const VRegister& vn, 2546 int value); 2547 2548 // Signed shift left by register. 2549 void sshl(const VRegister& vd, 2550 const VRegister& vn, 2551 const VRegister& vm); 2552 2553 // Unsigned shift left by register. 2554 void ushl(const VRegister& vd, 2555 const VRegister& vn, 2556 const VRegister& vm); 2557 2558 // Signed saturating shift left by register. 2559 void sqshl(const VRegister& vd, 2560 const VRegister& vn, 2561 const VRegister& vm); 2562 2563 // Unsigned saturating shift left by register. 2564 void uqshl(const VRegister& vd, 2565 const VRegister& vn, 2566 const VRegister& vm); 2567 2568 // Signed rounding shift left by register. 2569 void srshl(const VRegister& vd, 2570 const VRegister& vn, 2571 const VRegister& vm); 2572 2573 // Unsigned rounding shift left by register. 2574 void urshl(const VRegister& vd, 2575 const VRegister& vn, 2576 const VRegister& vm); 2577 2578 // Signed saturating rounding shift left by register. 2579 void sqrshl(const VRegister& vd, 2580 const VRegister& vn, 2581 const VRegister& vm); 2582 2583 // Unsigned saturating rounding shift left by register. 2584 void uqrshl(const VRegister& vd, 2585 const VRegister& vn, 2586 const VRegister& vm); 2587 2588 // Bitwise and. 2589 void and_(const VRegister& vd, 2590 const VRegister& vn, 2591 const VRegister& vm); 2592 2593 // Bitwise or. 2594 void orr(const VRegister& vd, 2595 const VRegister& vn, 2596 const VRegister& vm); 2597 2598 // Bitwise or immediate. 2599 void orr(const VRegister& vd, 2600 const int imm8, 2601 const int left_shift = 0); 2602 2603 // Move register to register. 2604 void mov(const VRegister& vd, 2605 const VRegister& vn); 2606 2607 // Bitwise orn. 2608 void orn(const VRegister& vd, 2609 const VRegister& vn, 2610 const VRegister& vm); 2611 2612 // Bitwise eor. 2613 void eor(const VRegister& vd, 2614 const VRegister& vn, 2615 const VRegister& vm); 2616 2617 // Bit clear immediate. 2618 void bic(const VRegister& vd, 2619 const int imm8, 2620 const int left_shift = 0); 2621 2622 // Bit clear. 2623 void bic(const VRegister& vd, 2624 const VRegister& vn, 2625 const VRegister& vm); 2626 2627 // Bitwise insert if false. 2628 void bif(const VRegister& vd, 2629 const VRegister& vn, 2630 const VRegister& vm); 2631 2632 // Bitwise insert if true. 2633 void bit(const VRegister& vd, 2634 const VRegister& vn, 2635 const VRegister& vm); 2636 2637 // Bitwise select. 2638 void bsl(const VRegister& vd, 2639 const VRegister& vn, 2640 const VRegister& vm); 2641 2642 // Polynomial multiply. 2643 void pmul(const VRegister& vd, 2644 const VRegister& vn, 2645 const VRegister& vm); 2646 2647 // Vector move immediate. 2648 void movi(const VRegister& vd, 2649 const uint64_t imm, 2650 Shift shift = LSL, 2651 const int shift_amount = 0); 2652 2653 // Bitwise not. 2654 void mvn(const VRegister& vd, 2655 const VRegister& vn); 2656 2657 // Vector move inverted immediate. 2658 void mvni(const VRegister& vd, 2659 const int imm8, 2660 Shift shift = LSL, 2661 const int shift_amount = 0); 2662 2663 // Signed saturating accumulate of unsigned value. 2664 void suqadd(const VRegister& vd, 2665 const VRegister& vn); 2666 2667 // Unsigned saturating accumulate of signed value. 2668 void usqadd(const VRegister& vd, 2669 const VRegister& vn); 2670 2671 // Absolute value. 2672 void abs(const VRegister& vd, 2673 const VRegister& vn); 2674 2675 // Signed saturating absolute value. 2676 void sqabs(const VRegister& vd, 2677 const VRegister& vn); 2678 2679 // Negate. 2680 void neg(const VRegister& vd, 2681 const VRegister& vn); 2682 2683 // Signed saturating negate. 2684 void sqneg(const VRegister& vd, 2685 const VRegister& vn); 2686 2687 // Bitwise not. 2688 void not_(const VRegister& vd, 2689 const VRegister& vn); 2690 2691 // Extract narrow. 2692 void xtn(const VRegister& vd, 2693 const VRegister& vn); 2694 2695 // Extract narrow (second part). 2696 void xtn2(const VRegister& vd, 2697 const VRegister& vn); 2698 2699 // Signed saturating extract narrow. 2700 void sqxtn(const VRegister& vd, 2701 const VRegister& vn); 2702 2703 // Signed saturating extract narrow (second part). 2704 void sqxtn2(const VRegister& vd, 2705 const VRegister& vn); 2706 2707 // Unsigned saturating extract narrow. 2708 void uqxtn(const VRegister& vd, 2709 const VRegister& vn); 2710 2711 // Unsigned saturating extract narrow (second part). 2712 void uqxtn2(const VRegister& vd, 2713 const VRegister& vn); 2714 2715 // Signed saturating extract unsigned narrow. 2716 void sqxtun(const VRegister& vd, 2717 const VRegister& vn); 2718 2719 // Signed saturating extract unsigned narrow (second part). 2720 void sqxtun2(const VRegister& vd, 2721 const VRegister& vn); 2722 2723 // Extract vector from pair of vectors. 2724 void ext(const VRegister& vd, 2725 const VRegister& vn, 2726 const VRegister& vm, 2727 int index); 2728 2729 // Duplicate vector element to vector or scalar. 2730 void dup(const VRegister& vd, 2731 const VRegister& vn, 2732 int vn_index); 2733 2734 // Move vector element to scalar. 2735 void mov(const VRegister& vd, 2736 const VRegister& vn, 2737 int vn_index); 2738 2739 // Duplicate general-purpose register to vector. 2740 void dup(const VRegister& vd, 2741 const Register& rn); 2742 2743 // Insert vector element from another vector element. 2744 void ins(const VRegister& vd, 2745 int vd_index, 2746 const VRegister& vn, 2747 int vn_index); 2748 2749 // Move vector element to another vector element. 2750 void mov(const VRegister& vd, 2751 int vd_index, 2752 const VRegister& vn, 2753 int vn_index); 2754 2755 // Insert vector element from general-purpose register. 2756 void ins(const VRegister& vd, 2757 int vd_index, 2758 const Register& rn); 2759 2760 // Move general-purpose register to a vector element. 2761 void mov(const VRegister& vd, 2762 int vd_index, 2763 const Register& rn); 2764 2765 // Unsigned move vector element to general-purpose register. 2766 void umov(const Register& rd, 2767 const VRegister& vn, 2768 int vn_index); 2769 2770 // Move vector element to general-purpose register. 2771 void mov(const Register& rd, 2772 const VRegister& vn, 2773 int vn_index); 2774 2775 // Signed move vector element to general-purpose register. 2776 void smov(const Register& rd, 2777 const VRegister& vn, 2778 int vn_index); 2779 2780 // One-element structure load to one register. 2781 void ld1(const VRegister& vt, 2782 const MemOperand& src); 2783 2784 // One-element structure load to two registers. 2785 void ld1(const VRegister& vt, 2786 const VRegister& vt2, 2787 const MemOperand& src); 2788 2789 // One-element structure load to three registers. 2790 void ld1(const VRegister& vt, 2791 const VRegister& vt2, 2792 const VRegister& vt3, 2793 const MemOperand& src); 2794 2795 // One-element structure load to four registers. 2796 void ld1(const VRegister& vt, 2797 const VRegister& vt2, 2798 const VRegister& vt3, 2799 const VRegister& vt4, 2800 const MemOperand& src); 2801 2802 // One-element single structure load to one lane. 2803 void ld1(const VRegister& vt, 2804 int lane, 2805 const MemOperand& src); 2806 2807 // One-element single structure load to all lanes. 2808 void ld1r(const VRegister& vt, 2809 const MemOperand& src); 2810 2811 // Two-element structure load. 2812 void ld2(const VRegister& vt, 2813 const VRegister& vt2, 2814 const MemOperand& src); 2815 2816 // Two-element single structure load to one lane. 2817 void ld2(const VRegister& vt, 2818 const VRegister& vt2, 2819 int lane, 2820 const MemOperand& src); 2821 2822 // Two-element single structure load to all lanes. 2823 void ld2r(const VRegister& vt, 2824 const VRegister& vt2, 2825 const MemOperand& src); 2826 2827 // Three-element structure load. 2828 void ld3(const VRegister& vt, 2829 const VRegister& vt2, 2830 const VRegister& vt3, 2831 const MemOperand& src); 2832 2833 // Three-element single structure load to one lane. 2834 void ld3(const VRegister& vt, 2835 const VRegister& vt2, 2836 const VRegister& vt3, 2837 int lane, 2838 const MemOperand& src); 2839 2840 // Three-element single structure load to all lanes. 2841 void ld3r(const VRegister& vt, 2842 const VRegister& vt2, 2843 const VRegister& vt3, 2844 const MemOperand& src); 2845 2846 // Four-element structure load. 2847 void ld4(const VRegister& vt, 2848 const VRegister& vt2, 2849 const VRegister& vt3, 2850 const VRegister& vt4, 2851 const MemOperand& src); 2852 2853 // Four-element single structure load to one lane. 2854 void ld4(const VRegister& vt, 2855 const VRegister& vt2, 2856 const VRegister& vt3, 2857 const VRegister& vt4, 2858 int lane, 2859 const MemOperand& src); 2860 2861 // Four-element single structure load to all lanes. 2862 void ld4r(const VRegister& vt, 2863 const VRegister& vt2, 2864 const VRegister& vt3, 2865 const VRegister& vt4, 2866 const MemOperand& src); 2867 2868 // Count leading sign bits. 2869 void cls(const VRegister& vd, 2870 const VRegister& vn); 2871 2872 // Count leading zero bits (vector). 2873 void clz(const VRegister& vd, 2874 const VRegister& vn); 2875 2876 // Population count per byte. 2877 void cnt(const VRegister& vd, 2878 const VRegister& vn); 2879 2880 // Reverse bit order. 2881 void rbit(const VRegister& vd, 2882 const VRegister& vn); 2883 2884 // Reverse elements in 16-bit halfwords. 2885 void rev16(const VRegister& vd, 2886 const VRegister& vn); 2887 2888 // Reverse elements in 32-bit words. 2889 void rev32(const VRegister& vd, 2890 const VRegister& vn); 2891 2892 // Reverse elements in 64-bit doublewords. 2893 void rev64(const VRegister& vd, 2894 const VRegister& vn); 2895 2896 // Unsigned reciprocal square root estimate. 2897 void ursqrte(const VRegister& vd, 2898 const VRegister& vn); 2899 2900 // Unsigned reciprocal estimate. 2901 void urecpe(const VRegister& vd, 2902 const VRegister& vn); 2903 2904 // Signed pairwise long add. 2905 void saddlp(const VRegister& vd, 2906 const VRegister& vn); 2907 2908 // Unsigned pairwise long add. 2909 void uaddlp(const VRegister& vd, 2910 const VRegister& vn); 2911 2912 // Signed pairwise long add and accumulate. 2913 void sadalp(const VRegister& vd, 2914 const VRegister& vn); 2915 2916 // Unsigned pairwise long add and accumulate. 2917 void uadalp(const VRegister& vd, 2918 const VRegister& vn); 2919 2920 // Shift left by immediate. 2921 void shl(const VRegister& vd, 2922 const VRegister& vn, 2923 int shift); 2924 2925 // Signed saturating shift left by immediate. 2926 void sqshl(const VRegister& vd, 2927 const VRegister& vn, 2928 int shift); 2929 2930 // Signed saturating shift left unsigned by immediate. 2931 void sqshlu(const VRegister& vd, 2932 const VRegister& vn, 2933 int shift); 2934 2935 // Unsigned saturating shift left by immediate. 2936 void uqshl(const VRegister& vd, 2937 const VRegister& vn, 2938 int shift); 2939 2940 // Signed shift left long by immediate. 2941 void sshll(const VRegister& vd, 2942 const VRegister& vn, 2943 int shift); 2944 2945 // Signed shift left long by immediate (second part). 2946 void sshll2(const VRegister& vd, 2947 const VRegister& vn, 2948 int shift); 2949 2950 // Signed extend long. 2951 void sxtl(const VRegister& vd, 2952 const VRegister& vn); 2953 2954 // Signed extend long (second part). 2955 void sxtl2(const VRegister& vd, 2956 const VRegister& vn); 2957 2958 // Unsigned shift left long by immediate. 2959 void ushll(const VRegister& vd, 2960 const VRegister& vn, 2961 int shift); 2962 2963 // Unsigned shift left long by immediate (second part). 2964 void ushll2(const VRegister& vd, 2965 const VRegister& vn, 2966 int shift); 2967 2968 // Shift left long by element size. 2969 void shll(const VRegister& vd, 2970 const VRegister& vn, 2971 int shift); 2972 2973 // Shift left long by element size (second part). 2974 void shll2(const VRegister& vd, 2975 const VRegister& vn, 2976 int shift); 2977 2978 // Unsigned extend long. 2979 void uxtl(const VRegister& vd, 2980 const VRegister& vn); 2981 2982 // Unsigned extend long (second part). 2983 void uxtl2(const VRegister& vd, 2984 const VRegister& vn); 2985 2986 // Shift left by immediate and insert. 2987 void sli(const VRegister& vd, 2988 const VRegister& vn, 2989 int shift); 2990 2991 // Shift right by immediate and insert. 2992 void sri(const VRegister& vd, 2993 const VRegister& vn, 2994 int shift); 2995 2996 // Signed maximum. 2997 void smax(const VRegister& vd, 2998 const VRegister& vn, 2999 const VRegister& vm); 3000 3001 // Signed pairwise maximum. 3002 void smaxp(const VRegister& vd, 3003 const VRegister& vn, 3004 const VRegister& vm); 3005 3006 // Add across vector. 3007 void addv(const VRegister& vd, 3008 const VRegister& vn); 3009 3010 // Signed add long across vector. 3011 void saddlv(const VRegister& vd, 3012 const VRegister& vn); 3013 3014 // Unsigned add long across vector. 3015 void uaddlv(const VRegister& vd, 3016 const VRegister& vn); 3017 3018 // FP maximum number across vector. 3019 void fmaxnmv(const VRegister& vd, 3020 const VRegister& vn); 3021 3022 // FP maximum across vector. 3023 void fmaxv(const VRegister& vd, 3024 const VRegister& vn); 3025 3026 // FP minimum number across vector. 3027 void fminnmv(const VRegister& vd, 3028 const VRegister& vn); 3029 3030 // FP minimum across vector. 3031 void fminv(const VRegister& vd, 3032 const VRegister& vn); 3033 3034 // Signed maximum across vector. 3035 void smaxv(const VRegister& vd, 3036 const VRegister& vn); 3037 3038 // Signed minimum. 3039 void smin(const VRegister& vd, 3040 const VRegister& vn, 3041 const VRegister& vm); 3042 3043 // Signed minimum pairwise. 3044 void sminp(const VRegister& vd, 3045 const VRegister& vn, 3046 const VRegister& vm); 3047 3048 // Signed minimum across vector. 3049 void sminv(const VRegister& vd, 3050 const VRegister& vn); 3051 3052 // One-element structure store from one register. 3053 void st1(const VRegister& vt, 3054 const MemOperand& src); 3055 3056 // One-element structure store from two registers. 3057 void st1(const VRegister& vt, 3058 const VRegister& vt2, 3059 const MemOperand& src); 3060 3061 // One-element structure store from three registers. 3062 void st1(const VRegister& vt, 3063 const VRegister& vt2, 3064 const VRegister& vt3, 3065 const MemOperand& src); 3066 3067 // One-element structure store from four registers. 3068 void st1(const VRegister& vt, 3069 const VRegister& vt2, 3070 const VRegister& vt3, 3071 const VRegister& vt4, 3072 const MemOperand& src); 3073 3074 // One-element single structure store from one lane. 3075 void st1(const VRegister& vt, 3076 int lane, 3077 const MemOperand& src); 3078 3079 // Two-element structure store from two registers. 3080 void st2(const VRegister& vt, 3081 const VRegister& vt2, 3082 const MemOperand& src); 3083 3084 // Two-element single structure store from two lanes. 3085 void st2(const VRegister& vt, 3086 const VRegister& vt2, 3087 int lane, 3088 const MemOperand& src); 3089 3090 // Three-element structure store from three registers. 3091 void st3(const VRegister& vt, 3092 const VRegister& vt2, 3093 const VRegister& vt3, 3094 const MemOperand& src); 3095 3096 // Three-element single structure store from three lanes. 3097 void st3(const VRegister& vt, 3098 const VRegister& vt2, 3099 const VRegister& vt3, 3100 int lane, 3101 const MemOperand& src); 3102 3103 // Four-element structure store from four registers. 3104 void st4(const VRegister& vt, 3105 const VRegister& vt2, 3106 const VRegister& vt3, 3107 const VRegister& vt4, 3108 const MemOperand& src); 3109 3110 // Four-element single structure store from four lanes. 3111 void st4(const VRegister& vt, 3112 const VRegister& vt2, 3113 const VRegister& vt3, 3114 const VRegister& vt4, 3115 int lane, 3116 const MemOperand& src); 3117 3118 // Unsigned add long. 3119 void uaddl(const VRegister& vd, 3120 const VRegister& vn, 3121 const VRegister& vm); 3122 3123 // Unsigned add long (second part). 3124 void uaddl2(const VRegister& vd, 3125 const VRegister& vn, 3126 const VRegister& vm); 3127 3128 // Unsigned add wide. 3129 void uaddw(const VRegister& vd, 3130 const VRegister& vn, 3131 const VRegister& vm); 3132 3133 // Unsigned add wide (second part). 3134 void uaddw2(const VRegister& vd, 3135 const VRegister& vn, 3136 const VRegister& vm); 3137 3138 // Signed add long. 3139 void saddl(const VRegister& vd, 3140 const VRegister& vn, 3141 const VRegister& vm); 3142 3143 // Signed add long (second part). 3144 void saddl2(const VRegister& vd, 3145 const VRegister& vn, 3146 const VRegister& vm); 3147 3148 // Signed add wide. 3149 void saddw(const VRegister& vd, 3150 const VRegister& vn, 3151 const VRegister& vm); 3152 3153 // Signed add wide (second part). 3154 void saddw2(const VRegister& vd, 3155 const VRegister& vn, 3156 const VRegister& vm); 3157 3158 // Unsigned subtract long. 3159 void usubl(const VRegister& vd, 3160 const VRegister& vn, 3161 const VRegister& vm); 3162 3163 // Unsigned subtract long (second part). 3164 void usubl2(const VRegister& vd, 3165 const VRegister& vn, 3166 const VRegister& vm); 3167 3168 // Unsigned subtract wide. 3169 void usubw(const VRegister& vd, 3170 const VRegister& vn, 3171 const VRegister& vm); 3172 3173 // Unsigned subtract wide (second part). 3174 void usubw2(const VRegister& vd, 3175 const VRegister& vn, 3176 const VRegister& vm); 3177 3178 // Signed subtract long. 3179 void ssubl(const VRegister& vd, 3180 const VRegister& vn, 3181 const VRegister& vm); 3182 3183 // Signed subtract long (second part). 3184 void ssubl2(const VRegister& vd, 3185 const VRegister& vn, 3186 const VRegister& vm); 3187 3188 // Signed integer subtract wide. 3189 void ssubw(const VRegister& vd, 3190 const VRegister& vn, 3191 const VRegister& vm); 3192 3193 // Signed integer subtract wide (second part). 3194 void ssubw2(const VRegister& vd, 3195 const VRegister& vn, 3196 const VRegister& vm); 3197 3198 // Unsigned maximum. 3199 void umax(const VRegister& vd, 3200 const VRegister& vn, 3201 const VRegister& vm); 3202 3203 // Unsigned pairwise maximum. 3204 void umaxp(const VRegister& vd, 3205 const VRegister& vn, 3206 const VRegister& vm); 3207 3208 // Unsigned maximum across vector. 3209 void umaxv(const VRegister& vd, 3210 const VRegister& vn); 3211 3212 // Unsigned minimum. 3213 void umin(const VRegister& vd, 3214 const VRegister& vn, 3215 const VRegister& vm); 3216 3217 // Unsigned pairwise minimum. 3218 void uminp(const VRegister& vd, 3219 const VRegister& vn, 3220 const VRegister& vm); 3221 3222 // Unsigned minimum across vector. 3223 void uminv(const VRegister& vd, 3224 const VRegister& vn); 3225 3226 // Transpose vectors (primary). 3227 void trn1(const VRegister& vd, 3228 const VRegister& vn, 3229 const VRegister& vm); 3230 3231 // Transpose vectors (secondary). 3232 void trn2(const VRegister& vd, 3233 const VRegister& vn, 3234 const VRegister& vm); 3235 3236 // Unzip vectors (primary). 3237 void uzp1(const VRegister& vd, 3238 const VRegister& vn, 3239 const VRegister& vm); 3240 3241 // Unzip vectors (secondary). 3242 void uzp2(const VRegister& vd, 3243 const VRegister& vn, 3244 const VRegister& vm); 3245 3246 // Zip vectors (primary). 3247 void zip1(const VRegister& vd, 3248 const VRegister& vn, 3249 const VRegister& vm); 3250 3251 // Zip vectors (secondary). 3252 void zip2(const VRegister& vd, 3253 const VRegister& vn, 3254 const VRegister& vm); 3255 3256 // Signed shift right by immediate. 3257 void sshr(const VRegister& vd, 3258 const VRegister& vn, 3259 int shift); 3260 3261 // Unsigned shift right by immediate. 3262 void ushr(const VRegister& vd, 3263 const VRegister& vn, 3264 int shift); 3265 3266 // Signed rounding shift right by immediate. 3267 void srshr(const VRegister& vd, 3268 const VRegister& vn, 3269 int shift); 3270 3271 // Unsigned rounding shift right by immediate. 3272 void urshr(const VRegister& vd, 3273 const VRegister& vn, 3274 int shift); 3275 3276 // Signed shift right by immediate and accumulate. 3277 void ssra(const VRegister& vd, 3278 const VRegister& vn, 3279 int shift); 3280 3281 // Unsigned shift right by immediate and accumulate. 3282 void usra(const VRegister& vd, 3283 const VRegister& vn, 3284 int shift); 3285 3286 // Signed rounding shift right by immediate and accumulate. 3287 void srsra(const VRegister& vd, 3288 const VRegister& vn, 3289 int shift); 3290 3291 // Unsigned rounding shift right by immediate and accumulate. 3292 void ursra(const VRegister& vd, 3293 const VRegister& vn, 3294 int shift); 3295 3296 // Shift right narrow by immediate. 3297 void shrn(const VRegister& vd, 3298 const VRegister& vn, 3299 int shift); 3300 3301 // Shift right narrow by immediate (second part). 3302 void shrn2(const VRegister& vd, 3303 const VRegister& vn, 3304 int shift); 3305 3306 // Rounding shift right narrow by immediate. 3307 void rshrn(const VRegister& vd, 3308 const VRegister& vn, 3309 int shift); 3310 3311 // Rounding shift right narrow by immediate (second part). 3312 void rshrn2(const VRegister& vd, 3313 const VRegister& vn, 3314 int shift); 3315 3316 // Unsigned saturating shift right narrow by immediate. 3317 void uqshrn(const VRegister& vd, 3318 const VRegister& vn, 3319 int shift); 3320 3321 // Unsigned saturating shift right narrow by immediate (second part). 3322 void uqshrn2(const VRegister& vd, 3323 const VRegister& vn, 3324 int shift); 3325 3326 // Unsigned saturating rounding shift right narrow by immediate. 3327 void uqrshrn(const VRegister& vd, 3328 const VRegister& vn, 3329 int shift); 3330 3331 // Unsigned saturating rounding shift right narrow by immediate (second part). 3332 void uqrshrn2(const VRegister& vd, 3333 const VRegister& vn, 3334 int shift); 3335 3336 // Signed saturating shift right narrow by immediate. 3337 void sqshrn(const VRegister& vd, 3338 const VRegister& vn, 3339 int shift); 3340 3341 // Signed saturating shift right narrow by immediate (second part). 3342 void sqshrn2(const VRegister& vd, 3343 const VRegister& vn, 3344 int shift); 3345 3346 // Signed saturating rounded shift right narrow by immediate. 3347 void sqrshrn(const VRegister& vd, 3348 const VRegister& vn, 3349 int shift); 3350 3351 // Signed saturating rounded shift right narrow by immediate (second part). 3352 void sqrshrn2(const VRegister& vd, 3353 const VRegister& vn, 3354 int shift); 3355 3356 // Signed saturating shift right unsigned narrow by immediate. 3357 void sqshrun(const VRegister& vd, 3358 const VRegister& vn, 3359 int shift); 3360 3361 // Signed saturating shift right unsigned narrow by immediate (second part). 3362 void sqshrun2(const VRegister& vd, 3363 const VRegister& vn, 3364 int shift); 3365 3366 // Signed sat rounded shift right unsigned narrow by immediate. 3367 void sqrshrun(const VRegister& vd, 3368 const VRegister& vn, 3369 int shift); 3370 3371 // Signed sat rounded shift right unsigned narrow by immediate (second part). 3372 void sqrshrun2(const VRegister& vd, 3373 const VRegister& vn, 3374 int shift); 3375 3376 // FP reciprocal step. 3377 void frecps(const VRegister& vd, 3378 const VRegister& vn, 3379 const VRegister& vm); 3380 3381 // FP reciprocal estimate. 3382 void frecpe(const VRegister& vd, 3383 const VRegister& vn); 3384 3385 // FP reciprocal square root estimate. 3386 void frsqrte(const VRegister& vd, 3387 const VRegister& vn); 3388 3389 // FP reciprocal square root step. 3390 void frsqrts(const VRegister& vd, 3391 const VRegister& vn, 3392 const VRegister& vm); 3393 3394 // Signed absolute difference and accumulate long. 3395 void sabal(const VRegister& vd, 3396 const VRegister& vn, 3397 const VRegister& vm); 3398 3399 // Signed absolute difference and accumulate long (second part). 3400 void sabal2(const VRegister& vd, 3401 const VRegister& vn, 3402 const VRegister& vm); 3403 3404 // Unsigned absolute difference and accumulate long. 3405 void uabal(const VRegister& vd, 3406 const VRegister& vn, 3407 const VRegister& vm); 3408 3409 // Unsigned absolute difference and accumulate long (second part). 3410 void uabal2(const VRegister& vd, 3411 const VRegister& vn, 3412 const VRegister& vm); 3413 3414 // Signed absolute difference long. 3415 void sabdl(const VRegister& vd, 3416 const VRegister& vn, 3417 const VRegister& vm); 3418 3419 // Signed absolute difference long (second part). 3420 void sabdl2(const VRegister& vd, 3421 const VRegister& vn, 3422 const VRegister& vm); 3423 3424 // Unsigned absolute difference long. 3425 void uabdl(const VRegister& vd, 3426 const VRegister& vn, 3427 const VRegister& vm); 3428 3429 // Unsigned absolute difference long (second part). 3430 void uabdl2(const VRegister& vd, 3431 const VRegister& vn, 3432 const VRegister& vm); 3433 3434 // Polynomial multiply long. 3435 void pmull(const VRegister& vd, 3436 const VRegister& vn, 3437 const VRegister& vm); 3438 3439 // Polynomial multiply long (second part). 3440 void pmull2(const VRegister& vd, 3441 const VRegister& vn, 3442 const VRegister& vm); 3443 3444 // Signed long multiply-add. 3445 void smlal(const VRegister& vd, 3446 const VRegister& vn, 3447 const VRegister& vm); 3448 3449 // Signed long multiply-add (second part). 3450 void smlal2(const VRegister& vd, 3451 const VRegister& vn, 3452 const VRegister& vm); 3453 3454 // Unsigned long multiply-add. 3455 void umlal(const VRegister& vd, 3456 const VRegister& vn, 3457 const VRegister& vm); 3458 3459 // Unsigned long multiply-add (second part). 3460 void umlal2(const VRegister& vd, 3461 const VRegister& vn, 3462 const VRegister& vm); 3463 3464 // Signed long multiply-sub. 3465 void smlsl(const VRegister& vd, 3466 const VRegister& vn, 3467 const VRegister& vm); 3468 3469 // Signed long multiply-sub (second part). 3470 void smlsl2(const VRegister& vd, 3471 const VRegister& vn, 3472 const VRegister& vm); 3473 3474 // Unsigned long multiply-sub. 3475 void umlsl(const VRegister& vd, 3476 const VRegister& vn, 3477 const VRegister& vm); 3478 3479 // Unsigned long multiply-sub (second part). 3480 void umlsl2(const VRegister& vd, 3481 const VRegister& vn, 3482 const VRegister& vm); 3483 3484 // Signed long multiply. 3485 void smull(const VRegister& vd, 3486 const VRegister& vn, 3487 const VRegister& vm); 3488 3489 // Signed long multiply (second part). 3490 void smull2(const VRegister& vd, 3491 const VRegister& vn, 3492 const VRegister& vm); 3493 3494 // Signed saturating doubling long multiply-add. 3495 void sqdmlal(const VRegister& vd, 3496 const VRegister& vn, 3497 const VRegister& vm); 3498 3499 // Signed saturating doubling long multiply-add (second part). 3500 void sqdmlal2(const VRegister& vd, 3501 const VRegister& vn, 3502 const VRegister& vm); 3503 3504 // Signed saturating doubling long multiply-subtract. 3505 void sqdmlsl(const VRegister& vd, 3506 const VRegister& vn, 3507 const VRegister& vm); 3508 3509 // Signed saturating doubling long multiply-subtract (second part). 3510 void sqdmlsl2(const VRegister& vd, 3511 const VRegister& vn, 3512 const VRegister& vm); 3513 3514 // Signed saturating doubling long multiply. 3515 void sqdmull(const VRegister& vd, 3516 const VRegister& vn, 3517 const VRegister& vm); 3518 3519 // Signed saturating doubling long multiply (second part). 3520 void sqdmull2(const VRegister& vd, 3521 const VRegister& vn, 3522 const VRegister& vm); 3523 3524 // Signed saturating doubling multiply returning high half. 3525 void sqdmulh(const VRegister& vd, 3526 const VRegister& vn, 3527 const VRegister& vm); 3528 3529 // Signed saturating rounding doubling multiply returning high half. 3530 void sqrdmulh(const VRegister& vd, 3531 const VRegister& vn, 3532 const VRegister& vm); 3533 3534 // Signed saturating doubling multiply element returning high half. 3535 void sqdmulh(const VRegister& vd, 3536 const VRegister& vn, 3537 const VRegister& vm, 3538 int vm_index); 3539 3540 // Signed saturating rounding doubling multiply element returning high half. 3541 void sqrdmulh(const VRegister& vd, 3542 const VRegister& vn, 3543 const VRegister& vm, 3544 int vm_index); 3545 3546 // Unsigned long multiply long. 3547 void umull(const VRegister& vd, 3548 const VRegister& vn, 3549 const VRegister& vm); 3550 3551 // Unsigned long multiply (second part). 3552 void umull2(const VRegister& vd, 3553 const VRegister& vn, 3554 const VRegister& vm); 3555 3556 // Add narrow returning high half. 3557 void addhn(const VRegister& vd, 3558 const VRegister& vn, 3559 const VRegister& vm); 3560 3561 // Add narrow returning high half (second part). 3562 void addhn2(const VRegister& vd, 3563 const VRegister& vn, 3564 const VRegister& vm); 3565 3566 // Rounding add narrow returning high half. 3567 void raddhn(const VRegister& vd, 3568 const VRegister& vn, 3569 const VRegister& vm); 3570 3571 // Rounding add narrow returning high half (second part). 3572 void raddhn2(const VRegister& vd, 3573 const VRegister& vn, 3574 const VRegister& vm); 3575 3576 // Subtract narrow returning high half. 3577 void subhn(const VRegister& vd, 3578 const VRegister& vn, 3579 const VRegister& vm); 3580 3581 // Subtract narrow returning high half (second part). 3582 void subhn2(const VRegister& vd, 3583 const VRegister& vn, 3584 const VRegister& vm); 3585 3586 // Rounding subtract narrow returning high half. 3587 void rsubhn(const VRegister& vd, 3588 const VRegister& vn, 3589 const VRegister& vm); 3590 3591 // Rounding subtract narrow returning high half (second part). 3592 void rsubhn2(const VRegister& vd, 3593 const VRegister& vn, 3594 const VRegister& vm); 3595 3596 // FP vector multiply accumulate. 3597 void fmla(const VRegister& vd, 3598 const VRegister& vn, 3599 const VRegister& vm); 3600 3601 // FP vector multiply subtract. 3602 void fmls(const VRegister& vd, 3603 const VRegister& vn, 3604 const VRegister& vm); 3605 3606 // FP vector multiply extended. 3607 void fmulx(const VRegister& vd, 3608 const VRegister& vn, 3609 const VRegister& vm); 3610 3611 // FP absolute greater than or equal. 3612 void facge(const VRegister& vd, 3613 const VRegister& vn, 3614 const VRegister& vm); 3615 3616 // FP absolute greater than. 3617 void facgt(const VRegister& vd, 3618 const VRegister& vn, 3619 const VRegister& vm); 3620 3621 // FP multiply by element. 3622 void fmul(const VRegister& vd, 3623 const VRegister& vn, 3624 const VRegister& vm, 3625 int vm_index); 3626 3627 // FP fused multiply-add to accumulator by element. 3628 void fmla(const VRegister& vd, 3629 const VRegister& vn, 3630 const VRegister& vm, 3631 int vm_index); 3632 3633 // FP fused multiply-sub from accumulator by element. 3634 void fmls(const VRegister& vd, 3635 const VRegister& vn, 3636 const VRegister& vm, 3637 int vm_index); 3638 3639 // FP multiply extended by element. 3640 void fmulx(const VRegister& vd, 3641 const VRegister& vn, 3642 const VRegister& vm, 3643 int vm_index); 3644 3645 // FP compare equal. 3646 void fcmeq(const VRegister& vd, 3647 const VRegister& vn, 3648 const VRegister& vm); 3649 3650 // FP greater than. 3651 void fcmgt(const VRegister& vd, 3652 const VRegister& vn, 3653 const VRegister& vm); 3654 3655 // FP greater than or equal. 3656 void fcmge(const VRegister& vd, 3657 const VRegister& vn, 3658 const VRegister& vm); 3659 3660 // FP compare equal to zero. 3661 void fcmeq(const VRegister& vd, 3662 const VRegister& vn, 3663 double imm); 3664 3665 // FP greater than zero. 3666 void fcmgt(const VRegister& vd, 3667 const VRegister& vn, 3668 double imm); 3669 3670 // FP greater than or equal to zero. 3671 void fcmge(const VRegister& vd, 3672 const VRegister& vn, 3673 double imm); 3674 3675 // FP less than or equal to zero. 3676 void fcmle(const VRegister& vd, 3677 const VRegister& vn, 3678 double imm); 3679 3680 // FP less than to zero. 3681 void fcmlt(const VRegister& vd, 3682 const VRegister& vn, 3683 double imm); 3684 3685 // FP absolute difference. 3686 void fabd(const VRegister& vd, 3687 const VRegister& vn, 3688 const VRegister& vm); 3689 3690 // FP pairwise add vector. 3691 void faddp(const VRegister& vd, 3692 const VRegister& vn, 3693 const VRegister& vm); 3694 3695 // FP pairwise add scalar. 3696 void faddp(const VRegister& vd, 3697 const VRegister& vn); 3698 3699 // FP pairwise maximum vector. 3700 void fmaxp(const VRegister& vd, 3701 const VRegister& vn, 3702 const VRegister& vm); 3703 3704 // FP pairwise maximum scalar. 3705 void fmaxp(const VRegister& vd, 3706 const VRegister& vn); 3707 3708 // FP pairwise minimum vector. 3709 void fminp(const VRegister& vd, 3710 const VRegister& vn, 3711 const VRegister& vm); 3712 3713 // FP pairwise minimum scalar. 3714 void fminp(const VRegister& vd, 3715 const VRegister& vn); 3716 3717 // FP pairwise maximum number vector. 3718 void fmaxnmp(const VRegister& vd, 3719 const VRegister& vn, 3720 const VRegister& vm); 3721 3722 // FP pairwise maximum number scalar. 3723 void fmaxnmp(const VRegister& vd, 3724 const VRegister& vn); 3725 3726 // FP pairwise minimum number vector. 3727 void fminnmp(const VRegister& vd, 3728 const VRegister& vn, 3729 const VRegister& vm); 3730 3731 // FP pairwise minimum number scalar. 3732 void fminnmp(const VRegister& vd, 3733 const VRegister& vn); 3734 3735 // Emit generic instructions. 3736 // Emit raw instructions into the instruction stream. 3737 void dci(Instr raw_inst) { Emit(raw_inst); } 3738 3739 // Emit 32 bits of data into the instruction stream. 3740 void dc32(uint32_t data) { 3741 VIXL_ASSERT(buffer_monitor_ > 0); 3742 buffer_->Emit32(data); 3743 } 3744 3745 // Emit 64 bits of data into the instruction stream. 3746 void dc64(uint64_t data) { 3747 VIXL_ASSERT(buffer_monitor_ > 0); 3748 buffer_->Emit64(data); 3749 } 3750 3751 // Copy a string into the instruction stream, including the terminating NULL 3752 // character. The instruction pointer is then aligned correctly for 3753 // subsequent instructions. 3754 void EmitString(const char * string) { 3755 VIXL_ASSERT(string != NULL); 3756 VIXL_ASSERT(buffer_monitor_ > 0); 3757 3758 buffer_->EmitString(string); 3759 buffer_->Align(); 3760 } 3761 3762 // Code generation helpers. 3763 3764 // Register encoding. 3765 static Instr Rd(CPURegister rd) { 3766 VIXL_ASSERT(rd.code() != kSPRegInternalCode); 3767 return rd.code() << Rd_offset; 3768 } 3769 3770 static Instr Rn(CPURegister rn) { 3771 VIXL_ASSERT(rn.code() != kSPRegInternalCode); 3772 return rn.code() << Rn_offset; 3773 } 3774 3775 static Instr Rm(CPURegister rm) { 3776 VIXL_ASSERT(rm.code() != kSPRegInternalCode); 3777 return rm.code() << Rm_offset; 3778 } 3779 3780 static Instr RmNot31(CPURegister rm) { 3781 VIXL_ASSERT(rm.code() != kSPRegInternalCode); 3782 VIXL_ASSERT(!rm.IsZero()); 3783 return Rm(rm); 3784 } 3785 3786 static Instr Ra(CPURegister ra) { 3787 VIXL_ASSERT(ra.code() != kSPRegInternalCode); 3788 return ra.code() << Ra_offset; 3789 } 3790 3791 static Instr Rt(CPURegister rt) { 3792 VIXL_ASSERT(rt.code() != kSPRegInternalCode); 3793 return rt.code() << Rt_offset; 3794 } 3795 3796 static Instr Rt2(CPURegister rt2) { 3797 VIXL_ASSERT(rt2.code() != kSPRegInternalCode); 3798 return rt2.code() << Rt2_offset; 3799 } 3800 3801 static Instr Rs(CPURegister rs) { 3802 VIXL_ASSERT(rs.code() != kSPRegInternalCode); 3803 return rs.code() << Rs_offset; 3804 } 3805 3806 // These encoding functions allow the stack pointer to be encoded, and 3807 // disallow the zero register. 3808 static Instr RdSP(Register rd) { 3809 VIXL_ASSERT(!rd.IsZero()); 3810 return (rd.code() & kRegCodeMask) << Rd_offset; 3811 } 3812 3813 static Instr RnSP(Register rn) { 3814 VIXL_ASSERT(!rn.IsZero()); 3815 return (rn.code() & kRegCodeMask) << Rn_offset; 3816 } 3817 3818 // Flags encoding. 3819 static Instr Flags(FlagsUpdate S) { 3820 if (S == SetFlags) { 3821 return 1 << FlagsUpdate_offset; 3822 } else if (S == LeaveFlags) { 3823 return 0 << FlagsUpdate_offset; 3824 } 3825 VIXL_UNREACHABLE(); 3826 return 0; 3827 } 3828 3829 static Instr Cond(Condition cond) { 3830 return cond << Condition_offset; 3831 } 3832 3833 // PC-relative address encoding. 3834 static Instr ImmPCRelAddress(int imm21) { 3835 VIXL_ASSERT(is_int21(imm21)); 3836 Instr imm = static_cast<Instr>(truncate_to_int21(imm21)); 3837 Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset; 3838 Instr immlo = imm << ImmPCRelLo_offset; 3839 return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask); 3840 } 3841 3842 // Branch encoding. 3843 static Instr ImmUncondBranch(int imm26) { 3844 VIXL_ASSERT(is_int26(imm26)); 3845 return truncate_to_int26(imm26) << ImmUncondBranch_offset; 3846 } 3847 3848 static Instr ImmCondBranch(int imm19) { 3849 VIXL_ASSERT(is_int19(imm19)); 3850 return truncate_to_int19(imm19) << ImmCondBranch_offset; 3851 } 3852 3853 static Instr ImmCmpBranch(int imm19) { 3854 VIXL_ASSERT(is_int19(imm19)); 3855 return truncate_to_int19(imm19) << ImmCmpBranch_offset; 3856 } 3857 3858 static Instr ImmTestBranch(int imm14) { 3859 VIXL_ASSERT(is_int14(imm14)); 3860 return truncate_to_int14(imm14) << ImmTestBranch_offset; 3861 } 3862 3863 static Instr ImmTestBranchBit(unsigned bit_pos) { 3864 VIXL_ASSERT(is_uint6(bit_pos)); 3865 // Subtract five from the shift offset, as we need bit 5 from bit_pos. 3866 unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5); 3867 unsigned b40 = bit_pos << ImmTestBranchBit40_offset; 3868 b5 &= ImmTestBranchBit5_mask; 3869 b40 &= ImmTestBranchBit40_mask; 3870 return b5 | b40; 3871 } 3872 3873 // Data Processing encoding. 3874 static Instr SF(Register rd) { 3875 return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits; 3876 } 3877 3878 static Instr ImmAddSub(int imm) { 3879 VIXL_ASSERT(IsImmAddSub(imm)); 3880 if (is_uint12(imm)) { // No shift required. 3881 imm <<= ImmAddSub_offset; 3882 } else { 3883 imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset); 3884 } 3885 return imm; 3886 } 3887 3888 static Instr ImmS(unsigned imms, unsigned reg_size) { 3889 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(imms)) || 3890 ((reg_size == kWRegSize) && is_uint5(imms))); 3891 USE(reg_size); 3892 return imms << ImmS_offset; 3893 } 3894 3895 static Instr ImmR(unsigned immr, unsigned reg_size) { 3896 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) || 3897 ((reg_size == kWRegSize) && is_uint5(immr))); 3898 USE(reg_size); 3899 VIXL_ASSERT(is_uint6(immr)); 3900 return immr << ImmR_offset; 3901 } 3902 3903 static Instr ImmSetBits(unsigned imms, unsigned reg_size) { 3904 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 3905 VIXL_ASSERT(is_uint6(imms)); 3906 VIXL_ASSERT((reg_size == kXRegSize) || is_uint6(imms + 3)); 3907 USE(reg_size); 3908 return imms << ImmSetBits_offset; 3909 } 3910 3911 static Instr ImmRotate(unsigned immr, unsigned reg_size) { 3912 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 3913 VIXL_ASSERT(((reg_size == kXRegSize) && is_uint6(immr)) || 3914 ((reg_size == kWRegSize) && is_uint5(immr))); 3915 USE(reg_size); 3916 return immr << ImmRotate_offset; 3917 } 3918 3919 static Instr ImmLLiteral(int imm19) { 3920 VIXL_ASSERT(is_int19(imm19)); 3921 return truncate_to_int19(imm19) << ImmLLiteral_offset; 3922 } 3923 3924 static Instr BitN(unsigned bitn, unsigned reg_size) { 3925 VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); 3926 VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0)); 3927 USE(reg_size); 3928 return bitn << BitN_offset; 3929 } 3930 3931 static Instr ShiftDP(Shift shift) { 3932 VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR); 3933 return shift << ShiftDP_offset; 3934 } 3935 3936 static Instr ImmDPShift(unsigned amount) { 3937 VIXL_ASSERT(is_uint6(amount)); 3938 return amount << ImmDPShift_offset; 3939 } 3940 3941 static Instr ExtendMode(Extend extend) { 3942 return extend << ExtendMode_offset; 3943 } 3944 3945 static Instr ImmExtendShift(unsigned left_shift) { 3946 VIXL_ASSERT(left_shift <= 4); 3947 return left_shift << ImmExtendShift_offset; 3948 } 3949 3950 static Instr ImmCondCmp(unsigned imm) { 3951 VIXL_ASSERT(is_uint5(imm)); 3952 return imm << ImmCondCmp_offset; 3953 } 3954 3955 static Instr Nzcv(StatusFlags nzcv) { 3956 return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset; 3957 } 3958 3959 // MemOperand offset encoding. 3960 static Instr ImmLSUnsigned(int imm12) { 3961 VIXL_ASSERT(is_uint12(imm12)); 3962 return imm12 << ImmLSUnsigned_offset; 3963 } 3964 3965 static Instr ImmLS(int imm9) { 3966 VIXL_ASSERT(is_int9(imm9)); 3967 return truncate_to_int9(imm9) << ImmLS_offset; 3968 } 3969 3970 static Instr ImmLSPair(int imm7, unsigned access_size) { 3971 VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7); 3972 int scaled_imm7 = imm7 >> access_size; 3973 VIXL_ASSERT(is_int7(scaled_imm7)); 3974 return truncate_to_int7(scaled_imm7) << ImmLSPair_offset; 3975 } 3976 3977 static Instr ImmShiftLS(unsigned shift_amount) { 3978 VIXL_ASSERT(is_uint1(shift_amount)); 3979 return shift_amount << ImmShiftLS_offset; 3980 } 3981 3982 static Instr ImmPrefetchOperation(int imm5) { 3983 VIXL_ASSERT(is_uint5(imm5)); 3984 return imm5 << ImmPrefetchOperation_offset; 3985 } 3986 3987 static Instr ImmException(int imm16) { 3988 VIXL_ASSERT(is_uint16(imm16)); 3989 return imm16 << ImmException_offset; 3990 } 3991 3992 static Instr ImmSystemRegister(int imm15) { 3993 VIXL_ASSERT(is_uint15(imm15)); 3994 return imm15 << ImmSystemRegister_offset; 3995 } 3996 3997 static Instr ImmHint(int imm7) { 3998 VIXL_ASSERT(is_uint7(imm7)); 3999 return imm7 << ImmHint_offset; 4000 } 4001 4002 static Instr CRm(int imm4) { 4003 VIXL_ASSERT(is_uint4(imm4)); 4004 return imm4 << CRm_offset; 4005 } 4006 4007 static Instr CRn(int imm4) { 4008 VIXL_ASSERT(is_uint4(imm4)); 4009 return imm4 << CRn_offset; 4010 } 4011 4012 static Instr SysOp(int imm14) { 4013 VIXL_ASSERT(is_uint14(imm14)); 4014 return imm14 << SysOp_offset; 4015 } 4016 4017 static Instr ImmSysOp1(int imm3) { 4018 VIXL_ASSERT(is_uint3(imm3)); 4019 return imm3 << SysOp1_offset; 4020 } 4021 4022 static Instr ImmSysOp2(int imm3) { 4023 VIXL_ASSERT(is_uint3(imm3)); 4024 return imm3 << SysOp2_offset; 4025 } 4026 4027 static Instr ImmBarrierDomain(int imm2) { 4028 VIXL_ASSERT(is_uint2(imm2)); 4029 return imm2 << ImmBarrierDomain_offset; 4030 } 4031 4032 static Instr ImmBarrierType(int imm2) { 4033 VIXL_ASSERT(is_uint2(imm2)); 4034 return imm2 << ImmBarrierType_offset; 4035 } 4036 4037 // Move immediates encoding. 4038 static Instr ImmMoveWide(uint64_t imm) { 4039 VIXL_ASSERT(is_uint16(imm)); 4040 return static_cast<Instr>(imm << ImmMoveWide_offset); 4041 } 4042 4043 static Instr ShiftMoveWide(int64_t shift) { 4044 VIXL_ASSERT(is_uint2(shift)); 4045 return static_cast<Instr>(shift << ShiftMoveWide_offset); 4046 } 4047 4048 // FP Immediates. 4049 static Instr ImmFP32(float imm); 4050 static Instr ImmFP64(double imm); 4051 4052 // FP register type. 4053 static Instr FPType(FPRegister fd) { 4054 return fd.Is64Bits() ? FP64 : FP32; 4055 } 4056 4057 static Instr FPScale(unsigned scale) { 4058 VIXL_ASSERT(is_uint6(scale)); 4059 return scale << FPScale_offset; 4060 } 4061 4062 // Immediate field checking helpers. 4063 static bool IsImmAddSub(int64_t immediate); 4064 static bool IsImmConditionalCompare(int64_t immediate); 4065 static bool IsImmFP32(float imm); 4066 static bool IsImmFP64(double imm); 4067 static bool IsImmLogical(uint64_t value, 4068 unsigned width, 4069 unsigned* n = NULL, 4070 unsigned* imm_s = NULL, 4071 unsigned* imm_r = NULL); 4072 static bool IsImmLSPair(int64_t offset, unsigned access_size); 4073 static bool IsImmLSScaled(int64_t offset, unsigned access_size); 4074 static bool IsImmLSUnscaled(int64_t offset); 4075 static bool IsImmMovn(uint64_t imm, unsigned reg_size); 4076 static bool IsImmMovz(uint64_t imm, unsigned reg_size); 4077 4078 // Instruction bits for vector format in data processing operations. 4079 static Instr VFormat(VRegister vd) { 4080 if (vd.Is64Bits()) { 4081 switch (vd.lanes()) { 4082 case 2: return NEON_2S; 4083 case 4: return NEON_4H; 4084 case 8: return NEON_8B; 4085 default: return 0xffffffff; 4086 } 4087 } else { 4088 VIXL_ASSERT(vd.Is128Bits()); 4089 switch (vd.lanes()) { 4090 case 2: return NEON_2D; 4091 case 4: return NEON_4S; 4092 case 8: return NEON_8H; 4093 case 16: return NEON_16B; 4094 default: return 0xffffffff; 4095 } 4096 } 4097 } 4098 4099 // Instruction bits for vector format in floating point data processing 4100 // operations. 4101 static Instr FPFormat(VRegister vd) { 4102 if (vd.lanes() == 1) { 4103 // Floating point scalar formats. 4104 VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits()); 4105 return vd.Is64Bits() ? FP64 : FP32; 4106 } 4107 4108 // Two lane floating point vector formats. 4109 if (vd.lanes() == 2) { 4110 VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits()); 4111 return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; 4112 } 4113 4114 // Four lane floating point vector format. 4115 VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits()); 4116 return NEON_FP_4S; 4117 } 4118 4119 // Instruction bits for vector format in load and store operations. 4120 static Instr LSVFormat(VRegister vd) { 4121 if (vd.Is64Bits()) { 4122 switch (vd.lanes()) { 4123 case 1: return LS_NEON_1D; 4124 case 2: return LS_NEON_2S; 4125 case 4: return LS_NEON_4H; 4126 case 8: return LS_NEON_8B; 4127 default: return 0xffffffff; 4128 } 4129 } else { 4130 VIXL_ASSERT(vd.Is128Bits()); 4131 switch (vd.lanes()) { 4132 case 2: return LS_NEON_2D; 4133 case 4: return LS_NEON_4S; 4134 case 8: return LS_NEON_8H; 4135 case 16: return LS_NEON_16B; 4136 default: return 0xffffffff; 4137 } 4138 } 4139 } 4140 4141 // Instruction bits for scalar format in data processing operations. 4142 static Instr SFormat(VRegister vd) { 4143 VIXL_ASSERT(vd.lanes() == 1); 4144 switch (vd.SizeInBytes()) { 4145 case 1: return NEON_B; 4146 case 2: return NEON_H; 4147 case 4: return NEON_S; 4148 case 8: return NEON_D; 4149 default: return 0xffffffff; 4150 } 4151 } 4152 4153 static Instr ImmNEONHLM(int index, int num_bits) { 4154 int h, l, m; 4155 if (num_bits == 3) { 4156 VIXL_ASSERT(is_uint3(index)); 4157 h = (index >> 2) & 1; 4158 l = (index >> 1) & 1; 4159 m = (index >> 0) & 1; 4160 } else if (num_bits == 2) { 4161 VIXL_ASSERT(is_uint2(index)); 4162 h = (index >> 1) & 1; 4163 l = (index >> 0) & 1; 4164 m = 0; 4165 } else { 4166 VIXL_ASSERT(is_uint1(index) && (num_bits == 1)); 4167 h = (index >> 0) & 1; 4168 l = 0; 4169 m = 0; 4170 } 4171 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); 4172 } 4173 4174 static Instr ImmNEONExt(int imm4) { 4175 VIXL_ASSERT(is_uint4(imm4)); 4176 return imm4 << ImmNEONExt_offset; 4177 } 4178 4179 static Instr ImmNEON5(Instr format, int index) { 4180 VIXL_ASSERT(is_uint4(index)); 4181 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 4182 int imm5 = (index << (s + 1)) | (1 << s); 4183 return imm5 << ImmNEON5_offset; 4184 } 4185 4186 static Instr ImmNEON4(Instr format, int index) { 4187 VIXL_ASSERT(is_uint4(index)); 4188 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 4189 int imm4 = index << s; 4190 return imm4 << ImmNEON4_offset; 4191 } 4192 4193 static Instr ImmNEONabcdefgh(int imm8) { 4194 VIXL_ASSERT(is_uint8(imm8)); 4195 Instr instr; 4196 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; 4197 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; 4198 return instr; 4199 } 4200 4201 static Instr NEONCmode(int cmode) { 4202 VIXL_ASSERT(is_uint4(cmode)); 4203 return cmode << NEONCmode_offset; 4204 } 4205 4206 static Instr NEONModImmOp(int op) { 4207 VIXL_ASSERT(is_uint1(op)); 4208 return op << NEONModImmOp_offset; 4209 } 4210 4211 // Size of the code generated since label to the current position. 4212 size_t SizeOfCodeGeneratedSince(Label* label) const { 4213 VIXL_ASSERT(label->IsBound()); 4214 return buffer_->OffsetFrom(label->location()); 4215 } 4216 4217 size_t SizeOfCodeGenerated() const { 4218 return buffer_->CursorOffset(); 4219 } 4220 4221 size_t BufferCapacity() const { return buffer_->capacity(); } 4222 4223 size_t RemainingBufferSpace() const { return buffer_->RemainingBytes(); } 4224 4225 void EnsureSpaceFor(size_t amount) { 4226 if (buffer_->RemainingBytes() < amount) { 4227 size_t capacity = buffer_->capacity(); 4228 size_t size = buffer_->CursorOffset(); 4229 do { 4230 // TODO(all): refine. 4231 capacity *= 2; 4232 } while ((capacity - size) < amount); 4233 buffer_->Grow(capacity); 4234 } 4235 } 4236 4237#ifdef VIXL_DEBUG 4238 void AcquireBuffer() { 4239 VIXL_ASSERT(buffer_monitor_ >= 0); 4240 buffer_monitor_++; 4241 } 4242 4243 void ReleaseBuffer() { 4244 buffer_monitor_--; 4245 VIXL_ASSERT(buffer_monitor_ >= 0); 4246 } 4247#endif 4248 4249 PositionIndependentCodeOption pic() const { 4250 return pic_; 4251 } 4252 4253 bool AllowPageOffsetDependentCode() const { 4254 return (pic() == PageOffsetDependentCode) || 4255 (pic() == PositionDependentCode); 4256 } 4257 4258 static const Register& AppropriateZeroRegFor(const CPURegister& reg) { 4259 return reg.Is64Bits() ? xzr : wzr; 4260 } 4261 4262 4263 protected: 4264 void LoadStore(const CPURegister& rt, 4265 const MemOperand& addr, 4266 LoadStoreOp op, 4267 LoadStoreScalingOption option = PreferScaledOffset); 4268 4269 void LoadStorePair(const CPURegister& rt, 4270 const CPURegister& rt2, 4271 const MemOperand& addr, 4272 LoadStorePairOp op); 4273 void LoadStoreStruct(const VRegister& vt, 4274 const MemOperand& addr, 4275 NEONLoadStoreMultiStructOp op); 4276 void LoadStoreStruct1(const VRegister& vt, 4277 int reg_count, 4278 const MemOperand& addr); 4279 void LoadStoreStructSingle(const VRegister& vt, 4280 uint32_t lane, 4281 const MemOperand& addr, 4282 NEONLoadStoreSingleStructOp op); 4283 void LoadStoreStructSingleAllLanes(const VRegister& vt, 4284 const MemOperand& addr, 4285 NEONLoadStoreSingleStructOp op); 4286 void LoadStoreStructVerify(const VRegister& vt, 4287 const MemOperand& addr, 4288 Instr op); 4289 4290 void Prefetch(PrefetchOperation op, 4291 const MemOperand& addr, 4292 LoadStoreScalingOption option = PreferScaledOffset); 4293 4294 // TODO(all): The third parameter should be passed by reference but gcc 4.8.2 4295 // reports a bogus uninitialised warning then. 4296 void Logical(const Register& rd, 4297 const Register& rn, 4298 const Operand operand, 4299 LogicalOp op); 4300 void LogicalImmediate(const Register& rd, 4301 const Register& rn, 4302 unsigned n, 4303 unsigned imm_s, 4304 unsigned imm_r, 4305 LogicalOp op); 4306 4307 void ConditionalCompare(const Register& rn, 4308 const Operand& operand, 4309 StatusFlags nzcv, 4310 Condition cond, 4311 ConditionalCompareOp op); 4312 4313 void AddSubWithCarry(const Register& rd, 4314 const Register& rn, 4315 const Operand& operand, 4316 FlagsUpdate S, 4317 AddSubWithCarryOp op); 4318 4319 4320 // Functions for emulating operands not directly supported by the instruction 4321 // set. 4322 void EmitShift(const Register& rd, 4323 const Register& rn, 4324 Shift shift, 4325 unsigned amount); 4326 void EmitExtendShift(const Register& rd, 4327 const Register& rn, 4328 Extend extend, 4329 unsigned left_shift); 4330 4331 void AddSub(const Register& rd, 4332 const Register& rn, 4333 const Operand& operand, 4334 FlagsUpdate S, 4335 AddSubOp op); 4336 4337 void NEONTable(const VRegister& vd, 4338 const VRegister& vn, 4339 const VRegister& vm, 4340 NEONTableOp op); 4341 4342 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified 4343 // registers. Only simple loads are supported; sign- and zero-extension (such 4344 // as in LDPSW_x or LDRB_w) are not supported. 4345 static LoadStoreOp LoadOpFor(const CPURegister& rt); 4346 static LoadStorePairOp LoadPairOpFor(const CPURegister& rt, 4347 const CPURegister& rt2); 4348 static LoadStoreOp StoreOpFor(const CPURegister& rt); 4349 static LoadStorePairOp StorePairOpFor(const CPURegister& rt, 4350 const CPURegister& rt2); 4351 static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor( 4352 const CPURegister& rt, const CPURegister& rt2); 4353 static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor( 4354 const CPURegister& rt, const CPURegister& rt2); 4355 static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); 4356 4357 4358 private: 4359 static uint32_t FP32ToImm8(float imm); 4360 static uint32_t FP64ToImm8(double imm); 4361 4362 // Instruction helpers. 4363 void MoveWide(const Register& rd, 4364 uint64_t imm, 4365 int shift, 4366 MoveWideImmediateOp mov_op); 4367 void DataProcShiftedRegister(const Register& rd, 4368 const Register& rn, 4369 const Operand& operand, 4370 FlagsUpdate S, 4371 Instr op); 4372 void DataProcExtendedRegister(const Register& rd, 4373 const Register& rn, 4374 const Operand& operand, 4375 FlagsUpdate S, 4376 Instr op); 4377 void LoadStorePairNonTemporal(const CPURegister& rt, 4378 const CPURegister& rt2, 4379 const MemOperand& addr, 4380 LoadStorePairNonTemporalOp op); 4381 void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op); 4382 void ConditionalSelect(const Register& rd, 4383 const Register& rn, 4384 const Register& rm, 4385 Condition cond, 4386 ConditionalSelectOp op); 4387 void DataProcessing1Source(const Register& rd, 4388 const Register& rn, 4389 DataProcessing1SourceOp op); 4390 void DataProcessing3Source(const Register& rd, 4391 const Register& rn, 4392 const Register& rm, 4393 const Register& ra, 4394 DataProcessing3SourceOp op); 4395 void FPDataProcessing1Source(const VRegister& fd, 4396 const VRegister& fn, 4397 FPDataProcessing1SourceOp op); 4398 void FPDataProcessing3Source(const VRegister& fd, 4399 const VRegister& fn, 4400 const VRegister& fm, 4401 const VRegister& fa, 4402 FPDataProcessing3SourceOp op); 4403 void NEONAcrossLanesL(const VRegister& vd, 4404 const VRegister& vn, 4405 NEONAcrossLanesOp op); 4406 void NEONAcrossLanes(const VRegister& vd, 4407 const VRegister& vn, 4408 NEONAcrossLanesOp op); 4409 void NEONModifiedImmShiftLsl(const VRegister& vd, 4410 const int imm8, 4411 const int left_shift, 4412 NEONModifiedImmediateOp op); 4413 void NEONModifiedImmShiftMsl(const VRegister& vd, 4414 const int imm8, 4415 const int shift_amount, 4416 NEONModifiedImmediateOp op); 4417 void NEONFP2Same(const VRegister& vd, 4418 const VRegister& vn, 4419 Instr vop); 4420 void NEON3Same(const VRegister& vd, 4421 const VRegister& vn, 4422 const VRegister& vm, 4423 NEON3SameOp vop); 4424 void NEONFP3Same(const VRegister& vd, 4425 const VRegister& vn, 4426 const VRegister& vm, 4427 Instr op); 4428 void NEON3DifferentL(const VRegister& vd, 4429 const VRegister& vn, 4430 const VRegister& vm, 4431 NEON3DifferentOp vop); 4432 void NEON3DifferentW(const VRegister& vd, 4433 const VRegister& vn, 4434 const VRegister& vm, 4435 NEON3DifferentOp vop); 4436 void NEON3DifferentHN(const VRegister& vd, 4437 const VRegister& vn, 4438 const VRegister& vm, 4439 NEON3DifferentOp vop); 4440 void NEONFP2RegMisc(const VRegister& vd, 4441 const VRegister& vn, 4442 NEON2RegMiscOp vop, 4443 double value = 0.0); 4444 void NEON2RegMisc(const VRegister& vd, 4445 const VRegister& vn, 4446 NEON2RegMiscOp vop, 4447 int value = 0); 4448 void NEONFP2RegMisc(const VRegister& vd, 4449 const VRegister& vn, 4450 Instr op); 4451 void NEONAddlp(const VRegister& vd, 4452 const VRegister& vn, 4453 NEON2RegMiscOp op); 4454 void NEONPerm(const VRegister& vd, 4455 const VRegister& vn, 4456 const VRegister& vm, 4457 NEONPermOp op); 4458 void NEONFPByElement(const VRegister& vd, 4459 const VRegister& vn, 4460 const VRegister& vm, 4461 int vm_index, 4462 NEONByIndexedElementOp op); 4463 void NEONByElement(const VRegister& vd, 4464 const VRegister& vn, 4465 const VRegister& vm, 4466 int vm_index, 4467 NEONByIndexedElementOp op); 4468 void NEONByElementL(const VRegister& vd, 4469 const VRegister& vn, 4470 const VRegister& vm, 4471 int vm_index, 4472 NEONByIndexedElementOp op); 4473 void NEONShiftImmediate(const VRegister& vd, 4474 const VRegister& vn, 4475 NEONShiftImmediateOp op, 4476 int immh_immb); 4477 void NEONShiftLeftImmediate(const VRegister& vd, 4478 const VRegister& vn, 4479 int shift, 4480 NEONShiftImmediateOp op); 4481 void NEONShiftRightImmediate(const VRegister& vd, 4482 const VRegister& vn, 4483 int shift, 4484 NEONShiftImmediateOp op); 4485 void NEONShiftImmediateL(const VRegister& vd, 4486 const VRegister& vn, 4487 int shift, 4488 NEONShiftImmediateOp op); 4489 void NEONShiftImmediateN(const VRegister& vd, 4490 const VRegister& vn, 4491 int shift, 4492 NEONShiftImmediateOp op); 4493 void NEONXtn(const VRegister& vd, 4494 const VRegister& vn, 4495 NEON2RegMiscOp vop); 4496 4497 Instr LoadStoreStructAddrModeField(const MemOperand& addr); 4498 4499 // Encode the specified MemOperand for the specified access size and scaling 4500 // preference. 4501 Instr LoadStoreMemOperand(const MemOperand& addr, 4502 unsigned access_size, 4503 LoadStoreScalingOption option); 4504 4505 // Link the current (not-yet-emitted) instruction to the specified label, then 4506 // return an offset to be encoded in the instruction. If the label is not yet 4507 // bound, an offset of 0 is returned. 4508 ptrdiff_t LinkAndGetByteOffsetTo(Label * label); 4509 ptrdiff_t LinkAndGetInstructionOffsetTo(Label * label); 4510 ptrdiff_t LinkAndGetPageOffsetTo(Label * label); 4511 4512 // A common implementation for the LinkAndGet<Type>OffsetTo helpers. 4513 template <int element_shift> 4514 ptrdiff_t LinkAndGetOffsetTo(Label* label); 4515 4516 // Literal load offset are in words (32-bit). 4517 ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal); 4518 4519 // Emit the instruction in buffer_. 4520 void Emit(Instr instruction) { 4521 VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize); 4522 VIXL_ASSERT(buffer_monitor_ > 0); 4523 buffer_->Emit32(instruction); 4524 } 4525 4526 // Buffer where the code is emitted. 4527 CodeBuffer* buffer_; 4528 PositionIndependentCodeOption pic_; 4529 4530#ifdef VIXL_DEBUG 4531 int64_t buffer_monitor_; 4532#endif 4533}; 4534 4535 4536// All Assembler emits MUST acquire/release the underlying code buffer. The 4537// helper scope below will do so and optionally ensure the buffer is big enough 4538// to receive the emit. It is possible to request the scope not to perform any 4539// checks (kNoCheck) if for example it is known in advance the buffer size is 4540// adequate or there is some other size checking mechanism in place. 4541class CodeBufferCheckScope { 4542 public: 4543 // Tell whether or not the scope needs to ensure the associated CodeBuffer 4544 // has enough space for the requested size. 4545 enum CheckPolicy { 4546 kNoCheck, 4547 kCheck 4548 }; 4549 4550 // Tell whether or not the scope should assert the amount of code emitted 4551 // within the scope is consistent with the requested amount. 4552 enum AssertPolicy { 4553 kNoAssert, // No assert required. 4554 kExactSize, // The code emitted must be exactly size bytes. 4555 kMaximumSize // The code emitted must be at most size bytes. 4556 }; 4557 4558 CodeBufferCheckScope(Assembler* assm, 4559 size_t size, 4560 CheckPolicy check_policy = kCheck, 4561 AssertPolicy assert_policy = kMaximumSize) 4562 : assm_(assm) { 4563 if (check_policy == kCheck) assm->EnsureSpaceFor(size); 4564#ifdef VIXL_DEBUG 4565 assm->bind(&start_); 4566 size_ = size; 4567 assert_policy_ = assert_policy; 4568 assm->AcquireBuffer(); 4569#else 4570 USE(assert_policy); 4571#endif 4572 } 4573 4574 // This is a shortcut for CodeBufferCheckScope(assm, 0, kNoCheck, kNoAssert). 4575 explicit CodeBufferCheckScope(Assembler* assm) : assm_(assm) { 4576#ifdef VIXL_DEBUG 4577 size_ = 0; 4578 assert_policy_ = kNoAssert; 4579 assm->AcquireBuffer(); 4580#endif 4581 } 4582 4583 ~CodeBufferCheckScope() { 4584#ifdef VIXL_DEBUG 4585 assm_->ReleaseBuffer(); 4586 switch (assert_policy_) { 4587 case kNoAssert: break; 4588 case kExactSize: 4589 VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) == size_); 4590 break; 4591 case kMaximumSize: 4592 VIXL_ASSERT(assm_->SizeOfCodeGeneratedSince(&start_) <= size_); 4593 break; 4594 default: 4595 VIXL_UNREACHABLE(); 4596 } 4597#endif 4598 } 4599 4600 protected: 4601 Assembler* assm_; 4602#ifdef VIXL_DEBUG 4603 Label start_; 4604 size_t size_; 4605 AssertPolicy assert_policy_; 4606#endif 4607}; 4608 4609 4610template <typename T> 4611void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) { 4612 return UpdateValue(new_value, assembler->GetStartAddress<uint8_t*>()); 4613} 4614 4615 4616template <typename T> 4617void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) { 4618 return UpdateValue(high64, low64, assembler->GetStartAddress<uint8_t*>()); 4619} 4620 4621 4622} // namespace vixl 4623 4624#endif // VIXL_A64_ASSEMBLER_A64_H_