sve_mem.isa revision 14028:44edf7dbe672
1// Copyright (c) 2017-2018 ARM Limited 2// All rights reserved 3// 4// The license below extends only to copyright in the software and shall 5// not be construed as granting a license to any other intellectual 6// property including but not limited to intellectual property relating 7// to a hardware implementation of the functionality of the software 8// licensed hereunder. You may use the software subject to the license 9// terms below provided that you ensure that this notice is replicated 10// unmodified and in its entirety in all distributions of the software, 11// modified or unmodified, in source code or in binary form. 12// 13// Redistribution and use in source and binary forms, with or without 14// modification, are permitted provided that the following conditions are 15// met: redistributions of source code must retain the above copyright 16// notice, this list of conditions and the following disclaimer; 17// redistributions in binary form must reproduce the above copyright 18// notice, this list of conditions and the following disclaimer in the 19// documentation and/or other materials provided with the distribution; 20// neither the name of the copyright holders nor the names of its 21// contributors may be used to endorse or promote products derived from 22// this software without specific prior written permission. 23// 24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 30// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 31// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 32// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35// 36// Authors: Giacomo Gabrielli 37 38// @file Definition of SVE memory access instructions. 39 40output header {{ 41 42 // Decodes SVE contiguous load instructions, scalar plus scalar form. 43 template <template <typename T1, typename T2> class Base> 44 StaticInstPtr 45 decodeSveContigLoadSSInsts(uint8_t dtype, ExtMachInst machInst, 46 IntRegIndex zt, IntRegIndex pg, IntRegIndex rn, 47 IntRegIndex rm, bool firstFaulting) 48 { 49 const char* mn = firstFaulting ? "ldff1" : "ld1"; 50 switch (dtype) { 51 case 0x0: 52 return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, rm); 53 case 0x1: 54 return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, rm); 55 case 0x2: 56 return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, rm); 57 case 0x3: 58 return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, rm); 59 case 0x4: 60 return new Base<int64_t, int32_t>(mn, machInst, zt, pg, rn, rm); 61 case 0x5: 62 return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, rm); 63 case 0x6: 64 return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, rm); 65 case 0x7: 66 return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, rm); 67 case 0x8: 68 return new Base<int64_t, int16_t>(mn, machInst, zt, pg, rn, rm); 69 case 0x9: 70 return new Base<int32_t, int16_t>(mn, machInst, zt, pg, rn, rm); 71 case 0xa: 72 return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, rm); 73 case 0xb: 74 return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, rm); 75 case 0xc: 76 return new Base<int64_t, int8_t>(mn, machInst, zt, pg, rn, rm); 77 case 0xd: 78 return new Base<int32_t, int8_t>(mn, machInst, zt, pg, rn, rm); 79 case 0xe: 80 return new Base<int16_t, int8_t>(mn, machInst, zt, pg, rn, rm); 81 case 0xf: 82 return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, rm); 83 } 84 return new Unknown64(machInst); 85 } 86 87 // Decodes SVE contiguous load instructions, scalar plus immediate form. 88 template <template <typename T1, typename T2> class Base> 89 StaticInstPtr 90 decodeSveContigLoadSIInsts(uint8_t dtype, ExtMachInst machInst, 91 IntRegIndex zt, IntRegIndex pg, IntRegIndex rn, 92 uint64_t imm, bool firstFaulting, 93 bool replicate = false) 94 { 95 assert(!(replicate && firstFaulting)); 96 97 const char* mn = replicate ? "ld1r" : 98 (firstFaulting ? "ldff1" : "ld1"); 99 switch (dtype) { 100 case 0x0: 101 return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, imm); 102 case 0x1: 103 return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, imm); 104 case 0x2: 105 return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, imm); 106 case 0x3: 107 return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, imm); 108 case 0x4: 109 return new Base<int64_t, int32_t>(mn, machInst, zt, pg, rn, imm); 110 case 0x5: 111 return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, imm); 112 case 0x6: 113 return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, imm); 114 case 0x7: 115 return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, imm); 116 case 0x8: 117 return new Base<int64_t, int16_t>(mn, machInst, zt, pg, rn, imm); 118 case 0x9: 119 return new Base<int32_t, int16_t>(mn, machInst, zt, pg, rn, imm); 120 case 0xa: 121 return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, imm); 122 case 0xb: 123 return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, imm); 124 case 0xc: 125 return new Base<int64_t, int8_t>(mn, machInst, zt, pg, rn, imm); 126 case 0xd: 127 return new Base<int32_t, int8_t>(mn, machInst, zt, pg, rn, imm); 128 case 0xe: 129 return new Base<int16_t, int8_t>(mn, machInst, zt, pg, rn, imm); 130 case 0xf: 131 return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, imm); 132 } 133 return new Unknown64(machInst); 134 } 135 136 // Decodes SVE contiguous store instructions, scalar plus scalar form. 137 template <template <typename T1, typename T2> class Base> 138 StaticInstPtr 139 decodeSveContigStoreSSInsts(uint8_t dtype, ExtMachInst machInst, 140 IntRegIndex zt, IntRegIndex pg, IntRegIndex rn, 141 IntRegIndex rm) 142 { 143 const char* mn = "st1"; 144 switch (dtype) { 145 case 0x0: 146 return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, rm); 147 case 0x1: 148 return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, rm); 149 case 0x2: 150 return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, rm); 151 case 0x3: 152 return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, rm); 153 case 0x5: 154 return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, rm); 155 case 0x6: 156 return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, rm); 157 case 0x7: 158 return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, rm); 159 case 0xa: 160 return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, rm); 161 case 0xb: 162 return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, rm); 163 case 0xf: 164 return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, rm); 165 } 166 return new Unknown64(machInst); 167 } 168 169 // Decodes SVE contiguous store instructions, scalar plus immediate form. 170 template <template <typename T1, typename T2> class Base> 171 StaticInstPtr 172 decodeSveContigStoreSIInsts(uint8_t dtype, ExtMachInst machInst, 173 IntRegIndex zt, IntRegIndex pg, IntRegIndex rn, 174 int8_t imm) 175 { 176 const char* mn = "st1"; 177 switch (dtype) { 178 case 0x0: 179 return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, imm); 180 case 0x1: 181 return new Base<uint16_t, uint8_t>(mn, machInst, zt, pg, rn, imm); 182 case 0x2: 183 return new Base<uint32_t, uint8_t>(mn, machInst, zt, pg, rn, imm); 184 case 0x3: 185 return new Base<uint64_t, uint8_t>(mn, machInst, zt, pg, rn, imm); 186 case 0x5: 187 return new Base<uint16_t, uint16_t>(mn, machInst, zt, pg, rn, imm); 188 case 0x6: 189 return new Base<uint32_t, uint16_t>(mn, machInst, zt, pg, rn, imm); 190 case 0x7: 191 return new Base<uint64_t, uint16_t>(mn, machInst, zt, pg, rn, imm); 192 case 0xa: 193 return new Base<uint32_t, uint32_t>(mn, machInst, zt, pg, rn, imm); 194 case 0xb: 195 return new Base<uint64_t, uint32_t>(mn, machInst, zt, pg, rn, imm); 196 case 0xf: 197 return new Base<uint64_t, uint64_t>(mn, machInst, zt, pg, rn, imm); 198 } 199 return new Unknown64(machInst); 200 } 201 202 // NOTE: SVE load-and-replicate instructions are decoded with 203 // decodeSveContigLoadSIInsts(...). 204 205}}; 206 207output decoder {{ 208 209 StaticInstPtr 210 decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst, 211 IntRegIndex zt, IntRegIndex pg, IntRegIndex zn, 212 uint64_t imm, bool esizeIs32, 213 bool firstFaulting) 214 { 215 const char* mn = firstFaulting ? "ldff1" : "ld1"; 216 switch (dtype) { 217 case 0x0: 218 if (esizeIs32) { 219 return new SveIndexedMemVI<int32_t, int8_t, 220 SveGatherLoadVIMicroop>( 221 mn, machInst, MemReadOp, zt, pg, zn, imm); 222 } else { 223 return new SveIndexedMemVI<int64_t, int8_t, 224 SveGatherLoadVIMicroop>( 225 mn, machInst, MemReadOp, zt, pg, zn, imm); 226 } 227 case 0x1: 228 if (esizeIs32) { 229 return new SveIndexedMemVI<uint32_t, uint8_t, 230 SveGatherLoadVIMicroop>( 231 mn, machInst, MemReadOp, zt, pg, zn, imm); 232 } else { 233 return new SveIndexedMemVI<uint64_t, uint8_t, 234 SveGatherLoadVIMicroop>( 235 mn, machInst, MemReadOp, zt, pg, zn, imm); 236 } 237 case 0x2: 238 if (esizeIs32) { 239 return new SveIndexedMemVI<int32_t, int16_t, 240 SveGatherLoadVIMicroop>( 241 mn, machInst, MemReadOp, zt, pg, zn, imm); 242 } else { 243 return new SveIndexedMemVI<int64_t, int16_t, 244 SveGatherLoadVIMicroop>( 245 mn, machInst, MemReadOp, zt, pg, zn, imm); 246 } 247 case 0x3: 248 if (esizeIs32) { 249 return new SveIndexedMemVI<uint32_t, uint16_t, 250 SveGatherLoadVIMicroop>( 251 mn, machInst, MemReadOp, zt, pg, zn, imm); 252 } else { 253 return new SveIndexedMemVI<uint64_t, uint16_t, 254 SveGatherLoadVIMicroop>( 255 mn, machInst, MemReadOp, zt, pg, zn, imm); 256 } 257 case 0x4: 258 if (esizeIs32) { 259 break; 260 } else { 261 return new SveIndexedMemVI<int64_t, int32_t, 262 SveGatherLoadVIMicroop>( 263 mn, machInst, MemReadOp, zt, pg, zn, imm); 264 } 265 case 0x5: 266 if (esizeIs32) { 267 return new SveIndexedMemVI<uint32_t, uint32_t, 268 SveGatherLoadVIMicroop>( 269 mn, machInst, MemReadOp, zt, pg, zn, imm); 270 } else { 271 return new SveIndexedMemVI<uint64_t, uint32_t, 272 SveGatherLoadVIMicroop>( 273 mn, machInst, MemReadOp, zt, pg, zn, imm); 274 } 275 case 0x7: 276 if (esizeIs32) { 277 break; 278 } else { 279 return new SveIndexedMemVI<uint64_t, uint64_t, 280 SveGatherLoadVIMicroop>( 281 mn, machInst, MemReadOp, zt, pg, zn, imm); 282 } 283 } 284 return new Unknown64(machInst); 285 } 286 287 StaticInstPtr 288 decodeSveGatherLoadSVInsts(uint8_t dtype, ExtMachInst machInst, 289 IntRegIndex zt, IntRegIndex pg, IntRegIndex rn, 290 IntRegIndex zm, bool esizeIs32, bool offsetIs32, 291 bool offsetIsSigned, bool offsetIsScaled, 292 bool firstFaulting) 293 { 294 const char* mn = firstFaulting ? "ldff1" : "ld1"; 295 switch (dtype) { 296 case 0x0: 297 if (esizeIs32) { 298 return new SveIndexedMemSV<int32_t, int8_t, 299 SveGatherLoadSVMicroop>( 300 mn, machInst, MemReadOp, zt, pg, rn, zm, 301 offsetIs32, offsetIsSigned, offsetIsScaled); 302 } else { 303 return new SveIndexedMemSV<int64_t, int8_t, 304 SveGatherLoadSVMicroop>( 305 mn, machInst, MemReadOp, zt, pg, rn, zm, 306 offsetIs32, offsetIsSigned, offsetIsScaled); 307 } 308 case 0x1: 309 if (esizeIs32) { 310 return new SveIndexedMemSV<uint32_t, uint8_t, 311 SveGatherLoadSVMicroop>( 312 mn, machInst, MemReadOp, zt, pg, rn, zm, 313 offsetIs32, offsetIsSigned, offsetIsScaled); 314 } else { 315 return new SveIndexedMemSV<uint64_t, uint8_t, 316 SveGatherLoadSVMicroop>( 317 mn, machInst, MemReadOp, zt, pg, rn, zm, 318 offsetIs32, offsetIsSigned, offsetIsScaled); 319 } 320 case 0x2: 321 if (esizeIs32) { 322 return new SveIndexedMemSV<int32_t, int16_t, 323 SveGatherLoadSVMicroop>( 324 mn, machInst, MemReadOp, zt, pg, rn, zm, 325 offsetIs32, offsetIsSigned, offsetIsScaled); 326 } else { 327 return new SveIndexedMemSV<int64_t, int16_t, 328 SveGatherLoadSVMicroop>( 329 mn, machInst, MemReadOp, zt, pg, rn, zm, 330 offsetIs32, offsetIsSigned, offsetIsScaled); 331 } 332 case 0x3: 333 if (esizeIs32) { 334 return new SveIndexedMemSV<uint32_t, uint16_t, 335 SveGatherLoadSVMicroop>( 336 mn, machInst, MemReadOp, zt, pg, rn, zm, 337 offsetIs32, offsetIsSigned, offsetIsScaled); 338 } else { 339 return new SveIndexedMemSV<uint64_t, uint16_t, 340 SveGatherLoadSVMicroop>( 341 mn, machInst, MemReadOp, zt, pg, rn, zm, 342 offsetIs32, offsetIsSigned, offsetIsScaled); 343 } 344 case 0x4: 345 if (esizeIs32) { 346 break; 347 } else { 348 return new SveIndexedMemSV<int64_t, int32_t, 349 SveGatherLoadSVMicroop>( 350 mn, machInst, MemReadOp, zt, pg, rn, zm, 351 offsetIs32, offsetIsSigned, offsetIsScaled); 352 } 353 case 0x5: 354 if (esizeIs32) { 355 return new SveIndexedMemSV<uint32_t, uint32_t, 356 SveGatherLoadSVMicroop>( 357 mn, machInst, MemReadOp, zt, pg, rn, zm, 358 offsetIs32, offsetIsSigned, offsetIsScaled); 359 } else { 360 return new SveIndexedMemSV<uint64_t, uint32_t, 361 SveGatherLoadSVMicroop>( 362 mn, machInst, MemReadOp, zt, pg, rn, zm, 363 offsetIs32, offsetIsSigned, offsetIsScaled); 364 } 365 case 0x7: 366 if (esizeIs32) { 367 break; 368 } else { 369 return new SveIndexedMemSV<uint64_t, uint64_t, 370 SveGatherLoadSVMicroop>( 371 mn, machInst, MemReadOp, zt, pg, rn, zm, 372 offsetIs32, offsetIsSigned, offsetIsScaled); 373 } 374 } 375 return new Unknown64(machInst); 376 } 377 378 StaticInstPtr 379 decodeSveScatterStoreVIInsts(uint8_t msz, ExtMachInst machInst, 380 IntRegIndex zt, IntRegIndex pg, 381 IntRegIndex zn, uint64_t imm, 382 bool esizeIs32) 383 { 384 const char* mn = "st1"; 385 switch (msz) { 386 case 0x0: 387 if (esizeIs32) { 388 return new SveIndexedMemVI<uint32_t, uint8_t, 389 SveScatterStoreVIMicroop>( 390 mn, machInst, MemWriteOp, zt, pg, zn, imm); 391 } else { 392 return new SveIndexedMemVI<uint64_t, uint8_t, 393 SveScatterStoreVIMicroop>( 394 mn, machInst, MemWriteOp, zt, pg, zn, imm); 395 } 396 case 0x1: 397 if (esizeIs32) { 398 return new SveIndexedMemVI<uint32_t, uint16_t, 399 SveScatterStoreVIMicroop>( 400 mn, machInst, MemWriteOp, zt, pg, zn, imm); 401 } else { 402 return new SveIndexedMemVI<uint64_t, uint16_t, 403 SveScatterStoreVIMicroop>( 404 mn, machInst, MemWriteOp, zt, pg, zn, imm); 405 } 406 case 0x2: 407 if (esizeIs32) { 408 return new SveIndexedMemVI<uint32_t, uint32_t, 409 SveScatterStoreVIMicroop>( 410 mn, machInst, MemWriteOp, zt, pg, zn, imm); 411 } else { 412 return new SveIndexedMemVI<uint64_t, uint32_t, 413 SveScatterStoreVIMicroop>( 414 mn, machInst, MemWriteOp, zt, pg, zn, imm); 415 } 416 case 0x3: 417 if (esizeIs32) { 418 break; 419 } else { 420 return new SveIndexedMemVI<uint64_t, uint64_t, 421 SveScatterStoreVIMicroop>( 422 mn, machInst, MemWriteOp, zt, pg, zn, imm); 423 } 424 } 425 return new Unknown64(machInst); 426 } 427 428 StaticInstPtr 429 decodeSveScatterStoreSVInsts(uint8_t msz, ExtMachInst machInst, 430 IntRegIndex zt, IntRegIndex pg, 431 IntRegIndex rn, IntRegIndex zm, 432 bool esizeIs32, bool offsetIs32, 433 bool offsetIsSigned, bool offsetIsScaled) 434 { 435 const char* mn = "st1"; 436 switch (msz) { 437 case 0x0: 438 if (esizeIs32) { 439 return new SveIndexedMemSV<uint32_t, uint8_t, 440 SveScatterStoreSVMicroop>( 441 mn, machInst, MemWriteOp, zt, pg, rn, zm, 442 offsetIs32, offsetIsSigned, offsetIsScaled); 443 } else { 444 return new SveIndexedMemSV<uint64_t, uint8_t, 445 SveScatterStoreSVMicroop>( 446 mn, machInst, MemWriteOp, zt, pg, rn, zm, 447 offsetIs32, offsetIsSigned, offsetIsScaled); 448 } 449 case 0x1: 450 if (esizeIs32) { 451 return new SveIndexedMemSV<uint32_t, uint16_t, 452 SveScatterStoreSVMicroop>( 453 mn, machInst, MemWriteOp, zt, pg, rn, zm, 454 offsetIs32, offsetIsSigned, offsetIsScaled); 455 } else { 456 return new SveIndexedMemSV<uint64_t, uint16_t, 457 SveScatterStoreSVMicroop>( 458 mn, machInst, MemWriteOp, zt, pg, rn, zm, 459 offsetIs32, offsetIsSigned, offsetIsScaled); 460 } 461 case 0x2: 462 if (esizeIs32) { 463 return new SveIndexedMemSV<uint32_t, uint32_t, 464 SveScatterStoreSVMicroop>( 465 mn, machInst, MemWriteOp, zt, pg, rn, zm, 466 offsetIs32, offsetIsSigned, offsetIsScaled); 467 } else { 468 return new SveIndexedMemSV<uint64_t, uint32_t, 469 SveScatterStoreSVMicroop>( 470 mn, machInst, MemWriteOp, zt, pg, rn, zm, 471 offsetIs32, offsetIsSigned, offsetIsScaled); 472 } 473 case 0x3: 474 if (esizeIs32) { 475 break; 476 } else { 477 return new SveIndexedMemSV<uint64_t, uint64_t, 478 SveScatterStoreSVMicroop>( 479 mn, machInst, MemWriteOp, zt, pg, rn, zm, 480 offsetIs32, offsetIsSigned, offsetIsScaled); 481 } 482 } 483 return new Unknown64(machInst); 484 } 485 486}}; 487 488 489let {{ 490 491 header_output = '' 492 exec_output = '' 493 decoders = { 'Generic': {} } 494 495 SPAlignmentCheckCode = ''' 496 if (this->baseIsSP && bits(XBase, 3, 0) && 497 SPAlignmentCheckEnabled(xc->tcBase())) { 498 return std::make_shared<SPAlignmentFault>(); 499 } 500 ''' 501 502 def emitSveMemFillSpill(isPred): 503 global header_output, exec_output, decoders 504 eaCode = SPAlignmentCheckCode + ''' 505 int memAccessSize = %(memacc_size)s; 506 EA = XBase + ((int64_t) imm * %(memacc_size)s)''' % { 507 'memacc_size': 'eCount / 8' if isPred else 'eCount'} 508 if isPred: 509 loadMemAccCode = ''' 510 int index = 0; 511 uint8_t byte; 512 for (int i = 0; i < eCount / 8; i++) { 513 byte = memDataView[i]; 514 for (int j = 0; j < 8; j++, index++) { 515 PDest_x[index] = (byte >> j) & 1; 516 } 517 } 518 ''' 519 storeMemAccCode = ''' 520 int index = 0; 521 uint8_t byte; 522 for (int i = 0; i < eCount / 8; i++) { 523 byte = 0; 524 for (int j = 0; j < 8; j++, index++) { 525 byte |= PDest_x[index] << j; 526 } 527 memDataView[i] = byte; 528 } 529 ''' 530 storeWrEnableCode = ''' 531 auto wrEn = std::vector<bool>(eCount / 8, true); 532 ''' 533 else: 534 loadMemAccCode = ''' 535 for (int i = 0; i < eCount; i++) { 536 AA64FpDest_x[i] = memDataView[i]; 537 } 538 ''' 539 storeMemAccCode = ''' 540 for (int i = 0; i < eCount; i++) { 541 memDataView[i] = AA64FpDest_x[i]; 542 } 543 ''' 544 storeWrEnableCode = ''' 545 auto wrEn = std::vector<bool>(sizeof(MemElemType) * eCount, true); 546 ''' 547 loadIop = InstObjParams('ldr', 548 'SveLdrPred' if isPred else 'SveLdrVec', 549 'SveMemPredFillSpill' if isPred else 'SveMemVecFillSpill', 550 {'tpl_header': '', 551 'tpl_args': '', 552 'memacc_code': loadMemAccCode, 553 'ea_code' : sveEnabledCheckCode + eaCode, 554 'fa_code' : ''}, 555 ['IsMemRef', 'IsLoad']) 556 storeIop = InstObjParams('str', 557 'SveStrPred' if isPred else 'SveStrVec', 558 'SveMemPredFillSpill' if isPred else 'SveMemVecFillSpill', 559 {'tpl_header': '', 560 'tpl_args': '', 561 'wren_code': storeWrEnableCode, 562 'memacc_code': storeMemAccCode, 563 'ea_code' : sveEnabledCheckCode + eaCode, 564 'fa_code' : ''}, 565 ['IsMemRef', 'IsStore']) 566 header_output += SveMemFillSpillOpDeclare.subst(loadIop) 567 header_output += SveMemFillSpillOpDeclare.subst(storeIop) 568 exec_output += ( 569 SveContigLoadExecute.subst(loadIop) + 570 SveContigLoadInitiateAcc.subst(loadIop) + 571 SveContigLoadCompleteAcc.subst(loadIop) + 572 SveContigStoreExecute.subst(storeIop) + 573 SveContigStoreInitiateAcc.subst(storeIop) + 574 SveContigStoreCompleteAcc.subst(storeIop)) 575 576 loadTplArgs = ( 577 ('uint8_t', 'uint8_t'), 578 ('uint16_t', 'uint8_t'), 579 ('uint32_t', 'uint8_t'), 580 ('uint64_t', 'uint8_t'), 581 ('int64_t', 'int32_t'), 582 ('uint16_t', 'uint16_t'), 583 ('uint32_t', 'uint16_t'), 584 ('uint64_t', 'uint16_t'), 585 ('int64_t', 'int16_t'), 586 ('int32_t', 'int16_t'), 587 ('uint32_t', 'uint32_t'), 588 ('uint64_t', 'uint32_t'), 589 ('int64_t', 'int8_t'), 590 ('int32_t', 'int8_t'), 591 ('int16_t', 'int8_t'), 592 ('uint64_t', 'uint64_t'), 593 ) 594 595 storeTplArgs = ( 596 ('uint8_t', 'uint8_t'), 597 ('uint16_t', 'uint8_t'), 598 ('uint32_t', 'uint8_t'), 599 ('uint64_t', 'uint8_t'), 600 ('uint16_t', 'uint16_t'), 601 ('uint32_t', 'uint16_t'), 602 ('uint64_t', 'uint16_t'), 603 ('uint32_t', 'uint32_t'), 604 ('uint64_t', 'uint32_t'), 605 ('uint64_t', 'uint64_t'), 606 ) 607 608 gatherLoadTplArgs = ( 609 ('int32_t', 'int8_t'), 610 ('int64_t', 'int8_t'), 611 ('uint32_t', 'uint8_t'), 612 ('uint64_t', 'uint8_t'), 613 ('int32_t', 'int16_t'), 614 ('int64_t', 'int16_t'), 615 ('uint32_t', 'uint16_t'), 616 ('uint64_t', 'uint16_t'), 617 ('int64_t', 'int32_t'), 618 ('uint32_t', 'uint32_t'), 619 ('uint64_t', 'uint32_t'), 620 ('uint64_t', 'uint64_t'), 621 ) 622 623 scatterStoreTplArgs = ( 624 ('uint32_t', 'uint8_t'), 625 ('uint64_t', 'uint8_t'), 626 ('uint32_t', 'uint16_t'), 627 ('uint64_t', 'uint16_t'), 628 ('uint32_t', 'uint32_t'), 629 ('uint64_t', 'uint32_t'), 630 ('uint64_t', 'uint64_t'), 631 ) 632 633 # Generates definitions for SVE contiguous loads 634 def emitSveContigMemInsts(offsetIsImm): 635 global header_output, exec_output, decoders 636 tplHeader = 'template <class RegElemType, class MemElemType>' 637 tplArgs = '<RegElemType, MemElemType>' 638 eaCode = SPAlignmentCheckCode + ''' 639 int memAccessSize = eCount * sizeof(MemElemType); 640 EA = XBase + ''' 641 if offsetIsImm: 642 eaCode += '((int64_t) this->imm * eCount * sizeof(MemElemType))' 643 else: 644 eaCode += '(XOffset * sizeof(MemElemType));' 645 loadMemAccCode = ''' 646 for (int i = 0; i < eCount; i++) { 647 if (GpOp_x[i]) { 648 AA64FpDest_x[i] = memDataView[i]; 649 } else { 650 AA64FpDest_x[i] = 0; 651 } 652 } 653 ''' 654 storeMemAccCode = ''' 655 for (int i = 0; i < eCount; i++) { 656 if (GpOp_x[i]) { 657 memDataView[i] = AA64FpDest_x[i]; 658 } else { 659 memDataView[i] = 0; 660 for (int j = 0; j < sizeof(MemElemType); j++) { 661 wrEn[sizeof(MemElemType) * i + j] = false; 662 } 663 } 664 } 665 ''' 666 storeWrEnableCode = ''' 667 auto wrEn = std::vector<bool>(sizeof(MemElemType) * eCount, true); 668 ''' 669 loadIop = InstObjParams('ld1', 670 'SveContigLoadSI' if offsetIsImm else 'SveContigLoadSS', 671 'SveContigMemSI' if offsetIsImm else 'SveContigMemSS', 672 {'tpl_header': tplHeader, 673 'tpl_args': tplArgs, 674 'memacc_code': loadMemAccCode, 675 'ea_code' : sveEnabledCheckCode + eaCode, 676 'fa_code' : ''}, 677 ['IsMemRef', 'IsLoad']) 678 storeIop = InstObjParams('st1', 679 'SveContigStoreSI' if offsetIsImm else 'SveContigStoreSS', 680 'SveContigMemSI' if offsetIsImm else 'SveContigMemSS', 681 {'tpl_header': tplHeader, 682 'tpl_args': tplArgs, 683 'wren_code': storeWrEnableCode, 684 'memacc_code': storeMemAccCode, 685 'ea_code' : sveEnabledCheckCode + eaCode, 686 'fa_code' : ''}, 687 ['IsMemRef', 'IsStore']) 688 if offsetIsImm: 689 header_output += SveContigMemSIOpDeclare.subst(loadIop) 690 header_output += SveContigMemSIOpDeclare.subst(storeIop) 691 else: 692 header_output += SveContigMemSSOpDeclare.subst(loadIop) 693 header_output += SveContigMemSSOpDeclare.subst(storeIop) 694 exec_output += ( 695 SveContigLoadExecute.subst(loadIop) + 696 SveContigLoadInitiateAcc.subst(loadIop) + 697 SveContigLoadCompleteAcc.subst(loadIop) + 698 SveContigStoreExecute.subst(storeIop) + 699 SveContigStoreInitiateAcc.subst(storeIop) + 700 SveContigStoreCompleteAcc.subst(storeIop)) 701 for args in loadTplArgs: 702 substDict = {'tpl_args': '<%s>' % ', '.join(args), 703 'class_name': 'SveContigLoadSI' if offsetIsImm 704 else 'SveContigLoadSS'} 705 exec_output += SveContigMemExecDeclare.subst(substDict) 706 for args in storeTplArgs: 707 substDict = {'tpl_args': '<%s>' % ', '.join(args), 708 'class_name': 'SveContigStoreSI' if offsetIsImm 709 else 'SveContigStoreSS'} 710 exec_output += SveContigMemExecDeclare.subst(substDict) 711 712 # Generates definitions for SVE load-and-replicate instructions 713 def emitSveLoadAndRepl(): 714 global header_output, exec_output, decoders 715 tplHeader = 'template <class RegElemType, class MemElemType>' 716 tplArgs = '<RegElemType, MemElemType>' 717 eaCode = SPAlignmentCheckCode + ''' 718 EA = XBase + imm * sizeof(MemElemType);''' 719 memAccCode = ''' 720 for (int i = 0; i < eCount; i++) { 721 if (GpOp_x[i]) { 722 AA64FpDest_x[i] = memData; 723 } else { 724 AA64FpDest_x[i] = 0; 725 } 726 } 727 ''' 728 iop = InstObjParams('ld1r', 729 'SveLoadAndRepl', 730 'SveContigMemSI', 731 {'tpl_header': tplHeader, 732 'tpl_args': tplArgs, 733 'memacc_code': memAccCode, 734 'ea_code' : sveEnabledCheckCode + eaCode, 735 'fa_code' : ''}, 736 ['IsMemRef', 'IsLoad']) 737 header_output += SveContigMemSIOpDeclare.subst(iop) 738 exec_output += ( 739 SveLoadAndReplExecute.subst(iop) + 740 SveLoadAndReplInitiateAcc.subst(iop) + 741 SveLoadAndReplCompleteAcc.subst(iop)) 742 for args in loadTplArgs: 743 substDict = {'tpl_args': '<%s>' % ', '.join(args), 744 'class_name': 'SveLoadAndRepl'} 745 exec_output += SveContigMemExecDeclare.subst(substDict) 746 747 class IndexedAddrForm: 748 VEC_PLUS_IMM = 0 749 SCA_PLUS_VEC = 1 750 751 # Generates definitions for the transfer microops of SVE indexed memory 752 # operations (gather loads, scatter stores) 753 def emitSveIndexedMemMicroops(indexed_addr_form): 754 assert indexed_addr_form in (IndexedAddrForm.VEC_PLUS_IMM, 755 IndexedAddrForm.SCA_PLUS_VEC) 756 global header_output, exec_output, decoders 757 tplHeader = 'template <class RegElemType, class MemElemType>' 758 tplArgs = '<RegElemType, MemElemType>' 759 if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM: 760 eaCode = ''' 761 EA = AA64FpBase_x[elemIndex] + imm * sizeof(MemElemType)''' 762 else: 763 eaCode = ''' 764 uint64_t offset = AA64FpOffset_x[elemIndex]; 765 if (offsetIs32) { 766 offset &= (1ULL << 32) - 1; 767 } 768 if (offsetIsSigned) { 769 offset = sext<32>(offset); 770 } 771 if (offsetIsScaled) { 772 offset *= sizeof(MemElemType); 773 } 774 EA = XBase + offset''' 775 loadMemAccCode = ''' 776 if (GpOp_x[elemIndex]) { 777 AA64FpDest_x[elemIndex] = memData; 778 } else { 779 AA64FpDest_x[elemIndex] = 0; 780 } 781 ''' 782 storeMemAccCode = ''' 783 memData = AA64FpDest_x[elemIndex]; 784 ''' 785 predCheckCode = 'GpOp_x[elemIndex]' 786 loadIop = InstObjParams('ld1', 787 ('SveGatherLoadVIMicroop' 788 if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM 789 else 'SveGatherLoadSVMicroop'), 790 'MicroOp', 791 {'tpl_header': tplHeader, 792 'tpl_args': tplArgs, 793 'memacc_code': loadMemAccCode, 794 'ea_code' : sveEnabledCheckCode + eaCode, 795 'pred_check_code' : predCheckCode, 796 'fa_code' : ''}, 797 ['IsMicroop', 'IsMemRef', 'IsLoad']) 798 storeIop = InstObjParams('st1', 799 ('SveScatterStoreVIMicroop' 800 if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM 801 else 'SveScatterStoreSVMicroop'), 802 'MicroOp', 803 {'tpl_header': tplHeader, 804 'tpl_args': tplArgs, 805 'memacc_code': storeMemAccCode, 806 'ea_code' : sveEnabledCheckCode + eaCode, 807 'pred_check_code' : predCheckCode, 808 'fa_code' : ''}, 809 ['IsMicroop', 'IsMemRef', 'IsStore']) 810 if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM: 811 header_output += SveIndexedMemVIMicroopDeclare.subst(loadIop) 812 header_output += SveIndexedMemVIMicroopDeclare.subst(storeIop) 813 else: 814 header_output += SveIndexedMemSVMicroopDeclare.subst(loadIop) 815 header_output += SveIndexedMemSVMicroopDeclare.subst(storeIop) 816 exec_output += ( 817 SveGatherLoadMicroopExecute.subst(loadIop) + 818 SveGatherLoadMicroopInitiateAcc.subst(loadIop) + 819 SveGatherLoadMicroopCompleteAcc.subst(loadIop) + 820 SveScatterStoreMicroopExecute.subst(storeIop) + 821 SveScatterStoreMicroopInitiateAcc.subst(storeIop) + 822 SveScatterStoreMicroopCompleteAcc.subst(storeIop)) 823 for args in gatherLoadTplArgs: 824 substDict = {'tpl_args': '<%s>' % ', '.join(args), 825 'class_name': ( 826 'SveGatherLoadVIMicroop' 827 if indexed_addr_form == \ 828 IndexedAddrForm.VEC_PLUS_IMM 829 else 'SveGatherLoadSVMicroop')} 830 # TODO: this should become SveMemExecDeclare 831 exec_output += SveContigMemExecDeclare.subst(substDict) 832 for args in scatterStoreTplArgs: 833 substDict = {'tpl_args': '<%s>' % ', '.join(args), 834 'class_name': ( 835 'SveScatterStoreVIMicroop' 836 if indexed_addr_form == \ 837 IndexedAddrForm.VEC_PLUS_IMM 838 else 'SveScatterStoreSVMicroop')} 839 # TODO: this should become SveMemExecDeclare 840 exec_output += SveContigMemExecDeclare.subst(substDict) 841 842 # Generates definitions for the first microop of SVE gather loads, required 843 # to propagate the source vector register to the transfer microops 844 def emitSveGatherLoadCpySrcVecMicroop(): 845 global header_output, exec_output, decoders 846 code = sveEnabledCheckCode + ''' 847 unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>( 848 xc->tcBase()); 849 for (unsigned i = 0; i < eCount; i++) { 850 AA64FpUreg0_ub[i] = AA64FpOp1_ub[i]; 851 }''' 852 iop = InstObjParams('ld1', 853 'SveGatherLoadCpySrcVecMicroop', 854 'MicroOp', 855 {'code': code}, 856 ['IsMicroop']) 857 header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop) 858 exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop) 859 860 # LD1[S]{B,H,W,D} (scalar plus immediate) 861 # ST1[S]{B,H,W,D} (scalar plus immediate) 862 emitSveContigMemInsts(True) 863 # LD1[S]{B,H,W,D} (scalar plus scalar) 864 # ST1[S]{B,H,W,D} (scalar plus scalar) 865 emitSveContigMemInsts(False) 866 867 # LD1R[S]{B,H,W,D} 868 emitSveLoadAndRepl() 869 870 # LDR (predicate), STR (predicate) 871 emitSveMemFillSpill(True) 872 # LDR (vector), STR (vector) 873 emitSveMemFillSpill(False) 874 875 # LD1[S]{B,H,W,D} (vector plus immediate) 876 # ST1[S]{B,H,W,D} (vector plus immediate) 877 emitSveIndexedMemMicroops(IndexedAddrForm.VEC_PLUS_IMM) 878 # LD1[S]{B,H,W,D} (scalar plus vector) 879 # ST1[S]{B,H,W,D} (scalar plus vector) 880 emitSveIndexedMemMicroops(IndexedAddrForm.SCA_PLUS_VEC) 881 882 # Source vector copy microop for gather loads 883 emitSveGatherLoadCpySrcVecMicroop() 884 885}}; 886