pseudo_inst.cc revision 11308:7d8836fd043d
1/* 2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Marc Orr 34 */ 35 36#include <csignal> 37 38#include "arch/hsail/insts/decl.hh" 39#include "arch/hsail/insts/mem.hh" 40 41namespace HsailISA 42{ 43 // Pseudo (or magic) instructions are overloaded on the hsail call 44 // instruction, because of its flexible parameter signature. 45 46 // To add a new magic instruction: 47 // 1. Add an entry to the enum. 48 // 2. Implement it in the switch statement below (Call::exec). 49 // 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h, 50 // so its easy to call from an OpenCL kernel. 51 52 // This enum should be identical to the enum in 53 // hsa/hsail-gpu-compute/util/magicinst.h 54 enum 55 { 56 MAGIC_PRINT_WF_32 = 0, 57 MAGIC_PRINT_WF_64, 58 MAGIC_PRINT_LANE, 59 MAGIC_PRINT_LANE_64, 60 MAGIC_PRINT_WF_FLOAT, 61 MAGIC_SIM_BREAK, 62 MAGIC_PREF_SUM, 63 MAGIC_REDUCTION, 64 MAGIC_MASKLANE_LOWER, 65 MAGIC_MASKLANE_UPPER, 66 MAGIC_JOIN_WF_BAR, 67 MAGIC_WAIT_WF_BAR, 68 MAGIC_PANIC, 69 MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG, 70 MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG, 71 MAGIC_LOAD_GLOBAL_U32_REG, 72 MAGIC_XACT_CAS_LD, 73 MAGIC_MOST_SIG_THD, 74 MAGIC_MOST_SIG_BROADCAST, 75 MAGIC_PRINT_WFID_32, 76 MAGIC_PRINT_WFID_64 77 }; 78 79 void 80 Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst) 81 { 82 const VectorMask &mask = w->get_pred(); 83 84 int op = 0; 85 bool got_op = false; 86 87 for (int lane = 0; lane < VSZ; ++lane) { 88 if (mask[lane]) { 89 int src_val0 = src1.get<int>(w, lane, 0); 90 if (got_op) { 91 if (src_val0 != op) { 92 fatal("Multiple magic instructions per PC not " 93 "supported\n"); 94 } 95 } else { 96 op = src_val0; 97 got_op = true; 98 } 99 } 100 } 101 102 switch(op) { 103 case MAGIC_PRINT_WF_32: 104 MagicPrintWF32(w); 105 break; 106 case MAGIC_PRINT_WF_64: 107 MagicPrintWF64(w); 108 break; 109 case MAGIC_PRINT_LANE: 110 MagicPrintLane(w); 111 break; 112 case MAGIC_PRINT_LANE_64: 113 MagicPrintLane64(w); 114 break; 115 case MAGIC_PRINT_WF_FLOAT: 116 MagicPrintWFFloat(w); 117 break; 118 case MAGIC_SIM_BREAK: 119 MagicSimBreak(w); 120 break; 121 case MAGIC_PREF_SUM: 122 MagicPrefixSum(w); 123 break; 124 case MAGIC_REDUCTION: 125 MagicReduction(w); 126 break; 127 case MAGIC_MASKLANE_LOWER: 128 MagicMaskLower(w); 129 break; 130 case MAGIC_MASKLANE_UPPER: 131 MagicMaskUpper(w); 132 break; 133 case MAGIC_JOIN_WF_BAR: 134 MagicJoinWFBar(w); 135 break; 136 case MAGIC_WAIT_WF_BAR: 137 MagicWaitWFBar(w); 138 break; 139 case MAGIC_PANIC: 140 MagicPanic(w); 141 break; 142 143 // atomic instructions 144 case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG: 145 MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst); 146 break; 147 148 case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG: 149 MagicAtomicNRAddGroupU32Reg(w, gpuDynInst); 150 break; 151 152 case MAGIC_LOAD_GLOBAL_U32_REG: 153 MagicLoadGlobalU32Reg(w, gpuDynInst); 154 break; 155 156 case MAGIC_XACT_CAS_LD: 157 MagicXactCasLd(w); 158 break; 159 160 case MAGIC_MOST_SIG_THD: 161 MagicMostSigThread(w); 162 break; 163 164 case MAGIC_MOST_SIG_BROADCAST: 165 MagicMostSigBroadcast(w); 166 break; 167 168 case MAGIC_PRINT_WFID_32: 169 MagicPrintWF32ID(w); 170 break; 171 172 case MAGIC_PRINT_WFID_64: 173 MagicPrintWFID64(w); 174 break; 175 176 default: fatal("unrecognized magic instruction: %d\n", op); 177 } 178 } 179 180 void 181 Call::MagicPrintLane(Wavefront *w) 182 { 183 #if TRACING_ON 184 const VectorMask &mask = w->get_pred(); 185 for (int lane = 0; lane < VSZ; ++lane) { 186 if (mask[lane]) { 187 int src_val1 = src1.get<int>(w, lane, 1); 188 int src_val2 = src1.get<int>(w, lane, 2); 189 if (src_val2) { 190 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", 191 disassemble(), w->computeUnit->cu_id, w->simdId, 192 w->wfSlotId, lane, src_val1); 193 } else { 194 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n", 195 disassemble(), w->computeUnit->cu_id, w->simdId, 196 w->wfSlotId, lane, src_val1); 197 } 198 } 199 } 200 #endif 201 } 202 203 void 204 Call::MagicPrintLane64(Wavefront *w) 205 { 206 #if TRACING_ON 207 const VectorMask &mask = w->get_pred(); 208 for (int lane = 0; lane < VSZ; ++lane) { 209 if (mask[lane]) { 210 int64_t src_val1 = src1.get<int64_t>(w, lane, 1); 211 int src_val2 = src1.get<int>(w, lane, 2); 212 if (src_val2) { 213 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", 214 disassemble(), w->computeUnit->cu_id, w->simdId, 215 w->wfSlotId, lane, src_val1); 216 } else { 217 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n", 218 disassemble(), w->computeUnit->cu_id, w->simdId, 219 w->wfSlotId, lane, src_val1); 220 } 221 } 222 } 223 #endif 224 } 225 226 void 227 Call::MagicPrintWF32(Wavefront *w) 228 { 229 #if TRACING_ON 230 const VectorMask &mask = w->get_pred(); 231 std::string res_str; 232 res_str = csprintf("krl_prt (%s)\n", disassemble()); 233 234 for (int lane = 0; lane < VSZ; ++lane) { 235 if (!(lane & 7)) { 236 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 237 } 238 239 if (mask[lane]) { 240 int src_val1 = src1.get<int>(w, lane, 1); 241 int src_val2 = src1.get<int>(w, lane, 2); 242 243 if (src_val2) { 244 res_str += csprintf("%08x", src_val1); 245 } else { 246 res_str += csprintf("%08d", src_val1); 247 } 248 } else { 249 res_str += csprintf("xxxxxxxx"); 250 } 251 252 if ((lane & 7) == 7) { 253 res_str += csprintf("\n"); 254 } else { 255 res_str += csprintf(" "); 256 } 257 } 258 259 res_str += "\n\n"; 260 DPRINTFN(res_str.c_str()); 261 #endif 262 } 263 264 void 265 Call::MagicPrintWF32ID(Wavefront *w) 266 { 267 #if TRACING_ON 268 const VectorMask &mask = w->get_pred(); 269 std::string res_str; 270 int src_val3 = -1; 271 res_str = csprintf("krl_prt (%s)\n", disassemble()); 272 273 for (int lane = 0; lane < VSZ; ++lane) { 274 if (!(lane & 7)) { 275 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 276 } 277 278 if (mask[lane]) { 279 int src_val1 = src1.get<int>(w, lane, 1); 280 int src_val2 = src1.get<int>(w, lane, 2); 281 src_val3 = src1.get<int>(w, lane, 3); 282 283 if (src_val2) { 284 res_str += csprintf("%08x", src_val1); 285 } else { 286 res_str += csprintf("%08d", src_val1); 287 } 288 } else { 289 res_str += csprintf("xxxxxxxx"); 290 } 291 292 if ((lane & 7) == 7) { 293 res_str += csprintf("\n"); 294 } else { 295 res_str += csprintf(" "); 296 } 297 } 298 299 res_str += "\n\n"; 300 if (w->wfDynId == src_val3) { 301 DPRINTFN(res_str.c_str()); 302 } 303 #endif 304 } 305 306 void 307 Call::MagicPrintWF64(Wavefront *w) 308 { 309 #if TRACING_ON 310 const VectorMask &mask = w->get_pred(); 311 std::string res_str; 312 res_str = csprintf("krl_prt (%s)\n", disassemble()); 313 314 for (int lane = 0; lane < VSZ; ++lane) { 315 if (!(lane & 3)) { 316 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 317 } 318 319 if (mask[lane]) { 320 int64_t src_val1 = src1.get<int64_t>(w, lane, 1); 321 int src_val2 = src1.get<int>(w, lane, 2); 322 323 if (src_val2) { 324 res_str += csprintf("%016x", src_val1); 325 } else { 326 res_str += csprintf("%016d", src_val1); 327 } 328 } else { 329 res_str += csprintf("xxxxxxxxxxxxxxxx"); 330 } 331 332 if ((lane & 3) == 3) { 333 res_str += csprintf("\n"); 334 } else { 335 res_str += csprintf(" "); 336 } 337 } 338 339 res_str += "\n\n"; 340 DPRINTFN(res_str.c_str()); 341 #endif 342 } 343 344 void 345 Call::MagicPrintWFID64(Wavefront *w) 346 { 347 #if TRACING_ON 348 const VectorMask &mask = w->get_pred(); 349 std::string res_str; 350 int src_val3 = -1; 351 res_str = csprintf("krl_prt (%s)\n", disassemble()); 352 353 for (int lane = 0; lane < VSZ; ++lane) { 354 if (!(lane & 3)) { 355 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 356 } 357 358 if (mask[lane]) { 359 int64_t src_val1 = src1.get<int64_t>(w, lane, 1); 360 int src_val2 = src1.get<int>(w, lane, 2); 361 src_val3 = src1.get<int>(w, lane, 3); 362 363 if (src_val2) { 364 res_str += csprintf("%016x", src_val1); 365 } else { 366 res_str += csprintf("%016d", src_val1); 367 } 368 } else { 369 res_str += csprintf("xxxxxxxxxxxxxxxx"); 370 } 371 372 if ((lane & 3) == 3) { 373 res_str += csprintf("\n"); 374 } else { 375 res_str += csprintf(" "); 376 } 377 } 378 379 res_str += "\n\n"; 380 if (w->wfDynId == src_val3) { 381 DPRINTFN(res_str.c_str()); 382 } 383 #endif 384 } 385 386 void 387 Call::MagicPrintWFFloat(Wavefront *w) 388 { 389 #if TRACING_ON 390 const VectorMask &mask = w->get_pred(); 391 std::string res_str; 392 res_str = csprintf("krl_prt (%s)\n", disassemble()); 393 394 for (int lane = 0; lane < VSZ; ++lane) { 395 if (!(lane & 7)) { 396 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 397 } 398 399 if (mask[lane]) { 400 float src_val1 = src1.get<float>(w, lane, 1); 401 res_str += csprintf("%08f", src_val1); 402 } else { 403 res_str += csprintf("xxxxxxxx"); 404 } 405 406 if ((lane & 7) == 7) { 407 res_str += csprintf("\n"); 408 } else { 409 res_str += csprintf(" "); 410 } 411 } 412 413 res_str += "\n\n"; 414 DPRINTFN(res_str.c_str()); 415 #endif 416 } 417 418 // raises a signal that GDB will catch 419 // when done with the break, type "signal 0" in gdb to continue 420 void 421 Call::MagicSimBreak(Wavefront *w) 422 { 423 std::string res_str; 424 // print out state for this wavefront and then break 425 res_str = csprintf("Breakpoint encountered for wavefront %i\n", 426 w->wfSlotId); 427 428 res_str += csprintf(" Kern ID: %i\n", w->kern_id); 429 res_str += csprintf(" Phase ID: %i\n", w->simdId); 430 res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id); 431 res_str += csprintf(" Exec mask: "); 432 433 for (int i = VSZ - 1; i >= 0; --i) { 434 if (w->execMask(i)) 435 res_str += "1"; 436 else 437 res_str += "0"; 438 439 if ((i & 7) == 7) 440 res_str += " "; 441 } 442 443 res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong()); 444 445 res_str += "\nHelpful debugging hints:\n"; 446 res_str += " Check out w->s_reg / w->d_reg for register state\n"; 447 448 res_str += "\n\n"; 449 DPRINTFN(res_str.c_str()); 450 fflush(stdout); 451 452 raise(SIGTRAP); 453 } 454 455 void 456 Call::MagicPrefixSum(Wavefront *w) 457 { 458 const VectorMask &mask = w->get_pred(); 459 int res = 0; 460 461 for (int lane = 0; lane < VSZ; ++lane) { 462 if (mask[lane]) { 463 int src_val1 = src1.get<int>(w, lane, 1); 464 dest.set<int>(w, lane, res); 465 res += src_val1; 466 } 467 } 468 } 469 470 void 471 Call::MagicReduction(Wavefront *w) 472 { 473 // reduction magic instruction 474 // The reduction instruction takes up to 64 inputs (one from 475 // each thread in a WF) and sums them. It returns the sum to 476 // each thread in the WF. 477 const VectorMask &mask = w->get_pred(); 478 int res = 0; 479 480 for (int lane = 0; lane < VSZ; ++lane) { 481 if (mask[lane]) { 482 int src_val1 = src1.get<int>(w, lane, 1); 483 res += src_val1; 484 } 485 } 486 487 for (int lane = 0; lane < VSZ; ++lane) { 488 if (mask[lane]) { 489 dest.set<int>(w, lane, res); 490 } 491 } 492 } 493 494 void 495 Call::MagicMaskLower(Wavefront *w) 496 { 497 const VectorMask &mask = w->get_pred(); 498 int res = 0; 499 500 for (int lane = 0; lane < VSZ; ++lane) { 501 if (mask[lane]) { 502 int src_val1 = src1.get<int>(w, lane, 1); 503 504 if (src_val1) { 505 if (lane < (VSZ/2)) { 506 res = res | ((uint32_t)(1) << lane); 507 } 508 } 509 } 510 } 511 512 for (int lane = 0; lane < VSZ; ++lane) { 513 if (mask[lane]) { 514 dest.set<int>(w, lane, res); 515 } 516 } 517 } 518 519 void 520 Call::MagicMaskUpper(Wavefront *w) 521 { 522 const VectorMask &mask = w->get_pred(); 523 int res = 0; 524 for (int lane = 0; lane < VSZ; ++lane) { 525 if (mask[lane]) { 526 int src_val1 = src1.get<int>(w, lane, 1); 527 528 if (src_val1) { 529 if (lane >= (VSZ/2)) { 530 res = res | ((uint32_t)(1) << (lane - (VSZ/2))); 531 } 532 } 533 } 534 } 535 536 for (int lane = 0; lane < VSZ; ++lane) { 537 if (mask[lane]) { 538 dest.set<int>(w, lane, res); 539 } 540 } 541 } 542 543 void 544 Call::MagicJoinWFBar(Wavefront *w) 545 { 546 const VectorMask &mask = w->get_pred(); 547 int max_cnt = 0; 548 549 for (int lane = 0; lane < VSZ; ++lane) { 550 if (mask[lane]) { 551 w->bar_cnt[lane]++; 552 553 if (w->bar_cnt[lane] > max_cnt) { 554 max_cnt = w->bar_cnt[lane]; 555 } 556 } 557 } 558 559 if (max_cnt > w->max_bar_cnt) { 560 w->max_bar_cnt = max_cnt; 561 } 562 } 563 564 void 565 Call::MagicWaitWFBar(Wavefront *w) 566 { 567 const VectorMask &mask = w->get_pred(); 568 int max_cnt = 0; 569 570 for (int lane = 0; lane < VSZ; ++lane) { 571 if (mask[lane]) { 572 w->bar_cnt[lane]--; 573 } 574 575 if (w->bar_cnt[lane] > max_cnt) { 576 max_cnt = w->bar_cnt[lane]; 577 } 578 } 579 580 if (max_cnt < w->max_bar_cnt) { 581 w->max_bar_cnt = max_cnt; 582 } 583 584 w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, 585 w->instructionBuffer.end()); 586 if (w->pendingFetch) 587 w->dropFetch = true; 588 } 589 590 void 591 Call::MagicPanic(Wavefront *w) 592 { 593 const VectorMask &mask = w->get_pred(); 594 595 for (int lane = 0; lane < VSZ; ++lane) { 596 if (mask[lane]) { 597 int src_val1 = src1.get<int>(w, lane, 1); 598 panic("OpenCL Code failed assertion #%d. Triggered by lane %s", 599 src_val1, lane); 600 } 601 } 602 } 603 604 void 605 Call::calcAddr(Wavefront *w, GPUDynInstPtr m) 606 { 607 // the address is in src1 | src2 608 for (int lane = 0; lane < VSZ; ++lane) { 609 int src_val1 = src1.get<int>(w, lane, 1); 610 int src_val2 = src1.get<int>(w, lane, 2); 611 Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2); 612 613 m->addr[lane] = addr; 614 } 615 616 } 617 618 void 619 Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) 620 { 621 GPUDynInstPtr m = gpuDynInst; 622 623 calcAddr(w, m); 624 625 for (int lane = 0; lane < VSZ; ++lane) { 626 ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3); 627 } 628 629 m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET, 630 Brig::BRIG_ATOMIC_ADD); 631 m->m_type = U32::memType; 632 m->v_type = U32::vgprType; 633 634 m->exec_mask = w->execMask(); 635 m->statusBitVector = 0; 636 m->equiv = 0; // atomics don't have an equivalence class operand 637 m->n_reg = 1; 638 m->memoryOrder = Enums::MEMORY_ORDER_NONE; 639 m->scope = Enums::MEMORY_SCOPE_NONE; 640 641 m->simdId = w->simdId; 642 m->wfSlotId = w->wfSlotId; 643 m->wfDynId = w->wfDynId; 644 m->latency.init(&w->computeUnit->shader->tick_cnt); 645 646 m->s_type = SEG_GLOBAL; 647 m->pipeId = GLBMEM_PIPE; 648 m->latency.set(w->computeUnit->shader->ticks(64)); 649 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 650 w->outstanding_reqs_wr_gm++; 651 w->wr_gm_reqs_in_pipe--; 652 w->outstanding_reqs_rd_gm++; 653 w->rd_gm_reqs_in_pipe--; 654 w->outstanding_reqs++; 655 w->mem_reqs_in_pipe--; 656 } 657 658 void 659 Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) 660 { 661 GPUDynInstPtr m = gpuDynInst; 662 calcAddr(w, m); 663 664 for (int lane = 0; lane < VSZ; ++lane) { 665 ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1); 666 } 667 668 m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET, 669 Brig::BRIG_ATOMIC_ADD); 670 m->m_type = U32::memType; 671 m->v_type = U32::vgprType; 672 673 m->exec_mask = w->execMask(); 674 m->statusBitVector = 0; 675 m->equiv = 0; // atomics don't have an equivalence class operand 676 m->n_reg = 1; 677 m->memoryOrder = Enums::MEMORY_ORDER_NONE; 678 m->scope = Enums::MEMORY_SCOPE_NONE; 679 680 m->simdId = w->simdId; 681 m->wfSlotId = w->wfSlotId; 682 m->wfDynId = w->wfDynId; 683 m->latency.init(&w->computeUnit->shader->tick_cnt); 684 685 m->s_type = SEG_GLOBAL; 686 m->pipeId = GLBMEM_PIPE; 687 m->latency.set(w->computeUnit->shader->ticks(64)); 688 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 689 w->outstanding_reqs_wr_gm++; 690 w->wr_gm_reqs_in_pipe--; 691 w->outstanding_reqs_rd_gm++; 692 w->rd_gm_reqs_in_pipe--; 693 w->outstanding_reqs++; 694 w->mem_reqs_in_pipe--; 695 } 696 697 void 698 Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) 699 { 700 GPUDynInstPtr m = gpuDynInst; 701 // calculate the address 702 calcAddr(w, m); 703 704 m->m_op = Enums::MO_LD; 705 m->m_type = U32::memType; //MemDataType::memType; 706 m->v_type = U32::vgprType; //DestDataType::vgprType; 707 708 m->exec_mask = w->execMask(); 709 m->statusBitVector = 0; 710 m->equiv = 0; 711 m->n_reg = 1; 712 m->memoryOrder = Enums::MEMORY_ORDER_NONE; 713 m->scope = Enums::MEMORY_SCOPE_NONE; 714 715 // FIXME 716 //m->dst_reg = this->dest.regIndex(); 717 718 m->simdId = w->simdId; 719 m->wfSlotId = w->wfSlotId; 720 m->wfDynId = w->wfDynId; 721 m->latency.init(&w->computeUnit->shader->tick_cnt); 722 723 m->s_type = SEG_GLOBAL; 724 m->pipeId = GLBMEM_PIPE; 725 m->latency.set(w->computeUnit->shader->ticks(1)); 726 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 727 w->outstanding_reqs_rd_gm++; 728 w->rd_gm_reqs_in_pipe--; 729 w->outstanding_reqs++; 730 w->mem_reqs_in_pipe--; 731 } 732 733 void 734 Call::MagicXactCasLd(Wavefront *w) 735 { 736 const VectorMask &mask = w->get_pred(); 737 int src_val1 = 0; 738 739 for (int lane = 0; lane < VSZ; ++lane) { 740 if (mask[lane]) { 741 src_val1 = src1.get<int>(w, lane, 1); 742 break; 743 } 744 } 745 746 if (!w->computeUnit->xactCasLoadMap.count(src_val1)) { 747 w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue(); 748 w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear(); 749 } 750 751 w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue 752 .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId)); 753 } 754 755 void 756 Call::MagicMostSigThread(Wavefront *w) 757 { 758 const VectorMask &mask = w->get_pred(); 759 unsigned mst = true; 760 761 for (int lane = VSZ - 1; lane >= 0; --lane) { 762 if (mask[lane]) { 763 dest.set<int>(w, lane, mst); 764 mst = false; 765 } 766 } 767 } 768 769 void 770 Call::MagicMostSigBroadcast(Wavefront *w) 771 { 772 const VectorMask &mask = w->get_pred(); 773 int res = 0; 774 bool got_res = false; 775 776 for (int lane = VSZ - 1; lane >= 0; --lane) { 777 if (mask[lane]) { 778 if (!got_res) { 779 res = src1.get<int>(w, lane, 1); 780 got_res = true; 781 } 782 dest.set<int>(w, lane, res); 783 } 784 } 785 } 786 787} // namespace HsailISA 788