pseudo_inst.cc (11534:7106f550afad) | pseudo_inst.cc (11639:2e8d4bd8108d) |
---|---|
1/* 2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 65 unchanged lines hidden (view full) --- 74 MAGIC_MOST_SIG_BROADCAST, 75 MAGIC_PRINT_WFID_32, 76 MAGIC_PRINT_WFID_64 77 }; 78 79 void 80 Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst) 81 { | 1/* 2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 65 unchanged lines hidden (view full) --- 74 MAGIC_MOST_SIG_BROADCAST, 75 MAGIC_PRINT_WFID_32, 76 MAGIC_PRINT_WFID_64 77 }; 78 79 void 80 Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst) 81 { |
82 const VectorMask &mask = w->get_pred(); | 82 const VectorMask &mask = w->getPred(); |
83 84 int op = 0; 85 bool got_op = false; 86 87 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 88 if (mask[lane]) { 89 int src_val0 = src1.get<int>(w, lane, 0); 90 if (got_op) { --- 85 unchanged lines hidden (view full) --- 176 default: fatal("unrecognized magic instruction: %d\n", op); 177 } 178 } 179 180 void 181 Call::MagicPrintLane(Wavefront *w) 182 { 183 #if TRACING_ON | 83 84 int op = 0; 85 bool got_op = false; 86 87 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 88 if (mask[lane]) { 89 int src_val0 = src1.get<int>(w, lane, 0); 90 if (got_op) { --- 85 unchanged lines hidden (view full) --- 176 default: fatal("unrecognized magic instruction: %d\n", op); 177 } 178 } 179 180 void 181 Call::MagicPrintLane(Wavefront *w) 182 { 183 #if TRACING_ON |
184 const VectorMask &mask = w->get_pred(); | 184 const VectorMask &mask = w->getPred(); |
185 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 186 if (mask[lane]) { 187 int src_val1 = src1.get<int>(w, lane, 1); 188 int src_val2 = src1.get<int>(w, lane, 2); 189 if (src_val2) { 190 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", 191 disassemble(), w->computeUnit->cu_id, w->simdId, 192 w->wfSlotId, lane, src_val1); --- 6 unchanged lines hidden (view full) --- 199 } 200 #endif 201 } 202 203 void 204 Call::MagicPrintLane64(Wavefront *w) 205 { 206 #if TRACING_ON | 185 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 186 if (mask[lane]) { 187 int src_val1 = src1.get<int>(w, lane, 1); 188 int src_val2 = src1.get<int>(w, lane, 2); 189 if (src_val2) { 190 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", 191 disassemble(), w->computeUnit->cu_id, w->simdId, 192 w->wfSlotId, lane, src_val1); --- 6 unchanged lines hidden (view full) --- 199 } 200 #endif 201 } 202 203 void 204 Call::MagicPrintLane64(Wavefront *w) 205 { 206 #if TRACING_ON |
207 const VectorMask &mask = w->get_pred(); | 207 const VectorMask &mask = w->getPred(); |
208 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 209 if (mask[lane]) { 210 int64_t src_val1 = src1.get<int64_t>(w, lane, 1); 211 int src_val2 = src1.get<int>(w, lane, 2); 212 if (src_val2) { 213 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", 214 disassemble(), w->computeUnit->cu_id, w->simdId, 215 w->wfSlotId, lane, src_val1); --- 6 unchanged lines hidden (view full) --- 222 } 223 #endif 224 } 225 226 void 227 Call::MagicPrintWF32(Wavefront *w) 228 { 229 #if TRACING_ON | 208 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 209 if (mask[lane]) { 210 int64_t src_val1 = src1.get<int64_t>(w, lane, 1); 211 int src_val2 = src1.get<int>(w, lane, 2); 212 if (src_val2) { 213 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", 214 disassemble(), w->computeUnit->cu_id, w->simdId, 215 w->wfSlotId, lane, src_val1); --- 6 unchanged lines hidden (view full) --- 222 } 223 #endif 224 } 225 226 void 227 Call::MagicPrintWF32(Wavefront *w) 228 { 229 #if TRACING_ON |
230 const VectorMask &mask = w->get_pred(); | 230 const VectorMask &mask = w->getPred(); |
231 std::string res_str; 232 res_str = csprintf("krl_prt (%s)\n", disassemble()); 233 234 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 235 if (!(lane & 7)) { 236 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 237 } 238 --- 21 unchanged lines hidden (view full) --- 260 DPRINTFN(res_str.c_str()); 261 #endif 262 } 263 264 void 265 Call::MagicPrintWF32ID(Wavefront *w) 266 { 267 #if TRACING_ON | 231 std::string res_str; 232 res_str = csprintf("krl_prt (%s)\n", disassemble()); 233 234 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 235 if (!(lane & 7)) { 236 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 237 } 238 --- 21 unchanged lines hidden (view full) --- 260 DPRINTFN(res_str.c_str()); 261 #endif 262 } 263 264 void 265 Call::MagicPrintWF32ID(Wavefront *w) 266 { 267 #if TRACING_ON |
268 const VectorMask &mask = w->get_pred(); | 268 const VectorMask &mask = w->getPred(); |
269 std::string res_str; 270 int src_val3 = -1; 271 res_str = csprintf("krl_prt (%s)\n", disassemble()); 272 273 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 274 if (!(lane & 7)) { 275 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 276 } --- 25 unchanged lines hidden (view full) --- 302 } 303 #endif 304 } 305 306 void 307 Call::MagicPrintWF64(Wavefront *w) 308 { 309 #if TRACING_ON | 269 std::string res_str; 270 int src_val3 = -1; 271 res_str = csprintf("krl_prt (%s)\n", disassemble()); 272 273 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 274 if (!(lane & 7)) { 275 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 276 } --- 25 unchanged lines hidden (view full) --- 302 } 303 #endif 304 } 305 306 void 307 Call::MagicPrintWF64(Wavefront *w) 308 { 309 #if TRACING_ON |
310 const VectorMask &mask = w->get_pred(); | 310 const VectorMask &mask = w->getPred(); |
311 std::string res_str; 312 res_str = csprintf("krl_prt (%s)\n", disassemble()); 313 314 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 315 if (!(lane & 3)) { 316 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 317 } 318 --- 21 unchanged lines hidden (view full) --- 340 DPRINTFN(res_str.c_str()); 341 #endif 342 } 343 344 void 345 Call::MagicPrintWFID64(Wavefront *w) 346 { 347 #if TRACING_ON | 311 std::string res_str; 312 res_str = csprintf("krl_prt (%s)\n", disassemble()); 313 314 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 315 if (!(lane & 3)) { 316 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 317 } 318 --- 21 unchanged lines hidden (view full) --- 340 DPRINTFN(res_str.c_str()); 341 #endif 342 } 343 344 void 345 Call::MagicPrintWFID64(Wavefront *w) 346 { 347 #if TRACING_ON |
348 const VectorMask &mask = w->get_pred(); | 348 const VectorMask &mask = w->getPred(); |
349 std::string res_str; 350 int src_val3 = -1; 351 res_str = csprintf("krl_prt (%s)\n", disassemble()); 352 353 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 354 if (!(lane & 3)) { 355 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 356 } --- 25 unchanged lines hidden (view full) --- 382 } 383 #endif 384 } 385 386 void 387 Call::MagicPrintWFFloat(Wavefront *w) 388 { 389 #if TRACING_ON | 349 std::string res_str; 350 int src_val3 = -1; 351 res_str = csprintf("krl_prt (%s)\n", disassemble()); 352 353 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 354 if (!(lane & 3)) { 355 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 356 } --- 25 unchanged lines hidden (view full) --- 382 } 383 #endif 384 } 385 386 void 387 Call::MagicPrintWFFloat(Wavefront *w) 388 { 389 #if TRACING_ON |
390 const VectorMask &mask = w->get_pred(); | 390 const VectorMask &mask = w->getPred(); |
391 std::string res_str; 392 res_str = csprintf("krl_prt (%s)\n", disassemble()); 393 394 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 395 if (!(lane & 7)) { 396 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 397 } 398 --- 21 unchanged lines hidden (view full) --- 420 void 421 Call::MagicSimBreak(Wavefront *w) 422 { 423 std::string res_str; 424 // print out state for this wavefront and then break 425 res_str = csprintf("Breakpoint encountered for wavefront %i\n", 426 w->wfSlotId); 427 | 391 std::string res_str; 392 res_str = csprintf("krl_prt (%s)\n", disassemble()); 393 394 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 395 if (!(lane & 7)) { 396 res_str += csprintf("DB%03d: ", (int)w->wfDynId); 397 } 398 --- 21 unchanged lines hidden (view full) --- 420 void 421 Call::MagicSimBreak(Wavefront *w) 422 { 423 std::string res_str; 424 // print out state for this wavefront and then break 425 res_str = csprintf("Breakpoint encountered for wavefront %i\n", 426 w->wfSlotId); 427 |
428 res_str += csprintf(" Kern ID: %i\n", w->kern_id); | 428 res_str += csprintf(" Kern ID: %i\n", w->kernId); |
429 res_str += csprintf(" Phase ID: %i\n", w->simdId); 430 res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id); 431 res_str += csprintf(" Exec mask: "); 432 433 for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) { 434 if (w->execMask(i)) 435 res_str += "1"; 436 else --- 13 unchanged lines hidden (view full) --- 450 fflush(stdout); 451 452 raise(SIGTRAP); 453 } 454 455 void 456 Call::MagicPrefixSum(Wavefront *w) 457 { | 429 res_str += csprintf(" Phase ID: %i\n", w->simdId); 430 res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id); 431 res_str += csprintf(" Exec mask: "); 432 433 for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) { 434 if (w->execMask(i)) 435 res_str += "1"; 436 else --- 13 unchanged lines hidden (view full) --- 450 fflush(stdout); 451 452 raise(SIGTRAP); 453 } 454 455 void 456 Call::MagicPrefixSum(Wavefront *w) 457 { |
458 const VectorMask &mask = w->get_pred(); | 458 const VectorMask &mask = w->getPred(); |
459 int res = 0; 460 461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 462 if (mask[lane]) { 463 int src_val1 = src1.get<int>(w, lane, 1); 464 dest.set<int>(w, lane, res); 465 res += src_val1; 466 } 467 } 468 } 469 470 void 471 Call::MagicReduction(Wavefront *w) 472 { 473 // reduction magic instruction 474 // The reduction instruction takes up to 64 inputs (one from 475 // each thread in a WF) and sums them. It returns the sum to 476 // each thread in the WF. | 459 int res = 0; 460 461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 462 if (mask[lane]) { 463 int src_val1 = src1.get<int>(w, lane, 1); 464 dest.set<int>(w, lane, res); 465 res += src_val1; 466 } 467 } 468 } 469 470 void 471 Call::MagicReduction(Wavefront *w) 472 { 473 // reduction magic instruction 474 // The reduction instruction takes up to 64 inputs (one from 475 // each thread in a WF) and sums them. It returns the sum to 476 // each thread in the WF. |
477 const VectorMask &mask = w->get_pred(); | 477 const VectorMask &mask = w->getPred(); |
478 int res = 0; 479 480 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 481 if (mask[lane]) { 482 int src_val1 = src1.get<int>(w, lane, 1); 483 res += src_val1; 484 } 485 } 486 487 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 488 if (mask[lane]) { 489 dest.set<int>(w, lane, res); 490 } 491 } 492 } 493 494 void 495 Call::MagicMaskLower(Wavefront *w) 496 { | 478 int res = 0; 479 480 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 481 if (mask[lane]) { 482 int src_val1 = src1.get<int>(w, lane, 1); 483 res += src_val1; 484 } 485 } 486 487 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 488 if (mask[lane]) { 489 dest.set<int>(w, lane, res); 490 } 491 } 492 } 493 494 void 495 Call::MagicMaskLower(Wavefront *w) 496 { |
497 const VectorMask &mask = w->get_pred(); | 497 const VectorMask &mask = w->getPred(); |
498 int res = 0; 499 500 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 501 if (mask[lane]) { 502 int src_val1 = src1.get<int>(w, lane, 1); 503 504 if (src_val1) { 505 if (lane < (w->computeUnit->wfSize()/2)) { --- 8 unchanged lines hidden (view full) --- 514 dest.set<int>(w, lane, res); 515 } 516 } 517 } 518 519 void 520 Call::MagicMaskUpper(Wavefront *w) 521 { | 498 int res = 0; 499 500 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 501 if (mask[lane]) { 502 int src_val1 = src1.get<int>(w, lane, 1); 503 504 if (src_val1) { 505 if (lane < (w->computeUnit->wfSize()/2)) { --- 8 unchanged lines hidden (view full) --- 514 dest.set<int>(w, lane, res); 515 } 516 } 517 } 518 519 void 520 Call::MagicMaskUpper(Wavefront *w) 521 { |
522 const VectorMask &mask = w->get_pred(); | 522 const VectorMask &mask = w->getPred(); |
523 int res = 0; 524 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 525 if (mask[lane]) { 526 int src_val1 = src1.get<int>(w, lane, 1); 527 528 if (src_val1) { 529 if (lane >= (w->computeUnit->wfSize()/2)) { 530 res = res | ((uint32_t)(1) << --- 8 unchanged lines hidden (view full) --- 539 dest.set<int>(w, lane, res); 540 } 541 } 542 } 543 544 void 545 Call::MagicJoinWFBar(Wavefront *w) 546 { | 523 int res = 0; 524 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 525 if (mask[lane]) { 526 int src_val1 = src1.get<int>(w, lane, 1); 527 528 if (src_val1) { 529 if (lane >= (w->computeUnit->wfSize()/2)) { 530 res = res | ((uint32_t)(1) << --- 8 unchanged lines hidden (view full) --- 539 dest.set<int>(w, lane, res); 540 } 541 } 542 } 543 544 void 545 Call::MagicJoinWFBar(Wavefront *w) 546 { |
547 const VectorMask &mask = w->get_pred(); | 547 const VectorMask &mask = w->getPred(); |
548 int max_cnt = 0; 549 550 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 551 if (mask[lane]) { | 548 int max_cnt = 0; 549 550 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 551 if (mask[lane]) { |
552 w->bar_cnt[lane]++; | 552 w->barCnt[lane]++; |
553 | 553 |
554 if (w->bar_cnt[lane] > max_cnt) { 555 max_cnt = w->bar_cnt[lane]; | 554 if (w->barCnt[lane] > max_cnt) { 555 max_cnt = w->barCnt[lane]; |
556 } 557 } 558 } 559 | 556 } 557 } 558 } 559 |
560 if (max_cnt > w->max_bar_cnt) { 561 w->max_bar_cnt = max_cnt; | 560 if (max_cnt > w->maxBarCnt) { 561 w->maxBarCnt = max_cnt; |
562 } 563 } 564 565 void 566 Call::MagicWaitWFBar(Wavefront *w) 567 { | 562 } 563 } 564 565 void 566 Call::MagicWaitWFBar(Wavefront *w) 567 { |
568 const VectorMask &mask = w->get_pred(); | 568 const VectorMask &mask = w->getPred(); |
569 int max_cnt = 0; 570 571 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 572 if (mask[lane]) { | 569 int max_cnt = 0; 570 571 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 572 if (mask[lane]) { |
573 w->bar_cnt[lane]--; | 573 w->barCnt[lane]--; |
574 } 575 | 574 } 575 |
576 if (w->bar_cnt[lane] > max_cnt) { 577 max_cnt = w->bar_cnt[lane]; | 576 if (w->barCnt[lane] > max_cnt) { 577 max_cnt = w->barCnt[lane]; |
578 } 579 } 580 | 578 } 579 } 580 |
581 if (max_cnt < w->max_bar_cnt) { 582 w->max_bar_cnt = max_cnt; | 581 if (max_cnt < w->maxBarCnt) { 582 w->maxBarCnt = max_cnt; |
583 } 584 585 w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, 586 w->instructionBuffer.end()); 587 if (w->pendingFetch) 588 w->dropFetch = true; 589 } 590 591 void 592 Call::MagicPanic(Wavefront *w) 593 { | 583 } 584 585 w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, 586 w->instructionBuffer.end()); 587 if (w->pendingFetch) 588 w->dropFetch = true; 589 } 590 591 void 592 Call::MagicPanic(Wavefront *w) 593 { |
594 const VectorMask &mask = w->get_pred(); | 594 const VectorMask &mask = w->getPred(); |
595 596 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 597 if (mask[lane]) { 598 int src_val1 = src1.get<int>(w, lane, 1); 599 panic("OpenCL Code failed assertion #%d. Triggered by lane %s", 600 src_val1, lane); 601 } 602 } --- 40 unchanged lines hidden (view full) --- 643 m->wfSlotId = w->wfSlotId; 644 m->wfDynId = w->wfDynId; 645 m->latency.init(&w->computeUnit->shader->tick_cnt); 646 647 m->s_type = SEG_GLOBAL; 648 m->pipeId = GLBMEM_PIPE; 649 m->latency.set(w->computeUnit->shader->ticks(64)); 650 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); | 595 596 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 597 if (mask[lane]) { 598 int src_val1 = src1.get<int>(w, lane, 1); 599 panic("OpenCL Code failed assertion #%d. Triggered by lane %s", 600 src_val1, lane); 601 } 602 } --- 40 unchanged lines hidden (view full) --- 643 m->wfSlotId = w->wfSlotId; 644 m->wfDynId = w->wfDynId; 645 m->latency.init(&w->computeUnit->shader->tick_cnt); 646 647 m->s_type = SEG_GLOBAL; 648 m->pipeId = GLBMEM_PIPE; 649 m->latency.set(w->computeUnit->shader->ticks(64)); 650 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); |
651 w->outstanding_reqs_wr_gm++; 652 w->wr_gm_reqs_in_pipe--; 653 w->outstanding_reqs_rd_gm++; 654 w->rd_gm_reqs_in_pipe--; 655 w->outstanding_reqs++; 656 w->mem_reqs_in_pipe--; | 651 w->outstandingReqsWrGm++; 652 w->wrGmReqsInPipe--; 653 w->outstandingReqsRdGm++; 654 w->rdGmReqsInPipe--; 655 w->outstandingReqs++; 656 w->memReqsInPipe--; |
657 } 658 659 void 660 Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) 661 { 662 GPUDynInstPtr m = gpuDynInst; 663 calcAddr(w, m); 664 --- 17 unchanged lines hidden (view full) --- 682 m->wfSlotId = w->wfSlotId; 683 m->wfDynId = w->wfDynId; 684 m->latency.init(&w->computeUnit->shader->tick_cnt); 685 686 m->s_type = SEG_GLOBAL; 687 m->pipeId = GLBMEM_PIPE; 688 m->latency.set(w->computeUnit->shader->ticks(64)); 689 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); | 657 } 658 659 void 660 Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) 661 { 662 GPUDynInstPtr m = gpuDynInst; 663 calcAddr(w, m); 664 --- 17 unchanged lines hidden (view full) --- 682 m->wfSlotId = w->wfSlotId; 683 m->wfDynId = w->wfDynId; 684 m->latency.init(&w->computeUnit->shader->tick_cnt); 685 686 m->s_type = SEG_GLOBAL; 687 m->pipeId = GLBMEM_PIPE; 688 m->latency.set(w->computeUnit->shader->ticks(64)); 689 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); |
690 w->outstanding_reqs_wr_gm++; 691 w->wr_gm_reqs_in_pipe--; 692 w->outstanding_reqs_rd_gm++; 693 w->rd_gm_reqs_in_pipe--; 694 w->outstanding_reqs++; 695 w->mem_reqs_in_pipe--; | 690 w->outstandingReqsWrGm++; 691 w->wrGmReqsInPipe--; 692 w->outstandingReqsRdGm++; 693 w->rdGmReqsInPipe--; 694 w->outstandingReqs++; 695 w->memReqsInPipe--; |
696 } 697 698 void 699 Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) 700 { 701 GPUDynInstPtr m = gpuDynInst; 702 // calculate the address 703 calcAddr(w, m); --- 16 unchanged lines hidden (view full) --- 720 m->wfSlotId = w->wfSlotId; 721 m->wfDynId = w->wfDynId; 722 m->latency.init(&w->computeUnit->shader->tick_cnt); 723 724 m->s_type = SEG_GLOBAL; 725 m->pipeId = GLBMEM_PIPE; 726 m->latency.set(w->computeUnit->shader->ticks(1)); 727 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); | 696 } 697 698 void 699 Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) 700 { 701 GPUDynInstPtr m = gpuDynInst; 702 // calculate the address 703 calcAddr(w, m); --- 16 unchanged lines hidden (view full) --- 720 m->wfSlotId = w->wfSlotId; 721 m->wfDynId = w->wfDynId; 722 m->latency.init(&w->computeUnit->shader->tick_cnt); 723 724 m->s_type = SEG_GLOBAL; 725 m->pipeId = GLBMEM_PIPE; 726 m->latency.set(w->computeUnit->shader->ticks(1)); 727 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); |
728 w->outstanding_reqs_rd_gm++; 729 w->rd_gm_reqs_in_pipe--; 730 w->outstanding_reqs++; 731 w->mem_reqs_in_pipe--; | 728 w->outstandingReqsRdGm++; 729 w->rdGmReqsInPipe--; 730 w->outstandingReqs++; 731 w->memReqsInPipe--; |
732 } 733 734 void 735 Call::MagicXactCasLd(Wavefront *w) 736 { | 732 } 733 734 void 735 Call::MagicXactCasLd(Wavefront *w) 736 { |
737 const VectorMask &mask = w->get_pred(); | 737 const VectorMask &mask = w->getPred(); |
738 int src_val1 = 0; 739 740 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 741 if (mask[lane]) { 742 src_val1 = src1.get<int>(w, lane, 1); 743 break; 744 } 745 } --- 5 unchanged lines hidden (view full) --- 751 752 w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue 753 .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId)); 754 } 755 756 void 757 Call::MagicMostSigThread(Wavefront *w) 758 { | 738 int src_val1 = 0; 739 740 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 741 if (mask[lane]) { 742 src_val1 = src1.get<int>(w, lane, 1); 743 break; 744 } 745 } --- 5 unchanged lines hidden (view full) --- 751 752 w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue 753 .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId)); 754 } 755 756 void 757 Call::MagicMostSigThread(Wavefront *w) 758 { |
759 const VectorMask &mask = w->get_pred(); | 759 const VectorMask &mask = w->getPred(); |
760 unsigned mst = true; 761 762 for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { 763 if (mask[lane]) { 764 dest.set<int>(w, lane, mst); 765 mst = false; 766 } 767 } 768 } 769 770 void 771 Call::MagicMostSigBroadcast(Wavefront *w) 772 { | 760 unsigned mst = true; 761 762 for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { 763 if (mask[lane]) { 764 dest.set<int>(w, lane, mst); 765 mst = false; 766 } 767 } 768 } 769 770 void 771 Call::MagicMostSigBroadcast(Wavefront *w) 772 { |
773 const VectorMask &mask = w->get_pred(); | 773 const VectorMask &mask = w->getPred(); |
774 int res = 0; 775 bool got_res = false; 776 777 for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { 778 if (mask[lane]) { 779 if (!got_res) { 780 res = src1.get<int>(w, lane, 1); 781 got_res = true; 782 } 783 dest.set<int>(w, lane, res); 784 } 785 } 786 } 787 788} // namespace HsailISA | 774 int res = 0; 775 bool got_res = false; 776 777 for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { 778 if (mask[lane]) { 779 if (!got_res) { 780 res = src1.get<int>(w, lane, 1); 781 got_res = true; 782 } 783 dest.set<int>(w, lane, res); 784 } 785 } 786 } 787 788} // namespace HsailISA |