pseudo_inst.cc (11534:7106f550afad) pseudo_inst.cc (11639:2e8d4bd8108d)
1/*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 65 unchanged lines hidden (view full) ---

74 MAGIC_MOST_SIG_BROADCAST,
75 MAGIC_PRINT_WFID_32,
76 MAGIC_PRINT_WFID_64
77 };
78
79 void
80 Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
81 {
1/*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 65 unchanged lines hidden (view full) ---

74 MAGIC_MOST_SIG_BROADCAST,
75 MAGIC_PRINT_WFID_32,
76 MAGIC_PRINT_WFID_64
77 };
78
79 void
80 Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
81 {
82 const VectorMask &mask = w->get_pred();
82 const VectorMask &mask = w->getPred();
83
84 int op = 0;
85 bool got_op = false;
86
87 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
88 if (mask[lane]) {
89 int src_val0 = src1.get<int>(w, lane, 0);
90 if (got_op) {

--- 85 unchanged lines hidden (view full) ---

176 default: fatal("unrecognized magic instruction: %d\n", op);
177 }
178 }
179
180 void
181 Call::MagicPrintLane(Wavefront *w)
182 {
183 #if TRACING_ON
83
84 int op = 0;
85 bool got_op = false;
86
87 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
88 if (mask[lane]) {
89 int src_val0 = src1.get<int>(w, lane, 0);
90 if (got_op) {

--- 85 unchanged lines hidden (view full) ---

176 default: fatal("unrecognized magic instruction: %d\n", op);
177 }
178 }
179
180 void
181 Call::MagicPrintLane(Wavefront *w)
182 {
183 #if TRACING_ON
184 const VectorMask &mask = w->get_pred();
184 const VectorMask &mask = w->getPred();
185 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
186 if (mask[lane]) {
187 int src_val1 = src1.get<int>(w, lane, 1);
188 int src_val2 = src1.get<int>(w, lane, 2);
189 if (src_val2) {
190 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
191 disassemble(), w->computeUnit->cu_id, w->simdId,
192 w->wfSlotId, lane, src_val1);

--- 6 unchanged lines hidden (view full) ---

199 }
200 #endif
201 }
202
203 void
204 Call::MagicPrintLane64(Wavefront *w)
205 {
206 #if TRACING_ON
185 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
186 if (mask[lane]) {
187 int src_val1 = src1.get<int>(w, lane, 1);
188 int src_val2 = src1.get<int>(w, lane, 2);
189 if (src_val2) {
190 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
191 disassemble(), w->computeUnit->cu_id, w->simdId,
192 w->wfSlotId, lane, src_val1);

--- 6 unchanged lines hidden (view full) ---

199 }
200 #endif
201 }
202
203 void
204 Call::MagicPrintLane64(Wavefront *w)
205 {
206 #if TRACING_ON
207 const VectorMask &mask = w->get_pred();
207 const VectorMask &mask = w->getPred();
208 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
209 if (mask[lane]) {
210 int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
211 int src_val2 = src1.get<int>(w, lane, 2);
212 if (src_val2) {
213 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
214 disassemble(), w->computeUnit->cu_id, w->simdId,
215 w->wfSlotId, lane, src_val1);

--- 6 unchanged lines hidden (view full) ---

222 }
223 #endif
224 }
225
226 void
227 Call::MagicPrintWF32(Wavefront *w)
228 {
229 #if TRACING_ON
208 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
209 if (mask[lane]) {
210 int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
211 int src_val2 = src1.get<int>(w, lane, 2);
212 if (src_val2) {
213 DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
214 disassemble(), w->computeUnit->cu_id, w->simdId,
215 w->wfSlotId, lane, src_val1);

--- 6 unchanged lines hidden (view full) ---

222 }
223 #endif
224 }
225
226 void
227 Call::MagicPrintWF32(Wavefront *w)
228 {
229 #if TRACING_ON
230 const VectorMask &mask = w->get_pred();
230 const VectorMask &mask = w->getPred();
231 std::string res_str;
232 res_str = csprintf("krl_prt (%s)\n", disassemble());
233
234 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
235 if (!(lane & 7)) {
236 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
237 }
238

--- 21 unchanged lines hidden (view full) ---

260 DPRINTFN(res_str.c_str());
261 #endif
262 }
263
264 void
265 Call::MagicPrintWF32ID(Wavefront *w)
266 {
267 #if TRACING_ON
231 std::string res_str;
232 res_str = csprintf("krl_prt (%s)\n", disassemble());
233
234 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
235 if (!(lane & 7)) {
236 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
237 }
238

--- 21 unchanged lines hidden (view full) ---

260 DPRINTFN(res_str.c_str());
261 #endif
262 }
263
264 void
265 Call::MagicPrintWF32ID(Wavefront *w)
266 {
267 #if TRACING_ON
268 const VectorMask &mask = w->get_pred();
268 const VectorMask &mask = w->getPred();
269 std::string res_str;
270 int src_val3 = -1;
271 res_str = csprintf("krl_prt (%s)\n", disassemble());
272
273 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
274 if (!(lane & 7)) {
275 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
276 }

--- 25 unchanged lines hidden (view full) ---

302 }
303 #endif
304 }
305
306 void
307 Call::MagicPrintWF64(Wavefront *w)
308 {
309 #if TRACING_ON
269 std::string res_str;
270 int src_val3 = -1;
271 res_str = csprintf("krl_prt (%s)\n", disassemble());
272
273 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
274 if (!(lane & 7)) {
275 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
276 }

--- 25 unchanged lines hidden (view full) ---

302 }
303 #endif
304 }
305
306 void
307 Call::MagicPrintWF64(Wavefront *w)
308 {
309 #if TRACING_ON
310 const VectorMask &mask = w->get_pred();
310 const VectorMask &mask = w->getPred();
311 std::string res_str;
312 res_str = csprintf("krl_prt (%s)\n", disassemble());
313
314 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
315 if (!(lane & 3)) {
316 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
317 }
318

--- 21 unchanged lines hidden (view full) ---

340 DPRINTFN(res_str.c_str());
341 #endif
342 }
343
344 void
345 Call::MagicPrintWFID64(Wavefront *w)
346 {
347 #if TRACING_ON
311 std::string res_str;
312 res_str = csprintf("krl_prt (%s)\n", disassemble());
313
314 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
315 if (!(lane & 3)) {
316 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
317 }
318

--- 21 unchanged lines hidden (view full) ---

340 DPRINTFN(res_str.c_str());
341 #endif
342 }
343
344 void
345 Call::MagicPrintWFID64(Wavefront *w)
346 {
347 #if TRACING_ON
348 const VectorMask &mask = w->get_pred();
348 const VectorMask &mask = w->getPred();
349 std::string res_str;
350 int src_val3 = -1;
351 res_str = csprintf("krl_prt (%s)\n", disassemble());
352
353 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
354 if (!(lane & 3)) {
355 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
356 }

--- 25 unchanged lines hidden (view full) ---

382 }
383 #endif
384 }
385
386 void
387 Call::MagicPrintWFFloat(Wavefront *w)
388 {
389 #if TRACING_ON
349 std::string res_str;
350 int src_val3 = -1;
351 res_str = csprintf("krl_prt (%s)\n", disassemble());
352
353 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
354 if (!(lane & 3)) {
355 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
356 }

--- 25 unchanged lines hidden (view full) ---

382 }
383 #endif
384 }
385
386 void
387 Call::MagicPrintWFFloat(Wavefront *w)
388 {
389 #if TRACING_ON
390 const VectorMask &mask = w->get_pred();
390 const VectorMask &mask = w->getPred();
391 std::string res_str;
392 res_str = csprintf("krl_prt (%s)\n", disassemble());
393
394 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
395 if (!(lane & 7)) {
396 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
397 }
398

--- 21 unchanged lines hidden (view full) ---

420 void
421 Call::MagicSimBreak(Wavefront *w)
422 {
423 std::string res_str;
424 // print out state for this wavefront and then break
425 res_str = csprintf("Breakpoint encountered for wavefront %i\n",
426 w->wfSlotId);
427
391 std::string res_str;
392 res_str = csprintf("krl_prt (%s)\n", disassemble());
393
394 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
395 if (!(lane & 7)) {
396 res_str += csprintf("DB%03d: ", (int)w->wfDynId);
397 }
398

--- 21 unchanged lines hidden (view full) ---

420 void
421 Call::MagicSimBreak(Wavefront *w)
422 {
423 std::string res_str;
424 // print out state for this wavefront and then break
425 res_str = csprintf("Breakpoint encountered for wavefront %i\n",
426 w->wfSlotId);
427
428 res_str += csprintf(" Kern ID: %i\n", w->kern_id);
428 res_str += csprintf(" Kern ID: %i\n", w->kernId);
429 res_str += csprintf(" Phase ID: %i\n", w->simdId);
430 res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
431 res_str += csprintf(" Exec mask: ");
432
433 for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
434 if (w->execMask(i))
435 res_str += "1";
436 else

--- 13 unchanged lines hidden (view full) ---

450 fflush(stdout);
451
452 raise(SIGTRAP);
453 }
454
455 void
456 Call::MagicPrefixSum(Wavefront *w)
457 {
429 res_str += csprintf(" Phase ID: %i\n", w->simdId);
430 res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
431 res_str += csprintf(" Exec mask: ");
432
433 for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
434 if (w->execMask(i))
435 res_str += "1";
436 else

--- 13 unchanged lines hidden (view full) ---

450 fflush(stdout);
451
452 raise(SIGTRAP);
453 }
454
455 void
456 Call::MagicPrefixSum(Wavefront *w)
457 {
458 const VectorMask &mask = w->get_pred();
458 const VectorMask &mask = w->getPred();
459 int res = 0;
460
461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462 if (mask[lane]) {
463 int src_val1 = src1.get<int>(w, lane, 1);
464 dest.set<int>(w, lane, res);
465 res += src_val1;
466 }
467 }
468 }
469
470 void
471 Call::MagicReduction(Wavefront *w)
472 {
473 // reduction magic instruction
474 // The reduction instruction takes up to 64 inputs (one from
475 // each thread in a WF) and sums them. It returns the sum to
476 // each thread in the WF.
459 int res = 0;
460
461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462 if (mask[lane]) {
463 int src_val1 = src1.get<int>(w, lane, 1);
464 dest.set<int>(w, lane, res);
465 res += src_val1;
466 }
467 }
468 }
469
470 void
471 Call::MagicReduction(Wavefront *w)
472 {
473 // reduction magic instruction
474 // The reduction instruction takes up to 64 inputs (one from
475 // each thread in a WF) and sums them. It returns the sum to
476 // each thread in the WF.
477 const VectorMask &mask = w->get_pred();
477 const VectorMask &mask = w->getPred();
478 int res = 0;
479
480 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
481 if (mask[lane]) {
482 int src_val1 = src1.get<int>(w, lane, 1);
483 res += src_val1;
484 }
485 }
486
487 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
488 if (mask[lane]) {
489 dest.set<int>(w, lane, res);
490 }
491 }
492 }
493
494 void
495 Call::MagicMaskLower(Wavefront *w)
496 {
478 int res = 0;
479
480 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
481 if (mask[lane]) {
482 int src_val1 = src1.get<int>(w, lane, 1);
483 res += src_val1;
484 }
485 }
486
487 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
488 if (mask[lane]) {
489 dest.set<int>(w, lane, res);
490 }
491 }
492 }
493
494 void
495 Call::MagicMaskLower(Wavefront *w)
496 {
497 const VectorMask &mask = w->get_pred();
497 const VectorMask &mask = w->getPred();
498 int res = 0;
499
500 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
501 if (mask[lane]) {
502 int src_val1 = src1.get<int>(w, lane, 1);
503
504 if (src_val1) {
505 if (lane < (w->computeUnit->wfSize()/2)) {

--- 8 unchanged lines hidden (view full) ---

514 dest.set<int>(w, lane, res);
515 }
516 }
517 }
518
519 void
520 Call::MagicMaskUpper(Wavefront *w)
521 {
498 int res = 0;
499
500 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
501 if (mask[lane]) {
502 int src_val1 = src1.get<int>(w, lane, 1);
503
504 if (src_val1) {
505 if (lane < (w->computeUnit->wfSize()/2)) {

--- 8 unchanged lines hidden (view full) ---

514 dest.set<int>(w, lane, res);
515 }
516 }
517 }
518
519 void
520 Call::MagicMaskUpper(Wavefront *w)
521 {
522 const VectorMask &mask = w->get_pred();
522 const VectorMask &mask = w->getPred();
523 int res = 0;
524 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
525 if (mask[lane]) {
526 int src_val1 = src1.get<int>(w, lane, 1);
527
528 if (src_val1) {
529 if (lane >= (w->computeUnit->wfSize()/2)) {
530 res = res | ((uint32_t)(1) <<

--- 8 unchanged lines hidden (view full) ---

539 dest.set<int>(w, lane, res);
540 }
541 }
542 }
543
544 void
545 Call::MagicJoinWFBar(Wavefront *w)
546 {
523 int res = 0;
524 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
525 if (mask[lane]) {
526 int src_val1 = src1.get<int>(w, lane, 1);
527
528 if (src_val1) {
529 if (lane >= (w->computeUnit->wfSize()/2)) {
530 res = res | ((uint32_t)(1) <<

--- 8 unchanged lines hidden (view full) ---

539 dest.set<int>(w, lane, res);
540 }
541 }
542 }
543
544 void
545 Call::MagicJoinWFBar(Wavefront *w)
546 {
547 const VectorMask &mask = w->get_pred();
547 const VectorMask &mask = w->getPred();
548 int max_cnt = 0;
549
550 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
551 if (mask[lane]) {
548 int max_cnt = 0;
549
550 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
551 if (mask[lane]) {
552 w->bar_cnt[lane]++;
552 w->barCnt[lane]++;
553
553
554 if (w->bar_cnt[lane] > max_cnt) {
555 max_cnt = w->bar_cnt[lane];
554 if (w->barCnt[lane] > max_cnt) {
555 max_cnt = w->barCnt[lane];
556 }
557 }
558 }
559
556 }
557 }
558 }
559
560 if (max_cnt > w->max_bar_cnt) {
561 w->max_bar_cnt = max_cnt;
560 if (max_cnt > w->maxBarCnt) {
561 w->maxBarCnt = max_cnt;
562 }
563 }
564
565 void
566 Call::MagicWaitWFBar(Wavefront *w)
567 {
562 }
563 }
564
565 void
566 Call::MagicWaitWFBar(Wavefront *w)
567 {
568 const VectorMask &mask = w->get_pred();
568 const VectorMask &mask = w->getPred();
569 int max_cnt = 0;
570
571 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
572 if (mask[lane]) {
569 int max_cnt = 0;
570
571 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
572 if (mask[lane]) {
573 w->bar_cnt[lane]--;
573 w->barCnt[lane]--;
574 }
575
574 }
575
576 if (w->bar_cnt[lane] > max_cnt) {
577 max_cnt = w->bar_cnt[lane];
576 if (w->barCnt[lane] > max_cnt) {
577 max_cnt = w->barCnt[lane];
578 }
579 }
580
578 }
579 }
580
581 if (max_cnt < w->max_bar_cnt) {
582 w->max_bar_cnt = max_cnt;
581 if (max_cnt < w->maxBarCnt) {
582 w->maxBarCnt = max_cnt;
583 }
584
585 w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
586 w->instructionBuffer.end());
587 if (w->pendingFetch)
588 w->dropFetch = true;
589 }
590
591 void
592 Call::MagicPanic(Wavefront *w)
593 {
583 }
584
585 w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
586 w->instructionBuffer.end());
587 if (w->pendingFetch)
588 w->dropFetch = true;
589 }
590
591 void
592 Call::MagicPanic(Wavefront *w)
593 {
594 const VectorMask &mask = w->get_pred();
594 const VectorMask &mask = w->getPred();
595
596 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
597 if (mask[lane]) {
598 int src_val1 = src1.get<int>(w, lane, 1);
599 panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
600 src_val1, lane);
601 }
602 }

--- 40 unchanged lines hidden (view full) ---

643 m->wfSlotId = w->wfSlotId;
644 m->wfDynId = w->wfDynId;
645 m->latency.init(&w->computeUnit->shader->tick_cnt);
646
647 m->s_type = SEG_GLOBAL;
648 m->pipeId = GLBMEM_PIPE;
649 m->latency.set(w->computeUnit->shader->ticks(64));
650 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
595
596 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
597 if (mask[lane]) {
598 int src_val1 = src1.get<int>(w, lane, 1);
599 panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
600 src_val1, lane);
601 }
602 }

--- 40 unchanged lines hidden (view full) ---

643 m->wfSlotId = w->wfSlotId;
644 m->wfDynId = w->wfDynId;
645 m->latency.init(&w->computeUnit->shader->tick_cnt);
646
647 m->s_type = SEG_GLOBAL;
648 m->pipeId = GLBMEM_PIPE;
649 m->latency.set(w->computeUnit->shader->ticks(64));
650 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
651 w->outstanding_reqs_wr_gm++;
652 w->wr_gm_reqs_in_pipe--;
653 w->outstanding_reqs_rd_gm++;
654 w->rd_gm_reqs_in_pipe--;
655 w->outstanding_reqs++;
656 w->mem_reqs_in_pipe--;
651 w->outstandingReqsWrGm++;
652 w->wrGmReqsInPipe--;
653 w->outstandingReqsRdGm++;
654 w->rdGmReqsInPipe--;
655 w->outstandingReqs++;
656 w->memReqsInPipe--;
657 }
658
659 void
660 Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
661 {
662 GPUDynInstPtr m = gpuDynInst;
663 calcAddr(w, m);
664

--- 17 unchanged lines hidden (view full) ---

682 m->wfSlotId = w->wfSlotId;
683 m->wfDynId = w->wfDynId;
684 m->latency.init(&w->computeUnit->shader->tick_cnt);
685
686 m->s_type = SEG_GLOBAL;
687 m->pipeId = GLBMEM_PIPE;
688 m->latency.set(w->computeUnit->shader->ticks(64));
689 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
657 }
658
659 void
660 Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
661 {
662 GPUDynInstPtr m = gpuDynInst;
663 calcAddr(w, m);
664

--- 17 unchanged lines hidden (view full) ---

682 m->wfSlotId = w->wfSlotId;
683 m->wfDynId = w->wfDynId;
684 m->latency.init(&w->computeUnit->shader->tick_cnt);
685
686 m->s_type = SEG_GLOBAL;
687 m->pipeId = GLBMEM_PIPE;
688 m->latency.set(w->computeUnit->shader->ticks(64));
689 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
690 w->outstanding_reqs_wr_gm++;
691 w->wr_gm_reqs_in_pipe--;
692 w->outstanding_reqs_rd_gm++;
693 w->rd_gm_reqs_in_pipe--;
694 w->outstanding_reqs++;
695 w->mem_reqs_in_pipe--;
690 w->outstandingReqsWrGm++;
691 w->wrGmReqsInPipe--;
692 w->outstandingReqsRdGm++;
693 w->rdGmReqsInPipe--;
694 w->outstandingReqs++;
695 w->memReqsInPipe--;
696 }
697
698 void
699 Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
700 {
701 GPUDynInstPtr m = gpuDynInst;
702 // calculate the address
703 calcAddr(w, m);

--- 16 unchanged lines hidden (view full) ---

720 m->wfSlotId = w->wfSlotId;
721 m->wfDynId = w->wfDynId;
722 m->latency.init(&w->computeUnit->shader->tick_cnt);
723
724 m->s_type = SEG_GLOBAL;
725 m->pipeId = GLBMEM_PIPE;
726 m->latency.set(w->computeUnit->shader->ticks(1));
727 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
696 }
697
698 void
699 Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
700 {
701 GPUDynInstPtr m = gpuDynInst;
702 // calculate the address
703 calcAddr(w, m);

--- 16 unchanged lines hidden (view full) ---

720 m->wfSlotId = w->wfSlotId;
721 m->wfDynId = w->wfDynId;
722 m->latency.init(&w->computeUnit->shader->tick_cnt);
723
724 m->s_type = SEG_GLOBAL;
725 m->pipeId = GLBMEM_PIPE;
726 m->latency.set(w->computeUnit->shader->ticks(1));
727 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
728 w->outstanding_reqs_rd_gm++;
729 w->rd_gm_reqs_in_pipe--;
730 w->outstanding_reqs++;
731 w->mem_reqs_in_pipe--;
728 w->outstandingReqsRdGm++;
729 w->rdGmReqsInPipe--;
730 w->outstandingReqs++;
731 w->memReqsInPipe--;
732 }
733
734 void
735 Call::MagicXactCasLd(Wavefront *w)
736 {
732 }
733
734 void
735 Call::MagicXactCasLd(Wavefront *w)
736 {
737 const VectorMask &mask = w->get_pred();
737 const VectorMask &mask = w->getPred();
738 int src_val1 = 0;
739
740 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
741 if (mask[lane]) {
742 src_val1 = src1.get<int>(w, lane, 1);
743 break;
744 }
745 }

--- 5 unchanged lines hidden (view full) ---

751
752 w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
753 .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
754 }
755
756 void
757 Call::MagicMostSigThread(Wavefront *w)
758 {
738 int src_val1 = 0;
739
740 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
741 if (mask[lane]) {
742 src_val1 = src1.get<int>(w, lane, 1);
743 break;
744 }
745 }

--- 5 unchanged lines hidden (view full) ---

751
752 w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
753 .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
754 }
755
756 void
757 Call::MagicMostSigThread(Wavefront *w)
758 {
759 const VectorMask &mask = w->get_pred();
759 const VectorMask &mask = w->getPred();
760 unsigned mst = true;
761
762 for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
763 if (mask[lane]) {
764 dest.set<int>(w, lane, mst);
765 mst = false;
766 }
767 }
768 }
769
770 void
771 Call::MagicMostSigBroadcast(Wavefront *w)
772 {
760 unsigned mst = true;
761
762 for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
763 if (mask[lane]) {
764 dest.set<int>(w, lane, mst);
765 mst = false;
766 }
767 }
768 }
769
770 void
771 Call::MagicMostSigBroadcast(Wavefront *w)
772 {
773 const VectorMask &mask = w->get_pred();
773 const VectorMask &mask = w->getPred();
774 int res = 0;
775 bool got_res = false;
776
777 for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
778 if (mask[lane]) {
779 if (!got_res) {
780 res = src1.get<int>(w, lane, 1);
781 got_res = true;
782 }
783 dest.set<int>(w, lane, res);
784 }
785 }
786 }
787
788} // namespace HsailISA
774 int res = 0;
775 bool got_res = false;
776
777 for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
778 if (mask[lane]) {
779 if (!got_res) {
780 res = src1.get<int>(w, lane, 1);
781 got_res = true;
782 }
783 dest.set<int>(w, lane, res);
784 }
785 }
786 }
787
788} // namespace HsailISA