wavefront.cc (11657:5fad5a37d6fc) wavefront.cc (11692:e772fdcd3809)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 23 unchanged lines hidden (view full) ---

32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/wavefront.hh"
37
38#include "debug/GPUExec.hh"
39#include "debug/WavefrontStack.hh"
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 23 unchanged lines hidden (view full) ---

32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/wavefront.hh"
37
38#include "debug/GPUExec.hh"
39#include "debug/WavefrontStack.hh"
40#include "gpu-compute/code_enums.hh"
41#include "gpu-compute/compute_unit.hh"
42#include "gpu-compute/gpu_dyn_inst.hh"
43#include "gpu-compute/shader.hh"
44#include "gpu-compute/vector_register_file.hh"
45
46Wavefront*
47WavefrontParams::create()
48{

--- 111 unchanged lines hidden (view full) ---

160 wfDynId = _wf_dyn_id;
161 basePtr = _base_ptr;
162 status = S_RUNNING;
163}
164
165bool
166Wavefront::isGmInstruction(GPUDynInstPtr ii)
167{
40#include "gpu-compute/compute_unit.hh"
41#include "gpu-compute/gpu_dyn_inst.hh"
42#include "gpu-compute/shader.hh"
43#include "gpu-compute/vector_register_file.hh"
44
45Wavefront*
46WavefrontParams::create()
47{

--- 111 unchanged lines hidden (view full) ---

159 wfDynId = _wf_dyn_id;
160 basePtr = _base_ptr;
161 status = S_RUNNING;
162}
163
164bool
165Wavefront::isGmInstruction(GPUDynInstPtr ii)
166{
168 if (IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
169 IS_OT_ATOMIC_PM(ii->opType())) {
167 if (ii->isGlobalMem() || ii->isFlat())
170 return true;
168 return true;
171 }
172
169
173 if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
174 IS_OT_ATOMIC_GM(ii->opType())) {
175 return true;
176 }
177
178 if (IS_OT_FLAT(ii->opType())) {
179 return true;
180 }
181
182 return false;
183}
184
185bool
186Wavefront::isLmInstruction(GPUDynInstPtr ii)
187{
170 return false;
171}
172
173bool
174Wavefront::isLmInstruction(GPUDynInstPtr ii)
175{
188 if (IS_OT_READ_LM(ii->opType()) || IS_OT_WRITE_LM(ii->opType()) ||
189 IS_OT_ATOMIC_LM(ii->opType())) {
176 if (ii->isLocalMem()) {
190 return true;
191 }
192
193 return false;
194}
195
196bool
197Wavefront::isOldestInstALU()
198{
199 assert(!instructionBuffer.empty());
200 GPUDynInstPtr ii = instructionBuffer.front();
201
177 return true;
178 }
179
180 return false;
181}
182
183bool
184Wavefront::isOldestInstALU()
185{
186 assert(!instructionBuffer.empty());
187 GPUDynInstPtr ii = instructionBuffer.front();
188
202 if (status != S_STOPPED && (ii->opType() == Enums::OT_NOP ||
203 ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
204 ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
205 ii->opType() == Enums::OT_KERN_READ)) {
189 if (status != S_STOPPED && (ii->isNop() ||
190 ii->isReturn() || ii->isBranch() ||
191 ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) {
206 return true;
207 }
208
209 return false;
210}
211
212bool
213Wavefront::isOldestInstBarrier()
214{
215 assert(!instructionBuffer.empty());
216 GPUDynInstPtr ii = instructionBuffer.front();
217
192 return true;
193 }
194
195 return false;
196}
197
198bool
199Wavefront::isOldestInstBarrier()
200{
201 assert(!instructionBuffer.empty());
202 GPUDynInstPtr ii = instructionBuffer.front();
203
218 if (status != S_STOPPED && ii->opType() == Enums::OT_BARRIER) {
204 if (status != S_STOPPED && ii->isBarrier()) {
219 return true;
220 }
221
222 return false;
223}
224
225bool
226Wavefront::isOldestInstGMem()
227{
228 assert(!instructionBuffer.empty());
229 GPUDynInstPtr ii = instructionBuffer.front();
230
205 return true;
206 }
207
208 return false;
209}
210
211bool
212Wavefront::isOldestInstGMem()
213{
214 assert(!instructionBuffer.empty());
215 GPUDynInstPtr ii = instructionBuffer.front();
216
231 if (status != S_STOPPED && (IS_OT_READ_GM(ii->opType()) ||
232 IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
233
217 if (status != S_STOPPED && ii->isGlobalMem()) {
234 return true;
235 }
236
237 return false;
238}
239
240bool
241Wavefront::isOldestInstLMem()
242{
243 assert(!instructionBuffer.empty());
244 GPUDynInstPtr ii = instructionBuffer.front();
245
218 return true;
219 }
220
221 return false;
222}
223
224bool
225Wavefront::isOldestInstLMem()
226{
227 assert(!instructionBuffer.empty());
228 GPUDynInstPtr ii = instructionBuffer.front();
229
246 if (status != S_STOPPED && (IS_OT_READ_LM(ii->opType()) ||
247 IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
248
230 if (status != S_STOPPED && ii->isLocalMem()) {
249 return true;
250 }
251
252 return false;
253}
254
255bool
256Wavefront::isOldestInstPrivMem()
257{
258 assert(!instructionBuffer.empty());
259 GPUDynInstPtr ii = instructionBuffer.front();
260
231 return true;
232 }
233
234 return false;
235}
236
237bool
238Wavefront::isOldestInstPrivMem()
239{
240 assert(!instructionBuffer.empty());
241 GPUDynInstPtr ii = instructionBuffer.front();
242
261 if (status != S_STOPPED && (IS_OT_READ_PM(ii->opType()) ||
262 IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
263
243 if (status != S_STOPPED && ii->isPrivateSeg()) {
264 return true;
265 }
266
267 return false;
268}
269
270bool
271Wavefront::isOldestInstFlatMem()
272{
273 assert(!instructionBuffer.empty());
274 GPUDynInstPtr ii = instructionBuffer.front();
275
244 return true;
245 }
246
247 return false;
248}
249
250bool
251Wavefront::isOldestInstFlatMem()
252{
253 assert(!instructionBuffer.empty());
254 GPUDynInstPtr ii = instructionBuffer.front();
255
276 if (status != S_STOPPED && IS_OT_FLAT(ii->opType())) {
277
256 if (status != S_STOPPED && ii->isFlat()) {
278 return true;
279 }
280
281 return false;
282}
283
284// Return true if the Wavefront's instruction
285// buffer has branch instruction.
286bool
287Wavefront::instructionBufferHasBranch()
288{
289 for (auto it : instructionBuffer) {
290 GPUDynInstPtr ii = it;
291
257 return true;
258 }
259
260 return false;
261}
262
263// Return true if the Wavefront's instruction
264// buffer has branch instruction.
265bool
266Wavefront::instructionBufferHasBranch()
267{
268 for (auto it : instructionBuffer) {
269 GPUDynInstPtr ii = it;
270
292 if (ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH) {
271 if (ii->isReturn() || ii->isBranch()) {
293 return true;
294 }
295 }
296
297 return false;
298}
299
300// Remap HSAIL register to physical VGPR.

--- 65 unchanged lines hidden (view full) ---

366 locMemIssueRdy = true;
367 }
368 }
369
370 // The following code is very error prone and the entire process for
371 // checking readiness will be fixed eventually. In the meantime, let's
372 // make sure that we do not silently let an instruction type slip
373 // through this logic and always return not ready.
272 return true;
273 }
274 }
275
276 return false;
277}
278
279// Remap HSAIL register to physical VGPR.

--- 65 unchanged lines hidden (view full) ---

345 locMemIssueRdy = true;
346 }
347 }
348
349 // The following code is very error prone and the entire process for
350 // checking readiness will be fixed eventually. In the meantime, let's
351 // make sure that we do not silently let an instruction type slip
352 // through this logic and always return not ready.
374 if (!(ii->opType() == Enums::OT_BARRIER || ii->opType() == Enums::OT_NOP ||
375 ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH ||
376 ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
377 ii->opType() == Enums::OT_KERN_READ ||
378 ii->opType() == Enums::OT_ARG ||
379 IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
380 IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_READ_LM(ii->opType()) ||
381 IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
382 IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) ||
383 IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_FLAT(ii->opType()))) {
353 if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
354 ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
355 ii->isMemFence() || ii->isFlat())) {
384 panic("next instruction: %s is of unknown type\n", ii->disassemble());
385 }
386
387 DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
388 computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
389
356 panic("next instruction: %s is of unknown type\n", ii->disassemble());
357 }
358
359 DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n",
360 computeUnit->cu_id, simdId, wfSlotId, ii->disassemble());
361
390 if (type == I_ALU && ii->opType() == Enums::OT_BARRIER) {
362 if (type == I_ALU && ii->isBarrier()) {
391 // Here for ALU instruction (barrier)
392 if (!computeUnit->wfWait[simdId].prerdy()) {
393 // Is wave slot free?
394 return 0;
395 }
396
397 // Are there in pipe or outstanding memory requests?
398 if ((outstandingReqs + memReqsInPipe) > 0) {
399 return 0;
400 }
401
402 ready_inst = true;
363 // Here for ALU instruction (barrier)
364 if (!computeUnit->wfWait[simdId].prerdy()) {
365 // Is wave slot free?
366 return 0;
367 }
368
369 // Are there in pipe or outstanding memory requests?
370 if ((outstandingReqs + memReqsInPipe) > 0) {
371 return 0;
372 }
373
374 ready_inst = true;
403 } else if (type == I_ALU && ii->opType() == Enums::OT_NOP) {
375 } else if (type == I_ALU && ii->isNop()) {
404 // Here for ALU instruction (nop)
405 if (!computeUnit->wfWait[simdId].prerdy()) {
406 // Is wave slot free?
407 return 0;
408 }
409
410 ready_inst = true;
376 // Here for ALU instruction (nop)
377 if (!computeUnit->wfWait[simdId].prerdy()) {
378 // Is wave slot free?
379 return 0;
380 }
381
382 ready_inst = true;
411 } else if (type == I_ALU && ii->opType() == Enums::OT_RET) {
383 } else if (type == I_ALU && ii->isReturn()) {
412 // Here for ALU instruction (return)
413 if (!computeUnit->wfWait[simdId].prerdy()) {
414 // Is wave slot free?
415 return 0;
416 }
417
418 // Are there in pipe or outstanding memory requests?
419 if ((outstandingReqs + memReqsInPipe) > 0) {
420 return 0;
421 }
422
423 ready_inst = true;
384 // Here for ALU instruction (return)
385 if (!computeUnit->wfWait[simdId].prerdy()) {
386 // Is wave slot free?
387 return 0;
388 }
389
390 // Are there in pipe or outstanding memory requests?
391 if ((outstandingReqs + memReqsInPipe) > 0) {
392 return 0;
393 }
394
395 ready_inst = true;
424 } else if (type == I_ALU && (ii->opType() == Enums::OT_BRANCH ||
425 ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) ||
426 ii->opType() == Enums::OT_KERN_READ ||
427 ii->opType() == Enums::OT_ARG)) {
396 } else if (type == I_ALU && (ii->isBranch() ||
397 ii->isALU() ||
398 (ii->isKernArgSeg() && ii->isLoad()) ||
399 ii->isArgSeg())) {
428 // Here for ALU instruction (all others)
429 if (!computeUnit->wfWait[simdId].prerdy()) {
430 // Is alu slot free?
431 return 0;
432 }
433 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
434 VrfAccessType::RD_WR)) {
435 return 0;
436 }
437
438 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
439 return 0;
440 }
441 ready_inst = true;
400 // Here for ALU instruction (all others)
401 if (!computeUnit->wfWait[simdId].prerdy()) {
402 // Is alu slot free?
403 return 0;
404 }
405 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
406 VrfAccessType::RD_WR)) {
407 return 0;
408 }
409
410 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
411 return 0;
412 }
413 ready_inst = true;
442 } else if (type == I_GLOBAL && (IS_OT_READ_GM(ii->opType()) ||
443 IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) {
414 } else if (type == I_GLOBAL && ii->isGlobalMem()) {
444 // Here Global memory instruction
415 // Here Global memory instruction
445 if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
416 if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
446 // Are there in pipe or outstanding global memory write requests?
447 if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
448 return 0;
449 }
450 }
451
417 // Are there in pipe or outstanding global memory write requests?
418 if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
419 return 0;
420 }
421 }
422
452 if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
453 IS_OT_HIST_GM(ii->opType())) {
423 if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
454 // Are there in pipe or outstanding global memory read requests?
455 if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
456 return 0;
457 }
458
459 if (!glbMemIssueRdy) {
460 // Is WV issue slot free?
461 return 0;

--- 13 unchanged lines hidden (view full) ---

475 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
476 VrfAccessType::RD_WR)) {
477 return 0;
478 }
479 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
480 return 0;
481 }
482 ready_inst = true;
424 // Are there in pipe or outstanding global memory read requests?
425 if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
426 return 0;
427 }
428
429 if (!glbMemIssueRdy) {
430 // Is WV issue slot free?
431 return 0;

--- 13 unchanged lines hidden (view full) ---

445 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
446 VrfAccessType::RD_WR)) {
447 return 0;
448 }
449 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
450 return 0;
451 }
452 ready_inst = true;
483 } else if (type == I_SHARED && (IS_OT_READ_LM(ii->opType()) ||
484 IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
453 } else if (type == I_SHARED && ii->isLocalMem()) {
485 // Here for Shared memory instruction
454 // Here for Shared memory instruction
486 if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
455 if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) {
487 if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
488 return 0;
489 }
490 }
491
456 if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
457 return 0;
458 }
459 }
460
492 if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
493 IS_OT_HIST_LM(ii->opType())) {
461 if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) {
494 if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
495 return 0;
496 }
497 }
498
499 if (!locMemBusRdy) {
500 // Is there an available VRF->LDS read bus?
501 return 0;

--- 12 unchanged lines hidden (view full) ---

514 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
515 VrfAccessType::RD_WR)) {
516 return 0;
517 }
518 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
519 return 0;
520 }
521 ready_inst = true;
462 if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
463 return 0;
464 }
465 }
466
467 if (!locMemBusRdy) {
468 // Is there an available VRF->LDS read bus?
469 return 0;

--- 12 unchanged lines hidden (view full) ---

482 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
483 VrfAccessType::RD_WR)) {
484 return 0;
485 }
486 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
487 return 0;
488 }
489 ready_inst = true;
522 } else if (type == I_PRIVATE && (IS_OT_READ_PM(ii->opType()) ||
523 IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
524 // Here for Private memory instruction ------------------------ //
525 if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
526 if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
527 return 0;
528 }
529 }
530
531 if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
532 IS_OT_HIST_PM(ii->opType())) {
533 if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
534 return 0;
535 }
536 }
537
490 } else if (type == I_FLAT && ii->isFlat()) {
538 if (!glbMemBusRdy) {
539 // Is there an available VRF->Global memory read bus?
540 return 0;
541 }
542
491 if (!glbMemBusRdy) {
492 // Is there an available VRF->Global memory read bus?
493 return 0;
494 }
495
543 if (!glbMemIssueRdy) {
544 // Is wave slot free?
545 return 0;
546 }
547
548 if (!computeUnit->globalMemoryPipe.
549 isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
550 // Can we insert a new request to the Global Mem Request FIFO?
551 return 0;
552 }
553 // can we schedule source & destination operands on the VRF?
554 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii,
555 VrfAccessType::RD_WR)) {
556 return 0;
557 }
558 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) {
559 return 0;
560 }
561 ready_inst = true;
562 } else if (type == I_FLAT && IS_OT_FLAT(ii->opType())) {
563 if (!glbMemBusRdy) {
564 // Is there an available VRF->Global memory read bus?
565 return 0;
566 }
567
568 if (!locMemBusRdy) {
569 // Is there an available VRF->LDS read bus?
570 return 0;
571 }
572
573 if (!glbMemIssueRdy) {
574 // Is wave slot free?
575 return 0;

--- 37 unchanged lines hidden (view full) ---

613void
614Wavefront::updateResources()
615{
616 // Get current instruction
617 GPUDynInstPtr ii = instructionBuffer.front();
618 assert(ii);
619 computeUnit->vrf[simdId]->updateResources(this, ii);
620 // Single precision ALU or Branch or Return or Special instruction
496 if (!locMemBusRdy) {
497 // Is there an available VRF->LDS read bus?
498 return 0;
499 }
500
501 if (!glbMemIssueRdy) {
502 // Is wave slot free?
503 return 0;

--- 37 unchanged lines hidden (view full) ---

541void
542Wavefront::updateResources()
543{
544 // Get current instruction
545 GPUDynInstPtr ii = instructionBuffer.front();
546 assert(ii);
547 computeUnit->vrf[simdId]->updateResources(this, ii);
548 // Single precision ALU or Branch or Return or Special instruction
621 if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
622 ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
549 if (ii->isALU() || ii->isSpecialOp() ||
550 ii->isBranch() ||
623 // FIXME: Kernel argument loads are currently treated as ALU operations
624 // since we don't send memory packets at execution. If we fix that then
625 // we should map them to one of the memory pipelines
551 // FIXME: Kernel argument loads are currently treated as ALU operations
552 // since we don't send memory packets at execution. If we fix that then
553 // we should map them to one of the memory pipelines
626 ii->opType()==Enums::OT_KERN_READ ||
627 ii->opType()==Enums::OT_ARG ||
628 ii->opType()==Enums::OT_RET) {
554 (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
555 ii->isReturn()) {
629 computeUnit->aluPipe[simdId].preset(computeUnit->shader->
630 ticks(computeUnit->spBypassLength()));
631 // this is to enforce a fixed number of cycles per issue slot per SIMD
632 computeUnit->wfWait[simdId].preset(computeUnit->shader->
633 ticks(computeUnit->issuePeriod));
556 computeUnit->aluPipe[simdId].preset(computeUnit->shader->
557 ticks(computeUnit->spBypassLength()));
558 // this is to enforce a fixed number of cycles per issue slot per SIMD
559 computeUnit->wfWait[simdId].preset(computeUnit->shader->
560 ticks(computeUnit->issuePeriod));
634 } else if (ii->opType() == Enums::OT_BARRIER) {
561 } else if (ii->isBarrier()) {
635 computeUnit->wfWait[simdId].preset(computeUnit->shader->
636 ticks(computeUnit->issuePeriod));
562 computeUnit->wfWait[simdId].preset(computeUnit->shader->
563 ticks(computeUnit->issuePeriod));
637 } else if (ii->opType() == Enums::OT_FLAT_READ) {
564 } else if (ii->isLoad() && ii->isFlat()) {
638 assert(Enums::SC_NONE != ii->executedAs());
639 memReqsInPipe++;
640 rdGmReqsInPipe++;
641 if ( Enums::SC_SHARED == ii->executedAs() ) {
642 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
643 preset(computeUnit->shader->ticks(4));
644 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
645 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
646 } else {
647 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
648 preset(computeUnit->shader->ticks(4));
649 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
650 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
651 }
565 assert(Enums::SC_NONE != ii->executedAs());
566 memReqsInPipe++;
567 rdGmReqsInPipe++;
568 if ( Enums::SC_SHARED == ii->executedAs() ) {
569 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
570 preset(computeUnit->shader->ticks(4));
571 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
572 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
573 } else {
574 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
575 preset(computeUnit->shader->ticks(4));
576 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
577 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
578 }
652 } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
579 } else if (ii->isStore() && ii->isFlat()) {
653 assert(Enums::SC_NONE != ii->executedAs());
654 memReqsInPipe++;
655 wrGmReqsInPipe++;
656 if (Enums::SC_SHARED == ii->executedAs()) {
657 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
658 preset(computeUnit->shader->ticks(8));
659 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
660 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
661 } else {
662 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
663 preset(computeUnit->shader->ticks(8));
664 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
665 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
666 }
580 assert(Enums::SC_NONE != ii->executedAs());
581 memReqsInPipe++;
582 wrGmReqsInPipe++;
583 if (Enums::SC_SHARED == ii->executedAs()) {
584 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
585 preset(computeUnit->shader->ticks(8));
586 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
587 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
588 } else {
589 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
590 preset(computeUnit->shader->ticks(8));
591 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
592 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
593 }
667 } else if (IS_OT_READ_GM(ii->opType())) {
594 } else if (ii->isLoad() && ii->isGlobalMem()) {
668 memReqsInPipe++;
669 rdGmReqsInPipe++;
670 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
671 preset(computeUnit->shader->ticks(4));
672 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
673 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
595 memReqsInPipe++;
596 rdGmReqsInPipe++;
597 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
598 preset(computeUnit->shader->ticks(4));
599 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
600 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
674 } else if (IS_OT_WRITE_GM(ii->opType())) {
601 } else if (ii->isStore() && ii->isGlobalMem()) {
675 memReqsInPipe++;
676 wrGmReqsInPipe++;
677 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
678 preset(computeUnit->shader->ticks(8));
679 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
680 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
602 memReqsInPipe++;
603 wrGmReqsInPipe++;
604 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
605 preset(computeUnit->shader->ticks(8));
606 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
607 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
681 } else if (IS_OT_ATOMIC_GM(ii->opType())) {
608 } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
682 memReqsInPipe++;
683 wrGmReqsInPipe++;
684 rdGmReqsInPipe++;
685 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
686 preset(computeUnit->shader->ticks(8));
687 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
688 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
609 memReqsInPipe++;
610 wrGmReqsInPipe++;
611 rdGmReqsInPipe++;
612 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
613 preset(computeUnit->shader->ticks(8));
614 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
615 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
689 } else if (IS_OT_READ_LM(ii->opType())) {
616 } else if (ii->isLoad() && ii->isLocalMem()) {
690 memReqsInPipe++;
691 rdLmReqsInPipe++;
692 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
693 preset(computeUnit->shader->ticks(4));
694 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
695 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
617 memReqsInPipe++;
618 rdLmReqsInPipe++;
619 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
620 preset(computeUnit->shader->ticks(4));
621 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
622 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
696 } else if (IS_OT_WRITE_LM(ii->opType())) {
623 } else if (ii->isStore() && ii->isLocalMem()) {
697 memReqsInPipe++;
698 wrLmReqsInPipe++;
699 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
700 preset(computeUnit->shader->ticks(8));
701 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
702 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
624 memReqsInPipe++;
625 wrLmReqsInPipe++;
626 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
627 preset(computeUnit->shader->ticks(8));
628 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
629 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
703 } else if (IS_OT_ATOMIC_LM(ii->opType())) {
630 } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
704 memReqsInPipe++;
705 wrLmReqsInPipe++;
706 rdLmReqsInPipe++;
707 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
708 preset(computeUnit->shader->ticks(8));
709 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
710 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
631 memReqsInPipe++;
632 wrLmReqsInPipe++;
633 rdLmReqsInPipe++;
634 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
635 preset(computeUnit->shader->ticks(8));
636 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
637 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
711 } else if (IS_OT_READ_PM(ii->opType())) {
712 memReqsInPipe++;
713 rdGmReqsInPipe++;
714 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
715 preset(computeUnit->shader->ticks(4));
716 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
717 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
718 } else if (IS_OT_WRITE_PM(ii->opType())) {
719 memReqsInPipe++;
720 wrGmReqsInPipe++;
721 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
722 preset(computeUnit->shader->ticks(8));
723 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
724 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
725 } else if (IS_OT_ATOMIC_PM(ii->opType())) {
726 memReqsInPipe++;
727 wrGmReqsInPipe++;
728 rdGmReqsInPipe++;
729 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
730 preset(computeUnit->shader->ticks(8));
731 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
732 preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
733 }
734}
735
736void
737Wavefront::exec()
738{
739 // ---- Exit if wavefront is inactive ----------------------------- //
740

--- 5 unchanged lines hidden (view full) ---

746 // Get current instruction
747
748 GPUDynInstPtr ii = instructionBuffer.front();
749
750 const uint32_t old_pc = pc();
751 DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
752 "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
753 ii->disassemble(), old_pc);
638 }
639}
640
641void
642Wavefront::exec()
643{
644 // ---- Exit if wavefront is inactive ----------------------------- //
645

--- 5 unchanged lines hidden (view full) ---

651 // Get current instruction
652
653 GPUDynInstPtr ii = instructionBuffer.front();
654
655 const uint32_t old_pc = pc();
656 DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
657 "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId,
658 ii->disassemble(), old_pc);
754 ii->execute();
659 ii->execute(ii);
755 // access the VRF
756 computeUnit->vrf[simdId]->exec(ii, this);
757 srcRegOpDist.sample(ii->numSrcRegOperands());
758 dstRegOpDist.sample(ii->numDstRegOperands());
759 computeUnit->numInstrExecuted++;
760 computeUnit->execRateDist.sample(computeUnit->totalCycles.value() -
761 computeUnit->lastExecCycle[simdId]);
762 computeUnit->lastExecCycle[simdId] = computeUnit->totalCycles.value();

--- 17 unchanged lines hidden (view full) ---

780 computeUnit->activeLanesPerGMemInstrDist.sample(num_active_lanes);
781 } else if (isLmInstruction(ii)) {
782 computeUnit->activeLanesPerLMemInstrDist.sample(num_active_lanes);
783 }
784 }
785
786 // ---- Update Vector ALU pipeline and other resources ------------------ //
787 // Single precision ALU or Branch or Return or Special instruction
660 // access the VRF
661 computeUnit->vrf[simdId]->exec(ii, this);
662 srcRegOpDist.sample(ii->numSrcRegOperands());
663 dstRegOpDist.sample(ii->numDstRegOperands());
664 computeUnit->numInstrExecuted++;
665 computeUnit->execRateDist.sample(computeUnit->totalCycles.value() -
666 computeUnit->lastExecCycle[simdId]);
667 computeUnit->lastExecCycle[simdId] = computeUnit->totalCycles.value();

--- 17 unchanged lines hidden (view full) ---

685 computeUnit->activeLanesPerGMemInstrDist.sample(num_active_lanes);
686 } else if (isLmInstruction(ii)) {
687 computeUnit->activeLanesPerLMemInstrDist.sample(num_active_lanes);
688 }
689 }
690
691 // ---- Update Vector ALU pipeline and other resources ------------------ //
692 // Single precision ALU or Branch or Return or Special instruction
788 if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL ||
789 ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) ||
693 if (ii->isALU() || ii->isSpecialOp() ||
694 ii->isBranch() ||
790 // FIXME: Kernel argument loads are currently treated as ALU operations
791 // since we don't send memory packets at execution. If we fix that then
792 // we should map them to one of the memory pipelines
695 // FIXME: Kernel argument loads are currently treated as ALU operations
696 // since we don't send memory packets at execution. If we fix that then
697 // we should map them to one of the memory pipelines
793 ii->opType() == Enums::OT_KERN_READ ||
794 ii->opType() == Enums::OT_ARG ||
795 ii->opType() == Enums::OT_RET) {
698 (ii->isKernArgSeg() && ii->isLoad()) ||
699 ii->isArgSeg() ||
700 ii->isReturn()) {
796 computeUnit->aluPipe[simdId].set(computeUnit->shader->
797 ticks(computeUnit->spBypassLength()));
798
799 // this is to enforce a fixed number of cycles per issue slot per SIMD
800 computeUnit->wfWait[simdId].set(computeUnit->shader->
801 ticks(computeUnit->issuePeriod));
701 computeUnit->aluPipe[simdId].set(computeUnit->shader->
702 ticks(computeUnit->spBypassLength()));
703
704 // this is to enforce a fixed number of cycles per issue slot per SIMD
705 computeUnit->wfWait[simdId].set(computeUnit->shader->
706 ticks(computeUnit->issuePeriod));
802 } else if (ii->opType() == Enums::OT_BARRIER) {
707 } else if (ii->isBarrier()) {
803 computeUnit->wfWait[simdId].set(computeUnit->shader->
804 ticks(computeUnit->issuePeriod));
708 computeUnit->wfWait[simdId].set(computeUnit->shader->
709 ticks(computeUnit->issuePeriod));
805 } else if (ii->opType() == Enums::OT_FLAT_READ) {
710 } else if (ii->isLoad() && ii->isFlat()) {
806 assert(Enums::SC_NONE != ii->executedAs());
807
808 if (Enums::SC_SHARED == ii->executedAs()) {
809 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
810 set(computeUnit->shader->ticks(4));
811 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
812 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
813 } else {
814 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
815 set(computeUnit->shader->ticks(4));
816 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
817 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
818 }
711 assert(Enums::SC_NONE != ii->executedAs());
712
713 if (Enums::SC_SHARED == ii->executedAs()) {
714 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
715 set(computeUnit->shader->ticks(4));
716 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
717 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
718 } else {
719 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
720 set(computeUnit->shader->ticks(4));
721 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
722 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
723 }
819 } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
724 } else if (ii->isStore() && ii->isFlat()) {
820 assert(Enums::SC_NONE != ii->executedAs());
821 if (Enums::SC_SHARED == ii->executedAs()) {
822 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
823 set(computeUnit->shader->ticks(8));
824 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
825 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
826 } else {
827 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
828 set(computeUnit->shader->ticks(8));
829 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
830 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
831 }
725 assert(Enums::SC_NONE != ii->executedAs());
726 if (Enums::SC_SHARED == ii->executedAs()) {
727 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
728 set(computeUnit->shader->ticks(8));
729 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
730 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
731 } else {
732 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
733 set(computeUnit->shader->ticks(8));
734 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
735 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
736 }
832 } else if (IS_OT_READ_GM(ii->opType())) {
737 } else if (ii->isLoad() && ii->isGlobalMem()) {
833 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
834 set(computeUnit->shader->ticks(4));
835 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
836 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
738 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
739 set(computeUnit->shader->ticks(4));
740 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
741 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
837 } else if (IS_OT_WRITE_GM(ii->opType())) {
742 } else if (ii->isStore() && ii->isGlobalMem()) {
838 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
839 set(computeUnit->shader->ticks(8));
840 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
841 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
743 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
744 set(computeUnit->shader->ticks(8));
745 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
746 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
842 } else if (IS_OT_ATOMIC_GM(ii->opType())) {
747 } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) {
843 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
844 set(computeUnit->shader->ticks(8));
845 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
846 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
748 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
749 set(computeUnit->shader->ticks(8));
750 computeUnit->wfWait[computeUnit->GlbMemUnitId()].
751 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
847 } else if (IS_OT_READ_LM(ii->opType())) {
752 } else if (ii->isLoad() && ii->isLocalMem()) {
848 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
849 set(computeUnit->shader->ticks(4));
850 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
851 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
753 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
754 set(computeUnit->shader->ticks(4));
755 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
756 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
852 } else if (IS_OT_WRITE_LM(ii->opType())) {
757 } else if (ii->isStore() && ii->isLocalMem()) {
853 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
854 set(computeUnit->shader->ticks(8));
855 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
856 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
758 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
759 set(computeUnit->shader->ticks(8));
760 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
761 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
857 } else if (IS_OT_ATOMIC_LM(ii->opType())) {
762 } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) {
858 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
859 set(computeUnit->shader->ticks(8));
860 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
861 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
862 }
863}
864
865bool

--- 214 unchanged lines hidden ---
763 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
764 set(computeUnit->shader->ticks(8));
765 computeUnit->wfWait[computeUnit->ShrMemUnitId()].
766 set(computeUnit->shader->ticks(computeUnit->issuePeriod));
767 }
768}
769
770bool

--- 214 unchanged lines hidden ---