wavefront.cc (11657:5fad5a37d6fc) | wavefront.cc (11692:e772fdcd3809) |
---|---|
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 23 unchanged lines hidden (view full) --- 32 * 33 * Author: Lisa Hsu 34 */ 35 36#include "gpu-compute/wavefront.hh" 37 38#include "debug/GPUExec.hh" 39#include "debug/WavefrontStack.hh" | 1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 23 unchanged lines hidden (view full) --- 32 * 33 * Author: Lisa Hsu 34 */ 35 36#include "gpu-compute/wavefront.hh" 37 38#include "debug/GPUExec.hh" 39#include "debug/WavefrontStack.hh" |
40#include "gpu-compute/code_enums.hh" | |
41#include "gpu-compute/compute_unit.hh" 42#include "gpu-compute/gpu_dyn_inst.hh" 43#include "gpu-compute/shader.hh" 44#include "gpu-compute/vector_register_file.hh" 45 46Wavefront* 47WavefrontParams::create() 48{ --- 111 unchanged lines hidden (view full) --- 160 wfDynId = _wf_dyn_id; 161 basePtr = _base_ptr; 162 status = S_RUNNING; 163} 164 165bool 166Wavefront::isGmInstruction(GPUDynInstPtr ii) 167{ | 40#include "gpu-compute/compute_unit.hh" 41#include "gpu-compute/gpu_dyn_inst.hh" 42#include "gpu-compute/shader.hh" 43#include "gpu-compute/vector_register_file.hh" 44 45Wavefront* 46WavefrontParams::create() 47{ --- 111 unchanged lines hidden (view full) --- 159 wfDynId = _wf_dyn_id; 160 basePtr = _base_ptr; 161 status = S_RUNNING; 162} 163 164bool 165Wavefront::isGmInstruction(GPUDynInstPtr ii) 166{ |
168 if (IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) || 169 IS_OT_ATOMIC_PM(ii->opType())) { | 167 if (ii->isGlobalMem() || ii->isFlat()) |
170 return true; | 168 return true; |
171 } | |
172 | 169 |
173 if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) || 174 IS_OT_ATOMIC_GM(ii->opType())) { 175 return true; 176 } 177 178 if (IS_OT_FLAT(ii->opType())) { 179 return true; 180 } 181 | |
182 return false; 183} 184 185bool 186Wavefront::isLmInstruction(GPUDynInstPtr ii) 187{ | 170 return false; 171} 172 173bool 174Wavefront::isLmInstruction(GPUDynInstPtr ii) 175{ |
188 if (IS_OT_READ_LM(ii->opType()) || IS_OT_WRITE_LM(ii->opType()) || 189 IS_OT_ATOMIC_LM(ii->opType())) { | 176 if (ii->isLocalMem()) { |
190 return true; 191 } 192 193 return false; 194} 195 196bool 197Wavefront::isOldestInstALU() 198{ 199 assert(!instructionBuffer.empty()); 200 GPUDynInstPtr ii = instructionBuffer.front(); 201 | 177 return true; 178 } 179 180 return false; 181} 182 183bool 184Wavefront::isOldestInstALU() 185{ 186 assert(!instructionBuffer.empty()); 187 GPUDynInstPtr ii = instructionBuffer.front(); 188 |
202 if (status != S_STOPPED && (ii->opType() == Enums::OT_NOP || 203 ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH || 204 ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) || 205 ii->opType() == Enums::OT_KERN_READ)) { | 189 if (status != S_STOPPED && (ii->isNop() || 190 ii->isReturn() || ii->isBranch() || 191 ii->isALU() || (ii->isKernArgSeg() && ii->isLoad()))) { |
206 return true; 207 } 208 209 return false; 210} 211 212bool 213Wavefront::isOldestInstBarrier() 214{ 215 assert(!instructionBuffer.empty()); 216 GPUDynInstPtr ii = instructionBuffer.front(); 217 | 192 return true; 193 } 194 195 return false; 196} 197 198bool 199Wavefront::isOldestInstBarrier() 200{ 201 assert(!instructionBuffer.empty()); 202 GPUDynInstPtr ii = instructionBuffer.front(); 203 |
218 if (status != S_STOPPED && ii->opType() == Enums::OT_BARRIER) { | 204 if (status != S_STOPPED && ii->isBarrier()) { |
219 return true; 220 } 221 222 return false; 223} 224 225bool 226Wavefront::isOldestInstGMem() 227{ 228 assert(!instructionBuffer.empty()); 229 GPUDynInstPtr ii = instructionBuffer.front(); 230 | 205 return true; 206 } 207 208 return false; 209} 210 211bool 212Wavefront::isOldestInstGMem() 213{ 214 assert(!instructionBuffer.empty()); 215 GPUDynInstPtr ii = instructionBuffer.front(); 216 |
231 if (status != S_STOPPED && (IS_OT_READ_GM(ii->opType()) || 232 IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) { 233 | 217 if (status != S_STOPPED && ii->isGlobalMem()) { |
234 return true; 235 } 236 237 return false; 238} 239 240bool 241Wavefront::isOldestInstLMem() 242{ 243 assert(!instructionBuffer.empty()); 244 GPUDynInstPtr ii = instructionBuffer.front(); 245 | 218 return true; 219 } 220 221 return false; 222} 223 224bool 225Wavefront::isOldestInstLMem() 226{ 227 assert(!instructionBuffer.empty()); 228 GPUDynInstPtr ii = instructionBuffer.front(); 229 |
246 if (status != S_STOPPED && (IS_OT_READ_LM(ii->opType()) || 247 IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) { 248 | 230 if (status != S_STOPPED && ii->isLocalMem()) { |
249 return true; 250 } 251 252 return false; 253} 254 255bool 256Wavefront::isOldestInstPrivMem() 257{ 258 assert(!instructionBuffer.empty()); 259 GPUDynInstPtr ii = instructionBuffer.front(); 260 | 231 return true; 232 } 233 234 return false; 235} 236 237bool 238Wavefront::isOldestInstPrivMem() 239{ 240 assert(!instructionBuffer.empty()); 241 GPUDynInstPtr ii = instructionBuffer.front(); 242 |
261 if (status != S_STOPPED && (IS_OT_READ_PM(ii->opType()) || 262 IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) { 263 | 243 if (status != S_STOPPED && ii->isPrivateSeg()) { |
264 return true; 265 } 266 267 return false; 268} 269 270bool 271Wavefront::isOldestInstFlatMem() 272{ 273 assert(!instructionBuffer.empty()); 274 GPUDynInstPtr ii = instructionBuffer.front(); 275 | 244 return true; 245 } 246 247 return false; 248} 249 250bool 251Wavefront::isOldestInstFlatMem() 252{ 253 assert(!instructionBuffer.empty()); 254 GPUDynInstPtr ii = instructionBuffer.front(); 255 |
276 if (status != S_STOPPED && IS_OT_FLAT(ii->opType())) { 277 | 256 if (status != S_STOPPED && ii->isFlat()) { |
278 return true; 279 } 280 281 return false; 282} 283 284// Return true if the Wavefront's instruction 285// buffer has branch instruction. 286bool 287Wavefront::instructionBufferHasBranch() 288{ 289 for (auto it : instructionBuffer) { 290 GPUDynInstPtr ii = it; 291 | 257 return true; 258 } 259 260 return false; 261} 262 263// Return true if the Wavefront's instruction 264// buffer has branch instruction. 265bool 266Wavefront::instructionBufferHasBranch() 267{ 268 for (auto it : instructionBuffer) { 269 GPUDynInstPtr ii = it; 270 |
292 if (ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH) { | 271 if (ii->isReturn() || ii->isBranch()) { |
293 return true; 294 } 295 } 296 297 return false; 298} 299 300// Remap HSAIL register to physical VGPR. --- 65 unchanged lines hidden (view full) --- 366 locMemIssueRdy = true; 367 } 368 } 369 370 // The following code is very error prone and the entire process for 371 // checking readiness will be fixed eventually. In the meantime, let's 372 // make sure that we do not silently let an instruction type slip 373 // through this logic and always return not ready. | 272 return true; 273 } 274 } 275 276 return false; 277} 278 279// Remap HSAIL register to physical VGPR. --- 65 unchanged lines hidden (view full) --- 345 locMemIssueRdy = true; 346 } 347 } 348 349 // The following code is very error prone and the entire process for 350 // checking readiness will be fixed eventually. In the meantime, let's 351 // make sure that we do not silently let an instruction type slip 352 // through this logic and always return not ready. |
374 if (!(ii->opType() == Enums::OT_BARRIER || ii->opType() == Enums::OT_NOP || 375 ii->opType() == Enums::OT_RET || ii->opType() == Enums::OT_BRANCH || 376 ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) || 377 ii->opType() == Enums::OT_KERN_READ || 378 ii->opType() == Enums::OT_ARG || 379 IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) || 380 IS_OT_ATOMIC_GM(ii->opType()) || IS_OT_READ_LM(ii->opType()) || 381 IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) || 382 IS_OT_READ_PM(ii->opType()) || IS_OT_WRITE_PM(ii->opType()) || 383 IS_OT_ATOMIC_PM(ii->opType()) || IS_OT_FLAT(ii->opType()))) { | 353 if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() || 354 ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() || 355 ii->isMemFence() || ii->isFlat())) { |
384 panic("next instruction: %s is of unknown type\n", ii->disassemble()); 385 } 386 387 DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n", 388 computeUnit->cu_id, simdId, wfSlotId, ii->disassemble()); 389 | 356 panic("next instruction: %s is of unknown type\n", ii->disassemble()); 357 } 358 359 DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Read for Inst : %s\n", 360 computeUnit->cu_id, simdId, wfSlotId, ii->disassemble()); 361 |
390 if (type == I_ALU && ii->opType() == Enums::OT_BARRIER) { | 362 if (type == I_ALU && ii->isBarrier()) { |
391 // Here for ALU instruction (barrier) 392 if (!computeUnit->wfWait[simdId].prerdy()) { 393 // Is wave slot free? 394 return 0; 395 } 396 397 // Are there in pipe or outstanding memory requests? 398 if ((outstandingReqs + memReqsInPipe) > 0) { 399 return 0; 400 } 401 402 ready_inst = true; | 363 // Here for ALU instruction (barrier) 364 if (!computeUnit->wfWait[simdId].prerdy()) { 365 // Is wave slot free? 366 return 0; 367 } 368 369 // Are there in pipe or outstanding memory requests? 370 if ((outstandingReqs + memReqsInPipe) > 0) { 371 return 0; 372 } 373 374 ready_inst = true; |
403 } else if (type == I_ALU && ii->opType() == Enums::OT_NOP) { | 375 } else if (type == I_ALU && ii->isNop()) { |
404 // Here for ALU instruction (nop) 405 if (!computeUnit->wfWait[simdId].prerdy()) { 406 // Is wave slot free? 407 return 0; 408 } 409 410 ready_inst = true; | 376 // Here for ALU instruction (nop) 377 if (!computeUnit->wfWait[simdId].prerdy()) { 378 // Is wave slot free? 379 return 0; 380 } 381 382 ready_inst = true; |
411 } else if (type == I_ALU && ii->opType() == Enums::OT_RET) { | 383 } else if (type == I_ALU && ii->isReturn()) { |
412 // Here for ALU instruction (return) 413 if (!computeUnit->wfWait[simdId].prerdy()) { 414 // Is wave slot free? 415 return 0; 416 } 417 418 // Are there in pipe or outstanding memory requests? 419 if ((outstandingReqs + memReqsInPipe) > 0) { 420 return 0; 421 } 422 423 ready_inst = true; | 384 // Here for ALU instruction (return) 385 if (!computeUnit->wfWait[simdId].prerdy()) { 386 // Is wave slot free? 387 return 0; 388 } 389 390 // Are there in pipe or outstanding memory requests? 391 if ((outstandingReqs + memReqsInPipe) > 0) { 392 return 0; 393 } 394 395 ready_inst = true; |
424 } else if (type == I_ALU && (ii->opType() == Enums::OT_BRANCH || 425 ii->opType() == Enums::OT_ALU || IS_OT_LDAS(ii->opType()) || 426 ii->opType() == Enums::OT_KERN_READ || 427 ii->opType() == Enums::OT_ARG)) { | 396 } else if (type == I_ALU && (ii->isBranch() || 397 ii->isALU() || 398 (ii->isKernArgSeg() && ii->isLoad()) || 399 ii->isArgSeg())) { |
428 // Here for ALU instruction (all others) 429 if (!computeUnit->wfWait[simdId].prerdy()) { 430 // Is alu slot free? 431 return 0; 432 } 433 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 434 VrfAccessType::RD_WR)) { 435 return 0; 436 } 437 438 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 439 return 0; 440 } 441 ready_inst = true; | 400 // Here for ALU instruction (all others) 401 if (!computeUnit->wfWait[simdId].prerdy()) { 402 // Is alu slot free? 403 return 0; 404 } 405 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 406 VrfAccessType::RD_WR)) { 407 return 0; 408 } 409 410 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 411 return 0; 412 } 413 ready_inst = true; |
442 } else if (type == I_GLOBAL && (IS_OT_READ_GM(ii->opType()) || 443 IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()))) { | 414 } else if (type == I_GLOBAL && ii->isGlobalMem()) { |
444 // Here Global memory instruction | 415 // Here Global memory instruction |
445 if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) { | 416 if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) { |
446 // Are there in pipe or outstanding global memory write requests? 447 if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) { 448 return 0; 449 } 450 } 451 | 417 // Are there in pipe or outstanding global memory write requests? 418 if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) { 419 return 0; 420 } 421 } 422 |
452 if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) || 453 IS_OT_HIST_GM(ii->opType())) { | 423 if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) { |
454 // Are there in pipe or outstanding global memory read requests? 455 if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) 456 return 0; 457 } 458 459 if (!glbMemIssueRdy) { 460 // Is WV issue slot free? 461 return 0; --- 13 unchanged lines hidden (view full) --- 475 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 476 VrfAccessType::RD_WR)) { 477 return 0; 478 } 479 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 480 return 0; 481 } 482 ready_inst = true; | 424 // Are there in pipe or outstanding global memory read requests? 425 if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) 426 return 0; 427 } 428 429 if (!glbMemIssueRdy) { 430 // Is WV issue slot free? 431 return 0; --- 13 unchanged lines hidden (view full) --- 445 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 446 VrfAccessType::RD_WR)) { 447 return 0; 448 } 449 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 450 return 0; 451 } 452 ready_inst = true; |
483 } else if (type == I_SHARED && (IS_OT_READ_LM(ii->opType()) || 484 IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) { | 453 } else if (type == I_SHARED && ii->isLocalMem()) { |
485 // Here for Shared memory instruction | 454 // Here for Shared memory instruction |
486 if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) { | 455 if (ii->isLoad() || ii->isAtomic() || ii->isMemFence()) { |
487 if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) { 488 return 0; 489 } 490 } 491 | 456 if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) { 457 return 0; 458 } 459 } 460 |
492 if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) || 493 IS_OT_HIST_LM(ii->opType())) { | 461 if (ii->isStore() || ii->isAtomic() || ii->isMemFence()) { |
494 if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) { 495 return 0; 496 } 497 } 498 499 if (!locMemBusRdy) { 500 // Is there an available VRF->LDS read bus? 501 return 0; --- 12 unchanged lines hidden (view full) --- 514 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 515 VrfAccessType::RD_WR)) { 516 return 0; 517 } 518 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 519 return 0; 520 } 521 ready_inst = true; | 462 if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) { 463 return 0; 464 } 465 } 466 467 if (!locMemBusRdy) { 468 // Is there an available VRF->LDS read bus? 469 return 0; --- 12 unchanged lines hidden (view full) --- 482 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 483 VrfAccessType::RD_WR)) { 484 return 0; 485 } 486 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 487 return 0; 488 } 489 ready_inst = true; |
522 } else if (type == I_PRIVATE && (IS_OT_READ_PM(ii->opType()) || 523 IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) { 524 // Here for Private memory instruction ------------------------ // 525 if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) { 526 if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) { 527 return 0; 528 } 529 } 530 531 if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) || 532 IS_OT_HIST_PM(ii->opType())) { 533 if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) { 534 return 0; 535 } 536 } 537 | 490 } else if (type == I_FLAT && ii->isFlat()) { |
538 if (!glbMemBusRdy) { 539 // Is there an available VRF->Global memory read bus? 540 return 0; 541 } 542 | 491 if (!glbMemBusRdy) { 492 // Is there an available VRF->Global memory read bus? 493 return 0; 494 } 495 |
543 if (!glbMemIssueRdy) { 544 // Is wave slot free? 545 return 0; 546 } 547 548 if (!computeUnit->globalMemoryPipe. 549 isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) { 550 // Can we insert a new request to the Global Mem Request FIFO? 551 return 0; 552 } 553 // can we schedule source & destination operands on the VRF? 554 if (!computeUnit->vrf[simdId]->vrfOperandAccessReady(this, ii, 555 VrfAccessType::RD_WR)) { 556 return 0; 557 } 558 if (!computeUnit->vrf[simdId]->operandsReady(this, ii)) { 559 return 0; 560 } 561 ready_inst = true; 562 } else if (type == I_FLAT && IS_OT_FLAT(ii->opType())) { 563 if (!glbMemBusRdy) { 564 // Is there an available VRF->Global memory read bus? 565 return 0; 566 } 567 | |
568 if (!locMemBusRdy) { 569 // Is there an available VRF->LDS read bus? 570 return 0; 571 } 572 573 if (!glbMemIssueRdy) { 574 // Is wave slot free? 575 return 0; --- 37 unchanged lines hidden (view full) --- 613void 614Wavefront::updateResources() 615{ 616 // Get current instruction 617 GPUDynInstPtr ii = instructionBuffer.front(); 618 assert(ii); 619 computeUnit->vrf[simdId]->updateResources(this, ii); 620 // Single precision ALU or Branch or Return or Special instruction | 496 if (!locMemBusRdy) { 497 // Is there an available VRF->LDS read bus? 498 return 0; 499 } 500 501 if (!glbMemIssueRdy) { 502 // Is wave slot free? 503 return 0; --- 37 unchanged lines hidden (view full) --- 541void 542Wavefront::updateResources() 543{ 544 // Get current instruction 545 GPUDynInstPtr ii = instructionBuffer.front(); 546 assert(ii); 547 computeUnit->vrf[simdId]->updateResources(this, ii); 548 // Single precision ALU or Branch or Return or Special instruction |
621 if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL || 622 ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) || | 549 if (ii->isALU() || ii->isSpecialOp() || 550 ii->isBranch() || |
623 // FIXME: Kernel argument loads are currently treated as ALU operations 624 // since we don't send memory packets at execution. If we fix that then 625 // we should map them to one of the memory pipelines | 551 // FIXME: Kernel argument loads are currently treated as ALU operations 552 // since we don't send memory packets at execution. If we fix that then 553 // we should map them to one of the memory pipelines |
626 ii->opType()==Enums::OT_KERN_READ || 627 ii->opType()==Enums::OT_ARG || 628 ii->opType()==Enums::OT_RET) { | 554 (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() || 555 ii->isReturn()) { |
629 computeUnit->aluPipe[simdId].preset(computeUnit->shader-> 630 ticks(computeUnit->spBypassLength())); 631 // this is to enforce a fixed number of cycles per issue slot per SIMD 632 computeUnit->wfWait[simdId].preset(computeUnit->shader-> 633 ticks(computeUnit->issuePeriod)); | 556 computeUnit->aluPipe[simdId].preset(computeUnit->shader-> 557 ticks(computeUnit->spBypassLength())); 558 // this is to enforce a fixed number of cycles per issue slot per SIMD 559 computeUnit->wfWait[simdId].preset(computeUnit->shader-> 560 ticks(computeUnit->issuePeriod)); |
634 } else if (ii->opType() == Enums::OT_BARRIER) { | 561 } else if (ii->isBarrier()) { |
635 computeUnit->wfWait[simdId].preset(computeUnit->shader-> 636 ticks(computeUnit->issuePeriod)); | 562 computeUnit->wfWait[simdId].preset(computeUnit->shader-> 563 ticks(computeUnit->issuePeriod)); |
637 } else if (ii->opType() == Enums::OT_FLAT_READ) { | 564 } else if (ii->isLoad() && ii->isFlat()) { |
638 assert(Enums::SC_NONE != ii->executedAs()); 639 memReqsInPipe++; 640 rdGmReqsInPipe++; 641 if ( Enums::SC_SHARED == ii->executedAs() ) { 642 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 643 preset(computeUnit->shader->ticks(4)); 644 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 645 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 646 } else { 647 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 648 preset(computeUnit->shader->ticks(4)); 649 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 650 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 651 } | 565 assert(Enums::SC_NONE != ii->executedAs()); 566 memReqsInPipe++; 567 rdGmReqsInPipe++; 568 if ( Enums::SC_SHARED == ii->executedAs() ) { 569 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 570 preset(computeUnit->shader->ticks(4)); 571 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 572 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 573 } else { 574 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 575 preset(computeUnit->shader->ticks(4)); 576 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 577 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 578 } |
652 } else if (ii->opType() == Enums::OT_FLAT_WRITE) { | 579 } else if (ii->isStore() && ii->isFlat()) { |
653 assert(Enums::SC_NONE != ii->executedAs()); 654 memReqsInPipe++; 655 wrGmReqsInPipe++; 656 if (Enums::SC_SHARED == ii->executedAs()) { 657 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 658 preset(computeUnit->shader->ticks(8)); 659 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 660 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 661 } else { 662 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 663 preset(computeUnit->shader->ticks(8)); 664 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 665 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 666 } | 580 assert(Enums::SC_NONE != ii->executedAs()); 581 memReqsInPipe++; 582 wrGmReqsInPipe++; 583 if (Enums::SC_SHARED == ii->executedAs()) { 584 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 585 preset(computeUnit->shader->ticks(8)); 586 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 587 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 588 } else { 589 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 590 preset(computeUnit->shader->ticks(8)); 591 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 592 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 593 } |
667 } else if (IS_OT_READ_GM(ii->opType())) { | 594 } else if (ii->isLoad() && ii->isGlobalMem()) { |
668 memReqsInPipe++; 669 rdGmReqsInPipe++; 670 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 671 preset(computeUnit->shader->ticks(4)); 672 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 673 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 595 memReqsInPipe++; 596 rdGmReqsInPipe++; 597 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 598 preset(computeUnit->shader->ticks(4)); 599 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 600 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
674 } else if (IS_OT_WRITE_GM(ii->opType())) { | 601 } else if (ii->isStore() && ii->isGlobalMem()) { |
675 memReqsInPipe++; 676 wrGmReqsInPipe++; 677 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 678 preset(computeUnit->shader->ticks(8)); 679 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 680 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 602 memReqsInPipe++; 603 wrGmReqsInPipe++; 604 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 605 preset(computeUnit->shader->ticks(8)); 606 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 607 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
681 } else if (IS_OT_ATOMIC_GM(ii->opType())) { | 608 } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) { |
682 memReqsInPipe++; 683 wrGmReqsInPipe++; 684 rdGmReqsInPipe++; 685 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 686 preset(computeUnit->shader->ticks(8)); 687 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 688 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 609 memReqsInPipe++; 610 wrGmReqsInPipe++; 611 rdGmReqsInPipe++; 612 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 613 preset(computeUnit->shader->ticks(8)); 614 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 615 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
689 } else if (IS_OT_READ_LM(ii->opType())) { | 616 } else if (ii->isLoad() && ii->isLocalMem()) { |
690 memReqsInPipe++; 691 rdLmReqsInPipe++; 692 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 693 preset(computeUnit->shader->ticks(4)); 694 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 695 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 617 memReqsInPipe++; 618 rdLmReqsInPipe++; 619 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 620 preset(computeUnit->shader->ticks(4)); 621 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 622 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
696 } else if (IS_OT_WRITE_LM(ii->opType())) { | 623 } else if (ii->isStore() && ii->isLocalMem()) { |
697 memReqsInPipe++; 698 wrLmReqsInPipe++; 699 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 700 preset(computeUnit->shader->ticks(8)); 701 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 702 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 624 memReqsInPipe++; 625 wrLmReqsInPipe++; 626 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 627 preset(computeUnit->shader->ticks(8)); 628 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 629 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
703 } else if (IS_OT_ATOMIC_LM(ii->opType())) { | 630 } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) { |
704 memReqsInPipe++; 705 wrLmReqsInPipe++; 706 rdLmReqsInPipe++; 707 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 708 preset(computeUnit->shader->ticks(8)); 709 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 710 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 631 memReqsInPipe++; 632 wrLmReqsInPipe++; 633 rdLmReqsInPipe++; 634 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 635 preset(computeUnit->shader->ticks(8)); 636 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 637 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
711 } else if (IS_OT_READ_PM(ii->opType())) { 712 memReqsInPipe++; 713 rdGmReqsInPipe++; 714 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 715 preset(computeUnit->shader->ticks(4)); 716 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 717 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 718 } else if (IS_OT_WRITE_PM(ii->opType())) { 719 memReqsInPipe++; 720 wrGmReqsInPipe++; 721 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 722 preset(computeUnit->shader->ticks(8)); 723 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 724 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); 725 } else if (IS_OT_ATOMIC_PM(ii->opType())) { 726 memReqsInPipe++; 727 wrGmReqsInPipe++; 728 rdGmReqsInPipe++; 729 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 730 preset(computeUnit->shader->ticks(8)); 731 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 732 preset(computeUnit->shader->ticks(computeUnit->issuePeriod)); | |
733 } 734} 735 736void 737Wavefront::exec() 738{ 739 // ---- Exit if wavefront is inactive ----------------------------- // 740 --- 5 unchanged lines hidden (view full) --- 746 // Get current instruction 747 748 GPUDynInstPtr ii = instructionBuffer.front(); 749 750 const uint32_t old_pc = pc(); 751 DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s " 752 "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId, 753 ii->disassemble(), old_pc); | 638 } 639} 640 641void 642Wavefront::exec() 643{ 644 // ---- Exit if wavefront is inactive ----------------------------- // 645 --- 5 unchanged lines hidden (view full) --- 651 // Get current instruction 652 653 GPUDynInstPtr ii = instructionBuffer.front(); 654 655 const uint32_t old_pc = pc(); 656 DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s " 657 "(pc: %i)\n", computeUnit->cu_id, simdId, wfSlotId, wfDynId, 658 ii->disassemble(), old_pc); |
754 ii->execute(); | 659 ii->execute(ii); |
755 // access the VRF 756 computeUnit->vrf[simdId]->exec(ii, this); 757 srcRegOpDist.sample(ii->numSrcRegOperands()); 758 dstRegOpDist.sample(ii->numDstRegOperands()); 759 computeUnit->numInstrExecuted++; 760 computeUnit->execRateDist.sample(computeUnit->totalCycles.value() - 761 computeUnit->lastExecCycle[simdId]); 762 computeUnit->lastExecCycle[simdId] = computeUnit->totalCycles.value(); --- 17 unchanged lines hidden (view full) --- 780 computeUnit->activeLanesPerGMemInstrDist.sample(num_active_lanes); 781 } else if (isLmInstruction(ii)) { 782 computeUnit->activeLanesPerLMemInstrDist.sample(num_active_lanes); 783 } 784 } 785 786 // ---- Update Vector ALU pipeline and other resources ------------------ // 787 // Single precision ALU or Branch or Return or Special instruction | 660 // access the VRF 661 computeUnit->vrf[simdId]->exec(ii, this); 662 srcRegOpDist.sample(ii->numSrcRegOperands()); 663 dstRegOpDist.sample(ii->numDstRegOperands()); 664 computeUnit->numInstrExecuted++; 665 computeUnit->execRateDist.sample(computeUnit->totalCycles.value() - 666 computeUnit->lastExecCycle[simdId]); 667 computeUnit->lastExecCycle[simdId] = computeUnit->totalCycles.value(); --- 17 unchanged lines hidden (view full) --- 685 computeUnit->activeLanesPerGMemInstrDist.sample(num_active_lanes); 686 } else if (isLmInstruction(ii)) { 687 computeUnit->activeLanesPerLMemInstrDist.sample(num_active_lanes); 688 } 689 } 690 691 // ---- Update Vector ALU pipeline and other resources ------------------ // 692 // Single precision ALU or Branch or Return or Special instruction |
788 if (ii->opType() == Enums::OT_ALU || ii->opType() == Enums::OT_SPECIAL || 789 ii->opType() == Enums::OT_BRANCH || IS_OT_LDAS(ii->opType()) || | 693 if (ii->isALU() || ii->isSpecialOp() || 694 ii->isBranch() || |
790 // FIXME: Kernel argument loads are currently treated as ALU operations 791 // since we don't send memory packets at execution. If we fix that then 792 // we should map them to one of the memory pipelines | 695 // FIXME: Kernel argument loads are currently treated as ALU operations 696 // since we don't send memory packets at execution. If we fix that then 697 // we should map them to one of the memory pipelines |
793 ii->opType() == Enums::OT_KERN_READ || 794 ii->opType() == Enums::OT_ARG || 795 ii->opType() == Enums::OT_RET) { | 698 (ii->isKernArgSeg() && ii->isLoad()) || 699 ii->isArgSeg() || 700 ii->isReturn()) { |
796 computeUnit->aluPipe[simdId].set(computeUnit->shader-> 797 ticks(computeUnit->spBypassLength())); 798 799 // this is to enforce a fixed number of cycles per issue slot per SIMD 800 computeUnit->wfWait[simdId].set(computeUnit->shader-> 801 ticks(computeUnit->issuePeriod)); | 701 computeUnit->aluPipe[simdId].set(computeUnit->shader-> 702 ticks(computeUnit->spBypassLength())); 703 704 // this is to enforce a fixed number of cycles per issue slot per SIMD 705 computeUnit->wfWait[simdId].set(computeUnit->shader-> 706 ticks(computeUnit->issuePeriod)); |
802 } else if (ii->opType() == Enums::OT_BARRIER) { | 707 } else if (ii->isBarrier()) { |
803 computeUnit->wfWait[simdId].set(computeUnit->shader-> 804 ticks(computeUnit->issuePeriod)); | 708 computeUnit->wfWait[simdId].set(computeUnit->shader-> 709 ticks(computeUnit->issuePeriod)); |
805 } else if (ii->opType() == Enums::OT_FLAT_READ) { | 710 } else if (ii->isLoad() && ii->isFlat()) { |
806 assert(Enums::SC_NONE != ii->executedAs()); 807 808 if (Enums::SC_SHARED == ii->executedAs()) { 809 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 810 set(computeUnit->shader->ticks(4)); 811 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 812 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 813 } else { 814 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 815 set(computeUnit->shader->ticks(4)); 816 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 817 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 818 } | 711 assert(Enums::SC_NONE != ii->executedAs()); 712 713 if (Enums::SC_SHARED == ii->executedAs()) { 714 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 715 set(computeUnit->shader->ticks(4)); 716 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 717 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 718 } else { 719 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 720 set(computeUnit->shader->ticks(4)); 721 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 722 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 723 } |
819 } else if (ii->opType() == Enums::OT_FLAT_WRITE) { | 724 } else if (ii->isStore() && ii->isFlat()) { |
820 assert(Enums::SC_NONE != ii->executedAs()); 821 if (Enums::SC_SHARED == ii->executedAs()) { 822 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 823 set(computeUnit->shader->ticks(8)); 824 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 825 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 826 } else { 827 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 828 set(computeUnit->shader->ticks(8)); 829 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 830 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 831 } | 725 assert(Enums::SC_NONE != ii->executedAs()); 726 if (Enums::SC_SHARED == ii->executedAs()) { 727 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 728 set(computeUnit->shader->ticks(8)); 729 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 730 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 731 } else { 732 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 733 set(computeUnit->shader->ticks(8)); 734 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 735 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 736 } |
832 } else if (IS_OT_READ_GM(ii->opType())) { | 737 } else if (ii->isLoad() && ii->isGlobalMem()) { |
833 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 834 set(computeUnit->shader->ticks(4)); 835 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 836 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 738 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 739 set(computeUnit->shader->ticks(4)); 740 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 741 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
837 } else if (IS_OT_WRITE_GM(ii->opType())) { | 742 } else if (ii->isStore() && ii->isGlobalMem()) { |
838 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 839 set(computeUnit->shader->ticks(8)); 840 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 841 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 743 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 744 set(computeUnit->shader->ticks(8)); 745 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 746 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
842 } else if (IS_OT_ATOMIC_GM(ii->opType())) { | 747 } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isGlobalMem()) { |
843 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 844 set(computeUnit->shader->ticks(8)); 845 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 846 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 748 computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()]. 749 set(computeUnit->shader->ticks(8)); 750 computeUnit->wfWait[computeUnit->GlbMemUnitId()]. 751 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
847 } else if (IS_OT_READ_LM(ii->opType())) { | 752 } else if (ii->isLoad() && ii->isLocalMem()) { |
848 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 849 set(computeUnit->shader->ticks(4)); 850 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 851 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 753 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 754 set(computeUnit->shader->ticks(4)); 755 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 756 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
852 } else if (IS_OT_WRITE_LM(ii->opType())) { | 757 } else if (ii->isStore() && ii->isLocalMem()) { |
853 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 854 set(computeUnit->shader->ticks(8)); 855 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 856 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); | 758 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 759 set(computeUnit->shader->ticks(8)); 760 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 761 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); |
857 } else if (IS_OT_ATOMIC_LM(ii->opType())) { | 762 } else if ((ii->isAtomic() || ii->isMemFence()) && ii->isLocalMem()) { |
858 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 859 set(computeUnit->shader->ticks(8)); 860 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 861 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 862 } 863} 864 865bool --- 214 unchanged lines hidden --- | 763 computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()]. 764 set(computeUnit->shader->ticks(8)); 765 computeUnit->wfWait[computeUnit->ShrMemUnitId()]. 766 set(computeUnit->shader->ticks(computeUnit->issuePeriod)); 767 } 768} 769 770bool --- 214 unchanged lines hidden --- |