src/gpu-compute/compute_unit.cc

323         GPUDynInstPtr gpuDynInst =
327         gpuDynInst->useContinuation = false;
328         injectGlobalMemFence(gpuDynInst, true);
628     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
633             computeUnit->wfList[gpuDynInst->simdId][gpuDynInst->wfSlotId];
648                 computeUnit->cu_id, gpuDynInst->simdId,
649                 gpuDynInst->wfSlotId, w->barrierCnt);
651         if (gpuDynInst->useContinuation) {
652             assert(!gpuDynInst->isNoScope());
653             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
654                                            gpuDynInst);
661         if (gpuDynInst->useContinuation) {
662             assert(!gpuDynInst->isNoScope());
663             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
664                                            gpuDynInst);
676             computeUnit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
693         GPUDynInstPtr gpuDynInst M5_VAR_USED = retries.front().second;
695                 computeUnit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
743 ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
751     pkt->req->setPC(gpuDynInst->wavefront()->pc());
753     pkt->req->setReqInstSeqNum(gpuDynInst->seqNum());
782                       cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId, vaddr);
790                           cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
797         pkt->senderState = new DTLBPort::SenderState(gpuDynInst, index);
839             pkt->senderState = new ComputeUnit::DataPort::SenderState(gpuDynInst,
842             gpuDynInst->memStatusVector[pkt->getAddr()].push_back(index);
843             gpuDynInst->tlbHitLevel[index] = hit_level;
852                     "scheduled\n", cu_id, gpuDynInst->simdId,
853                     gpuDynInst->wfSlotId, index, pkt->req->getPaddr());
860                     "failed!\n", cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
872                     "failed!\n", cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
879                    cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId, tmp_vaddr);
883             gpuDynInst->statusBitVector = VectorMask(0);
885             gpuDynInst->statusBitVector &= (~(1ll << index));
908                 gpuDynInst->simdId, gpuDynInst->wfSlotId, index,
923 ComputeUnit::sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
930     pkt->senderState = new ComputeUnit::DataPort::SenderState(gpuDynInst, index,
934             cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId, index,
941 ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch,
944     assert(gpuDynInst->isGlobalSeg());
948             0, 0, 0, 0, masterId(), 0, gpuDynInst->wfDynId);
959     gpuDynInst->setRequestFlags(req, kernelLaunch);
969         new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr);
972     sendSyncRequest(gpuDynInst, 0, pkt);
981     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
984     assert(gpuDynInst);
987             compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
993         int index = gpuDynInst->memStatusVector[paddr].back();
998         gpuDynInst->memStatusVector[paddr].pop_back();
999         gpuDynInst->pAddr = pkt->req->getPaddr();
1003             if (gpuDynInst->n_reg <= MAX_REGS_FOR_NON_VEC_MEM_INST) {
1004                 gpuDynInst->statusBitVector &= (~(1ULL << index));
1006                 assert(gpuDynInst->statusVector[index] > 0);
1007                 gpuDynInst->statusVector[index]--;
1009                 if (!gpuDynInst->statusVector[index])
1010                     gpuDynInst->statusBitVector &= (~(1ULL << index));
1014                     gpuDynInst->statusBitVector);
1016             if (gpuDynInst->statusBitVector == VectorMask(0)) {
1017                 auto iter = gpuDynInst->memStatusVector.begin();
1018                 auto end = gpuDynInst->memStatusVector.end();
1025                 gpuDynInst->memStatusVector.clear();
1027                 if (gpuDynInst->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
1028                     gpuDynInst->statusVector.clear();
1030                 compute_unit->globalMemoryPipe.handleResponse(gpuDynInst);
1033                         compute_unit->cu_id, gpuDynInst->simdId,
1034                         gpuDynInst->wfSlotId);
1040                 if (gpuDynInst->useContinuation) {
1041                     assert(!gpuDynInst->isNoScope());
1042                     gpuDynInst->execContinuation(
1043                         gpuDynInst->staticInstruction(),
1044                         gpuDynInst);
1049         gpuDynInst->statusBitVector = VectorMask(0);
1051         if (gpuDynInst->useContinuation) {
1052             assert(!gpuDynInst->isNoScope());
1053             gpuDynInst->execContinuation(gpuDynInst->staticInstruction(),
1054                                          gpuDynInst);
1113     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
1116     gpuDynInst->memStatusVector[line].push_back(mp_index);
1117     gpuDynInst->tlbHitLevel[mp_index] = hit_level;
1133         int simdId = gpuDynInst->simdId;
1134         int wfSlotId = gpuDynInst->wfSlotId;
1219             new ComputeUnit::DataPort::SenderState(gpuDynInst, mp_index,
1228             computeUnit->cu_id, gpuDynInst->simdId,
1229             gpuDynInst->wfSlotId, mp_index, new_pkt->req->getPaddr());
1257     GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
1261         retries.push_back(std::make_pair(pkt, gpuDynInst));
1265                 compute_unit->cu_id, gpuDynInst->simdId,
1266                 gpuDynInst->wfSlotId, index,
1271                 compute_unit->cu_id, gpuDynInst->simdId,
1272                 gpuDynInst->wfSlotId, index,
1669 ComputeUnit::updateInstStats(GPUDynInstPtr gpuDynInst)
1671     if (gpuDynInst->isScalar()) {
1672         if (gpuDynInst->isALU() && !gpuDynInst->isWaitcnt()) {
1675         } else if (gpuDynInst->isLoad()) {
1677         } else if (gpuDynInst->isStore()) {
1681         if (gpuDynInst->isALU()) {
1684             threadCyclesVALU += gpuDynInst->wavefront()->execMask().count();
1685         } else if (gpuDynInst->isFlat()) {
1686             if (gpuDynInst->isLocalMem()) {
1691         } else if (gpuDynInst->isLocalMem()) {
1693         } else if (gpuDynInst->isLoad()) {
1695         } else if (gpuDynInst->isStore()) {
1797 ComputeUnit::sendToLds(GPUDynInstPtr gpuDynInst)
1808     newPacket->senderState = new LDSPort::SenderState(gpuDynInst);
1824     GPUDynInstPtr gpuDynInst = senderState->getMemInst();
1829     computeUnit->localMemoryPipe.getLMRespFIFO().push(gpuDynInst);
1845     GPUDynInstPtr gpuDynInst M5_VAR_USED = sender_state->getMemInst();
1853                         computeUnit->cu_id, gpuDynInst->simdId,
1854                         gpuDynInst->wfSlotId);
1863                 computeUnit->cu_id, gpuDynInst->simdId,
1864                 gpuDynInst->wfSlotId, pkt->req->getPaddr());
1868                 computeUnit->cu_id, gpuDynInst->simdId,
1869                 gpuDynInst->wfSlotId, pkt->req->getPaddr());