1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#include "arch/hsail/insts/decl.hh" 37#include "debug/GPUExec.hh" 38#include "gpu-compute/dispatcher.hh" 39#include "gpu-compute/simple_pool_manager.hh" 40 41namespace HsailISA 42{ 43 template<> const char *B1::label = "b1"; 44 template<> const char *B8::label = "b8"; 45 template<> const char *B16::label = "b16"; 46 template<> const char *B32::label = "b32"; 47 template<> const char *B64::label = "b64"; 48 49 template<> const char *S8::label = "s8"; 50 template<> const char *S16::label = "s16"; 51 template<> const char *S32::label = "s32"; 52 template<> const char *S64::label = "s64"; 53 54 template<> const char *U8::label = "u8"; 55 template<> const char *U16::label = "u16"; 56 template<> const char *U32::label = "u32"; 57 template<> const char *U64::label = "u64"; 58 59 template<> const char *F32::label = "f32"; 60 template<> const char *F64::label = "f64"; 61 62 const char* 63 cmpOpToString(Brig::BrigCompareOperation cmpOp) 64 { 65 using namespace Brig; 66 67 switch (cmpOp) { 68 case BRIG_COMPARE_EQ: 69 return "eq"; 70 case BRIG_COMPARE_NE: 71 return "ne"; 72 case BRIG_COMPARE_LT: 73 return "lt"; 74 case BRIG_COMPARE_LE: 75 return "le"; 76 case BRIG_COMPARE_GT: 77 return "gt"; 78 case BRIG_COMPARE_GE: 79 return "ge"; 80 case BRIG_COMPARE_EQU: 81 return "equ"; 82 case BRIG_COMPARE_NEU: 83 return "neu"; 84 case BRIG_COMPARE_LTU: 85 return "ltu"; 86 case BRIG_COMPARE_LEU: 87 return "leu"; 88 case BRIG_COMPARE_GTU: 89 return "gtu"; 90 case BRIG_COMPARE_GEU: 91 return "geu"; 92 case BRIG_COMPARE_NUM: 93 return "num"; 94 case BRIG_COMPARE_NAN: 95 return "nan"; 96 case BRIG_COMPARE_SEQ: 97 return "seq"; 98 case BRIG_COMPARE_SNE: 99 return "sne"; 100 case BRIG_COMPARE_SLT: 101 return "slt"; 102 case BRIG_COMPARE_SLE: 103 return "sle"; 104 case BRIG_COMPARE_SGT: 105 return "sgt"; 106 case BRIG_COMPARE_SGE: 107 return "sge"; 108 case BRIG_COMPARE_SGEU: 109 return "sgeu"; 110 case BRIG_COMPARE_SEQU: 111 return "sequ"; 112 case BRIG_COMPARE_SNEU: 113 return "sneu"; 114 case BRIG_COMPARE_SLTU: 115 return "sltu"; 116 case BRIG_COMPARE_SLEU: 117 return "sleu"; 118 case BRIG_COMPARE_SNUM: 119 return "snum"; 120 case BRIG_COMPARE_SNAN: 121 return "snan"; 122 case BRIG_COMPARE_SGTU: 123 return "sgtu"; 124 default: 125 return "unknown"; 126 } 127 } 128 129 void 130 Ret::execute(GPUDynInstPtr gpuDynInst) 131 { 132 Wavefront *w = gpuDynInst->wavefront(); 133 134 const VectorMask &mask = w->getPred(); 135 136 // mask off completed work-items 137 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 138 if (mask[lane]) { 139 w->initMask[lane] = 0; 140 } 141 142 } 143 144 // delete extra instructions fetched for completed work-items 145 w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, 146 w->instructionBuffer.end()); 147 if (w->pendingFetch) { 148 w->dropFetch = true; 149 } 150 151 // if all work-items have completed, then wave-front is done 152 if (w->initMask.none()) { 153 w->status = Wavefront::S_STOPPED; 154 155 int32_t refCount = w->computeUnit->getLds(). 156 decreaseRefCounter(w->dispatchId, w->wgId); 157 158 DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", 159 w->computeUnit->cu_id, w->wgId, refCount); 160 161 // free the vector registers of the completed wavefront 162 w->computeUnit->vectorRegsReserved[w->simdId] -= 163 w->reservedVectorRegs; 164 165 assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0); 166 167 uint32_t endIndex = (w->startVgprIndex + 168 w->reservedVectorRegs - 1) % 169 w->computeUnit->vrf[w->simdId]->numRegs(); 170 171 w->computeUnit->vrf[w->simdId]->manager-> 172 freeRegion(w->startVgprIndex, endIndex); 173 174 w->reservedVectorRegs = 0; 175 w->startVgprIndex = 0; 176 w->computeUnit->completedWfs++; 177 178 DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", 179 w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId); 180 181 if (!refCount) { 182 setFlag(SystemScope); 183 setFlag(Release); 184 setFlag(GlobalSegment); 185 // Notify Memory System of Kernel Completion 186 // Kernel End = isKernel + isRelease 187 w->status = Wavefront::S_RETURNING; 188 GPUDynInstPtr local_mempacket = gpuDynInst; 189 local_mempacket->useContinuation = false; 190 local_mempacket->simdId = w->simdId; 191 local_mempacket->wfSlotId = w->wfSlotId; 192 local_mempacket->wfDynId = w->wfDynId; 193 w->computeUnit->injectGlobalMemFence(local_mempacket, true); 194 } else { 195 w->computeUnit->shader->dispatcher->scheduleDispatch(); 196 } 197 } 198 } 199 200 void 201 Barrier::execute(GPUDynInstPtr gpuDynInst) 202 { 203 Wavefront *w = gpuDynInst->wavefront(); 204 205 assert(w->barrierCnt == w->oldBarrierCnt); 206 w->barrierCnt = w->oldBarrierCnt + 1; 207 w->stalledAtBarrier = true; 208 } 209} // namespace HsailISA 210