Deleted Added
sdiff udiff text old ( 11534:7106f550afad ) new ( 11639:2e8d4bd8108d )
full compact
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#include "arch/hsail/insts/decl.hh"
37#include "debug/GPUExec.hh"
38#include "gpu-compute/dispatcher.hh"
39#include "gpu-compute/simple_pool_manager.hh"
40
41namespace HsailISA
42{
43 template<> const char *B1::label = "b1";
44 template<> const char *B8::label = "b8";
45 template<> const char *B16::label = "b16";
46 template<> const char *B32::label = "b32";
47 template<> const char *B64::label = "b64";
48
49 template<> const char *S8::label = "s8";
50 template<> const char *S16::label = "s16";
51 template<> const char *S32::label = "s32";
52 template<> const char *S64::label = "s64";
53
54 template<> const char *U8::label = "u8";
55 template<> const char *U16::label = "u16";
56 template<> const char *U32::label = "u32";
57 template<> const char *U64::label = "u64";
58
59 template<> const char *F32::label = "f32";
60 template<> const char *F64::label = "f64";
61
62 const char*
63 cmpOpToString(Brig::BrigCompareOperation cmpOp)
64 {
65 using namespace Brig;
66
67 switch (cmpOp) {
68 case BRIG_COMPARE_EQ:
69 return "eq";
70 case BRIG_COMPARE_NE:
71 return "ne";
72 case BRIG_COMPARE_LT:
73 return "lt";
74 case BRIG_COMPARE_LE:
75 return "le";
76 case BRIG_COMPARE_GT:
77 return "gt";
78 case BRIG_COMPARE_GE:
79 return "ge";
80 case BRIG_COMPARE_EQU:
81 return "equ";
82 case BRIG_COMPARE_NEU:
83 return "neu";
84 case BRIG_COMPARE_LTU:
85 return "ltu";
86 case BRIG_COMPARE_LEU:
87 return "leu";
88 case BRIG_COMPARE_GTU:
89 return "gtu";
90 case BRIG_COMPARE_GEU:
91 return "geu";
92 case BRIG_COMPARE_NUM:
93 return "num";
94 case BRIG_COMPARE_NAN:
95 return "nan";
96 case BRIG_COMPARE_SEQ:
97 return "seq";
98 case BRIG_COMPARE_SNE:
99 return "sne";
100 case BRIG_COMPARE_SLT:
101 return "slt";
102 case BRIG_COMPARE_SLE:
103 return "sle";
104 case BRIG_COMPARE_SGT:
105 return "sgt";
106 case BRIG_COMPARE_SGE:
107 return "sge";
108 case BRIG_COMPARE_SGEU:
109 return "sgeu";
110 case BRIG_COMPARE_SEQU:
111 return "sequ";
112 case BRIG_COMPARE_SNEU:
113 return "sneu";
114 case BRIG_COMPARE_SLTU:
115 return "sltu";
116 case BRIG_COMPARE_SLEU:
117 return "sleu";
118 case BRIG_COMPARE_SNUM:
119 return "snum";
120 case BRIG_COMPARE_SNAN:
121 return "snan";
122 case BRIG_COMPARE_SGTU:
123 return "sgtu";
124 default:
125 return "unknown";
126 }
127 }
128
129 void
130 Ret::execute(GPUDynInstPtr gpuDynInst)
131 {
132 Wavefront *w = gpuDynInst->wavefront();
133
134 const VectorMask &mask = w->getPred();
135
136 // mask off completed work-items
137 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
138 if (mask[lane]) {
139 w->initMask[lane] = 0;
140 }
141
142 }
143
144 // delete extra instructions fetched for completed work-items
145 w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
146 w->instructionBuffer.end());
147 if (w->pendingFetch) {
148 w->dropFetch = true;
149 }
150
151 // if all work-items have completed, then wave-front is done
152 if (w->initMask.none()) {
153 w->status = Wavefront::S_STOPPED;
154
155 int32_t refCount = w->computeUnit->getLds().
156 decreaseRefCounter(w->dispatchId, w->wgId);
157
158 DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
159 w->computeUnit->cu_id, w->wgId, refCount);
160
161 // free the vector registers of the completed wavefront
162 w->computeUnit->vectorRegsReserved[w->simdId] -=
163 w->reservedVectorRegs;
164
165 assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);
166
167 uint32_t endIndex = (w->startVgprIndex +
168 w->reservedVectorRegs - 1) %
169 w->computeUnit->vrf[w->simdId]->numRegs();
170
171 w->computeUnit->vrf[w->simdId]->manager->
172 freeRegion(w->startVgprIndex, endIndex);
173
174 w->reservedVectorRegs = 0;
175 w->startVgprIndex = 0;
176 w->computeUnit->completedWfs++;
177
178 DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
179 w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
180
181 if (!refCount) {
182 // Notify Memory System of Kernel Completion
183 // Kernel End = isKernel + isRelease
184 w->status = Wavefront::S_RETURNING;
185 GPUDynInstPtr local_mempacket = gpuDynInst;
186 local_mempacket->memoryOrder = Enums::MEMORY_ORDER_SC_RELEASE;
187 local_mempacket->scope = Enums::MEMORY_SCOPE_SYSTEM;
188 local_mempacket->useContinuation = false;
189 local_mempacket->simdId = w->simdId;
190 local_mempacket->wfSlotId = w->wfSlotId;
191 local_mempacket->wfDynId = w->wfDynId;
192 w->computeUnit->injectGlobalMemFence(local_mempacket, true);
193 } else {
194 w->computeUnit->shader->dispatcher->scheduleDispatch();
195 }
196 }
197 }
198
199 void
200 Barrier::execute(GPUDynInstPtr gpuDynInst)
201 {
202 Wavefront *w = gpuDynInst->wavefront();
203
204 assert(w->barrierCnt == w->oldBarrierCnt);
205 w->barrierCnt = w->oldBarrierCnt + 1;
206 w->stalledAtBarrier = true;
207 }
208} // namespace HsailISA