gpu_dyn_inst.cc revision 11692:e772fdcd3809
1/*
2 * Copyright (c) 2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Anthony Gutierrez
34 */
35
36#include "gpu-compute/gpu_dyn_inst.hh"
37
38#include "debug/GPUMem.hh"
39#include "gpu-compute/gpu_static_inst.hh"
40#include "gpu-compute/shader.hh"
41#include "gpu-compute/wavefront.hh"
42
43GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
44                       GPUStaticInst *static_inst, uint64_t instSeqNum)
45    : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
46      n_reg(0), useContinuation(false),
47      statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
48{
49    tlbHitLevel.assign(computeUnit()->wfSize(), -1);
50    d_data = new uint8_t[computeUnit()->wfSize() * 16];
51    a_data = new uint8_t[computeUnit()->wfSize() * 8];
52    x_data = new uint8_t[computeUnit()->wfSize() * 8];
53    for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
54        a_data[i] = 0;
55        x_data[i] = 0;
56    }
57    for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) {
58        d_data[i] = 0;
59    }
60}
61
62GPUDynInst::~GPUDynInst()
63{
64    delete[] d_data;
65    delete[] a_data;
66    delete[] x_data;
67}
68
69void
70GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
71{
72    _staticInst->execute(gpuDynInst);
73}
74
75int
76GPUDynInst::numSrcRegOperands()
77{
78    return _staticInst->numSrcRegOperands();
79}
80
81int
82GPUDynInst::numDstRegOperands()
83{
84    return _staticInst->numDstRegOperands();
85}
86
87int
88GPUDynInst::getNumOperands()
89{
90    return _staticInst->getNumOperands();
91}
92
93bool
94GPUDynInst::isVectorRegister(int operandIdx)
95{
96    return _staticInst->isVectorRegister(operandIdx);
97}
98
99bool
100GPUDynInst::isScalarRegister(int operandIdx)
101{
102    return _staticInst->isScalarRegister(operandIdx);
103}
104
105int
106GPUDynInst::getRegisterIndex(int operandIdx)
107{
108    return _staticInst->getRegisterIndex(operandIdx);
109}
110
111int
112GPUDynInst::getOperandSize(int operandIdx)
113{
114    return _staticInst->getOperandSize(operandIdx);
115}
116
117bool
118GPUDynInst::isDstOperand(int operandIdx)
119{
120    return _staticInst->isDstOperand(operandIdx);
121}
122
123bool
124GPUDynInst::isSrcOperand(int operandIdx)
125{
126    return _staticInst->isSrcOperand(operandIdx);
127}
128
129const std::string&
130GPUDynInst::disassemble() const
131{
132    return _staticInst->disassemble();
133}
134
135uint64_t
136GPUDynInst::seqNum() const
137{
138    return _seqNum;
139}
140
141Enums::StorageClassType
142GPUDynInst::executedAs()
143{
144    return _staticInst->executed_as;
145}
146
147// Process a memory instruction and (if necessary) submit timing request
148void
149GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
150{
151    DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
152            cu->cu_id, simdId, wfSlotId, exec_mask);
153
154    _staticInst->initiateAcc(gpuDynInst);
155    time = 0;
156}
157
158/**
159 * accessor methods for the attributes of
160 * the underlying GPU static instruction
161 */
162bool
163GPUDynInst::isALU() const
164{
165    return _staticInst->isALU();
166}
167
168bool
169GPUDynInst::isBranch() const
170{
171    return _staticInst->isBranch();
172}
173
174bool
175GPUDynInst::isNop() const
176{
177    return _staticInst->isNop();
178}
179
180bool
181GPUDynInst::isReturn() const
182{
183    return _staticInst->isReturn();
184}
185
186bool
187GPUDynInst::isUnconditionalJump() const
188{
189    return _staticInst->isUnconditionalJump();
190}
191
192bool
193GPUDynInst::isSpecialOp() const
194{
195    return _staticInst->isSpecialOp();
196}
197
198bool
199GPUDynInst::isWaitcnt() const
200{
201    return _staticInst->isWaitcnt();
202}
203
204bool
205GPUDynInst::isBarrier() const
206{
207    return _staticInst->isBarrier();
208}
209
210bool
211GPUDynInst::isMemFence() const
212{
213    return _staticInst->isMemFence();
214}
215
216bool
217GPUDynInst::isMemRef() const
218{
219    return _staticInst->isMemRef();
220}
221
222bool
223GPUDynInst::isFlat() const
224{
225    return _staticInst->isFlat();
226}
227
228bool
229GPUDynInst::isLoad() const
230{
231    return _staticInst->isLoad();
232}
233
234bool
235GPUDynInst::isStore() const
236{
237    return _staticInst->isStore();
238}
239
240bool
241GPUDynInst::isAtomic() const
242{
243    return _staticInst->isAtomic();
244}
245
246bool
247GPUDynInst::isAtomicNoRet() const
248{
249    return _staticInst->isAtomicNoRet();
250}
251
252bool
253GPUDynInst::isAtomicRet() const
254{
255    return _staticInst->isAtomicRet();
256}
257
258bool
259GPUDynInst::isScalar() const
260{
261    return _staticInst->isScalar();
262}
263
264bool
265GPUDynInst::readsSCC() const
266{
267    return _staticInst->readsSCC();
268}
269
270bool
271GPUDynInst::writesSCC() const
272{
273    return _staticInst->writesSCC();
274}
275
276bool
277GPUDynInst::readsVCC() const
278{
279    return _staticInst->readsVCC();
280}
281
282bool
283GPUDynInst::writesVCC() const
284{
285    return _staticInst->writesVCC();
286}
287
288bool
289GPUDynInst::isAtomicAnd() const
290{
291    return _staticInst->isAtomicAnd();
292}
293
294bool
295GPUDynInst::isAtomicOr() const
296{
297    return _staticInst->isAtomicOr();
298}
299
300bool
301GPUDynInst::isAtomicXor() const
302{
303    return _staticInst->isAtomicXor();
304}
305
306bool
307GPUDynInst::isAtomicCAS() const
308{
309    return _staticInst->isAtomicCAS();
310}
311
312bool GPUDynInst::isAtomicExch() const
313{
314    return _staticInst->isAtomicExch();
315}
316
317bool
318GPUDynInst::isAtomicAdd() const
319{
320    return _staticInst->isAtomicAdd();
321}
322
323bool
324GPUDynInst::isAtomicSub() const
325{
326    return _staticInst->isAtomicSub();
327}
328
329bool
330GPUDynInst::isAtomicInc() const
331{
332    return _staticInst->isAtomicInc();
333}
334
335bool
336GPUDynInst::isAtomicDec() const
337{
338    return _staticInst->isAtomicDec();
339}
340
341bool
342GPUDynInst::isAtomicMax() const
343{
344    return _staticInst->isAtomicMax();
345}
346
347bool
348GPUDynInst::isAtomicMin() const
349{
350    return _staticInst->isAtomicMin();
351}
352
353bool
354GPUDynInst::isArgLoad() const
355{
356    return _staticInst->isArgLoad();
357}
358
359bool
360GPUDynInst::isGlobalMem() const
361{
362    return _staticInst->isGlobalMem();
363}
364
365bool
366GPUDynInst::isLocalMem() const
367{
368    return _staticInst->isLocalMem();
369}
370
371bool
372GPUDynInst::isArgSeg() const
373{
374    return _staticInst->isArgSeg();
375}
376
377bool
378GPUDynInst::isGlobalSeg() const
379{
380    return _staticInst->isGlobalSeg();
381}
382
383bool
384GPUDynInst::isGroupSeg() const
385{
386    return _staticInst->isGroupSeg();
387}
388
389bool
390GPUDynInst::isKernArgSeg() const
391{
392    return _staticInst->isKernArgSeg();
393}
394
395bool
396GPUDynInst::isPrivateSeg() const
397{
398    return _staticInst->isPrivateSeg();
399}
400
401bool
402GPUDynInst::isReadOnlySeg() const
403{
404    return _staticInst->isReadOnlySeg();
405}
406
407bool
408GPUDynInst::isSpillSeg() const
409{
410    return _staticInst->isSpillSeg();
411}
412
413bool
414GPUDynInst::isWorkitemScope() const
415{
416    return _staticInst->isWorkitemScope();
417}
418
419bool
420GPUDynInst::isWavefrontScope() const
421{
422    return _staticInst->isWavefrontScope();
423}
424
425bool
426GPUDynInst::isWorkgroupScope() const
427{
428    return _staticInst->isWorkgroupScope();
429}
430
431bool
432GPUDynInst::isDeviceScope() const
433{
434    return _staticInst->isDeviceScope();
435}
436
437bool
438GPUDynInst::isSystemScope() const
439{
440    return _staticInst->isSystemScope();
441}
442
443bool
444GPUDynInst::isNoScope() const
445{
446    return _staticInst->isNoScope();
447}
448
449bool
450GPUDynInst::isRelaxedOrder() const
451{
452    return _staticInst->isRelaxedOrder();
453}
454
455bool
456GPUDynInst::isAcquire() const
457{
458    return _staticInst->isAcquire();
459}
460
461bool
462GPUDynInst::isRelease() const
463{
464    return _staticInst->isRelease();
465}
466
467bool
468GPUDynInst::isAcquireRelease() const
469{
470    return _staticInst->isAcquireRelease();
471}
472
473bool
474GPUDynInst::isNoOrder() const
475{
476    return _staticInst->isNoOrder();
477}
478
479bool
480GPUDynInst::isGloballyCoherent() const
481{
482    return _staticInst->isGloballyCoherent();
483}
484
485bool
486GPUDynInst::isSystemCoherent() const
487{
488    return _staticInst->isSystemCoherent();
489}
490
491void
492GPUDynInst::updateStats()
493{
494    if (_staticInst->isLocalMem()) {
495        // access to LDS (shared) memory
496        cu->dynamicLMemInstrCnt++;
497    } else {
498        // access to global memory
499
500        // update PageDivergence histogram
501        int number_pages_touched = cu->pagesTouched.size();
502        assert(number_pages_touched);
503        cu->pageDivergenceDist.sample(number_pages_touched);
504
505        std::pair<ComputeUnit::pageDataStruct::iterator, bool> ret;
506
507        for (auto it : cu->pagesTouched) {
508            // see if this page has been touched before. if not, this also
509            // inserts the page into the table.
510            ret = cu->pageAccesses
511                .insert(ComputeUnit::pageDataStruct::value_type(it.first,
512                        std::make_pair(1, it.second)));
513
514            // if yes, then update the stats
515            if (!ret.second) {
516                ret.first->second.first++;
517                ret.first->second.second += it.second;
518            }
519        }
520
521        cu->pagesTouched.clear();
522
523        // total number of memory instructions (dynamic)
524        // Atomics are counted as a single memory instruction.
525        // this is # memory instructions per wavefronts, not per workitem
526        cu->dynamicGMemInstrCnt++;
527    }
528}
529