1/*
2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Anthony Gutierrez
34 */
35
36#include "gpu-compute/gpu_dyn_inst.hh"
37
38#include "debug/GPUMem.hh"
39#include "gpu-compute/gpu_static_inst.hh"
40#include "gpu-compute/shader.hh"
41#include "gpu-compute/wavefront.hh"
42
43GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
44                       GPUStaticInst *static_inst, uint64_t instSeqNum)
45    : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
46      n_reg(0), useContinuation(false),
47      statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
48{
49    tlbHitLevel.assign(computeUnit()->wfSize(), -1);
50    d_data = new uint8_t[computeUnit()->wfSize() * 16];
51    a_data = new uint8_t[computeUnit()->wfSize() * 8];
52    x_data = new uint8_t[computeUnit()->wfSize() * 8];
53    for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
54        a_data[i] = 0;
55        x_data[i] = 0;
56    }
57    for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) {
58        d_data[i] = 0;
59    }
60}
61
62GPUDynInst::~GPUDynInst()
63{
64    delete[] d_data;
65    delete[] a_data;
66    delete[] x_data;
67}
68
69void
70GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
71{
72    _staticInst->execute(gpuDynInst);
73}
74
75int
76GPUDynInst::numSrcRegOperands()
77{
78    return _staticInst->numSrcRegOperands();
79}
80
81int
82GPUDynInst::numDstRegOperands()
83{
84    return _staticInst->numDstRegOperands();
85}
86
87int
88GPUDynInst::getNumOperands()
89{
90    return _staticInst->getNumOperands();
91}
92
93bool
94GPUDynInst::isVectorRegister(int operandIdx)
95{
96    return _staticInst->isVectorRegister(operandIdx);
97}
98
99bool
100GPUDynInst::isScalarRegister(int operandIdx)
101{
102    return _staticInst->isScalarRegister(operandIdx);
103}
104
105bool
106GPUDynInst::isCondRegister(int operandIdx)
107{
108    return _staticInst->isCondRegister(operandIdx);
109}
110
111int
112GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
113{
114    return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
115}
116
117int
118GPUDynInst::getOperandSize(int operandIdx)
119{
120    return _staticInst->getOperandSize(operandIdx);
121}
122
123bool
124GPUDynInst::isDstOperand(int operandIdx)
125{
126    return _staticInst->isDstOperand(operandIdx);
127}
128
129bool
130GPUDynInst::isSrcOperand(int operandIdx)
131{
132    return _staticInst->isSrcOperand(operandIdx);
133}
134
135const std::string&
136GPUDynInst::disassemble() const
137{
138    return _staticInst->disassemble();
139}
140
141uint64_t
142GPUDynInst::seqNum() const
143{
144    return _seqNum;
145}
146
147Enums::StorageClassType
148GPUDynInst::executedAs()
149{
150    return _staticInst->executed_as;
151}
152
153// Process a memory instruction and (if necessary) submit timing request
154void
155GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
156{
157    DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
158            cu->cu_id, simdId, wfSlotId, exec_mask);
159
160    _staticInst->initiateAcc(gpuDynInst);
161    time = 0;
162}
163
164void
165GPUDynInst::completeAcc(GPUDynInstPtr gpuDynInst)
166{
167    _staticInst->completeAcc(gpuDynInst);
168}
169
170/**
171 * accessor methods for the attributes of
172 * the underlying GPU static instruction
173 */
174bool
175GPUDynInst::isALU() const
176{
177    return _staticInst->isALU();
178}
179
180bool
181GPUDynInst::isBranch() const
182{
183    return _staticInst->isBranch();
184}
185
186bool
187GPUDynInst::isNop() const
188{
189    return _staticInst->isNop();
190}
191
192bool
193GPUDynInst::isReturn() const
194{
195    return _staticInst->isReturn();
196}
197
198bool
199GPUDynInst::isUnconditionalJump() const
200{
201    return _staticInst->isUnconditionalJump();
202}
203
204bool
205GPUDynInst::isSpecialOp() const
206{
207    return _staticInst->isSpecialOp();
208}
209
210bool
211GPUDynInst::isWaitcnt() const
212{
213    return _staticInst->isWaitcnt();
214}
215
216bool
217GPUDynInst::isBarrier() const
218{
219    return _staticInst->isBarrier();
220}
221
222bool
223GPUDynInst::isMemFence() const
224{
225    return _staticInst->isMemFence();
226}
227
228bool
229GPUDynInst::isMemRef() const
230{
231    return _staticInst->isMemRef();
232}
233
234bool
235GPUDynInst::isFlat() const
236{
237    return _staticInst->isFlat();
238}
239
240bool
241GPUDynInst::isLoad() const
242{
243    return _staticInst->isLoad();
244}
245
246bool
247GPUDynInst::isStore() const
248{
249    return _staticInst->isStore();
250}
251
252bool
253GPUDynInst::isAtomic() const
254{
255    return _staticInst->isAtomic();
256}
257
258bool
259GPUDynInst::isAtomicNoRet() const
260{
261    return _staticInst->isAtomicNoRet();
262}
263
264bool
265GPUDynInst::isAtomicRet() const
266{
267    return _staticInst->isAtomicRet();
268}
269
270bool
271GPUDynInst::isScalar() const
272{
273    return _staticInst->isScalar();
274}
275
276bool
277GPUDynInst::readsSCC() const
278{
279    return _staticInst->readsSCC();
280}
281
282bool
283GPUDynInst::writesSCC() const
284{
285    return _staticInst->writesSCC();
286}
287
288bool
289GPUDynInst::readsVCC() const
290{
291    return _staticInst->readsVCC();
292}
293
294bool
295GPUDynInst::writesVCC() const
296{
297    return _staticInst->writesVCC();
298}
299
300bool
301GPUDynInst::isAtomicAnd() const
302{
303    return _staticInst->isAtomicAnd();
304}
305
306bool
307GPUDynInst::isAtomicOr() const
308{
309    return _staticInst->isAtomicOr();
310}
311
312bool
313GPUDynInst::isAtomicXor() const
314{
315    return _staticInst->isAtomicXor();
316}
317
318bool
319GPUDynInst::isAtomicCAS() const
320{
321    return _staticInst->isAtomicCAS();
322}
323
324bool GPUDynInst::isAtomicExch() const
325{
326    return _staticInst->isAtomicExch();
327}
328
329bool
330GPUDynInst::isAtomicAdd() const
331{
332    return _staticInst->isAtomicAdd();
333}
334
335bool
336GPUDynInst::isAtomicSub() const
337{
338    return _staticInst->isAtomicSub();
339}
340
341bool
342GPUDynInst::isAtomicInc() const
343{
344    return _staticInst->isAtomicInc();
345}
346
347bool
348GPUDynInst::isAtomicDec() const
349{
350    return _staticInst->isAtomicDec();
351}
352
353bool
354GPUDynInst::isAtomicMax() const
355{
356    return _staticInst->isAtomicMax();
357}
358
359bool
360GPUDynInst::isAtomicMin() const
361{
362    return _staticInst->isAtomicMin();
363}
364
365bool
366GPUDynInst::isArgLoad() const
367{
368    return _staticInst->isArgLoad();
369}
370
371bool
372GPUDynInst::isGlobalMem() const
373{
374    return _staticInst->isGlobalMem();
375}
376
377bool
378GPUDynInst::isLocalMem() const
379{
380    return _staticInst->isLocalMem();
381}
382
383bool
384GPUDynInst::isArgSeg() const
385{
386    return _staticInst->isArgSeg();
387}
388
389bool
390GPUDynInst::isGlobalSeg() const
391{
392    return _staticInst->isGlobalSeg();
393}
394
395bool
396GPUDynInst::isGroupSeg() const
397{
398    return _staticInst->isGroupSeg();
399}
400
401bool
402GPUDynInst::isKernArgSeg() const
403{
404    return _staticInst->isKernArgSeg();
405}
406
407bool
408GPUDynInst::isPrivateSeg() const
409{
410    return _staticInst->isPrivateSeg();
411}
412
413bool
414GPUDynInst::isReadOnlySeg() const
415{
416    return _staticInst->isReadOnlySeg();
417}
418
419bool
420GPUDynInst::isSpillSeg() const
421{
422    return _staticInst->isSpillSeg();
423}
424
425bool
426GPUDynInst::isWorkitemScope() const
427{
428    return _staticInst->isWorkitemScope();
429}
430
431bool
432GPUDynInst::isWavefrontScope() const
433{
434    return _staticInst->isWavefrontScope();
435}
436
437bool
438GPUDynInst::isWorkgroupScope() const
439{
440    return _staticInst->isWorkgroupScope();
441}
442
443bool
444GPUDynInst::isDeviceScope() const
445{
446    return _staticInst->isDeviceScope();
447}
448
449bool
450GPUDynInst::isSystemScope() const
451{
452    return _staticInst->isSystemScope();
453}
454
455bool
456GPUDynInst::isNoScope() const
457{
458    return _staticInst->isNoScope();
459}
460
461bool
462GPUDynInst::isRelaxedOrder() const
463{
464    return _staticInst->isRelaxedOrder();
465}
466
467bool
468GPUDynInst::isAcquire() const
469{
470    return _staticInst->isAcquire();
471}
472
473bool
474GPUDynInst::isRelease() const
475{
476    return _staticInst->isRelease();
477}
478
479bool
480GPUDynInst::isAcquireRelease() const
481{
482    return _staticInst->isAcquireRelease();
483}
484
485bool
486GPUDynInst::isNoOrder() const
487{
488    return _staticInst->isNoOrder();
489}
490
491bool
492GPUDynInst::isGloballyCoherent() const
493{
494    return _staticInst->isGloballyCoherent();
495}
496
497bool
498GPUDynInst::isSystemCoherent() const
499{
500    return _staticInst->isSystemCoherent();
501}
502
503void
504GPUDynInst::updateStats()
505{
506    if (_staticInst->isLocalMem()) {
507        // access to LDS (shared) memory
508        cu->dynamicLMemInstrCnt++;
509    } else {
510        // access to global memory
511
512        // update PageDivergence histogram
513        int number_pages_touched = cu->pagesTouched.size();
514        assert(number_pages_touched);
515        cu->pageDivergenceDist.sample(number_pages_touched);
516
517        std::pair<ComputeUnit::pageDataStruct::iterator, bool> ret;
518
519        for (auto it : cu->pagesTouched) {
520            // see if this page has been touched before. if not, this also
521            // inserts the page into the table.
522            ret = cu->pageAccesses
523                .insert(ComputeUnit::pageDataStruct::value_type(it.first,
524                        std::make_pair(1, it.second)));
525
526            // if yes, then update the stats
527            if (!ret.second) {
528                ret.first->second.first++;
529                ret.first->second.second += it.second;
530            }
531        }
532
533        cu->pagesTouched.clear();
534
535        // total number of memory instructions (dynamic)
536        // Atomics are counted as a single memory instruction.
537        // this is # memory instructions per wavefronts, not per workitem
538        cu->dynamicGMemInstrCnt++;
539    }
540}
541