gpu_dyn_inst.cc revision 11693:bc1f702c25b9
1/*
2 * Copyright (c) 2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Anthony Gutierrez
34 */
35
36#include "gpu-compute/gpu_dyn_inst.hh"
37
38#include "debug/GPUMem.hh"
39#include "gpu-compute/gpu_static_inst.hh"
40#include "gpu-compute/shader.hh"
41#include "gpu-compute/wavefront.hh"
42
43GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
44                       GPUStaticInst *static_inst, uint64_t instSeqNum)
45    : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
46      n_reg(0), useContinuation(false),
47      statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
48{
49    tlbHitLevel.assign(computeUnit()->wfSize(), -1);
50    d_data = new uint8_t[computeUnit()->wfSize() * 16];
51    a_data = new uint8_t[computeUnit()->wfSize() * 8];
52    x_data = new uint8_t[computeUnit()->wfSize() * 8];
53    for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
54        a_data[i] = 0;
55        x_data[i] = 0;
56    }
57    for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) {
58        d_data[i] = 0;
59    }
60}
61
62GPUDynInst::~GPUDynInst()
63{
64    delete[] d_data;
65    delete[] a_data;
66    delete[] x_data;
67}
68
69void
70GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
71{
72    _staticInst->execute(gpuDynInst);
73}
74
75int
76GPUDynInst::numSrcRegOperands()
77{
78    return _staticInst->numSrcRegOperands();
79}
80
81int
82GPUDynInst::numDstRegOperands()
83{
84    return _staticInst->numDstRegOperands();
85}
86
87int
88GPUDynInst::getNumOperands()
89{
90    return _staticInst->getNumOperands();
91}
92
93bool
94GPUDynInst::isVectorRegister(int operandIdx)
95{
96    return _staticInst->isVectorRegister(operandIdx);
97}
98
99bool
100GPUDynInst::isScalarRegister(int operandIdx)
101{
102    return _staticInst->isScalarRegister(operandIdx);
103}
104
105int
106GPUDynInst::getRegisterIndex(int operandIdx)
107{
108    return _staticInst->getRegisterIndex(operandIdx);
109}
110
111int
112GPUDynInst::getOperandSize(int operandIdx)
113{
114    return _staticInst->getOperandSize(operandIdx);
115}
116
117bool
118GPUDynInst::isDstOperand(int operandIdx)
119{
120    return _staticInst->isDstOperand(operandIdx);
121}
122
123bool
124GPUDynInst::isSrcOperand(int operandIdx)
125{
126    return _staticInst->isSrcOperand(operandIdx);
127}
128
129const std::string&
130GPUDynInst::disassemble() const
131{
132    return _staticInst->disassemble();
133}
134
135uint64_t
136GPUDynInst::seqNum() const
137{
138    return _seqNum;
139}
140
141Enums::StorageClassType
142GPUDynInst::executedAs()
143{
144    return _staticInst->executed_as;
145}
146
147// Process a memory instruction and (if necessary) submit timing request
148void
149GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
150{
151    DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
152            cu->cu_id, simdId, wfSlotId, exec_mask);
153
154    _staticInst->initiateAcc(gpuDynInst);
155    time = 0;
156}
157
158void
159GPUDynInst::completeAcc(GPUDynInstPtr gpuDynInst)
160{
161    _staticInst->completeAcc(gpuDynInst);
162}
163
164/**
165 * accessor methods for the attributes of
166 * the underlying GPU static instruction
167 */
168bool
169GPUDynInst::isALU() const
170{
171    return _staticInst->isALU();
172}
173
174bool
175GPUDynInst::isBranch() const
176{
177    return _staticInst->isBranch();
178}
179
180bool
181GPUDynInst::isNop() const
182{
183    return _staticInst->isNop();
184}
185
186bool
187GPUDynInst::isReturn() const
188{
189    return _staticInst->isReturn();
190}
191
192bool
193GPUDynInst::isUnconditionalJump() const
194{
195    return _staticInst->isUnconditionalJump();
196}
197
198bool
199GPUDynInst::isSpecialOp() const
200{
201    return _staticInst->isSpecialOp();
202}
203
204bool
205GPUDynInst::isWaitcnt() const
206{
207    return _staticInst->isWaitcnt();
208}
209
210bool
211GPUDynInst::isBarrier() const
212{
213    return _staticInst->isBarrier();
214}
215
216bool
217GPUDynInst::isMemFence() const
218{
219    return _staticInst->isMemFence();
220}
221
222bool
223GPUDynInst::isMemRef() const
224{
225    return _staticInst->isMemRef();
226}
227
228bool
229GPUDynInst::isFlat() const
230{
231    return _staticInst->isFlat();
232}
233
234bool
235GPUDynInst::isLoad() const
236{
237    return _staticInst->isLoad();
238}
239
240bool
241GPUDynInst::isStore() const
242{
243    return _staticInst->isStore();
244}
245
246bool
247GPUDynInst::isAtomic() const
248{
249    return _staticInst->isAtomic();
250}
251
252bool
253GPUDynInst::isAtomicNoRet() const
254{
255    return _staticInst->isAtomicNoRet();
256}
257
258bool
259GPUDynInst::isAtomicRet() const
260{
261    return _staticInst->isAtomicRet();
262}
263
264bool
265GPUDynInst::isScalar() const
266{
267    return _staticInst->isScalar();
268}
269
270bool
271GPUDynInst::readsSCC() const
272{
273    return _staticInst->readsSCC();
274}
275
276bool
277GPUDynInst::writesSCC() const
278{
279    return _staticInst->writesSCC();
280}
281
282bool
283GPUDynInst::readsVCC() const
284{
285    return _staticInst->readsVCC();
286}
287
288bool
289GPUDynInst::writesVCC() const
290{
291    return _staticInst->writesVCC();
292}
293
294bool
295GPUDynInst::isAtomicAnd() const
296{
297    return _staticInst->isAtomicAnd();
298}
299
300bool
301GPUDynInst::isAtomicOr() const
302{
303    return _staticInst->isAtomicOr();
304}
305
306bool
307GPUDynInst::isAtomicXor() const
308{
309    return _staticInst->isAtomicXor();
310}
311
312bool
313GPUDynInst::isAtomicCAS() const
314{
315    return _staticInst->isAtomicCAS();
316}
317
318bool GPUDynInst::isAtomicExch() const
319{
320    return _staticInst->isAtomicExch();
321}
322
323bool
324GPUDynInst::isAtomicAdd() const
325{
326    return _staticInst->isAtomicAdd();
327}
328
329bool
330GPUDynInst::isAtomicSub() const
331{
332    return _staticInst->isAtomicSub();
333}
334
335bool
336GPUDynInst::isAtomicInc() const
337{
338    return _staticInst->isAtomicInc();
339}
340
341bool
342GPUDynInst::isAtomicDec() const
343{
344    return _staticInst->isAtomicDec();
345}
346
347bool
348GPUDynInst::isAtomicMax() const
349{
350    return _staticInst->isAtomicMax();
351}
352
353bool
354GPUDynInst::isAtomicMin() const
355{
356    return _staticInst->isAtomicMin();
357}
358
359bool
360GPUDynInst::isArgLoad() const
361{
362    return _staticInst->isArgLoad();
363}
364
365bool
366GPUDynInst::isGlobalMem() const
367{
368    return _staticInst->isGlobalMem();
369}
370
371bool
372GPUDynInst::isLocalMem() const
373{
374    return _staticInst->isLocalMem();
375}
376
377bool
378GPUDynInst::isArgSeg() const
379{
380    return _staticInst->isArgSeg();
381}
382
383bool
384GPUDynInst::isGlobalSeg() const
385{
386    return _staticInst->isGlobalSeg();
387}
388
389bool
390GPUDynInst::isGroupSeg() const
391{
392    return _staticInst->isGroupSeg();
393}
394
395bool
396GPUDynInst::isKernArgSeg() const
397{
398    return _staticInst->isKernArgSeg();
399}
400
401bool
402GPUDynInst::isPrivateSeg() const
403{
404    return _staticInst->isPrivateSeg();
405}
406
407bool
408GPUDynInst::isReadOnlySeg() const
409{
410    return _staticInst->isReadOnlySeg();
411}
412
413bool
414GPUDynInst::isSpillSeg() const
415{
416    return _staticInst->isSpillSeg();
417}
418
419bool
420GPUDynInst::isWorkitemScope() const
421{
422    return _staticInst->isWorkitemScope();
423}
424
425bool
426GPUDynInst::isWavefrontScope() const
427{
428    return _staticInst->isWavefrontScope();
429}
430
431bool
432GPUDynInst::isWorkgroupScope() const
433{
434    return _staticInst->isWorkgroupScope();
435}
436
437bool
438GPUDynInst::isDeviceScope() const
439{
440    return _staticInst->isDeviceScope();
441}
442
443bool
444GPUDynInst::isSystemScope() const
445{
446    return _staticInst->isSystemScope();
447}
448
449bool
450GPUDynInst::isNoScope() const
451{
452    return _staticInst->isNoScope();
453}
454
455bool
456GPUDynInst::isRelaxedOrder() const
457{
458    return _staticInst->isRelaxedOrder();
459}
460
461bool
462GPUDynInst::isAcquire() const
463{
464    return _staticInst->isAcquire();
465}
466
467bool
468GPUDynInst::isRelease() const
469{
470    return _staticInst->isRelease();
471}
472
473bool
474GPUDynInst::isAcquireRelease() const
475{
476    return _staticInst->isAcquireRelease();
477}
478
479bool
480GPUDynInst::isNoOrder() const
481{
482    return _staticInst->isNoOrder();
483}
484
485bool
486GPUDynInst::isGloballyCoherent() const
487{
488    return _staticInst->isGloballyCoherent();
489}
490
491bool
492GPUDynInst::isSystemCoherent() const
493{
494    return _staticInst->isSystemCoherent();
495}
496
497void
498GPUDynInst::updateStats()
499{
500    if (_staticInst->isLocalMem()) {
501        // access to LDS (shared) memory
502        cu->dynamicLMemInstrCnt++;
503    } else {
504        // access to global memory
505
506        // update PageDivergence histogram
507        int number_pages_touched = cu->pagesTouched.size();
508        assert(number_pages_touched);
509        cu->pageDivergenceDist.sample(number_pages_touched);
510
511        std::pair<ComputeUnit::pageDataStruct::iterator, bool> ret;
512
513        for (auto it : cu->pagesTouched) {
514            // see if this page has been touched before. if not, this also
515            // inserts the page into the table.
516            ret = cu->pageAccesses
517                .insert(ComputeUnit::pageDataStruct::value_type(it.first,
518                        std::make_pair(1, it.second)));
519
520            // if yes, then update the stats
521            if (!ret.second) {
522                ret.first->second.first++;
523                ret.first->second.second += it.second;
524            }
525        }
526
527        cu->pagesTouched.clear();
528
529        // total number of memory instructions (dynamic)
530        // Atomics are counted as a single memory instruction.
531        // this is # memory instructions per wavefronts, not per workitem
532        cu->dynamicGMemInstrCnt++;
533    }
534}
535