1/*
2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: John Kalamatianos,
34 *          Sooraj Puthoor
35 */
36
37#include "gpu-compute/exec_stage.hh"
38
39#include "gpu-compute/compute_unit.hh"
40#include "gpu-compute/wavefront.hh"
41
42ExecStage::ExecStage(const ComputeUnitParams *p) : numSIMDs(p->num_SIMDs),
43    numMemUnits(p->num_global_mem_pipes + p->num_shared_mem_pipes),
44    vectorAluInstAvail(nullptr), glbMemInstAvail(nullptr),
45    shrMemInstAvail(nullptr), lastTimeInstExecuted(false),
46    thisTimeInstExecuted(false), instrExecuted (false),
47    executionResourcesUsed(0)
48{
49    numTransActiveIdle = 0;
50    idle_dur = 0;
51}
52
53void
54ExecStage::init(ComputeUnit *cu)
55{
56    computeUnit = cu;
57    _name = computeUnit->name() + ".ExecStage";
58    dispatchList = &computeUnit->dispatchList;
59    vectorAluInstAvail = &(computeUnit->vectorAluInstAvail);
60    glbMemInstAvail= &(computeUnit->glbMemInstAvail);
61    shrMemInstAvail= &(computeUnit->shrMemInstAvail);
62    idle_dur = 0;
63}
64
65void
66ExecStage::collectStatistics(enum STAT_STATUS stage, int unitId) {
67    if (stage == IdleExec) {
68        // count cycles of no vector ALU instruction executed
69        // even if one was the oldest in a WV of that vector SIMD unit
70        if (computeUnit->isVecAlu(unitId) && vectorAluInstAvail->at(unitId)) {
71            numCyclesWithNoInstrTypeIssued[unitId]++;
72        }
73
74        // count cycles of no global memory (vector) instruction executed
75        // even if one was the oldest in a WV of that vector SIMD unit
76        if (computeUnit->isGlbMem(unitId) && *glbMemInstAvail > 0) {
77            numCyclesWithNoInstrTypeIssued[unitId]++;
78            (*glbMemInstAvail)--;
79        }
80
81        // count cycles of no shared memory (vector) instruction executed
82        // even if one was the oldest in a WV of that vector SIMD unit
83        if (computeUnit->isShrMem(unitId) && *shrMemInstAvail > 0) {
84            numCyclesWithNoInstrTypeIssued[unitId]++;
85            (*shrMemInstAvail)--;
86        }
87    } else if (stage == BusyExec) {
88        // count the number of cycles an instruction to a specific unit
89        // was issued
90        numCyclesWithInstrTypeIssued[unitId]++;
91        thisTimeInstExecuted = true;
92        instrExecuted = true;
93        ++executionResourcesUsed;
94    } else if (stage == PostExec) {
95        // count the number of transitions from active to idle
96        if (lastTimeInstExecuted && !thisTimeInstExecuted) {
97            ++numTransActiveIdle;
98        }
99
100        if (!lastTimeInstExecuted && thisTimeInstExecuted) {
101            idleDur.sample(idle_dur);
102            idle_dur = 0;
103        } else if (!thisTimeInstExecuted) {
104            idle_dur++;
105        }
106
107        lastTimeInstExecuted = thisTimeInstExecuted;
108        // track the number of cycles we either issued one vector instruction
109        // or issued no instructions at all
110        if (instrExecuted) {
111            numCyclesWithInstrIssued++;
112        } else {
113            numCyclesWithNoIssue++;
114        }
115
116        spc.sample(executionResourcesUsed);
117    }
118}
119
120void
121ExecStage::initStatistics()
122{
123    instrExecuted = false;
124    executionResourcesUsed = 0;
125    thisTimeInstExecuted = false;
126}
127
128void
129ExecStage::exec()
130{
131    initStatistics();
132
133    for (int unitId = 0; unitId < (numSIMDs + numMemUnits); ++unitId) {
134         // if dispatch list for this execution resource is empty,
135         // skip this execution resource this cycle
136         if (dispatchList->at(unitId).second == EMPTY) {
137             collectStatistics(IdleExec, unitId);
138             continue;
139         }
140
141         collectStatistics(BusyExec, unitId);
142         // execute an instruction for the WF
143         dispatchList->at(unitId).first->exec();
144         // clear the dispatch list entry
145         dispatchList->at(unitId).second = EMPTY;
146         dispatchList->at(unitId).first = (Wavefront*)nullptr;
147    }
148
149    collectStatistics(PostExec, 0);
150}
151
152void
153ExecStage::regStats()
154{
155    numTransActiveIdle
156       .name(name() + ".num_transitions_active_to_idle")
157       .desc("number of CU transitions from active to idle")
158        ;
159
160    numCyclesWithNoIssue
161        .name(name() + ".num_cycles_with_no_issue")
162        .desc("number of cycles the CU issues nothing")
163        ;
164
165    numCyclesWithInstrIssued
166        .name(name() + ".num_cycles_with_instr_issued")
167        .desc("number of cycles the CU issued at least one instruction")
168        ;
169
170    spc
171        .init(0, numSIMDs + numMemUnits, 1)
172        .name(name() + ".spc")
173        .desc("Execution units active per cycle (Exec unit=SIMD,MemPipe)")
174        ;
175
176    idleDur
177        .init(0,75,5)
178        .name(name() + ".idle_duration_in_cycles")
179        .desc("duration of idle periods in cycles")
180        ;
181
182    numCyclesWithInstrTypeIssued
183        .init(numSIMDs + numMemUnits)
184        .name(name() + ".num_cycles_with_instrtype_issue")
185        .desc("Number of cycles at least one instruction of specific type "
186              "issued")
187        ;
188
189    numCyclesWithNoInstrTypeIssued
190        .init(numSIMDs + numMemUnits)
191       .name(name() + ".num_cycles_with_instr_type_no_issue")
192       .desc("Number of cycles no instruction of specific type issued")
193       ;
194
195    for (int i = 0; i < numSIMDs; ++i) {
196        numCyclesWithInstrTypeIssued.subname(i, csprintf("ALU%d",i));
197        numCyclesWithNoInstrTypeIssued.subname(i, csprintf("ALU%d",i));
198    }
199
200    numCyclesWithInstrTypeIssued.subname(numSIMDs, csprintf("GM"));
201    numCyclesWithNoInstrTypeIssued.subname(numSIMDs, csprintf("GM"));
202    numCyclesWithInstrTypeIssued.subname(numSIMDs + 1, csprintf("LM"));
203    numCyclesWithNoInstrTypeIssued.subname(numSIMDs + 1, csprintf("LM"));
204}
205