exec_stage.cc revision 11308
1/*
2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: John Kalamatianos, Sooraj Puthoor
34 */
35
36#include "gpu-compute/exec_stage.hh"
37
38#include "gpu-compute/compute_unit.hh"
39#include "gpu-compute/wavefront.hh"
40
41ExecStage::ExecStage(const ComputeUnitParams *p) : numSIMDs(p->num_SIMDs),
42    numMemUnits(p->num_global_mem_pipes + p->num_shared_mem_pipes),
43    vectorAluInstAvail(nullptr), glbMemInstAvail(nullptr),
44    shrMemInstAvail(nullptr), lastTimeInstExecuted(false),
45    thisTimeInstExecuted(false), instrExecuted (false),
46    executionResourcesUsed(0)
47{
48    numTransActiveIdle = 0;
49    idle_dur = 0;
50}
51
52void
53ExecStage::init(ComputeUnit *cu)
54{
55    computeUnit = cu;
56    _name = computeUnit->name() + ".ExecStage";
57    dispatchList = &computeUnit->dispatchList;
58    vectorAluInstAvail = &(computeUnit->vectorAluInstAvail);
59    glbMemInstAvail= &(computeUnit->glbMemInstAvail);
60    shrMemInstAvail= &(computeUnit->shrMemInstAvail);
61    idle_dur = 0;
62}
63
64void
65ExecStage::collectStatistics(enum STAT_STATUS stage, int unitId) {
66    if (stage == IdleExec) {
67        // count cycles of no vector ALU instruction executed
68        // even if one was the oldest in a WV of that vector SIMD unit
69        if (computeUnit->isVecAlu(unitId) && vectorAluInstAvail->at(unitId)) {
70            numCyclesWithNoInstrTypeIssued[unitId]++;
71        }
72
73        // count cycles of no global memory (vector) instruction executed
74        // even if one was the oldest in a WV of that vector SIMD unit
75        if (computeUnit->isGlbMem(unitId) && *glbMemInstAvail > 0) {
76            numCyclesWithNoInstrTypeIssued[unitId]++;
77            (*glbMemInstAvail)--;
78        }
79
80        // count cycles of no shared memory (vector) instruction executed
81        // even if one was the oldest in a WV of that vector SIMD unit
82        if (computeUnit->isShrMem(unitId) && *shrMemInstAvail > 0) {
83            numCyclesWithNoInstrTypeIssued[unitId]++;
84            (*shrMemInstAvail)--;
85        }
86    } else if (stage == BusyExec) {
87        // count the number of cycles an instruction to a specific unit
88        // was issued
89        numCyclesWithInstrTypeIssued[unitId]++;
90        thisTimeInstExecuted = true;
91        instrExecuted = true;
92        ++executionResourcesUsed;
93    } else if (stage == PostExec) {
94        // count the number of transitions from active to idle
95        if (lastTimeInstExecuted && !thisTimeInstExecuted) {
96            ++numTransActiveIdle;
97        }
98
99        if (!lastTimeInstExecuted && thisTimeInstExecuted) {
100            idleDur.sample(idle_dur);
101            idle_dur = 0;
102        } else if (!thisTimeInstExecuted) {
103            idle_dur++;
104        }
105
106        lastTimeInstExecuted = thisTimeInstExecuted;
107        // track the number of cycles we either issued one vector instruction
108        // or issued no instructions at all
109        if (instrExecuted) {
110            numCyclesWithInstrIssued++;
111        } else {
112            numCyclesWithNoIssue++;
113        }
114
115        spc.sample(executionResourcesUsed);
116    }
117}
118
119void
120ExecStage::initStatistics()
121{
122    instrExecuted = false;
123    executionResourcesUsed = 0;
124    thisTimeInstExecuted = false;
125}
126
127void
128ExecStage::exec()
129{
130    initStatistics();
131
132    for (int unitId = 0; unitId < (numSIMDs + numMemUnits); ++unitId) {
133         // if dispatch list for this execution resource is empty,
134         // skip this execution resource this cycle
135         if (dispatchList->at(unitId).second == EMPTY) {
136             collectStatistics(IdleExec, unitId);
137             continue;
138         }
139
140         collectStatistics(BusyExec, unitId);
141         // execute an instruction for the WF
142         dispatchList->at(unitId).first->exec();
143         // clear the dispatch list entry
144         dispatchList->at(unitId).second = EMPTY;
145         dispatchList->at(unitId).first = (Wavefront*)nullptr;
146    }
147
148    collectStatistics(PostExec, 0);
149}
150
151void
152ExecStage::regStats()
153{
154    numTransActiveIdle
155       .name(name() + ".num_transitions_active_to_idle")
156       .desc("number of CU transitions from active to idle")
157        ;
158
159    numCyclesWithNoIssue
160        .name(name() + ".num_cycles_with_no_issue")
161        .desc("number of cycles the CU issues nothing")
162        ;
163
164    numCyclesWithInstrIssued
165        .name(name() + ".num_cycles_with_instr_issued")
166        .desc("number of cycles the CU issued at least one instruction")
167        ;
168
169    spc
170        .init(0, numSIMDs + numMemUnits, 1)
171        .name(name() + ".spc")
172        .desc("Execution units active per cycle (Exec unit=SIMD,MemPipe)")
173        ;
174
175    idleDur
176        .init(0,75,5)
177        .name(name() + ".idle_duration_in_cycles")
178        .desc("duration of idle periods in cycles")
179        ;
180
181    numCyclesWithInstrTypeIssued
182        .init(numSIMDs + numMemUnits)
183        .name(name() + ".num_cycles_with_instrtype_issue")
184        .desc("Number of cycles at least one instruction of specific type "
185              "issued")
186        ;
187
188    numCyclesWithNoInstrTypeIssued
189        .init(numSIMDs + numMemUnits)
190       .name(name() + ".num_cycles_with_instr_type_no_issue")
191       .desc("Number of cycles no instruction of specific type issued")
192       ;
193
194    for (int i = 0; i < numSIMDs; ++i) {
195        numCyclesWithInstrTypeIssued.subname(i, csprintf("ALU%d",i));
196        numCyclesWithNoInstrTypeIssued.subname(i, csprintf("ALU%d",i));
197    }
198
199    numCyclesWithInstrTypeIssued.subname(numSIMDs, csprintf("GM"));
200    numCyclesWithNoInstrTypeIssued.subname(numSIMDs, csprintf("GM"));
201    numCyclesWithInstrTypeIssued.subname(numSIMDs + 1, csprintf("LM"));
202    numCyclesWithNoInstrTypeIssued.subname(numSIMDs + 1, csprintf("LM"));
203}
204