1/* 2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: John Kalamatianos, 34 * Sooraj Puthoor 35 */ 36 37#include "gpu-compute/exec_stage.hh" 38 39#include "gpu-compute/compute_unit.hh" 40#include "gpu-compute/wavefront.hh" 41 42ExecStage::ExecStage(const ComputeUnitParams *p) : numSIMDs(p->num_SIMDs), 43 numMemUnits(p->num_global_mem_pipes + p->num_shared_mem_pipes), 44 vectorAluInstAvail(nullptr), glbMemInstAvail(nullptr), 45 shrMemInstAvail(nullptr), lastTimeInstExecuted(false), 46 thisTimeInstExecuted(false), instrExecuted (false), 47 executionResourcesUsed(0) 48{ 49 numTransActiveIdle = 0; 50 idle_dur = 0; 51} 52 53void 54ExecStage::init(ComputeUnit *cu) 55{ 56 computeUnit = cu; 57 _name = computeUnit->name() + ".ExecStage"; 58 dispatchList = &computeUnit->dispatchList; 59 vectorAluInstAvail = &(computeUnit->vectorAluInstAvail); 60 glbMemInstAvail= &(computeUnit->glbMemInstAvail); 61 shrMemInstAvail= &(computeUnit->shrMemInstAvail); 62 idle_dur = 0; 63} 64 65void 66ExecStage::collectStatistics(enum STAT_STATUS stage, int unitId) { 67 if (stage == IdleExec) { 68 // count cycles of no vector ALU instruction executed 69 // even if one was the oldest in a WV of that vector SIMD unit 70 if (computeUnit->isVecAlu(unitId) && vectorAluInstAvail->at(unitId)) { 71 numCyclesWithNoInstrTypeIssued[unitId]++; 72 } 73 74 // count cycles of no global memory (vector) instruction executed 75 // even if one was the oldest in a WV of that vector SIMD unit 76 if (computeUnit->isGlbMem(unitId) && *glbMemInstAvail > 0) { 77 numCyclesWithNoInstrTypeIssued[unitId]++; 78 (*glbMemInstAvail)--; 79 } 80 81 // count cycles of no shared memory (vector) instruction executed 82 // even if one was the oldest in a WV of that vector SIMD unit 83 if (computeUnit->isShrMem(unitId) && *shrMemInstAvail > 0) { 84 numCyclesWithNoInstrTypeIssued[unitId]++; 85 (*shrMemInstAvail)--; 86 } 87 } else if (stage == BusyExec) { 88 // count the number of cycles an instruction to a specific unit 89 // was issued 90 numCyclesWithInstrTypeIssued[unitId]++; 91 thisTimeInstExecuted = true; 92 instrExecuted = true; 93 ++executionResourcesUsed; 94 } else if (stage == PostExec) { 95 // count the number of transitions from active to idle 96 if (lastTimeInstExecuted && !thisTimeInstExecuted) { 97 ++numTransActiveIdle; 98 } 99 100 if (!lastTimeInstExecuted && thisTimeInstExecuted) { 101 idleDur.sample(idle_dur); 102 idle_dur = 0; 103 } else if (!thisTimeInstExecuted) { 104 idle_dur++; 105 } 106 107 lastTimeInstExecuted = thisTimeInstExecuted; 108 // track the number of cycles we either issued one vector instruction 109 // or issued no instructions at all 110 if (instrExecuted) { 111 numCyclesWithInstrIssued++; 112 } else { 113 numCyclesWithNoIssue++; 114 } 115 116 spc.sample(executionResourcesUsed); 117 } 118} 119 120void 121ExecStage::initStatistics() 122{ 123 instrExecuted = false; 124 executionResourcesUsed = 0; 125 thisTimeInstExecuted = false; 126} 127 128void 129ExecStage::exec() 130{ 131 initStatistics(); 132 133 for (int unitId = 0; unitId < (numSIMDs + numMemUnits); ++unitId) { 134 // if dispatch list for this execution resource is empty, 135 // skip this execution resource this cycle 136 if (dispatchList->at(unitId).second == EMPTY) { 137 collectStatistics(IdleExec, unitId); 138 continue; 139 } 140 141 collectStatistics(BusyExec, unitId); 142 // execute an instruction for the WF 143 dispatchList->at(unitId).first->exec(); 144 // clear the dispatch list entry 145 dispatchList->at(unitId).second = EMPTY; 146 dispatchList->at(unitId).first = (Wavefront*)nullptr; 147 } 148 149 collectStatistics(PostExec, 0); 150} 151 152void 153ExecStage::regStats() 154{ 155 numTransActiveIdle 156 .name(name() + ".num_transitions_active_to_idle") 157 .desc("number of CU transitions from active to idle") 158 ; 159 160 numCyclesWithNoIssue 161 .name(name() + ".num_cycles_with_no_issue") 162 .desc("number of cycles the CU issues nothing") 163 ; 164 165 numCyclesWithInstrIssued 166 .name(name() + ".num_cycles_with_instr_issued") 167 .desc("number of cycles the CU issued at least one instruction") 168 ; 169 170 spc 171 .init(0, numSIMDs + numMemUnits, 1) 172 .name(name() + ".spc") 173 .desc("Execution units active per cycle (Exec unit=SIMD,MemPipe)") 174 ; 175 176 idleDur 177 .init(0,75,5) 178 .name(name() + ".idle_duration_in_cycles") 179 .desc("duration of idle periods in cycles") 180 ; 181 182 numCyclesWithInstrTypeIssued 183 .init(numSIMDs + numMemUnits) 184 .name(name() + ".num_cycles_with_instrtype_issue") 185 .desc("Number of cycles at least one instruction of specific type " 186 "issued") 187 ; 188 189 numCyclesWithNoInstrTypeIssued 190 .init(numSIMDs + numMemUnits) 191 .name(name() + ".num_cycles_with_instr_type_no_issue") 192 .desc("Number of cycles no instruction of specific type issued") 193 ; 194 195 for (int i = 0; i < numSIMDs; ++i) { 196 numCyclesWithInstrTypeIssued.subname(i, csprintf("ALU%d",i)); 197 numCyclesWithNoInstrTypeIssued.subname(i, csprintf("ALU%d",i)); 198 } 199 200 numCyclesWithInstrTypeIssued.subname(numSIMDs, csprintf("GM")); 201 numCyclesWithNoInstrTypeIssued.subname(numSIMDs, csprintf("GM")); 202 numCyclesWithInstrTypeIssued.subname(numSIMDs + 1, csprintf("LM")); 203 numCyclesWithNoInstrTypeIssued.subname(numSIMDs + 1, csprintf("LM")); 204} 205