1/* 2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Sooraj Puthoor 34 */ 35 36#include "gpu-compute/local_memory_pipeline.hh" 37 38#include "debug/GPUPort.hh" 39#include "gpu-compute/compute_unit.hh" 40#include "gpu-compute/gpu_dyn_inst.hh" 41#include "gpu-compute/shader.hh" 42#include "gpu-compute/vector_register_file.hh" 43#include "gpu-compute/wavefront.hh" 44 45LocalMemPipeline::LocalMemPipeline(const ComputeUnitParams* p) : 46 computeUnit(nullptr), lmQueueSize(p->local_mem_queue_size) 47{ 48} 49 50void 51LocalMemPipeline::init(ComputeUnit *cu) 52{ 53 computeUnit = cu; 54 _name = computeUnit->name() + ".LocalMemPipeline"; 55} 56 57void 58LocalMemPipeline::exec() 59{ 60 // apply any returned shared (LDS) memory operations 61 GPUDynInstPtr m = !lmReturnedRequests.empty() ? 62 lmReturnedRequests.front() : nullptr; 63 64 bool accessVrf = true; 65 Wavefront *w = nullptr; 66 67 if ((m) && (m->isLoad() || m->isAtomicRet())) { 68 w = m->wavefront(); 69 70 accessVrf = 71 w->computeUnit->vrf[w->simdId]-> 72 vrfOperandAccessReady(m->seqNum(), w, m, 73 VrfAccessType::WRITE); 74 } 75 76 if (!lmReturnedRequests.empty() && m->latency.rdy() && accessVrf && 77 computeUnit->locMemToVrfBus.rdy() && (computeUnit->shader->coissue_return 78 || computeUnit->wfWait.at(m->pipeId).rdy())) { 79 80 lmReturnedRequests.pop(); 81 w = m->wavefront(); 82 83 m->completeAcc(m); 84 85 // Decrement outstanding request count 86 computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1); 87 88 if (m->isStore() || m->isAtomic()) { 89 computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm, 90 m->time, -1); 91 } 92 93 if (m->isLoad() || m->isAtomic()) { 94 computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm, 95 m->time, -1); 96 } 97 98 // Mark write bus busy for appropriate amount of time 99 computeUnit->locMemToVrfBus.set(m->time); 100 if (computeUnit->shader->coissue_return == 0) 101 w->computeUnit->wfWait.at(m->pipeId).set(m->time); 102 } 103 104 // If pipeline has executed a local memory instruction 105 // execute local memory packet and issue the packets 106 // to LDS 107 if (!lmIssuedRequests.empty() && lmReturnedRequests.size() < lmQueueSize) { 108 109 GPUDynInstPtr m = lmIssuedRequests.front(); 110 111 bool returnVal = computeUnit->sendToLds(m); 112 if (!returnVal) { 113 DPRINTF(GPUPort, "packet was nack'd and put in retry queue"); 114 } 115 lmIssuedRequests.pop(); 116 } 117} 118 119void 120LocalMemPipeline::regStats() 121{ 122 loadVrfBankConflictCycles 123 .name(name() + ".load_vrf_bank_conflict_cycles") 124 .desc("total number of cycles LDS data are delayed before updating " 125 "the VRF") 126 ; 127} 128