lds_state.cc revision 11308
19814Sandreas.hansson@arm.com/* 22292SN/A * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 313590Srekai.gonzalezalberquilla@arm.com * All rights reserved. 410239Sbinhpham@cs.rutgers.edu * 57597Sminkyu.jeong@arm.com * For use for simulation and test purposes only 67597Sminkyu.jeong@arm.com * 77597Sminkyu.jeong@arm.com * Redistribution and use in source and binary forms, with or without 87597Sminkyu.jeong@arm.com * modification, are permitted provided that the following conditions are met: 97597Sminkyu.jeong@arm.com * 107597Sminkyu.jeong@arm.com * 1. Redistributions of source code must retain the above copyright notice, 117597Sminkyu.jeong@arm.com * this list of conditions and the following disclaimer. 127597Sminkyu.jeong@arm.com * 137597Sminkyu.jeong@arm.com * 2. Redistributions in binary form must reproduce the above copyright notice, 147597Sminkyu.jeong@arm.com * this list of conditions and the following disclaimer in the documentation 157597Sminkyu.jeong@arm.com * and/or other materials provided with the distribution. 162292SN/A * 172292SN/A * 3. Neither the name of the copyright holder nor the names of its contributors 182292SN/A * may be used to endorse or promote products derived from this software 192292SN/A * without specific prior written permission. 202292SN/A * 212292SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 222292SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 232292SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 242292SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 252292SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 262292SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 272292SN/A * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 282292SN/A * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 292292SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 302292SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 312292SN/A * POSSIBILITY OF SUCH DAMAGE. 322292SN/A * 332292SN/A * Author: John Kalamatianos, Joe Gross 342292SN/A */ 352292SN/A 362292SN/A#include "gpu-compute/lds_state.hh" 372292SN/A 382292SN/A#include <array> 392292SN/A#include <cstdio> 402292SN/A#include <cstdlib> 412689Sktlim@umich.edu 422689Sktlim@umich.edu#include "gpu-compute/compute_unit.hh" 432689Sktlim@umich.edu#include "gpu-compute/gpu_dyn_inst.hh" 442292SN/A#include "gpu-compute/shader.hh" 452292SN/A 469944Smatt.horsnell@ARM.com/** 479944Smatt.horsnell@ARM.com * the default constructor that works with SWIG 489944Smatt.horsnell@ARM.com */ 498591Sgblack@eecs.umich.eduLdsState::LdsState(const Params *params) : 503326Sktlim@umich.edu MemObject(params), 518229Snate@binkert.org tickEvent(this), 526658Snate@binkert.org cuPort(name() + ".port", this), 538887Sgeoffrey.blake@arm.com maximumSize(params->size), 542907Sktlim@umich.edu range(params->range), 552292SN/A bankConflictPenalty(params->bankConflictPenalty), 568232Snate@binkert.org banks(params->banks) 578232Snate@binkert.org{ 588232Snate@binkert.org fatal_if(params->banks <= 0, 599527SMatt.Horsnell@arm.com "Number of LDS banks should be positive number"); 602722Sktlim@umich.edu fatal_if((params->banks & (params->banks - 1)) != 0, 612669Sktlim@umich.edu "Number of LDS banks should be a power of 2"); 622292SN/A fatal_if(params->size <= 0, 632669Sktlim@umich.edu "cannot allocate an LDS with a size less than 1"); 6413429Srekai.gonzalezalberquilla@arm.com fatal_if(params->size % 2, 6513429Srekai.gonzalezalberquilla@arm.com "the LDS should be an even number"); 668581Ssteve.reinhardt@amd.com} 678581Ssteve.reinhardt@amd.com 682292SN/A/** 6913590Srekai.gonzalezalberquilla@arm.com * Needed by the SWIG compiler 7013590Srekai.gonzalezalberquilla@arm.com */ 712292SN/ALdsState * 722292SN/ALdsStateParams::create() 732669Sktlim@umich.edu{ 742292SN/A return new LdsState(this); 752678Sktlim@umich.edu} 762292SN/A 779444SAndreas.Sandberg@ARM.com/** 789444SAndreas.Sandberg@ARM.com * set the parent and name based on the parent 799444SAndreas.Sandberg@ARM.com */ 804319Sktlim@umich.eduvoid 8113590Srekai.gonzalezalberquilla@arm.comLdsState::setParent(ComputeUnit *x_parent) 8213590Srekai.gonzalezalberquilla@arm.com{ 832678Sktlim@umich.edu // check that this gets assigned to the same thing each time 842678Sktlim@umich.edu fatal_if(!x_parent, "x_parent should not be nullptr"); 852292SN/A fatal_if(x_parent == parent, 862678Sktlim@umich.edu "should not be setting the parent twice"); 872678Sktlim@umich.edu 885336Shines@cs.fsu.edu parent = x_parent; 892678Sktlim@umich.edu _name = x_parent->name() + ".LdsState"; 904873Sstever@eecs.umich.edu} 912678Sktlim@umich.edu 922292SN/A/** 9313590Srekai.gonzalezalberquilla@arm.com * derive the gpu mem packet from the packet and then count the bank conflicts 9413590Srekai.gonzalezalberquilla@arm.com */ 9513590Srekai.gonzalezalberquilla@arm.comunsigned 9613590Srekai.gonzalezalberquilla@arm.comLdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses) 9713590Srekai.gonzalezalberquilla@arm.com{ 9813590Srekai.gonzalezalberquilla@arm.com Packet::SenderState *baseSenderState = packet->senderState; 9913590Srekai.gonzalezalberquilla@arm.com while (baseSenderState->predecessor) { 10013590Srekai.gonzalezalberquilla@arm.com baseSenderState = baseSenderState->predecessor; 10113590Srekai.gonzalezalberquilla@arm.com } 10213590Srekai.gonzalezalberquilla@arm.com const ComputeUnit::LDSPort::SenderState *senderState = 10313590Srekai.gonzalezalberquilla@arm.com dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState); 10413590Srekai.gonzalezalberquilla@arm.com 10513590Srekai.gonzalezalberquilla@arm.com fatal_if(!senderState, 10613590Srekai.gonzalezalberquilla@arm.com "did not get the right sort of sender state"); 10713590Srekai.gonzalezalberquilla@arm.com 10813590Srekai.gonzalezalberquilla@arm.com GPUDynInstPtr gpuDynInst = senderState->getMemInst(); 10913590Srekai.gonzalezalberquilla@arm.com 11013590Srekai.gonzalezalberquilla@arm.com return countBankConflicts(gpuDynInst, bankAccesses); 1112678Sktlim@umich.edu} 1122678Sktlim@umich.edu 1132678Sktlim@umich.edu// Count the total number of bank conflicts for the local memory packet 1142678Sktlim@umich.eduunsigned 1152678Sktlim@umich.eduLdsState::countBankConflicts(GPUDynInstPtr gpuDynInst, 1162678Sktlim@umich.edu unsigned *numBankAccesses) 1172344SN/A{ 11813590Srekai.gonzalezalberquilla@arm.com int bank_conflicts = 0; 1192678Sktlim@umich.edu std::vector<int> bank; 12013590Srekai.gonzalezalberquilla@arm.com // the number of LDS banks being touched by the memory instruction 12113590Srekai.gonzalezalberquilla@arm.com int numBanks = std::min(parent->wfSize(), banks); 12213590Srekai.gonzalezalberquilla@arm.com // if the wavefront size is larger than the number of LDS banks, we 1236974Stjones1@inf.ed.ac.uk // need to iterate over all work items to calculate the total 1249444SAndreas.Sandberg@ARM.com // number of bank conflicts 12510327Smitch.hayenga@arm.com int groups = (parent->wfSize() > numBanks) ? 12613590Srekai.gonzalezalberquilla@arm.com (parent->wfSize() / numBanks) : 1; 12713652Sqtt2@cornell.edu for (int i = 0; i < groups; i++) { 12812216Snikos.nikoleris@arm.com // Address Array holding all the work item addresses of an instruction 12913652Sqtt2@cornell.edu std::vector<Addr> addr_array; 13013652Sqtt2@cornell.edu addr_array.resize(numBanks, 0); 13113590Srekai.gonzalezalberquilla@arm.com bank.clear(); 13213652Sqtt2@cornell.edu bank.resize(banks, 0); 13313590Srekai.gonzalezalberquilla@arm.com int max_bank = 0; 13413590Srekai.gonzalezalberquilla@arm.com 13513590Srekai.gonzalezalberquilla@arm.com // populate the address array for all active work items 1366974Stjones1@inf.ed.ac.uk for (int j = 0; j < numBanks; j++) { 13713590Srekai.gonzalezalberquilla@arm.com if (gpuDynInst->exec_mask[(i*numBanks)+j]) { 13813652Sqtt2@cornell.edu addr_array[j] = gpuDynInst->addr[(i*numBanks)+j]; 13913652Sqtt2@cornell.edu } else { 14013590Srekai.gonzalezalberquilla@arm.com addr_array[j] = std::numeric_limits<Addr>::max(); 1412678Sktlim@umich.edu } 1422344SN/A } 1432292SN/A 1442292SN/A if (gpuDynInst->m_op == Enums::MO_LD || 1452292SN/A gpuDynInst->m_op == Enums::MO_ST) { 14613472Srekai.gonzalezalberquilla@arm.com // mask identical addresses 14713472Srekai.gonzalezalberquilla@arm.com for (int j = 0; j < numBanks; ++j) { 14813472Srekai.gonzalezalberquilla@arm.com for (int j0 = 0; j0 < j; j0++) { 14913590Srekai.gonzalezalberquilla@arm.com if (addr_array[j] != std::numeric_limits<Addr>::max() 15013590Srekai.gonzalezalberquilla@arm.com && addr_array[j] == addr_array[j0]) { 1512292SN/A addr_array[j] = std::numeric_limits<Addr>::max(); 1522292SN/A } 1532292SN/A } 1542292SN/A } 1552292SN/A } 1565529Snate@binkert.org // calculate bank conflicts 15713472Srekai.gonzalezalberquilla@arm.com for (int j = 0; j < numBanks; ++j) { 1582292SN/A if (addr_array[j] != std::numeric_limits<Addr>::max()) { 15913472Srekai.gonzalezalberquilla@arm.com int bankId = addr_array[j] % banks; 16013472Srekai.gonzalezalberquilla@arm.com bank[bankId]++; 1614329Sktlim@umich.edu max_bank = std::max(max_bank, bank[bankId]); 1624329Sktlim@umich.edu // Count the number of LDS banks accessed. 1634329Sktlim@umich.edu // Since we have masked identical addresses all remaining 1642907Sktlim@umich.edu // accesses will need to be serialized if they access 1652907Sktlim@umich.edu // the same bank (bank conflict). 16613472Srekai.gonzalezalberquilla@arm.com (*numBankAccesses)++; 1672292SN/A } 1688199SAli.Saidi@ARM.com } 1698199SAli.Saidi@ARM.com bank_conflicts += max_bank; 1709444SAndreas.Sandberg@ARM.com } 1719444SAndreas.Sandberg@ARM.com panic_if(bank_conflicts > parent->wfSize(), 1729444SAndreas.Sandberg@ARM.com "Max bank conflicts should match num of work items per instr"); 1739444SAndreas.Sandberg@ARM.com return bank_conflicts; 1749444SAndreas.Sandberg@ARM.com} 1759444SAndreas.Sandberg@ARM.com 1769444SAndreas.Sandberg@ARM.com/** 1779444SAndreas.Sandberg@ARM.com * receive the packet from the CU 1789444SAndreas.Sandberg@ARM.com */ 1799444SAndreas.Sandberg@ARM.combool 1809444SAndreas.Sandberg@ARM.comLdsState::CuSidePort::recvTimingReq(PacketPtr packet) 1818199SAli.Saidi@ARM.com{ 1822292SN/A return ownerLds->processPacket(packet); 18313590Srekai.gonzalezalberquilla@arm.com} 1842292SN/A 1853492Sktlim@umich.eduGPUDynInstPtr 1862329SN/ALdsState::getDynInstr(PacketPtr packet) 1872292SN/A{ 1889444SAndreas.Sandberg@ARM.com ComputeUnit::LDSPort::SenderState *ss = 1899444SAndreas.Sandberg@ARM.com dynamic_cast<ComputeUnit::LDSPort::SenderState *>( 1909814Sandreas.hansson@arm.com packet->senderState); 1912292SN/A return ss->getMemInst(); 1922292SN/A} 1932292SN/A 1942292SN/A/** 1952292SN/A * process an incoming packet, add it to the return queue 1962292SN/A */ 1972292SN/Abool 1982292SN/ALdsState::processPacket(PacketPtr packet) 1992292SN/A{ 20010386Sandreas.hansson@arm.com unsigned bankAccesses = 0; 2012292SN/A // the number of conflicts this packet will have when accessing the LDS 2022292SN/A unsigned bankConflicts = countBankConflicts(packet, &bankAccesses); 2032292SN/A // count the total number of physical LDS bank accessed 2042292SN/A parent->ldsBankAccesses += bankAccesses; 2052292SN/A // count the LDS bank conflicts. A number set to 1 indicates one 2062727Sktlim@umich.edu // access per bank maximum so there are no bank conflicts 2072727Sktlim@umich.edu parent->ldsBankConflictDist.sample(bankConflicts-1); 2082727Sktlim@umich.edu 2092727Sktlim@umich.edu GPUDynInstPtr dynInst = getDynInstr(packet); 2102727Sktlim@umich.edu // account for the LDS bank conflict overhead 2112727Sktlim@umich.edu int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() : 2122727Sktlim@umich.edu (dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() : 2132727Sktlim@umich.edu parent->loadBusLength(); 2142727Sktlim@umich.edu // delay for accessing the LDS 2152727Sktlim@umich.edu Tick processingTime = 2162727Sktlim@umich.edu parent->shader->ticks(bankConflicts * bankConflictPenalty) + 2172727Sktlim@umich.edu parent->shader->ticks(busLength); 2182727Sktlim@umich.edu // choose (delay + last packet in queue) or (now + delay) as the time to 2192727Sktlim@umich.edu // return this 2202727Sktlim@umich.edu Tick doneAt = earliestReturnTime() + processingTime; 2212727Sktlim@umich.edu // then store it for processing 2222727Sktlim@umich.edu return returnQueuePush(std::make_pair(doneAt, packet)); 2232727Sktlim@umich.edu} 2242361SN/A 2252361SN/A/** 2262361SN/A * add this to the queue of packets to be returned 2272361SN/A */ 2282727Sktlim@umich.edubool 2292727Sktlim@umich.eduLdsState::returnQueuePush(std::pair<Tick, PacketPtr> thePair) 2302727Sktlim@umich.edu{ 2312727Sktlim@umich.edu // TODO add time limits (e.g. one packet per cycle) and queue size limits 2322727Sktlim@umich.edu // and implement flow control 2332727Sktlim@umich.edu returnQueue.push(thePair); 2342727Sktlim@umich.edu 2352727Sktlim@umich.edu // if there is no set wakeup time, look through the queue 2362727Sktlim@umich.edu if (!tickEvent.scheduled()) { 2372727Sktlim@umich.edu process(); 2382727Sktlim@umich.edu } 2392727Sktlim@umich.edu 2402727Sktlim@umich.edu return true; 2412727Sktlim@umich.edu} 2422727Sktlim@umich.edu 2432727Sktlim@umich.edu/** 2442727Sktlim@umich.edu * receive a packet in functional mode 2452727Sktlim@umich.edu */ 2462727Sktlim@umich.eduvoid 2472727Sktlim@umich.eduLdsState::CuSidePort::recvFunctional(PacketPtr pkt) 2482727Sktlim@umich.edu{ 2492727Sktlim@umich.edu fatal("not implemented"); 2502727Sktlim@umich.edu} 2518922Swilliam.wang@arm.com 2524329Sktlim@umich.edu/** 2534329Sktlim@umich.edu * receive a retry for a response 2544329Sktlim@umich.edu */ 2554329Sktlim@umich.eduvoid 2564329Sktlim@umich.eduLdsState::CuSidePort::recvRespRetry() 2574329Sktlim@umich.edu{ 2589444SAndreas.Sandberg@ARM.com // TODO verify that this is the right way to do this 2592307SN/A assert(ownerLds->isRetryResp()); 26013590Srekai.gonzalezalberquilla@arm.com ownerLds->setRetryResp(false); 26113590Srekai.gonzalezalberquilla@arm.com ownerLds->process(); 2622307SN/A} 2632329SN/A 2649444SAndreas.Sandberg@ARM.com/** 2652307SN/A * receive a retry 2662307SN/A */ 2672307SN/Avoid 2682307SN/ALdsState::CuSidePort::recvRetry() 2692307SN/A{ 2702307SN/A fatal("not implemented"); 2719444SAndreas.Sandberg@ARM.com} 2722307SN/A 2732307SN/A/** 2742292SN/A * look for packets to return at this time 2752292SN/A */ 27613429Srekai.gonzalezalberquilla@arm.combool 2772292SN/ALdsState::process() 2782292SN/A{ 2792292SN/A Tick now = clockEdge(); 28013652Sqtt2@cornell.edu 2812292SN/A // send back completed packets 2822292SN/A while (!returnQueue.empty() && returnQueue.front().first <= now) { 2832292SN/A PacketPtr packet = returnQueue.front().second; 2842292SN/A 2852292SN/A ComputeUnit::LDSPort::SenderState *ss = 2862292SN/A dynamic_cast<ComputeUnit::LDSPort::SenderState *>( 2872292SN/A packet->senderState); 2882292SN/A 2892292SN/A GPUDynInstPtr gpuDynInst = ss->getMemInst(); 2902292SN/A 2912292SN/A gpuDynInst->initiateAcc(gpuDynInst); 2922292SN/A 29313429Srekai.gonzalezalberquilla@arm.com packet->makeTimingResponse(); 2942292SN/A 29513590Srekai.gonzalezalberquilla@arm.com returnQueue.pop(); 29613590Srekai.gonzalezalberquilla@arm.com 2972292SN/A bool success = cuPort.sendTimingResp(packet); 2987720Sgblack@eecs.umich.edu 29913590Srekai.gonzalezalberquilla@arm.com if (!success) { 3002292SN/A retryResp = true; 30113590Srekai.gonzalezalberquilla@arm.com panic("have not handled timing responses being NACK'd when sent" 30213590Srekai.gonzalezalberquilla@arm.com "back"); 3032292SN/A } 30413590Srekai.gonzalezalberquilla@arm.com } 3052292SN/A 30613590Srekai.gonzalezalberquilla@arm.com // determine the next wakeup time 30713590Srekai.gonzalezalberquilla@arm.com if (!returnQueue.empty()) { 30813590Srekai.gonzalezalberquilla@arm.com 30913590Srekai.gonzalezalberquilla@arm.com Tick next = returnQueue.front().first; 3102292SN/A 3112292SN/A if (tickEvent.scheduled()) { 3122292SN/A 3132292SN/A if (next < tickEvent.when()) { 3142292SN/A 3152292SN/A tickEvent.deschedule(); 31613590Srekai.gonzalezalberquilla@arm.com tickEvent.schedule(next); 3172292SN/A } 3182292SN/A } else { 31913590Srekai.gonzalezalberquilla@arm.com tickEvent.schedule(next); 32013590Srekai.gonzalezalberquilla@arm.com } 3212292SN/A } 3227720Sgblack@eecs.umich.edu 32313590Srekai.gonzalezalberquilla@arm.com return true; 32413590Srekai.gonzalezalberquilla@arm.com} 3252292SN/A 32613590Srekai.gonzalezalberquilla@arm.com/** 32713590Srekai.gonzalezalberquilla@arm.com * wake up at this time and perform specified actions 32813590Srekai.gonzalezalberquilla@arm.com */ 3292292SN/Avoid 33013590Srekai.gonzalezalberquilla@arm.comLdsState::TickEvent::process() 3312292SN/A{ 3322292SN/A ldsState->process(); 3332292SN/A} 3342292SN/A 3352292SN/A/** 3362292SN/A * 3372292SN/A */ 3382292SN/Avoid 3392292SN/ALdsState::regStats() 3402292SN/A{ 3412292SN/A} 3422292SN/A