lds_state.cc revision 11692
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: John Kalamatianos, Joe Gross 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "gpu-compute/lds_state.hh" 3711308Santhony.gutierrez@amd.com 3811308Santhony.gutierrez@amd.com#include <array> 3911308Santhony.gutierrez@amd.com#include <cstdio> 4011308Santhony.gutierrez@amd.com#include <cstdlib> 4111308Santhony.gutierrez@amd.com 4211308Santhony.gutierrez@amd.com#include "gpu-compute/compute_unit.hh" 4311308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_dyn_inst.hh" 4411308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 4511308Santhony.gutierrez@amd.com 4611308Santhony.gutierrez@amd.com/** 4711308Santhony.gutierrez@amd.com * the default constructor that works with SWIG 4811308Santhony.gutierrez@amd.com */ 4911308Santhony.gutierrez@amd.comLdsState::LdsState(const Params *params) : 5011308Santhony.gutierrez@amd.com MemObject(params), 5111308Santhony.gutierrez@amd.com tickEvent(this), 5211308Santhony.gutierrez@amd.com cuPort(name() + ".port", this), 5311308Santhony.gutierrez@amd.com maximumSize(params->size), 5411308Santhony.gutierrez@amd.com range(params->range), 5511308Santhony.gutierrez@amd.com bankConflictPenalty(params->bankConflictPenalty), 5611308Santhony.gutierrez@amd.com banks(params->banks) 5711308Santhony.gutierrez@amd.com{ 5811308Santhony.gutierrez@amd.com fatal_if(params->banks <= 0, 5911308Santhony.gutierrez@amd.com "Number of LDS banks should be positive number"); 6011308Santhony.gutierrez@amd.com fatal_if((params->banks & (params->banks - 1)) != 0, 6111308Santhony.gutierrez@amd.com "Number of LDS banks should be a power of 2"); 6211308Santhony.gutierrez@amd.com fatal_if(params->size <= 0, 6311308Santhony.gutierrez@amd.com "cannot allocate an LDS with a size less than 1"); 6411308Santhony.gutierrez@amd.com fatal_if(params->size % 2, 6511308Santhony.gutierrez@amd.com "the LDS should be an even number"); 6611308Santhony.gutierrez@amd.com} 6711308Santhony.gutierrez@amd.com 6811308Santhony.gutierrez@amd.com/** 6911308Santhony.gutierrez@amd.com * Needed by the SWIG compiler 7011308Santhony.gutierrez@amd.com */ 7111308Santhony.gutierrez@amd.comLdsState * 7211308Santhony.gutierrez@amd.comLdsStateParams::create() 7311308Santhony.gutierrez@amd.com{ 7411308Santhony.gutierrez@amd.com return new LdsState(this); 7511308Santhony.gutierrez@amd.com} 7611308Santhony.gutierrez@amd.com 7711308Santhony.gutierrez@amd.com/** 7811308Santhony.gutierrez@amd.com * set the parent and name based on the parent 7911308Santhony.gutierrez@amd.com */ 8011308Santhony.gutierrez@amd.comvoid 8111308Santhony.gutierrez@amd.comLdsState::setParent(ComputeUnit *x_parent) 8211308Santhony.gutierrez@amd.com{ 8311308Santhony.gutierrez@amd.com // check that this gets assigned to the same thing each time 8411308Santhony.gutierrez@amd.com fatal_if(!x_parent, "x_parent should not be nullptr"); 8511308Santhony.gutierrez@amd.com fatal_if(x_parent == parent, 8611308Santhony.gutierrez@amd.com "should not be setting the parent twice"); 8711308Santhony.gutierrez@amd.com 8811308Santhony.gutierrez@amd.com parent = x_parent; 8911308Santhony.gutierrez@amd.com _name = x_parent->name() + ".LdsState"; 9011308Santhony.gutierrez@amd.com} 9111308Santhony.gutierrez@amd.com 9211308Santhony.gutierrez@amd.com/** 9311308Santhony.gutierrez@amd.com * derive the gpu mem packet from the packet and then count the bank conflicts 9411308Santhony.gutierrez@amd.com */ 9511308Santhony.gutierrez@amd.comunsigned 9611308Santhony.gutierrez@amd.comLdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses) 9711308Santhony.gutierrez@amd.com{ 9811308Santhony.gutierrez@amd.com Packet::SenderState *baseSenderState = packet->senderState; 9911308Santhony.gutierrez@amd.com while (baseSenderState->predecessor) { 10011308Santhony.gutierrez@amd.com baseSenderState = baseSenderState->predecessor; 10111308Santhony.gutierrez@amd.com } 10211308Santhony.gutierrez@amd.com const ComputeUnit::LDSPort::SenderState *senderState = 10311308Santhony.gutierrez@amd.com dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState); 10411308Santhony.gutierrez@amd.com 10511308Santhony.gutierrez@amd.com fatal_if(!senderState, 10611308Santhony.gutierrez@amd.com "did not get the right sort of sender state"); 10711308Santhony.gutierrez@amd.com 10811308Santhony.gutierrez@amd.com GPUDynInstPtr gpuDynInst = senderState->getMemInst(); 10911308Santhony.gutierrez@amd.com 11011308Santhony.gutierrez@amd.com return countBankConflicts(gpuDynInst, bankAccesses); 11111308Santhony.gutierrez@amd.com} 11211308Santhony.gutierrez@amd.com 11311308Santhony.gutierrez@amd.com// Count the total number of bank conflicts for the local memory packet 11411308Santhony.gutierrez@amd.comunsigned 11511308Santhony.gutierrez@amd.comLdsState::countBankConflicts(GPUDynInstPtr gpuDynInst, 11611308Santhony.gutierrez@amd.com unsigned *numBankAccesses) 11711308Santhony.gutierrez@amd.com{ 11811308Santhony.gutierrez@amd.com int bank_conflicts = 0; 11911308Santhony.gutierrez@amd.com std::vector<int> bank; 12011308Santhony.gutierrez@amd.com // the number of LDS banks being touched by the memory instruction 12111308Santhony.gutierrez@amd.com int numBanks = std::min(parent->wfSize(), banks); 12211308Santhony.gutierrez@amd.com // if the wavefront size is larger than the number of LDS banks, we 12311308Santhony.gutierrez@amd.com // need to iterate over all work items to calculate the total 12411308Santhony.gutierrez@amd.com // number of bank conflicts 12511308Santhony.gutierrez@amd.com int groups = (parent->wfSize() > numBanks) ? 12611308Santhony.gutierrez@amd.com (parent->wfSize() / numBanks) : 1; 12711308Santhony.gutierrez@amd.com for (int i = 0; i < groups; i++) { 12811308Santhony.gutierrez@amd.com // Address Array holding all the work item addresses of an instruction 12911308Santhony.gutierrez@amd.com std::vector<Addr> addr_array; 13011308Santhony.gutierrez@amd.com addr_array.resize(numBanks, 0); 13111308Santhony.gutierrez@amd.com bank.clear(); 13211308Santhony.gutierrez@amd.com bank.resize(banks, 0); 13311308Santhony.gutierrez@amd.com int max_bank = 0; 13411308Santhony.gutierrez@amd.com 13511308Santhony.gutierrez@amd.com // populate the address array for all active work items 13611308Santhony.gutierrez@amd.com for (int j = 0; j < numBanks; j++) { 13711308Santhony.gutierrez@amd.com if (gpuDynInst->exec_mask[(i*numBanks)+j]) { 13811308Santhony.gutierrez@amd.com addr_array[j] = gpuDynInst->addr[(i*numBanks)+j]; 13911308Santhony.gutierrez@amd.com } else { 14011308Santhony.gutierrez@amd.com addr_array[j] = std::numeric_limits<Addr>::max(); 14111308Santhony.gutierrez@amd.com } 14211308Santhony.gutierrez@amd.com } 14311308Santhony.gutierrez@amd.com 14411692Santhony.gutierrez@amd.com if (gpuDynInst->isLoad() || gpuDynInst->isStore()) { 14511308Santhony.gutierrez@amd.com // mask identical addresses 14611308Santhony.gutierrez@amd.com for (int j = 0; j < numBanks; ++j) { 14711308Santhony.gutierrez@amd.com for (int j0 = 0; j0 < j; j0++) { 14811308Santhony.gutierrez@amd.com if (addr_array[j] != std::numeric_limits<Addr>::max() 14911308Santhony.gutierrez@amd.com && addr_array[j] == addr_array[j0]) { 15011308Santhony.gutierrez@amd.com addr_array[j] = std::numeric_limits<Addr>::max(); 15111308Santhony.gutierrez@amd.com } 15211308Santhony.gutierrez@amd.com } 15311308Santhony.gutierrez@amd.com } 15411308Santhony.gutierrez@amd.com } 15511308Santhony.gutierrez@amd.com // calculate bank conflicts 15611308Santhony.gutierrez@amd.com for (int j = 0; j < numBanks; ++j) { 15711308Santhony.gutierrez@amd.com if (addr_array[j] != std::numeric_limits<Addr>::max()) { 15811308Santhony.gutierrez@amd.com int bankId = addr_array[j] % banks; 15911308Santhony.gutierrez@amd.com bank[bankId]++; 16011308Santhony.gutierrez@amd.com max_bank = std::max(max_bank, bank[bankId]); 16111308Santhony.gutierrez@amd.com // Count the number of LDS banks accessed. 16211308Santhony.gutierrez@amd.com // Since we have masked identical addresses all remaining 16311308Santhony.gutierrez@amd.com // accesses will need to be serialized if they access 16411308Santhony.gutierrez@amd.com // the same bank (bank conflict). 16511308Santhony.gutierrez@amd.com (*numBankAccesses)++; 16611308Santhony.gutierrez@amd.com } 16711308Santhony.gutierrez@amd.com } 16811308Santhony.gutierrez@amd.com bank_conflicts += max_bank; 16911308Santhony.gutierrez@amd.com } 17011308Santhony.gutierrez@amd.com panic_if(bank_conflicts > parent->wfSize(), 17111308Santhony.gutierrez@amd.com "Max bank conflicts should match num of work items per instr"); 17211308Santhony.gutierrez@amd.com return bank_conflicts; 17311308Santhony.gutierrez@amd.com} 17411308Santhony.gutierrez@amd.com 17511308Santhony.gutierrez@amd.com/** 17611308Santhony.gutierrez@amd.com * receive the packet from the CU 17711308Santhony.gutierrez@amd.com */ 17811308Santhony.gutierrez@amd.combool 17911308Santhony.gutierrez@amd.comLdsState::CuSidePort::recvTimingReq(PacketPtr packet) 18011308Santhony.gutierrez@amd.com{ 18111308Santhony.gutierrez@amd.com return ownerLds->processPacket(packet); 18211308Santhony.gutierrez@amd.com} 18311308Santhony.gutierrez@amd.com 18411308Santhony.gutierrez@amd.comGPUDynInstPtr 18511308Santhony.gutierrez@amd.comLdsState::getDynInstr(PacketPtr packet) 18611308Santhony.gutierrez@amd.com{ 18711308Santhony.gutierrez@amd.com ComputeUnit::LDSPort::SenderState *ss = 18811308Santhony.gutierrez@amd.com dynamic_cast<ComputeUnit::LDSPort::SenderState *>( 18911308Santhony.gutierrez@amd.com packet->senderState); 19011308Santhony.gutierrez@amd.com return ss->getMemInst(); 19111308Santhony.gutierrez@amd.com} 19211308Santhony.gutierrez@amd.com 19311308Santhony.gutierrez@amd.com/** 19411308Santhony.gutierrez@amd.com * process an incoming packet, add it to the return queue 19511308Santhony.gutierrez@amd.com */ 19611308Santhony.gutierrez@amd.combool 19711308Santhony.gutierrez@amd.comLdsState::processPacket(PacketPtr packet) 19811308Santhony.gutierrez@amd.com{ 19911308Santhony.gutierrez@amd.com unsigned bankAccesses = 0; 20011308Santhony.gutierrez@amd.com // the number of conflicts this packet will have when accessing the LDS 20111308Santhony.gutierrez@amd.com unsigned bankConflicts = countBankConflicts(packet, &bankAccesses); 20211308Santhony.gutierrez@amd.com // count the total number of physical LDS bank accessed 20311308Santhony.gutierrez@amd.com parent->ldsBankAccesses += bankAccesses; 20411308Santhony.gutierrez@amd.com // count the LDS bank conflicts. A number set to 1 indicates one 20511308Santhony.gutierrez@amd.com // access per bank maximum so there are no bank conflicts 20611308Santhony.gutierrez@amd.com parent->ldsBankConflictDist.sample(bankConflicts-1); 20711308Santhony.gutierrez@amd.com 20811308Santhony.gutierrez@amd.com GPUDynInstPtr dynInst = getDynInstr(packet); 20911308Santhony.gutierrez@amd.com // account for the LDS bank conflict overhead 21011692Santhony.gutierrez@amd.com int busLength = (dynInst->isLoad()) ? parent->loadBusLength() : 21111692Santhony.gutierrez@amd.com (dynInst->isStore()) ? parent->storeBusLength() : 21211308Santhony.gutierrez@amd.com parent->loadBusLength(); 21311308Santhony.gutierrez@amd.com // delay for accessing the LDS 21411308Santhony.gutierrez@amd.com Tick processingTime = 21511308Santhony.gutierrez@amd.com parent->shader->ticks(bankConflicts * bankConflictPenalty) + 21611308Santhony.gutierrez@amd.com parent->shader->ticks(busLength); 21711308Santhony.gutierrez@amd.com // choose (delay + last packet in queue) or (now + delay) as the time to 21811308Santhony.gutierrez@amd.com // return this 21911308Santhony.gutierrez@amd.com Tick doneAt = earliestReturnTime() + processingTime; 22011308Santhony.gutierrez@amd.com // then store it for processing 22111308Santhony.gutierrez@amd.com return returnQueuePush(std::make_pair(doneAt, packet)); 22211308Santhony.gutierrez@amd.com} 22311308Santhony.gutierrez@amd.com 22411308Santhony.gutierrez@amd.com/** 22511308Santhony.gutierrez@amd.com * add this to the queue of packets to be returned 22611308Santhony.gutierrez@amd.com */ 22711308Santhony.gutierrez@amd.combool 22811308Santhony.gutierrez@amd.comLdsState::returnQueuePush(std::pair<Tick, PacketPtr> thePair) 22911308Santhony.gutierrez@amd.com{ 23011308Santhony.gutierrez@amd.com // TODO add time limits (e.g. one packet per cycle) and queue size limits 23111308Santhony.gutierrez@amd.com // and implement flow control 23211308Santhony.gutierrez@amd.com returnQueue.push(thePair); 23311308Santhony.gutierrez@amd.com 23411308Santhony.gutierrez@amd.com // if there is no set wakeup time, look through the queue 23511308Santhony.gutierrez@amd.com if (!tickEvent.scheduled()) { 23611308Santhony.gutierrez@amd.com process(); 23711308Santhony.gutierrez@amd.com } 23811308Santhony.gutierrez@amd.com 23911308Santhony.gutierrez@amd.com return true; 24011308Santhony.gutierrez@amd.com} 24111308Santhony.gutierrez@amd.com 24211308Santhony.gutierrez@amd.com/** 24311308Santhony.gutierrez@amd.com * receive a packet in functional mode 24411308Santhony.gutierrez@amd.com */ 24511308Santhony.gutierrez@amd.comvoid 24611308Santhony.gutierrez@amd.comLdsState::CuSidePort::recvFunctional(PacketPtr pkt) 24711308Santhony.gutierrez@amd.com{ 24811308Santhony.gutierrez@amd.com fatal("not implemented"); 24911308Santhony.gutierrez@amd.com} 25011308Santhony.gutierrez@amd.com 25111308Santhony.gutierrez@amd.com/** 25211308Santhony.gutierrez@amd.com * receive a retry for a response 25311308Santhony.gutierrez@amd.com */ 25411308Santhony.gutierrez@amd.comvoid 25511308Santhony.gutierrez@amd.comLdsState::CuSidePort::recvRespRetry() 25611308Santhony.gutierrez@amd.com{ 25711308Santhony.gutierrez@amd.com // TODO verify that this is the right way to do this 25811308Santhony.gutierrez@amd.com assert(ownerLds->isRetryResp()); 25911308Santhony.gutierrez@amd.com ownerLds->setRetryResp(false); 26011308Santhony.gutierrez@amd.com ownerLds->process(); 26111308Santhony.gutierrez@amd.com} 26211308Santhony.gutierrez@amd.com 26311308Santhony.gutierrez@amd.com/** 26411308Santhony.gutierrez@amd.com * receive a retry 26511308Santhony.gutierrez@amd.com */ 26611308Santhony.gutierrez@amd.comvoid 26711308Santhony.gutierrez@amd.comLdsState::CuSidePort::recvRetry() 26811308Santhony.gutierrez@amd.com{ 26911308Santhony.gutierrez@amd.com fatal("not implemented"); 27011308Santhony.gutierrez@amd.com} 27111308Santhony.gutierrez@amd.com 27211308Santhony.gutierrez@amd.com/** 27311308Santhony.gutierrez@amd.com * look for packets to return at this time 27411308Santhony.gutierrez@amd.com */ 27511308Santhony.gutierrez@amd.combool 27611308Santhony.gutierrez@amd.comLdsState::process() 27711308Santhony.gutierrez@amd.com{ 27811308Santhony.gutierrez@amd.com Tick now = clockEdge(); 27911308Santhony.gutierrez@amd.com 28011308Santhony.gutierrez@amd.com // send back completed packets 28111308Santhony.gutierrez@amd.com while (!returnQueue.empty() && returnQueue.front().first <= now) { 28211308Santhony.gutierrez@amd.com PacketPtr packet = returnQueue.front().second; 28311308Santhony.gutierrez@amd.com 28411308Santhony.gutierrez@amd.com ComputeUnit::LDSPort::SenderState *ss = 28511308Santhony.gutierrez@amd.com dynamic_cast<ComputeUnit::LDSPort::SenderState *>( 28611308Santhony.gutierrez@amd.com packet->senderState); 28711308Santhony.gutierrez@amd.com 28811308Santhony.gutierrez@amd.com GPUDynInstPtr gpuDynInst = ss->getMemInst(); 28911308Santhony.gutierrez@amd.com 29011308Santhony.gutierrez@amd.com gpuDynInst->initiateAcc(gpuDynInst); 29111308Santhony.gutierrez@amd.com 29211308Santhony.gutierrez@amd.com packet->makeTimingResponse(); 29311308Santhony.gutierrez@amd.com 29411308Santhony.gutierrez@amd.com returnQueue.pop(); 29511308Santhony.gutierrez@amd.com 29611308Santhony.gutierrez@amd.com bool success = cuPort.sendTimingResp(packet); 29711308Santhony.gutierrez@amd.com 29811308Santhony.gutierrez@amd.com if (!success) { 29911308Santhony.gutierrez@amd.com retryResp = true; 30011308Santhony.gutierrez@amd.com panic("have not handled timing responses being NACK'd when sent" 30111308Santhony.gutierrez@amd.com "back"); 30211308Santhony.gutierrez@amd.com } 30311308Santhony.gutierrez@amd.com } 30411308Santhony.gutierrez@amd.com 30511308Santhony.gutierrez@amd.com // determine the next wakeup time 30611308Santhony.gutierrez@amd.com if (!returnQueue.empty()) { 30711308Santhony.gutierrez@amd.com 30811308Santhony.gutierrez@amd.com Tick next = returnQueue.front().first; 30911308Santhony.gutierrez@amd.com 31011308Santhony.gutierrez@amd.com if (tickEvent.scheduled()) { 31111308Santhony.gutierrez@amd.com 31211308Santhony.gutierrez@amd.com if (next < tickEvent.when()) { 31311308Santhony.gutierrez@amd.com 31411308Santhony.gutierrez@amd.com tickEvent.deschedule(); 31511308Santhony.gutierrez@amd.com tickEvent.schedule(next); 31611308Santhony.gutierrez@amd.com } 31711308Santhony.gutierrez@amd.com } else { 31811308Santhony.gutierrez@amd.com tickEvent.schedule(next); 31911308Santhony.gutierrez@amd.com } 32011308Santhony.gutierrez@amd.com } 32111308Santhony.gutierrez@amd.com 32211308Santhony.gutierrez@amd.com return true; 32311308Santhony.gutierrez@amd.com} 32411308Santhony.gutierrez@amd.com 32511308Santhony.gutierrez@amd.com/** 32611308Santhony.gutierrez@amd.com * wake up at this time and perform specified actions 32711308Santhony.gutierrez@amd.com */ 32811308Santhony.gutierrez@amd.comvoid 32911308Santhony.gutierrez@amd.comLdsState::TickEvent::process() 33011308Santhony.gutierrez@amd.com{ 33111308Santhony.gutierrez@amd.com ldsState->process(); 33211308Santhony.gutierrez@amd.com} 333