111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1712697Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its 1812697Santhony.gutierrez@amd.com * contributors may be used to endorse or promote products derived from this 1912697Santhony.gutierrez@amd.com * software without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3312697Santhony.gutierrez@amd.com * Authors: John Kalamatianos, 3412697Santhony.gutierrez@amd.com * Joe Gross 3511308Santhony.gutierrez@amd.com */ 3611308Santhony.gutierrez@amd.com 3711308Santhony.gutierrez@amd.com#include "gpu-compute/lds_state.hh" 3811308Santhony.gutierrez@amd.com 3911308Santhony.gutierrez@amd.com#include <array> 4011308Santhony.gutierrez@amd.com#include <cstdio> 4111308Santhony.gutierrez@amd.com#include <cstdlib> 4211308Santhony.gutierrez@amd.com 4311308Santhony.gutierrez@amd.com#include "gpu-compute/compute_unit.hh" 4411308Santhony.gutierrez@amd.com#include "gpu-compute/gpu_dyn_inst.hh" 4511308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 4611308Santhony.gutierrez@amd.com 4711308Santhony.gutierrez@amd.com/** 4811308Santhony.gutierrez@amd.com * the default constructor that works with SWIG 4911308Santhony.gutierrez@amd.com */ 5011308Santhony.gutierrez@amd.comLdsState::LdsState(const Params *params) : 5113892Sgabeblack@google.com ClockedObject(params), 5211308Santhony.gutierrez@amd.com tickEvent(this), 5311308Santhony.gutierrez@amd.com cuPort(name() + ".port", this), 5411308Santhony.gutierrez@amd.com maximumSize(params->size), 5511308Santhony.gutierrez@amd.com range(params->range), 5611308Santhony.gutierrez@amd.com bankConflictPenalty(params->bankConflictPenalty), 5711308Santhony.gutierrez@amd.com banks(params->banks) 5811308Santhony.gutierrez@amd.com{ 5911308Santhony.gutierrez@amd.com fatal_if(params->banks <= 0, 6011308Santhony.gutierrez@amd.com "Number of LDS banks should be positive number"); 6111308Santhony.gutierrez@amd.com fatal_if((params->banks & (params->banks - 1)) != 0, 6211308Santhony.gutierrez@amd.com "Number of LDS banks should be a power of 2"); 6311308Santhony.gutierrez@amd.com fatal_if(params->size <= 0, 6411308Santhony.gutierrez@amd.com "cannot allocate an LDS with a size less than 1"); 6511308Santhony.gutierrez@amd.com fatal_if(params->size % 2, 6611308Santhony.gutierrez@amd.com "the LDS should be an even number"); 6711308Santhony.gutierrez@amd.com} 6811308Santhony.gutierrez@amd.com 6911308Santhony.gutierrez@amd.com/** 7011308Santhony.gutierrez@amd.com * Needed by the SWIG compiler 7111308Santhony.gutierrez@amd.com */ 7211308Santhony.gutierrez@amd.comLdsState * 7311308Santhony.gutierrez@amd.comLdsStateParams::create() 7411308Santhony.gutierrez@amd.com{ 7511308Santhony.gutierrez@amd.com return new LdsState(this); 7611308Santhony.gutierrez@amd.com} 7711308Santhony.gutierrez@amd.com 7811308Santhony.gutierrez@amd.com/** 7911308Santhony.gutierrez@amd.com * set the parent and name based on the parent 8011308Santhony.gutierrez@amd.com */ 8111308Santhony.gutierrez@amd.comvoid 8211308Santhony.gutierrez@amd.comLdsState::setParent(ComputeUnit *x_parent) 8311308Santhony.gutierrez@amd.com{ 8411308Santhony.gutierrez@amd.com // check that this gets assigned to the same thing each time 8511308Santhony.gutierrez@amd.com fatal_if(!x_parent, "x_parent should not be nullptr"); 8611308Santhony.gutierrez@amd.com fatal_if(x_parent == parent, 8711308Santhony.gutierrez@amd.com "should not be setting the parent twice"); 8811308Santhony.gutierrez@amd.com 8911308Santhony.gutierrez@amd.com parent = x_parent; 9011308Santhony.gutierrez@amd.com _name = x_parent->name() + ".LdsState"; 9111308Santhony.gutierrez@amd.com} 9211308Santhony.gutierrez@amd.com 9311308Santhony.gutierrez@amd.com/** 9411308Santhony.gutierrez@amd.com * derive the gpu mem packet from the packet and then count the bank conflicts 9511308Santhony.gutierrez@amd.com */ 9611308Santhony.gutierrez@amd.comunsigned 9711308Santhony.gutierrez@amd.comLdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses) 9811308Santhony.gutierrez@amd.com{ 9911308Santhony.gutierrez@amd.com Packet::SenderState *baseSenderState = packet->senderState; 10011308Santhony.gutierrez@amd.com while (baseSenderState->predecessor) { 10111308Santhony.gutierrez@amd.com baseSenderState = baseSenderState->predecessor; 10211308Santhony.gutierrez@amd.com } 10311308Santhony.gutierrez@amd.com const ComputeUnit::LDSPort::SenderState *senderState = 10411308Santhony.gutierrez@amd.com dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState); 10511308Santhony.gutierrez@amd.com 10611308Santhony.gutierrez@amd.com fatal_if(!senderState, 10711308Santhony.gutierrez@amd.com "did not get the right sort of sender state"); 10811308Santhony.gutierrez@amd.com 10911308Santhony.gutierrez@amd.com GPUDynInstPtr gpuDynInst = senderState->getMemInst(); 11011308Santhony.gutierrez@amd.com 11111308Santhony.gutierrez@amd.com return countBankConflicts(gpuDynInst, bankAccesses); 11211308Santhony.gutierrez@amd.com} 11311308Santhony.gutierrez@amd.com 11411308Santhony.gutierrez@amd.com// Count the total number of bank conflicts for the local memory packet 11511308Santhony.gutierrez@amd.comunsigned 11611308Santhony.gutierrez@amd.comLdsState::countBankConflicts(GPUDynInstPtr gpuDynInst, 11711308Santhony.gutierrez@amd.com unsigned *numBankAccesses) 11811308Santhony.gutierrez@amd.com{ 11911308Santhony.gutierrez@amd.com int bank_conflicts = 0; 12011308Santhony.gutierrez@amd.com std::vector<int> bank; 12111308Santhony.gutierrez@amd.com // the number of LDS banks being touched by the memory instruction 12211308Santhony.gutierrez@amd.com int numBanks = std::min(parent->wfSize(), banks); 12311308Santhony.gutierrez@amd.com // if the wavefront size is larger than the number of LDS banks, we 12411308Santhony.gutierrez@amd.com // need to iterate over all work items to calculate the total 12511308Santhony.gutierrez@amd.com // number of bank conflicts 12611308Santhony.gutierrez@amd.com int groups = (parent->wfSize() > numBanks) ? 12711308Santhony.gutierrez@amd.com (parent->wfSize() / numBanks) : 1; 12811308Santhony.gutierrez@amd.com for (int i = 0; i < groups; i++) { 12911308Santhony.gutierrez@amd.com // Address Array holding all the work item addresses of an instruction 13011308Santhony.gutierrez@amd.com std::vector<Addr> addr_array; 13111308Santhony.gutierrez@amd.com addr_array.resize(numBanks, 0); 13211308Santhony.gutierrez@amd.com bank.clear(); 13311308Santhony.gutierrez@amd.com bank.resize(banks, 0); 13411308Santhony.gutierrez@amd.com int max_bank = 0; 13511308Santhony.gutierrez@amd.com 13611308Santhony.gutierrez@amd.com // populate the address array for all active work items 13711308Santhony.gutierrez@amd.com for (int j = 0; j < numBanks; j++) { 13811308Santhony.gutierrez@amd.com if (gpuDynInst->exec_mask[(i*numBanks)+j]) { 13911308Santhony.gutierrez@amd.com addr_array[j] = gpuDynInst->addr[(i*numBanks)+j]; 14011308Santhony.gutierrez@amd.com } else { 14111308Santhony.gutierrez@amd.com addr_array[j] = std::numeric_limits<Addr>::max(); 14211308Santhony.gutierrez@amd.com } 14311308Santhony.gutierrez@amd.com } 14411308Santhony.gutierrez@amd.com 14511692Santhony.gutierrez@amd.com if (gpuDynInst->isLoad() || gpuDynInst->isStore()) { 14611308Santhony.gutierrez@amd.com // mask identical addresses 14711308Santhony.gutierrez@amd.com for (int j = 0; j < numBanks; ++j) { 14811308Santhony.gutierrez@amd.com for (int j0 = 0; j0 < j; j0++) { 14911308Santhony.gutierrez@amd.com if (addr_array[j] != std::numeric_limits<Addr>::max() 15011308Santhony.gutierrez@amd.com && addr_array[j] == addr_array[j0]) { 15111308Santhony.gutierrez@amd.com addr_array[j] = std::numeric_limits<Addr>::max(); 15211308Santhony.gutierrez@amd.com } 15311308Santhony.gutierrez@amd.com } 15411308Santhony.gutierrez@amd.com } 15511308Santhony.gutierrez@amd.com } 15611308Santhony.gutierrez@amd.com // calculate bank conflicts 15711308Santhony.gutierrez@amd.com for (int j = 0; j < numBanks; ++j) { 15811308Santhony.gutierrez@amd.com if (addr_array[j] != std::numeric_limits<Addr>::max()) { 15911308Santhony.gutierrez@amd.com int bankId = addr_array[j] % banks; 16011308Santhony.gutierrez@amd.com bank[bankId]++; 16111308Santhony.gutierrez@amd.com max_bank = std::max(max_bank, bank[bankId]); 16211308Santhony.gutierrez@amd.com // Count the number of LDS banks accessed. 16311308Santhony.gutierrez@amd.com // Since we have masked identical addresses all remaining 16411308Santhony.gutierrez@amd.com // accesses will need to be serialized if they access 16511308Santhony.gutierrez@amd.com // the same bank (bank conflict). 16611308Santhony.gutierrez@amd.com (*numBankAccesses)++; 16711308Santhony.gutierrez@amd.com } 16811308Santhony.gutierrez@amd.com } 16911308Santhony.gutierrez@amd.com bank_conflicts += max_bank; 17011308Santhony.gutierrez@amd.com } 17111308Santhony.gutierrez@amd.com panic_if(bank_conflicts > parent->wfSize(), 17211308Santhony.gutierrez@amd.com "Max bank conflicts should match num of work items per instr"); 17311308Santhony.gutierrez@amd.com return bank_conflicts; 17411308Santhony.gutierrez@amd.com} 17511308Santhony.gutierrez@amd.com 17611308Santhony.gutierrez@amd.com/** 17711308Santhony.gutierrez@amd.com * receive the packet from the CU 17811308Santhony.gutierrez@amd.com */ 17911308Santhony.gutierrez@amd.combool 18011308Santhony.gutierrez@amd.comLdsState::CuSidePort::recvTimingReq(PacketPtr packet) 18111308Santhony.gutierrez@amd.com{ 18211308Santhony.gutierrez@amd.com return ownerLds->processPacket(packet); 18311308Santhony.gutierrez@amd.com} 18411308Santhony.gutierrez@amd.com 18511308Santhony.gutierrez@amd.comGPUDynInstPtr 18611308Santhony.gutierrez@amd.comLdsState::getDynInstr(PacketPtr packet) 18711308Santhony.gutierrez@amd.com{ 18811308Santhony.gutierrez@amd.com ComputeUnit::LDSPort::SenderState *ss = 18911308Santhony.gutierrez@amd.com dynamic_cast<ComputeUnit::LDSPort::SenderState *>( 19011308Santhony.gutierrez@amd.com packet->senderState); 19111308Santhony.gutierrez@amd.com return ss->getMemInst(); 19211308Santhony.gutierrez@amd.com} 19311308Santhony.gutierrez@amd.com 19411308Santhony.gutierrez@amd.com/** 19511308Santhony.gutierrez@amd.com * process an incoming packet, add it to the return queue 19611308Santhony.gutierrez@amd.com */ 19711308Santhony.gutierrez@amd.combool 19811308Santhony.gutierrez@amd.comLdsState::processPacket(PacketPtr packet) 19911308Santhony.gutierrez@amd.com{ 20011308Santhony.gutierrez@amd.com unsigned bankAccesses = 0; 20111308Santhony.gutierrez@amd.com // the number of conflicts this packet will have when accessing the LDS 20211308Santhony.gutierrez@amd.com unsigned bankConflicts = countBankConflicts(packet, &bankAccesses); 20311308Santhony.gutierrez@amd.com // count the total number of physical LDS bank accessed 20411308Santhony.gutierrez@amd.com parent->ldsBankAccesses += bankAccesses; 20511308Santhony.gutierrez@amd.com // count the LDS bank conflicts. A number set to 1 indicates one 20611308Santhony.gutierrez@amd.com // access per bank maximum so there are no bank conflicts 20711308Santhony.gutierrez@amd.com parent->ldsBankConflictDist.sample(bankConflicts-1); 20811308Santhony.gutierrez@amd.com 20911308Santhony.gutierrez@amd.com GPUDynInstPtr dynInst = getDynInstr(packet); 21011308Santhony.gutierrez@amd.com // account for the LDS bank conflict overhead 21111692Santhony.gutierrez@amd.com int busLength = (dynInst->isLoad()) ? parent->loadBusLength() : 21211692Santhony.gutierrez@amd.com (dynInst->isStore()) ? parent->storeBusLength() : 21311308Santhony.gutierrez@amd.com parent->loadBusLength(); 21411308Santhony.gutierrez@amd.com // delay for accessing the LDS 21511308Santhony.gutierrez@amd.com Tick processingTime = 21611308Santhony.gutierrez@amd.com parent->shader->ticks(bankConflicts * bankConflictPenalty) + 21711308Santhony.gutierrez@amd.com parent->shader->ticks(busLength); 21811308Santhony.gutierrez@amd.com // choose (delay + last packet in queue) or (now + delay) as the time to 21911308Santhony.gutierrez@amd.com // return this 22011308Santhony.gutierrez@amd.com Tick doneAt = earliestReturnTime() + processingTime; 22111308Santhony.gutierrez@amd.com // then store it for processing 22211308Santhony.gutierrez@amd.com return returnQueuePush(std::make_pair(doneAt, packet)); 22311308Santhony.gutierrez@amd.com} 22411308Santhony.gutierrez@amd.com 22511308Santhony.gutierrez@amd.com/** 22611308Santhony.gutierrez@amd.com * add this to the queue of packets to be returned 22711308Santhony.gutierrez@amd.com */ 22811308Santhony.gutierrez@amd.combool 22911308Santhony.gutierrez@amd.comLdsState::returnQueuePush(std::pair<Tick, PacketPtr> thePair) 23011308Santhony.gutierrez@amd.com{ 23111308Santhony.gutierrez@amd.com // TODO add time limits (e.g. one packet per cycle) and queue size limits 23211308Santhony.gutierrez@amd.com // and implement flow control 23311308Santhony.gutierrez@amd.com returnQueue.push(thePair); 23411308Santhony.gutierrez@amd.com 23511308Santhony.gutierrez@amd.com // if there is no set wakeup time, look through the queue 23611308Santhony.gutierrez@amd.com if (!tickEvent.scheduled()) { 23711308Santhony.gutierrez@amd.com process(); 23811308Santhony.gutierrez@amd.com } 23911308Santhony.gutierrez@amd.com 24011308Santhony.gutierrez@amd.com return true; 24111308Santhony.gutierrez@amd.com} 24211308Santhony.gutierrez@amd.com 24311308Santhony.gutierrez@amd.com/** 24411308Santhony.gutierrez@amd.com * receive a packet in functional mode 24511308Santhony.gutierrez@amd.com */ 24611308Santhony.gutierrez@amd.comvoid 24711308Santhony.gutierrez@amd.comLdsState::CuSidePort::recvFunctional(PacketPtr pkt) 24811308Santhony.gutierrez@amd.com{ 24911308Santhony.gutierrez@amd.com fatal("not implemented"); 25011308Santhony.gutierrez@amd.com} 25111308Santhony.gutierrez@amd.com 25211308Santhony.gutierrez@amd.com/** 25311308Santhony.gutierrez@amd.com * receive a retry for a response 25411308Santhony.gutierrez@amd.com */ 25511308Santhony.gutierrez@amd.comvoid 25611308Santhony.gutierrez@amd.comLdsState::CuSidePort::recvRespRetry() 25711308Santhony.gutierrez@amd.com{ 25811308Santhony.gutierrez@amd.com // TODO verify that this is the right way to do this 25911308Santhony.gutierrez@amd.com assert(ownerLds->isRetryResp()); 26011308Santhony.gutierrez@amd.com ownerLds->setRetryResp(false); 26111308Santhony.gutierrez@amd.com ownerLds->process(); 26211308Santhony.gutierrez@amd.com} 26311308Santhony.gutierrez@amd.com 26411308Santhony.gutierrez@amd.com/** 26511308Santhony.gutierrez@amd.com * receive a retry 26611308Santhony.gutierrez@amd.com */ 26711308Santhony.gutierrez@amd.comvoid 26811308Santhony.gutierrez@amd.comLdsState::CuSidePort::recvRetry() 26911308Santhony.gutierrez@amd.com{ 27011308Santhony.gutierrez@amd.com fatal("not implemented"); 27111308Santhony.gutierrez@amd.com} 27211308Santhony.gutierrez@amd.com 27311308Santhony.gutierrez@amd.com/** 27411308Santhony.gutierrez@amd.com * look for packets to return at this time 27511308Santhony.gutierrez@amd.com */ 27611308Santhony.gutierrez@amd.combool 27711308Santhony.gutierrez@amd.comLdsState::process() 27811308Santhony.gutierrez@amd.com{ 27911308Santhony.gutierrez@amd.com Tick now = clockEdge(); 28011308Santhony.gutierrez@amd.com 28111308Santhony.gutierrez@amd.com // send back completed packets 28211308Santhony.gutierrez@amd.com while (!returnQueue.empty() && returnQueue.front().first <= now) { 28311308Santhony.gutierrez@amd.com PacketPtr packet = returnQueue.front().second; 28411308Santhony.gutierrez@amd.com 28511308Santhony.gutierrez@amd.com ComputeUnit::LDSPort::SenderState *ss = 28611308Santhony.gutierrez@amd.com dynamic_cast<ComputeUnit::LDSPort::SenderState *>( 28711308Santhony.gutierrez@amd.com packet->senderState); 28811308Santhony.gutierrez@amd.com 28911308Santhony.gutierrez@amd.com GPUDynInstPtr gpuDynInst = ss->getMemInst(); 29011308Santhony.gutierrez@amd.com 29111308Santhony.gutierrez@amd.com gpuDynInst->initiateAcc(gpuDynInst); 29211308Santhony.gutierrez@amd.com 29311308Santhony.gutierrez@amd.com packet->makeTimingResponse(); 29411308Santhony.gutierrez@amd.com 29511308Santhony.gutierrez@amd.com returnQueue.pop(); 29611308Santhony.gutierrez@amd.com 29711308Santhony.gutierrez@amd.com bool success = cuPort.sendTimingResp(packet); 29811308Santhony.gutierrez@amd.com 29911308Santhony.gutierrez@amd.com if (!success) { 30011308Santhony.gutierrez@amd.com retryResp = true; 30111308Santhony.gutierrez@amd.com panic("have not handled timing responses being NACK'd when sent" 30211308Santhony.gutierrez@amd.com "back"); 30311308Santhony.gutierrez@amd.com } 30411308Santhony.gutierrez@amd.com } 30511308Santhony.gutierrez@amd.com 30611308Santhony.gutierrez@amd.com // determine the next wakeup time 30711308Santhony.gutierrez@amd.com if (!returnQueue.empty()) { 30811308Santhony.gutierrez@amd.com 30911308Santhony.gutierrez@amd.com Tick next = returnQueue.front().first; 31011308Santhony.gutierrez@amd.com 31111308Santhony.gutierrez@amd.com if (tickEvent.scheduled()) { 31211308Santhony.gutierrez@amd.com 31311308Santhony.gutierrez@amd.com if (next < tickEvent.when()) { 31411308Santhony.gutierrez@amd.com 31511308Santhony.gutierrez@amd.com tickEvent.deschedule(); 31611308Santhony.gutierrez@amd.com tickEvent.schedule(next); 31711308Santhony.gutierrez@amd.com } 31811308Santhony.gutierrez@amd.com } else { 31911308Santhony.gutierrez@amd.com tickEvent.schedule(next); 32011308Santhony.gutierrez@amd.com } 32111308Santhony.gutierrez@amd.com } 32211308Santhony.gutierrez@amd.com 32311308Santhony.gutierrez@amd.com return true; 32411308Santhony.gutierrez@amd.com} 32511308Santhony.gutierrez@amd.com 32611308Santhony.gutierrez@amd.com/** 32711308Santhony.gutierrez@amd.com * wake up at this time and perform specified actions 32811308Santhony.gutierrez@amd.com */ 32911308Santhony.gutierrez@amd.comvoid 33011308Santhony.gutierrez@amd.comLdsState::TickEvent::process() 33111308Santhony.gutierrez@amd.com{ 33211308Santhony.gutierrez@amd.com ldsState->process(); 33311308Santhony.gutierrez@amd.com} 334