lds_state.cc revision 11308
19814Sandreas.hansson@arm.com/*
22292SN/A * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
313590Srekai.gonzalezalberquilla@arm.com * All rights reserved.
410239Sbinhpham@cs.rutgers.edu *
57597Sminkyu.jeong@arm.com * For use for simulation and test purposes only
67597Sminkyu.jeong@arm.com *
77597Sminkyu.jeong@arm.com * Redistribution and use in source and binary forms, with or without
87597Sminkyu.jeong@arm.com * modification, are permitted provided that the following conditions are met:
97597Sminkyu.jeong@arm.com *
107597Sminkyu.jeong@arm.com * 1. Redistributions of source code must retain the above copyright notice,
117597Sminkyu.jeong@arm.com * this list of conditions and the following disclaimer.
127597Sminkyu.jeong@arm.com *
137597Sminkyu.jeong@arm.com * 2. Redistributions in binary form must reproduce the above copyright notice,
147597Sminkyu.jeong@arm.com * this list of conditions and the following disclaimer in the documentation
157597Sminkyu.jeong@arm.com * and/or other materials provided with the distribution.
162292SN/A *
172292SN/A * 3. Neither the name of the copyright holder nor the names of its contributors
182292SN/A * may be used to endorse or promote products derived from this software
192292SN/A * without specific prior written permission.
202292SN/A *
212292SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
222292SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
232292SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
242292SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
252292SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
262292SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
272292SN/A * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
282292SN/A * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
292292SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
302292SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
312292SN/A * POSSIBILITY OF SUCH DAMAGE.
322292SN/A *
332292SN/A * Author: John Kalamatianos, Joe Gross
342292SN/A */
352292SN/A
362292SN/A#include "gpu-compute/lds_state.hh"
372292SN/A
382292SN/A#include <array>
392292SN/A#include <cstdio>
402292SN/A#include <cstdlib>
412689Sktlim@umich.edu
422689Sktlim@umich.edu#include "gpu-compute/compute_unit.hh"
432689Sktlim@umich.edu#include "gpu-compute/gpu_dyn_inst.hh"
442292SN/A#include "gpu-compute/shader.hh"
452292SN/A
469944Smatt.horsnell@ARM.com/**
479944Smatt.horsnell@ARM.com * the default constructor that works with SWIG
489944Smatt.horsnell@ARM.com */
498591Sgblack@eecs.umich.eduLdsState::LdsState(const Params *params) :
503326Sktlim@umich.edu    MemObject(params),
518229Snate@binkert.org    tickEvent(this),
526658Snate@binkert.org    cuPort(name() + ".port", this),
538887Sgeoffrey.blake@arm.com    maximumSize(params->size),
542907Sktlim@umich.edu    range(params->range),
552292SN/A    bankConflictPenalty(params->bankConflictPenalty),
568232Snate@binkert.org    banks(params->banks)
578232Snate@binkert.org{
588232Snate@binkert.org    fatal_if(params->banks <= 0,
599527SMatt.Horsnell@arm.com             "Number of LDS banks should be positive number");
602722Sktlim@umich.edu    fatal_if((params->banks & (params->banks - 1)) != 0,
612669Sktlim@umich.edu             "Number of LDS banks should be a power of 2");
622292SN/A    fatal_if(params->size <= 0,
632669Sktlim@umich.edu             "cannot allocate an LDS with a size less than 1");
6413429Srekai.gonzalezalberquilla@arm.com    fatal_if(params->size % 2,
6513429Srekai.gonzalezalberquilla@arm.com          "the LDS should be an even number");
668581Ssteve.reinhardt@amd.com}
678581Ssteve.reinhardt@amd.com
682292SN/A/**
6913590Srekai.gonzalezalberquilla@arm.com * Needed by the SWIG compiler
7013590Srekai.gonzalezalberquilla@arm.com */
712292SN/ALdsState *
722292SN/ALdsStateParams::create()
732669Sktlim@umich.edu{
742292SN/A    return new LdsState(this);
752678Sktlim@umich.edu}
762292SN/A
779444SAndreas.Sandberg@ARM.com/**
789444SAndreas.Sandberg@ARM.com * set the parent and name based on the parent
799444SAndreas.Sandberg@ARM.com */
804319Sktlim@umich.eduvoid
8113590Srekai.gonzalezalberquilla@arm.comLdsState::setParent(ComputeUnit *x_parent)
8213590Srekai.gonzalezalberquilla@arm.com{
832678Sktlim@umich.edu    // check that this gets assigned to the same thing each time
842678Sktlim@umich.edu    fatal_if(!x_parent, "x_parent should not be nullptr");
852292SN/A    fatal_if(x_parent == parent,
862678Sktlim@umich.edu             "should not be setting the parent twice");
872678Sktlim@umich.edu
885336Shines@cs.fsu.edu    parent = x_parent;
892678Sktlim@umich.edu    _name = x_parent->name() + ".LdsState";
904873Sstever@eecs.umich.edu}
912678Sktlim@umich.edu
922292SN/A/**
9313590Srekai.gonzalezalberquilla@arm.com * derive the gpu mem packet from the packet and then count the bank conflicts
9413590Srekai.gonzalezalberquilla@arm.com */
9513590Srekai.gonzalezalberquilla@arm.comunsigned
9613590Srekai.gonzalezalberquilla@arm.comLdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
9713590Srekai.gonzalezalberquilla@arm.com{
9813590Srekai.gonzalezalberquilla@arm.com    Packet::SenderState *baseSenderState = packet->senderState;
9913590Srekai.gonzalezalberquilla@arm.com    while (baseSenderState->predecessor) {
10013590Srekai.gonzalezalberquilla@arm.com        baseSenderState = baseSenderState->predecessor;
10113590Srekai.gonzalezalberquilla@arm.com    }
10213590Srekai.gonzalezalberquilla@arm.com    const ComputeUnit::LDSPort::SenderState *senderState =
10313590Srekai.gonzalezalberquilla@arm.com            dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState);
10413590Srekai.gonzalezalberquilla@arm.com
10513590Srekai.gonzalezalberquilla@arm.com    fatal_if(!senderState,
10613590Srekai.gonzalezalberquilla@arm.com             "did not get the right sort of sender state");
10713590Srekai.gonzalezalberquilla@arm.com
10813590Srekai.gonzalezalberquilla@arm.com    GPUDynInstPtr gpuDynInst = senderState->getMemInst();
10913590Srekai.gonzalezalberquilla@arm.com
11013590Srekai.gonzalezalberquilla@arm.com    return countBankConflicts(gpuDynInst, bankAccesses);
1112678Sktlim@umich.edu}
1122678Sktlim@umich.edu
1132678Sktlim@umich.edu// Count the total number of bank conflicts for the local memory packet
1142678Sktlim@umich.eduunsigned
1152678Sktlim@umich.eduLdsState::countBankConflicts(GPUDynInstPtr gpuDynInst,
1162678Sktlim@umich.edu                             unsigned *numBankAccesses)
1172344SN/A{
11813590Srekai.gonzalezalberquilla@arm.com    int bank_conflicts = 0;
1192678Sktlim@umich.edu    std::vector<int> bank;
12013590Srekai.gonzalezalberquilla@arm.com    // the number of LDS banks being touched by the memory instruction
12113590Srekai.gonzalezalberquilla@arm.com    int numBanks = std::min(parent->wfSize(), banks);
12213590Srekai.gonzalezalberquilla@arm.com    // if the wavefront size is larger than the number of LDS banks, we
1236974Stjones1@inf.ed.ac.uk    // need to iterate over all work items to calculate the total
1249444SAndreas.Sandberg@ARM.com    // number of bank conflicts
12510327Smitch.hayenga@arm.com    int groups = (parent->wfSize() > numBanks) ?
12613590Srekai.gonzalezalberquilla@arm.com        (parent->wfSize() / numBanks) : 1;
12713652Sqtt2@cornell.edu    for (int i = 0; i < groups; i++) {
12812216Snikos.nikoleris@arm.com        // Address Array holding all the work item addresses of an instruction
12913652Sqtt2@cornell.edu        std::vector<Addr> addr_array;
13013652Sqtt2@cornell.edu        addr_array.resize(numBanks, 0);
13113590Srekai.gonzalezalberquilla@arm.com        bank.clear();
13213652Sqtt2@cornell.edu        bank.resize(banks, 0);
13313590Srekai.gonzalezalberquilla@arm.com        int max_bank = 0;
13413590Srekai.gonzalezalberquilla@arm.com
13513590Srekai.gonzalezalberquilla@arm.com        // populate the address array for all active work items
1366974Stjones1@inf.ed.ac.uk        for (int j = 0; j < numBanks; j++) {
13713590Srekai.gonzalezalberquilla@arm.com            if (gpuDynInst->exec_mask[(i*numBanks)+j]) {
13813652Sqtt2@cornell.edu                addr_array[j] = gpuDynInst->addr[(i*numBanks)+j];
13913652Sqtt2@cornell.edu            } else {
14013590Srekai.gonzalezalberquilla@arm.com                addr_array[j] = std::numeric_limits<Addr>::max();
1412678Sktlim@umich.edu            }
1422344SN/A        }
1432292SN/A
1442292SN/A        if (gpuDynInst->m_op == Enums::MO_LD ||
1452292SN/A            gpuDynInst->m_op == Enums::MO_ST) {
14613472Srekai.gonzalezalberquilla@arm.com            // mask identical addresses
14713472Srekai.gonzalezalberquilla@arm.com            for (int j = 0; j < numBanks; ++j) {
14813472Srekai.gonzalezalberquilla@arm.com                for (int j0 = 0; j0 < j; j0++) {
14913590Srekai.gonzalezalberquilla@arm.com                    if (addr_array[j] != std::numeric_limits<Addr>::max()
15013590Srekai.gonzalezalberquilla@arm.com                                    && addr_array[j] == addr_array[j0]) {
1512292SN/A                        addr_array[j] = std::numeric_limits<Addr>::max();
1522292SN/A                    }
1532292SN/A                }
1542292SN/A            }
1552292SN/A        }
1565529Snate@binkert.org        // calculate bank conflicts
15713472Srekai.gonzalezalberquilla@arm.com        for (int j = 0; j < numBanks; ++j) {
1582292SN/A            if (addr_array[j] != std::numeric_limits<Addr>::max()) {
15913472Srekai.gonzalezalberquilla@arm.com                int bankId = addr_array[j] % banks;
16013472Srekai.gonzalezalberquilla@arm.com                bank[bankId]++;
1614329Sktlim@umich.edu                max_bank = std::max(max_bank, bank[bankId]);
1624329Sktlim@umich.edu                // Count the number of LDS banks accessed.
1634329Sktlim@umich.edu                // Since we have masked identical addresses all remaining
1642907Sktlim@umich.edu                // accesses will need to be serialized if they access
1652907Sktlim@umich.edu                // the same bank (bank conflict).
16613472Srekai.gonzalezalberquilla@arm.com                (*numBankAccesses)++;
1672292SN/A            }
1688199SAli.Saidi@ARM.com        }
1698199SAli.Saidi@ARM.com        bank_conflicts += max_bank;
1709444SAndreas.Sandberg@ARM.com    }
1719444SAndreas.Sandberg@ARM.com    panic_if(bank_conflicts > parent->wfSize(),
1729444SAndreas.Sandberg@ARM.com             "Max bank conflicts should match num of work items per instr");
1739444SAndreas.Sandberg@ARM.com    return bank_conflicts;
1749444SAndreas.Sandberg@ARM.com}
1759444SAndreas.Sandberg@ARM.com
1769444SAndreas.Sandberg@ARM.com/**
1779444SAndreas.Sandberg@ARM.com * receive the packet from the CU
1789444SAndreas.Sandberg@ARM.com */
1799444SAndreas.Sandberg@ARM.combool
1809444SAndreas.Sandberg@ARM.comLdsState::CuSidePort::recvTimingReq(PacketPtr packet)
1818199SAli.Saidi@ARM.com{
1822292SN/A    return ownerLds->processPacket(packet);
18313590Srekai.gonzalezalberquilla@arm.com}
1842292SN/A
1853492Sktlim@umich.eduGPUDynInstPtr
1862329SN/ALdsState::getDynInstr(PacketPtr packet)
1872292SN/A{
1889444SAndreas.Sandberg@ARM.com    ComputeUnit::LDSPort::SenderState *ss =
1899444SAndreas.Sandberg@ARM.com        dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
1909814Sandreas.hansson@arm.com                     packet->senderState);
1912292SN/A    return ss->getMemInst();
1922292SN/A}
1932292SN/A
1942292SN/A/**
1952292SN/A * process an incoming packet, add it to the return queue
1962292SN/A */
1972292SN/Abool
1982292SN/ALdsState::processPacket(PacketPtr packet)
1992292SN/A{
20010386Sandreas.hansson@arm.com    unsigned bankAccesses = 0;
2012292SN/A    // the number of conflicts this packet will have when accessing the LDS
2022292SN/A    unsigned bankConflicts = countBankConflicts(packet, &bankAccesses);
2032292SN/A    // count the total number of physical LDS bank accessed
2042292SN/A    parent->ldsBankAccesses += bankAccesses;
2052292SN/A    // count the LDS bank conflicts. A number set to 1 indicates one
2062727Sktlim@umich.edu    // access per bank maximum so there are no bank conflicts
2072727Sktlim@umich.edu    parent->ldsBankConflictDist.sample(bankConflicts-1);
2082727Sktlim@umich.edu
2092727Sktlim@umich.edu    GPUDynInstPtr dynInst = getDynInstr(packet);
2102727Sktlim@umich.edu    // account for the LDS bank conflict overhead
2112727Sktlim@umich.edu    int busLength = (dynInst->m_op == Enums::MO_LD) ? parent->loadBusLength() :
2122727Sktlim@umich.edu        (dynInst->m_op == Enums::MO_ST) ? parent->storeBusLength() :
2132727Sktlim@umich.edu        parent->loadBusLength();
2142727Sktlim@umich.edu    // delay for accessing the LDS
2152727Sktlim@umich.edu    Tick processingTime =
2162727Sktlim@umich.edu        parent->shader->ticks(bankConflicts * bankConflictPenalty) +
2172727Sktlim@umich.edu        parent->shader->ticks(busLength);
2182727Sktlim@umich.edu    // choose (delay + last packet in queue) or (now + delay) as the time to
2192727Sktlim@umich.edu    // return this
2202727Sktlim@umich.edu    Tick doneAt = earliestReturnTime() + processingTime;
2212727Sktlim@umich.edu    // then store it for processing
2222727Sktlim@umich.edu    return returnQueuePush(std::make_pair(doneAt, packet));
2232727Sktlim@umich.edu}
2242361SN/A
2252361SN/A/**
2262361SN/A * add this to the queue of packets to be returned
2272361SN/A */
2282727Sktlim@umich.edubool
2292727Sktlim@umich.eduLdsState::returnQueuePush(std::pair<Tick, PacketPtr> thePair)
2302727Sktlim@umich.edu{
2312727Sktlim@umich.edu    // TODO add time limits (e.g. one packet per cycle) and queue size limits
2322727Sktlim@umich.edu    // and implement flow control
2332727Sktlim@umich.edu    returnQueue.push(thePair);
2342727Sktlim@umich.edu
2352727Sktlim@umich.edu    // if there is no set wakeup time, look through the queue
2362727Sktlim@umich.edu    if (!tickEvent.scheduled()) {
2372727Sktlim@umich.edu        process();
2382727Sktlim@umich.edu    }
2392727Sktlim@umich.edu
2402727Sktlim@umich.edu    return true;
2412727Sktlim@umich.edu}
2422727Sktlim@umich.edu
2432727Sktlim@umich.edu/**
2442727Sktlim@umich.edu * receive a packet in functional mode
2452727Sktlim@umich.edu */
2462727Sktlim@umich.eduvoid
2472727Sktlim@umich.eduLdsState::CuSidePort::recvFunctional(PacketPtr pkt)
2482727Sktlim@umich.edu{
2492727Sktlim@umich.edu    fatal("not implemented");
2502727Sktlim@umich.edu}
2518922Swilliam.wang@arm.com
2524329Sktlim@umich.edu/**
2534329Sktlim@umich.edu * receive a retry for a response
2544329Sktlim@umich.edu */
2554329Sktlim@umich.eduvoid
2564329Sktlim@umich.eduLdsState::CuSidePort::recvRespRetry()
2574329Sktlim@umich.edu{
2589444SAndreas.Sandberg@ARM.com    // TODO verify that this is the right way to do this
2592307SN/A    assert(ownerLds->isRetryResp());
26013590Srekai.gonzalezalberquilla@arm.com    ownerLds->setRetryResp(false);
26113590Srekai.gonzalezalberquilla@arm.com    ownerLds->process();
2622307SN/A}
2632329SN/A
2649444SAndreas.Sandberg@ARM.com/**
2652307SN/A * receive a retry
2662307SN/A */
2672307SN/Avoid
2682307SN/ALdsState::CuSidePort::recvRetry()
2692307SN/A{
2702307SN/A    fatal("not implemented");
2719444SAndreas.Sandberg@ARM.com}
2722307SN/A
2732307SN/A/**
2742292SN/A * look for packets to return at this time
2752292SN/A */
27613429Srekai.gonzalezalberquilla@arm.combool
2772292SN/ALdsState::process()
2782292SN/A{
2792292SN/A    Tick now = clockEdge();
28013652Sqtt2@cornell.edu
2812292SN/A    // send back completed packets
2822292SN/A    while (!returnQueue.empty() && returnQueue.front().first <= now) {
2832292SN/A        PacketPtr packet = returnQueue.front().second;
2842292SN/A
2852292SN/A        ComputeUnit::LDSPort::SenderState *ss =
2862292SN/A            dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
2872292SN/A                            packet->senderState);
2882292SN/A
2892292SN/A        GPUDynInstPtr gpuDynInst = ss->getMemInst();
2902292SN/A
2912292SN/A        gpuDynInst->initiateAcc(gpuDynInst);
2922292SN/A
29313429Srekai.gonzalezalberquilla@arm.com        packet->makeTimingResponse();
2942292SN/A
29513590Srekai.gonzalezalberquilla@arm.com        returnQueue.pop();
29613590Srekai.gonzalezalberquilla@arm.com
2972292SN/A        bool success = cuPort.sendTimingResp(packet);
2987720Sgblack@eecs.umich.edu
29913590Srekai.gonzalezalberquilla@arm.com        if (!success) {
3002292SN/A            retryResp = true;
30113590Srekai.gonzalezalberquilla@arm.com            panic("have not handled timing responses being NACK'd when sent"
30213590Srekai.gonzalezalberquilla@arm.com                            "back");
3032292SN/A        }
30413590Srekai.gonzalezalberquilla@arm.com    }
3052292SN/A
30613590Srekai.gonzalezalberquilla@arm.com    // determine the next wakeup time
30713590Srekai.gonzalezalberquilla@arm.com    if (!returnQueue.empty()) {
30813590Srekai.gonzalezalberquilla@arm.com
30913590Srekai.gonzalezalberquilla@arm.com        Tick next = returnQueue.front().first;
3102292SN/A
3112292SN/A        if (tickEvent.scheduled()) {
3122292SN/A
3132292SN/A            if (next < tickEvent.when()) {
3142292SN/A
3152292SN/A                tickEvent.deschedule();
31613590Srekai.gonzalezalberquilla@arm.com                tickEvent.schedule(next);
3172292SN/A            }
3182292SN/A        } else {
31913590Srekai.gonzalezalberquilla@arm.com            tickEvent.schedule(next);
32013590Srekai.gonzalezalberquilla@arm.com        }
3212292SN/A    }
3227720Sgblack@eecs.umich.edu
32313590Srekai.gonzalezalberquilla@arm.com    return true;
32413590Srekai.gonzalezalberquilla@arm.com}
3252292SN/A
32613590Srekai.gonzalezalberquilla@arm.com/**
32713590Srekai.gonzalezalberquilla@arm.com * wake up at this time and perform specified actions
32813590Srekai.gonzalezalberquilla@arm.com */
3292292SN/Avoid
33013590Srekai.gonzalezalberquilla@arm.comLdsState::TickEvent::process()
3312292SN/A{
3322292SN/A    ldsState->process();
3332292SN/A}
3342292SN/A
3352292SN/A/**
3362292SN/A *
3372292SN/A */
3382292SN/Avoid
3392292SN/ALdsState::regStats()
3402292SN/A{
3412292SN/A}
3422292SN/A