global_memory_pipeline.cc revision 11308
16242Sgblack@eecs.umich.edu/*
27093Sgblack@eecs.umich.edu * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
37093Sgblack@eecs.umich.edu * All rights reserved.
47093Sgblack@eecs.umich.edu *
57093Sgblack@eecs.umich.edu * For use for simulation and test purposes only
67093Sgblack@eecs.umich.edu *
77093Sgblack@eecs.umich.edu * Redistribution and use in source and binary forms, with or without
87093Sgblack@eecs.umich.edu * modification, are permitted provided that the following conditions are met:
97093Sgblack@eecs.umich.edu *
107093Sgblack@eecs.umich.edu * 1. Redistributions of source code must retain the above copyright notice,
117093Sgblack@eecs.umich.edu * this list of conditions and the following disclaimer.
127093Sgblack@eecs.umich.edu *
137093Sgblack@eecs.umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice,
146242Sgblack@eecs.umich.edu * this list of conditions and the following disclaimer in the documentation
156242Sgblack@eecs.umich.edu * and/or other materials provided with the distribution.
166242Sgblack@eecs.umich.edu *
176242Sgblack@eecs.umich.edu * 3. Neither the name of the copyright holder nor the names of its contributors
186242Sgblack@eecs.umich.edu * may be used to endorse or promote products derived from this software
196242Sgblack@eecs.umich.edu * without specific prior written permission.
206242Sgblack@eecs.umich.edu *
216242Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
226242Sgblack@eecs.umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
236242Sgblack@eecs.umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
246242Sgblack@eecs.umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
256242Sgblack@eecs.umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
266242Sgblack@eecs.umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
276242Sgblack@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
286242Sgblack@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
296242Sgblack@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
306242Sgblack@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
316242Sgblack@eecs.umich.edu * POSSIBILITY OF SUCH DAMAGE.
326242Sgblack@eecs.umich.edu *
336242Sgblack@eecs.umich.edu * Author: John Kalamatianos, Sooraj Puthoor
346242Sgblack@eecs.umich.edu */
356242Sgblack@eecs.umich.edu
366242Sgblack@eecs.umich.edu#include "gpu-compute/global_memory_pipeline.hh"
376242Sgblack@eecs.umich.edu
386242Sgblack@eecs.umich.edu#include "debug/GPUMem.hh"
396242Sgblack@eecs.umich.edu#include "debug/GPUReg.hh"
406242Sgblack@eecs.umich.edu#include "gpu-compute/compute_unit.hh"
416242Sgblack@eecs.umich.edu#include "gpu-compute/gpu_dyn_inst.hh"
426242Sgblack@eecs.umich.edu#include "gpu-compute/shader.hh"
436242Sgblack@eecs.umich.edu#include "gpu-compute/vector_register_file.hh"
446242Sgblack@eecs.umich.edu#include "gpu-compute/wavefront.hh"
456242Sgblack@eecs.umich.edu
466242Sgblack@eecs.umich.eduGlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams* p) :
476242Sgblack@eecs.umich.edu    computeUnit(nullptr), gmQueueSize(p->global_mem_queue_size),
486242Sgblack@eecs.umich.edu    inflightStores(0), inflightLoads(0)
496242Sgblack@eecs.umich.edu{
506242Sgblack@eecs.umich.edu}
516242Sgblack@eecs.umich.edu
526242Sgblack@eecs.umich.eduvoid
536242Sgblack@eecs.umich.eduGlobalMemPipeline::init(ComputeUnit *cu)
546242Sgblack@eecs.umich.edu{
556242Sgblack@eecs.umich.edu    computeUnit = cu;
566242Sgblack@eecs.umich.edu    globalMemSize = computeUnit->shader->globalMemSize;
576242Sgblack@eecs.umich.edu    _name = computeUnit->name() + ".GlobalMemPipeline";
586242Sgblack@eecs.umich.edu}
596242Sgblack@eecs.umich.edu
606242Sgblack@eecs.umich.eduvoid
616242Sgblack@eecs.umich.eduGlobalMemPipeline::exec()
626242Sgblack@eecs.umich.edu{
636242Sgblack@eecs.umich.edu    // apply any returned global memory operations
646242Sgblack@eecs.umich.edu    GPUDynInstPtr m = !gmReturnedLoads.empty() ? gmReturnedLoads.front() :
657111Sgblack@eecs.umich.edu        !gmReturnedStores.empty() ? gmReturnedStores.front() : nullptr;
666242Sgblack@eecs.umich.edu
676242Sgblack@eecs.umich.edu    bool accessVrf = true;
686242Sgblack@eecs.umich.edu    // check the VRF to see if the operands of a load (or load component
696242Sgblack@eecs.umich.edu    // of an atomic) are accessible
706735Sgblack@eecs.umich.edu    if ((m) && (m->m_op==Enums::MO_LD || MO_A(m->m_op))) {
716242Sgblack@eecs.umich.edu        Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
726242Sgblack@eecs.umich.edu
736242Sgblack@eecs.umich.edu        accessVrf =
746723Sgblack@eecs.umich.edu            w->computeUnit->vrf[m->simdId]->
756242Sgblack@eecs.umich.edu            vrfOperandAccessReady(m->seqNum(), w, m,
766242Sgblack@eecs.umich.edu                                  VrfAccessType::WRITE);
776261Sgblack@eecs.umich.edu    }
786403Sgblack@eecs.umich.edu
796403Sgblack@eecs.umich.edu    if ((!gmReturnedStores.empty() || !gmReturnedLoads.empty()) &&
806403Sgblack@eecs.umich.edu        m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() &&
817259Sgblack@eecs.umich.edu        accessVrf && m->statusBitVector == VectorMask(0) &&
827259Sgblack@eecs.umich.edu        (computeUnit->shader->coissue_return ||
837259Sgblack@eecs.umich.edu         computeUnit->wfWait.at(m->pipeId).rdy())) {
847259Sgblack@eecs.umich.edu
857264Sgblack@eecs.umich.edu        if (m->v_type == VT_32 && m->m_type == Enums::M_U8)
867267Sgblack@eecs.umich.edu            doGmReturn<uint32_t, uint8_t>(m);
877265Sgblack@eecs.umich.edu        else if (m->v_type == VT_32 && m->m_type == Enums::M_U16)
887266Sgblack@eecs.umich.edu            doGmReturn<uint32_t, uint16_t>(m);
897266Sgblack@eecs.umich.edu        else if (m->v_type == VT_32 && m->m_type == Enums::M_U32)
907266Sgblack@eecs.umich.edu            doGmReturn<uint32_t, uint32_t>(m);
917268Sgblack@eecs.umich.edu        else if (m->v_type == VT_32 && m->m_type == Enums::M_S8)
927271Sgblack@eecs.umich.edu            doGmReturn<int32_t, int8_t>(m);
937259Sgblack@eecs.umich.edu        else if (m->v_type == VT_32 && m->m_type == Enums::M_S16)
947259Sgblack@eecs.umich.edu            doGmReturn<int32_t, int16_t>(m);
957259Sgblack@eecs.umich.edu        else if (m->v_type == VT_32 && m->m_type == Enums::M_S32)
967259Sgblack@eecs.umich.edu            doGmReturn<int32_t, int32_t>(m);
977259Sgblack@eecs.umich.edu        else if (m->v_type == VT_32 && m->m_type == Enums::M_F16)
987259Sgblack@eecs.umich.edu            doGmReturn<float, Float16>(m);
997259Sgblack@eecs.umich.edu        else if (m->v_type == VT_32 && m->m_type == Enums::M_F32)
1007259Sgblack@eecs.umich.edu            doGmReturn<float, float>(m);
1017259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_U8)
1027259Sgblack@eecs.umich.edu            doGmReturn<uint64_t, uint8_t>(m);
1037259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_U16)
1047259Sgblack@eecs.umich.edu            doGmReturn<uint64_t, uint16_t>(m);
1057259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_U32)
1067259Sgblack@eecs.umich.edu            doGmReturn<uint64_t, uint32_t>(m);
1077259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_U64)
1087259Sgblack@eecs.umich.edu            doGmReturn<uint64_t, uint64_t>(m);
1097259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_S8)
1107259Sgblack@eecs.umich.edu            doGmReturn<int64_t, int8_t>(m);
1117259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_S16)
1127259Sgblack@eecs.umich.edu            doGmReturn<int64_t, int16_t>(m);
1137259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_S32)
1147259Sgblack@eecs.umich.edu            doGmReturn<int64_t, int32_t>(m);
1157259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_S64)
1167259Sgblack@eecs.umich.edu            doGmReturn<int64_t, int64_t>(m);
1177259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_F16)
1187259Sgblack@eecs.umich.edu            doGmReturn<double, Float16>(m);
1197259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_F32)
1207259Sgblack@eecs.umich.edu            doGmReturn<double, float>(m);
1217259Sgblack@eecs.umich.edu        else if (m->v_type == VT_64 && m->m_type == Enums::M_F64)
1227259Sgblack@eecs.umich.edu            doGmReturn<double, double>(m);
1237259Sgblack@eecs.umich.edu    }
1247259Sgblack@eecs.umich.edu
1257259Sgblack@eecs.umich.edu    // If pipeline has executed a global memory instruction
1267259Sgblack@eecs.umich.edu    // execute global memory packets and issue global
1277259Sgblack@eecs.umich.edu    // memory packets to DTLB
1287259Sgblack@eecs.umich.edu    if (!gmIssuedRequests.empty()) {
1297259Sgblack@eecs.umich.edu        GPUDynInstPtr mp = gmIssuedRequests.front();
1307259Sgblack@eecs.umich.edu        if (mp->m_op == Enums::MO_LD ||
1317259Sgblack@eecs.umich.edu            (mp->m_op >= Enums::MO_AAND && mp->m_op <= Enums::MO_AMIN) ||
1327259Sgblack@eecs.umich.edu            (mp->m_op >= Enums::MO_ANRAND && mp->m_op <= Enums::MO_ANRMIN)) {
1337259Sgblack@eecs.umich.edu
1347259Sgblack@eecs.umich.edu            if (inflightLoads >= gmQueueSize) {
1357259Sgblack@eecs.umich.edu                return;
1367259Sgblack@eecs.umich.edu            } else {
1377259Sgblack@eecs.umich.edu                ++inflightLoads;
1387259Sgblack@eecs.umich.edu            }
1397259Sgblack@eecs.umich.edu        } else {
1407259Sgblack@eecs.umich.edu            if (inflightStores >= gmQueueSize) {
1417259Sgblack@eecs.umich.edu                return;
1427259Sgblack@eecs.umich.edu            } else {
1437259Sgblack@eecs.umich.edu                ++inflightStores;
1447259Sgblack@eecs.umich.edu            }
1457259Sgblack@eecs.umich.edu        }
1467259Sgblack@eecs.umich.edu
1477259Sgblack@eecs.umich.edu        mp->initiateAcc(mp);
1487259Sgblack@eecs.umich.edu        gmIssuedRequests.pop();
1497259Sgblack@eecs.umich.edu
1507259Sgblack@eecs.umich.edu        DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = %s\n",
1516735Sgblack@eecs.umich.edu                computeUnit->cu_id, mp->simdId, mp->wfSlotId,
1526261Sgblack@eecs.umich.edu                Enums::MemOpTypeStrings[mp->m_op]);
1536261Sgblack@eecs.umich.edu    }
1547259Sgblack@eecs.umich.edu}
1557259Sgblack@eecs.umich.edu
1567259Sgblack@eecs.umich.edutemplate<typename c0, typename c1>
1576261Sgblack@eecs.umich.eduvoid
1587259Sgblack@eecs.umich.eduGlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
1597259Sgblack@eecs.umich.edu{
1607259Sgblack@eecs.umich.edu    Wavefront *w = computeUnit->wfList[m->simdId][m->wfSlotId];
1617267Sgblack@eecs.umich.edu
1627267Sgblack@eecs.umich.edu    // Return data to registers
1637271Sgblack@eecs.umich.edu    if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
1647259Sgblack@eecs.umich.edu        gmReturnedLoads.pop();
1657259Sgblack@eecs.umich.edu        assert(inflightLoads > 0);
1667259Sgblack@eecs.umich.edu        --inflightLoads;
1677271Sgblack@eecs.umich.edu
1687259Sgblack@eecs.umich.edu        if (m->m_op == Enums::MO_LD || MO_A(m->m_op)) {
1697259Sgblack@eecs.umich.edu            std::vector<uint32_t> regVec;
1707259Sgblack@eecs.umich.edu            // iterate over number of destination register operands since
1717268Sgblack@eecs.umich.edu            // this is a load or atomic operation
1727267Sgblack@eecs.umich.edu            for (int k = 0; k < m->n_reg; ++k) {
1737259Sgblack@eecs.umich.edu                assert((sizeof(c1) * m->n_reg) <= MAX_WIDTH_FOR_MEM_INST);
1746242Sgblack@eecs.umich.edu                int dst = m->dst_reg + k;
1756242Sgblack@eecs.umich.edu
1766242Sgblack@eecs.umich.edu                if (m->n_reg > MAX_REGS_FOR_NON_VEC_MEM_INST)
1776242Sgblack@eecs.umich.edu                    dst = m->dst_reg_vec[k];
1786242Sgblack@eecs.umich.edu                // virtual->physical VGPR mapping
1796242Sgblack@eecs.umich.edu                int physVgpr = w->remap(dst, sizeof(c0), 1);
1806242Sgblack@eecs.umich.edu                // save the physical VGPR index
1816242Sgblack@eecs.umich.edu                regVec.push_back(physVgpr);
1826735Sgblack@eecs.umich.edu                c1 *p1 = &((c1*)m->d_data)[k * VSZ];
1836242Sgblack@eecs.umich.edu
1846242Sgblack@eecs.umich.edu                for (int i = 0; i < VSZ; ++i) {
1856735Sgblack@eecs.umich.edu                    if (m->exec_mask[i]) {
1866242Sgblack@eecs.umich.edu                        DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
1876242Sgblack@eecs.umich.edu                                "$%s%d <- %d global ld done (src = wavefront "
1886242Sgblack@eecs.umich.edu                                "ld inst)\n", w->computeUnit->cu_id, w->simdId,
1896242Sgblack@eecs.umich.edu                                w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d",
1906242Sgblack@eecs.umich.edu                                dst, *p1);
1916242Sgblack@eecs.umich.edu                        // write the value into the physical VGPR. This is a
1926242Sgblack@eecs.umich.edu                        // purely functional operation. No timing is modeled.
1936735Sgblack@eecs.umich.edu                        w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr,
1946750Sgblack@eecs.umich.edu                                                                    *p1, i);
1956750Sgblack@eecs.umich.edu                    }
1966750Sgblack@eecs.umich.edu                    ++p1;
1976750Sgblack@eecs.umich.edu                }
1987093Sgblack@eecs.umich.edu            }
1997093Sgblack@eecs.umich.edu
2007093Sgblack@eecs.umich.edu            // Schedule the write operation of the load data on the VRF.
2017093Sgblack@eecs.umich.edu            // This simply models the timing aspect of the VRF write operation.
2027093Sgblack@eecs.umich.edu            // It does not modify the physical VGPR.
2037093Sgblack@eecs.umich.edu            loadVrfBankConflictCycles +=
2047093Sgblack@eecs.umich.edu                w->computeUnit->vrf[w->simdId]->exec(m->seqNum(),
2056735Sgblack@eecs.umich.edu                                                     w, regVec, sizeof(c0),
2066735Sgblack@eecs.umich.edu                                                     m->time);
2076735Sgblack@eecs.umich.edu        }
2086735Sgblack@eecs.umich.edu    } else {
2096735Sgblack@eecs.umich.edu        gmReturnedStores.pop();
2106735Sgblack@eecs.umich.edu        assert(inflightStores > 0);
2116735Sgblack@eecs.umich.edu        --inflightStores;
2126735Sgblack@eecs.umich.edu    }
2136735Sgblack@eecs.umich.edu
2146735Sgblack@eecs.umich.edu    // Decrement outstanding register count
2156735Sgblack@eecs.umich.edu    computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1);
2166735Sgblack@eecs.umich.edu
2176735Sgblack@eecs.umich.edu    if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
2186735Sgblack@eecs.umich.edu        MO_H(m->m_op)) {
2196735Sgblack@eecs.umich.edu        computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_gm, m->time,
2206735Sgblack@eecs.umich.edu                                         -1);
2216735Sgblack@eecs.umich.edu    }
2226735Sgblack@eecs.umich.edu
2236735Sgblack@eecs.umich.edu    if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
2246735Sgblack@eecs.umich.edu        computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_gm, m->time,
2256735Sgblack@eecs.umich.edu                                         -1);
2266735Sgblack@eecs.umich.edu    }
2276735Sgblack@eecs.umich.edu
2286735Sgblack@eecs.umich.edu    // Mark write bus busy for appropriate amount of time
2296242Sgblack@eecs.umich.edu    computeUnit->glbMemToVrfBus.set(m->time);
2306242Sgblack@eecs.umich.edu    if (!computeUnit->shader->coissue_return)
2316242Sgblack@eecs.umich.edu        w->computeUnit->wfWait.at(m->pipeId).set(m->time);
232}
233
234void
235GlobalMemPipeline::regStats()
236{
237    loadVrfBankConflictCycles
238        .name(name() + ".load_vrf_bank_conflict_cycles")
239        .desc("total number of cycles GM data are delayed before updating "
240              "the VRF")
241        ;
242}
243