gpu_dyn_inst.hh revision 11699
12632Sstever@eecs.umich.edu/*
22632Sstever@eecs.umich.edu * Copyright (c) 2015 Advanced Micro Devices, Inc.
32632Sstever@eecs.umich.edu * All rights reserved.
42632Sstever@eecs.umich.edu *
52632Sstever@eecs.umich.edu * For use for simulation and test purposes only
62632Sstever@eecs.umich.edu *
72632Sstever@eecs.umich.edu * Redistribution and use in source and binary forms, with or without
82632Sstever@eecs.umich.edu * modification, are permitted provided that the following conditions are met:
92632Sstever@eecs.umich.edu *
102632Sstever@eecs.umich.edu * 1. Redistributions of source code must retain the above copyright notice,
112632Sstever@eecs.umich.edu * this list of conditions and the following disclaimer.
122632Sstever@eecs.umich.edu *
132632Sstever@eecs.umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice,
142632Sstever@eecs.umich.edu * this list of conditions and the following disclaimer in the documentation
152632Sstever@eecs.umich.edu * and/or other materials provided with the distribution.
162632Sstever@eecs.umich.edu *
172632Sstever@eecs.umich.edu * 3. Neither the name of the copyright holder nor the names of its contributors
182632Sstever@eecs.umich.edu * may be used to endorse or promote products derived from this software
192632Sstever@eecs.umich.edu * without specific prior written permission.
202632Sstever@eecs.umich.edu *
212632Sstever@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
222632Sstever@eecs.umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
232632Sstever@eecs.umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
242632Sstever@eecs.umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
252632Sstever@eecs.umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
262632Sstever@eecs.umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
272632Sstever@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
282632Sstever@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
292632Sstever@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
302632Sstever@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
312022SN/A * POSSIBILITY OF SUCH DAMAGE.
322022SN/A *
332022SN/A * Author: Anthony Gutierrez
342022SN/A */
352022SN/A
362469SN/A#ifndef __GPU_DYN_INST_HH__
372469SN/A#define __GPU_DYN_INST_HH__
382469SN/A
392469SN/A#include <cstdint>
402516SN/A#include <string>
412516SN/A
422944Sgblack@eecs.umich.edu#include "enums/MemType.hh"
432482SN/A#include "enums/StorageClassType.hh"
442944Sgblack@eecs.umich.edu#include "gpu-compute/compute_unit.hh"
452469SN/A#include "gpu-compute/gpu_exec_context.hh"
462944Sgblack@eecs.umich.edu
472646Ssaidi@eecs.umich.educlass GPUStaticInst;
482516SN/A
492580SN/Atemplate<typename T>
502580SN/Aclass AtomicOpAnd : public TypedAtomicOpFunctor<T>
512486SN/A{
522944Sgblack@eecs.umich.edu  public:
532646Ssaidi@eecs.umich.edu    T a;
542516SN/A
552580SN/A    AtomicOpAnd(T _a) : a(_a) { }
562580SN/A    void execute(T *b) { *b &= a; }
572486SN/A};
582482SN/A
592944Sgblack@eecs.umich.edutemplate<typename T>
602944Sgblack@eecs.umich.educlass AtomicOpOr : public TypedAtomicOpFunctor<T>
612944Sgblack@eecs.umich.edu{
622944Sgblack@eecs.umich.edu  public:
632944Sgblack@eecs.umich.edu    T a;
642944Sgblack@eecs.umich.edu    AtomicOpOr(T _a) : a(_a) { }
652516SN/A    void execute(T *b) { *b |= a; }
662516SN/A};
672516SN/A
682516SN/Atemplate<typename T>
692482SN/Aclass AtomicOpXor : public TypedAtomicOpFunctor<T>
702482SN/A{
712591SN/A  public:
722516SN/A    T a;
732580SN/A    AtomicOpXor(T _a) : a(_a) {}
742580SN/A    void execute(T *b) { *b ^= a; }
752482SN/A};
762482SN/A
772591SN/Atemplate<typename T>
782516SN/Aclass AtomicOpCAS : public TypedAtomicOpFunctor<T>
792580SN/A{
802580SN/A  public:
812482SN/A    T c;
822482SN/A    T s;
832591SN/A
842516SN/A    ComputeUnit *computeUnit;
852580SN/A
862580SN/A    AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
872482SN/A      : c(_c), s(_s), computeUnit(compute_unit) { }
882482SN/A
892591SN/A    void
902516SN/A    execute(T *b)
912580SN/A    {
922580SN/A        computeUnit->numCASOps++;
932482SN/A
942482SN/A        if (*b == c) {
952591SN/A            *b = s;
962516SN/A        } else {
972580SN/A            computeUnit->numFailedCASOps++;
982580SN/A        }
992482SN/A
1002482SN/A        if (computeUnit->xact_cas_mode) {
1012591SN/A            computeUnit->xactCasLoadMap.clear();
1022516SN/A        }
1032580SN/A    }
1042580SN/A};
1052482SN/A
1062469SN/Atemplate<typename T>
1072482SN/Aclass AtomicOpExch : public TypedAtomicOpFunctor<T>
1082516SN/A{
1093042Sgblack@eecs.umich.edu  public:
1102516SN/A    T a;
1112516SN/A    AtomicOpExch(T _a) : a(_a) { }
1122469SN/A    void execute(T *b) { *b = a; }
1132944Sgblack@eecs.umich.edu};
1142516SN/A
1152516SN/Atemplate<typename T>
1162469SN/Aclass AtomicOpAdd : public TypedAtomicOpFunctor<T>
1172469SN/A{
1182482SN/A  public:
1192482SN/A    T a;
1202974Sgblack@eecs.umich.edu    AtomicOpAdd(T _a) : a(_a) { }
1212974Sgblack@eecs.umich.edu    void execute(T *b) { *b += a; }
1222974Sgblack@eecs.umich.edu};
1232526SN/A
1242974Sgblack@eecs.umich.edutemplate<typename T>
1252974Sgblack@eecs.umich.educlass AtomicOpSub : public TypedAtomicOpFunctor<T>
1262974Sgblack@eecs.umich.edu{
1272646Ssaidi@eecs.umich.edu  public:
1282974Sgblack@eecs.umich.edu    T a;
1292469SN/A    AtomicOpSub(T _a) : a(_a) { }
1302516SN/A    void execute(T *b) { *b -= a; }
1312646Ssaidi@eecs.umich.edu};
1322482SN/A
1332469SN/Atemplate<typename T>
1342516SN/Aclass AtomicOpInc : public TypedAtomicOpFunctor<T>
1352646Ssaidi@eecs.umich.edu{
1362482SN/A  public:
1372954Sgblack@eecs.umich.edu    AtomicOpInc() { }
1382469SN/A    void execute(T *b) { *b += 1; }
1392516SN/A};
1402516SN/A
1412482SN/Atemplate<typename T>
1422469SN/Aclass AtomicOpDec : public TypedAtomicOpFunctor<T>
1432516SN/A{
1442482SN/A  public:
1452482SN/A    AtomicOpDec() {}
1462646Ssaidi@eecs.umich.edu    void execute(T *b) { *b -= 1; }
1472482SN/A};
1482482SN/A
1492482SN/Atemplate<typename T>
1502482SN/Aclass AtomicOpMax : public TypedAtomicOpFunctor<T>
1512482SN/A{
1522615SN/A  public:
1532469SN/A    T a;
1542469SN/A    AtomicOpMax(T _a) : a(_a) { }
1552482SN/A
1562646Ssaidi@eecs.umich.edu    void
1572482SN/A    execute(T *b)
1582482SN/A    {
1592482SN/A        if (a > *b)
1602588SN/A            *b = a;
1612482SN/A    }
1622526SN/A};
1632469SN/A
1642482SN/Atemplate<typename T>
1652469SN/Aclass AtomicOpMin : public TypedAtomicOpFunctor<T>
1662516SN/A{
1672469SN/A  public:
1682580SN/A    T a;
1692469SN/A    AtomicOpMin(T _a) : a(_a) {}
1702580SN/A
1712469SN/A    void
1722526SN/A    execute(T *b)
1732482SN/A    {
1742482SN/A        if (a < *b)
1752482SN/A            *b = a;
1762469SN/A    }
1772580SN/A};
1782580SN/A
1792580SN/Atypedef enum
1802580SN/A{
1812580SN/A    VT_32,
1822580SN/A    VT_64,
1832580SN/A} vgpr_type;
1842526SN/A
1852482SN/Aclass GPUDynInst : public GPUExecContext
1862482SN/A{
1872482SN/A  public:
1882469SN/A    GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
1892516SN/A               uint64_t instSeqNum);
1902646Ssaidi@eecs.umich.edu    ~GPUDynInst();
1912469SN/A    void execute(GPUDynInstPtr gpuDynInst);
1922580SN/A    int numSrcRegOperands();
1932469SN/A    int numDstRegOperands();
1942580SN/A    int getNumOperands();
1952580SN/A    bool isVectorRegister(int operandIdx);
1962469SN/A    bool isScalarRegister(int operandIdx);
1972526SN/A    bool isCondRegister(int operandIdx);
1982469SN/A    int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
1992615SN/A    int getOperandSize(int operandIdx);
2002615SN/A    bool isDstOperand(int operandIdx);
2012646Ssaidi@eecs.umich.edu    bool isSrcOperand(int operandIdx);
2022526SN/A
2032469SN/A    const std::string &disassemble() const;
2042615SN/A
2052615SN/A    uint64_t seqNum() const;
2062646Ssaidi@eecs.umich.edu
2072526SN/A    Enums::StorageClassType executedAs();
2082469SN/A
2092516SN/A    // The address of the memory operation
2102646Ssaidi@eecs.umich.edu    std::vector<Addr> addr;
2112954Sgblack@eecs.umich.edu    Addr pAddr;
2122580SN/A
2132469SN/A    // The data to get written
2142580SN/A    uint8_t *d_data;
2152469SN/A    // Additional data (for atomics)
2162526SN/A    uint8_t *a_data;
2172469SN/A    // Additional data (for atomics)
2182615SN/A    uint8_t *x_data;
2192615SN/A    // The execution mask
2202526SN/A    VectorMask exec_mask;
2212469SN/A
2222615SN/A    // The memory type (M_U32, M_S32, ...)
2232989Ssaidi@eecs.umich.edu    Enums::MemType m_type;
2242469SN/A
2252469SN/A    // The equivalency class
2262224SN/A    int equiv;
2272646Ssaidi@eecs.umich.edu    // The return VGPR type (VT_32 or VT_64)
2282516SN/A    vgpr_type v_type;
2292516SN/A    // Number of VGPR's accessed (1, 2, or 4)
2302516SN/A    int n_reg;
2312469SN/A    // The return VGPR index
2322469SN/A    int dst_reg;
2332469SN/A    // There can be max 4 dest regs>
2342469SN/A    int dst_reg_vec[4];
2352469SN/A    // SIMD where the WF of the memory instruction has been mapped to
2362526SN/A    int simdId;
2372469SN/A    // unique id of the WF where the memory instruction belongs to
2382996Sgblack@eecs.umich.edu    int wfDynId;
2392996Sgblack@eecs.umich.edu    // The kernel id of the requesting wf
2402469SN/A    int kern_id;
2412469SN/A    // The CU id of the requesting wf
2422469SN/A    int cu_id;
2432996Sgblack@eecs.umich.edu    // HW slot id where the WF is mapped to inside a SIMD unit
2442996Sgblack@eecs.umich.edu    int wfSlotId;
2452996Sgblack@eecs.umich.edu    // execution pipeline id where the memory instruction has been scheduled
2462996Sgblack@eecs.umich.edu    int pipeId;
2472996Sgblack@eecs.umich.edu    // The execution time of this operation
2482469SN/A    Tick time;
2492469SN/A    // The latency of this operation
2502469SN/A    WaitClass latency;
2512469SN/A    // A list of bank conflicts for the 4 cycles.
2522469SN/A    uint32_t bc[4];
2532526SN/A
2542469SN/A    // A pointer to ROM
2552516SN/A    uint8_t *rom;
2562469SN/A    // The size of the READONLY segment
2572469SN/A    int sz_rom;
2582469SN/A
2592469SN/A    // Initiate the specified memory operation, by creating a
2602469SN/A    // memory request and sending it off to the memory system.
2612469SN/A    void initiateAcc(GPUDynInstPtr gpuDynInst);
2622526SN/A    // Complete the specified memory operation, by writing
2632469SN/A    // value back to the RF in the case of a load or atomic
2642516SN/A    // return or, in the case of a store, we do nothing
2652469SN/A    void completeAcc(GPUDynInstPtr gpuDynInst);
2662469SN/A
2672516SN/A    void updateStats();
2682469SN/A
2692469SN/A    GPUStaticInst* staticInstruction() { return _staticInst; }
2702469SN/A
2712526SN/A    bool isALU() const;
2722469SN/A    bool isBranch() const;
2732996Sgblack@eecs.umich.edu    bool isNop() const;
2742996Sgblack@eecs.umich.edu    bool isReturn() const;
2752954Sgblack@eecs.umich.edu    bool isUnconditionalJump() const;
2762954Sgblack@eecs.umich.edu    bool isSpecialOp() const;
2772469SN/A    bool isWaitcnt() const;
2782469SN/A
2792469SN/A    bool isBarrier() const;
2802469SN/A    bool isMemFence() const;
2812996Sgblack@eecs.umich.edu    bool isMemRef() const;
2822526SN/A    bool isFlat() const;
2832469SN/A    bool isLoad() const;
2842516SN/A    bool isStore() const;
2852469SN/A
2862469SN/A    bool isAtomic() const;
2872469SN/A    bool isAtomicNoRet() const;
2882469SN/A    bool isAtomicRet() const;
2892469SN/A
2902469SN/A    bool isScalar() const;
2912469SN/A    bool readsSCC() const;
2922526SN/A    bool writesSCC() const;
2932469SN/A    bool readsVCC() const;
2942516SN/A    bool writesVCC() const;
2952469SN/A
2962469SN/A    bool isAtomicAnd() const;
2972516SN/A    bool isAtomicOr() const;
2982646Ssaidi@eecs.umich.edu    bool isAtomicXor() const;
2992646Ssaidi@eecs.umich.edu    bool isAtomicCAS() const;
3002646Ssaidi@eecs.umich.edu    bool isAtomicExch() const;
3012469SN/A    bool isAtomicAdd() const;
3022469SN/A    bool isAtomicSub() const;
3032646Ssaidi@eecs.umich.edu    bool isAtomicInc() const;
3042469SN/A    bool isAtomicDec() const;
3052469SN/A    bool isAtomicMax() const;
3062469SN/A    bool isAtomicMin() const;
3072469SN/A
3082526SN/A    bool isArgLoad() const;
3092526SN/A    bool isGlobalMem() const;
3102526SN/A    bool isLocalMem() const;
3112526SN/A
3122526SN/A    bool isArgSeg() const;
3132526SN/A    bool isGlobalSeg() const;
3142526SN/A    bool isGroupSeg() const;
3152469SN/A    bool isKernArgSeg() const;
3162526SN/A    bool isPrivateSeg() const;
3172526SN/A    bool isReadOnlySeg() const;
3182526SN/A    bool isSpillSeg() const;
3192526SN/A
3202526SN/A    bool isWorkitemScope() const;
3212526SN/A    bool isWavefrontScope() const;
3222526SN/A    bool isWorkgroupScope() const;
3232526SN/A    bool isDeviceScope() const;
3242646Ssaidi@eecs.umich.edu    bool isSystemScope() const;
3252954Sgblack@eecs.umich.edu    bool isNoScope() const;
3262954Sgblack@eecs.umich.edu
3272954Sgblack@eecs.umich.edu    bool isRelaxedOrder() const;
3282954Sgblack@eecs.umich.edu    bool isAcquire() const;
3292954Sgblack@eecs.umich.edu    bool isRelease() const;
3302954Sgblack@eecs.umich.edu    bool isAcquireRelease() const;
3312646Ssaidi@eecs.umich.edu    bool isNoOrder() const;
3322954Sgblack@eecs.umich.edu
3332954Sgblack@eecs.umich.edu    bool isGloballyCoherent() const;
3342938Sgblack@eecs.umich.edu    bool isSystemCoherent() const;
3352646Ssaidi@eecs.umich.edu
3362646Ssaidi@eecs.umich.edu    /*
3372646Ssaidi@eecs.umich.edu     * Loads/stores/atomics may have acquire/release semantics associated
3382646Ssaidi@eecs.umich.edu     * withthem. Some protocols want to see the acquire/release as separate
3392938Sgblack@eecs.umich.edu     * requests from the load/store/atomic. We implement that separation
3402646Ssaidi@eecs.umich.edu     * using continuations (i.e., a function pointer with an object associated
3412646Ssaidi@eecs.umich.edu     * with it). When, for example, the front-end generates a store with
3422646Ssaidi@eecs.umich.edu     * release semantics, we will first issue a normal store and set the
3432646Ssaidi@eecs.umich.edu     * continuation in the GPUDynInst to a function that generate a
3442526SN/A     * release request. That continuation will be called when the normal
3452526SN/A     * store completes (in ComputeUnit::DataPort::recvTimingResponse). The
3462526SN/A     * continuation will be called in the context of the same GPUDynInst
3472526SN/A     * that generated the initial store.
3482646Ssaidi@eecs.umich.edu     */
3492526SN/A    std::function<void(GPUStaticInst*, GPUDynInstPtr)> execContinuation;
3502646Ssaidi@eecs.umich.edu
3512526SN/A    // when true, call execContinuation when response arrives
3522526SN/A    bool useContinuation;
3532526SN/A
3542469SN/A    template<typename c0> AtomicOpFunctor*
3552526SN/A    makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
3562526SN/A    {
3572526SN/A        if (isAtomicAnd()) {
3582526SN/A            return new AtomicOpAnd<c0>(*reg0);
3592646Ssaidi@eecs.umich.edu        } else if (isAtomicOr()) {
3602591SN/A            return new AtomicOpOr<c0>(*reg0);
3612591SN/A        } else if (isAtomicXor()) {
3622591SN/A            return new AtomicOpXor<c0>(*reg0);
3632526SN/A        } else if (isAtomicCAS()) {
3642526SN/A            return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
3652646Ssaidi@eecs.umich.edu        } else if (isAtomicExch()) {
3662591SN/A            return new AtomicOpExch<c0>(*reg0);
3672591SN/A        } else if (isAtomicAdd()) {
3682591SN/A            return new AtomicOpAdd<c0>(*reg0);
3692526SN/A        } else if (isAtomicSub()) {
3702224SN/A            return new AtomicOpSub<c0>(*reg0);
3712526SN/A        } else if (isAtomicInc()) {
3722526SN/A            return new AtomicOpInc<c0>();
3732615SN/A        } else if (isAtomicDec()) {
3742615SN/A            return new AtomicOpDec<c0>();
3752526SN/A        } else if (isAtomicMax()) {
3762526SN/A            return new AtomicOpMax<c0>(*reg0);
3772526SN/A        } else if (isAtomicMin()) {
3782526SN/A            return new AtomicOpMin<c0>(*reg0);
3792526SN/A        } else {
3802526SN/A            fatal("Unrecognized atomic operation");
3812526SN/A        }
3822526SN/A    }
3832469SN/A
3842526SN/A    void
3852526SN/A    setRequestFlags(Request *req, bool setMemOrder=true)
3862516SN/A    {
3872591SN/A        // currently these are the easy scopes to deduce
3882516SN/A        if (isPrivateSeg()) {
3892526SN/A            req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
3902526SN/A        } else if (isSpillSeg()) {
3912526SN/A            req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
3922615SN/A        } else if (isGlobalSeg()) {
3932615SN/A            req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
3942615SN/A        } else if (isReadOnlySeg()) {
3952615SN/A            req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
3962615SN/A        } else if (isGroupSeg()) {
3972615SN/A            req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
3982526SN/A        } else if (isFlat()) {
3992646Ssaidi@eecs.umich.edu            // TODO: translate to correct scope
4002646Ssaidi@eecs.umich.edu            assert(false);
4012646Ssaidi@eecs.umich.edu        } else {
4022526SN/A            fatal("%s has bad segment type\n", disassemble());
4032526SN/A        }
4042526SN/A
4052526SN/A        if (isWavefrontScope()) {
4062938Sgblack@eecs.umich.edu            req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
4072646Ssaidi@eecs.umich.edu                                        Request::WAVEFRONT_SCOPE);
4082646Ssaidi@eecs.umich.edu        } else if (isWorkgroupScope()) {
4092938Sgblack@eecs.umich.edu            req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
4102646Ssaidi@eecs.umich.edu                                        Request::WORKGROUP_SCOPE);
4112938Sgblack@eecs.umich.edu        } else if (isDeviceScope()) {
4122646Ssaidi@eecs.umich.edu            req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
4132646Ssaidi@eecs.umich.edu                                        Request::DEVICE_SCOPE);
4142938Sgblack@eecs.umich.edu        } else if (isSystemScope()) {
4152646Ssaidi@eecs.umich.edu            req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
4162954Sgblack@eecs.umich.edu                                        Request::SYSTEM_SCOPE);
4172963Sgblack@eecs.umich.edu        } else if (!isNoScope() && !isWorkitemScope()) {
4182963Sgblack@eecs.umich.edu            fatal("%s has bad scope type\n", disassemble());
4192963Sgblack@eecs.umich.edu        }
4202963Sgblack@eecs.umich.edu
4212963Sgblack@eecs.umich.edu        if (setMemOrder) {
4222963Sgblack@eecs.umich.edu            // set acquire and release flags
4232963Sgblack@eecs.umich.edu            if (isAcquire()) {
4242963Sgblack@eecs.umich.edu                req->setFlags(Request::ACQUIRE);
4252963Sgblack@eecs.umich.edu            } else if (isRelease()) {
4262963Sgblack@eecs.umich.edu                req->setFlags(Request::RELEASE);
4272963Sgblack@eecs.umich.edu            } else if (isAcquireRelease()) {
4282963Sgblack@eecs.umich.edu                req->setFlags(Request::ACQUIRE | Request::RELEASE);
4292963Sgblack@eecs.umich.edu            } else if (!isNoOrder()) {
4302963Sgblack@eecs.umich.edu                fatal("%s has bad memory order\n", disassemble());
4312963Sgblack@eecs.umich.edu            }
4322963Sgblack@eecs.umich.edu        }
4332963Sgblack@eecs.umich.edu
4342963Sgblack@eecs.umich.edu        // set atomic type
4352963Sgblack@eecs.umich.edu        // currently, the instruction genenerator only produces atomic return
4362963Sgblack@eecs.umich.edu        // but a magic instruction can produce atomic no return
4372963Sgblack@eecs.umich.edu        if (isAtomicRet()) {
4382963Sgblack@eecs.umich.edu            req->setFlags(Request::ATOMIC_RETURN_OP);
4392963Sgblack@eecs.umich.edu        } else if (isAtomicNoRet()) {
4402963Sgblack@eecs.umich.edu            req->setFlags(Request::ATOMIC_NO_RETURN_OP);
4412963Sgblack@eecs.umich.edu        }
4422963Sgblack@eecs.umich.edu    }
4432963Sgblack@eecs.umich.edu
4442963Sgblack@eecs.umich.edu    // Map returned packets and the addresses they satisfy with which lane they
4452963Sgblack@eecs.umich.edu    // were requested from
4462963Sgblack@eecs.umich.edu    typedef std::unordered_map<Addr, std::vector<int>> StatusVector;
4472963Sgblack@eecs.umich.edu    StatusVector memStatusVector;
4482963Sgblack@eecs.umich.edu
4492963Sgblack@eecs.umich.edu    // Track the status of memory requests per lane, a bit per lane
4502963Sgblack@eecs.umich.edu    VectorMask statusBitVector;
4512963Sgblack@eecs.umich.edu    // for ld_v# or st_v#
4522963Sgblack@eecs.umich.edu    std::vector<int> statusVector;
4532963Sgblack@eecs.umich.edu    std::vector<int> tlbHitLevel;
4542963Sgblack@eecs.umich.edu
4552963Sgblack@eecs.umich.edu  private:
4562963Sgblack@eecs.umich.edu    GPUStaticInst *_staticInst;
4572963Sgblack@eecs.umich.edu    uint64_t _seqNum;
4582963Sgblack@eecs.umich.edu};
4592963Sgblack@eecs.umich.edu
4602963Sgblack@eecs.umich.edu#endif // __GPU_DYN_INST_HH__
4612963Sgblack@eecs.umich.edu