lds_state.hh revision 11308
16313Sgblack@eecs.umich.edu/*
26313Sgblack@eecs.umich.edu * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
36313Sgblack@eecs.umich.edu * All rights reserved.
46313Sgblack@eecs.umich.edu *
56313Sgblack@eecs.umich.edu * For use for simulation and test purposes only
66313Sgblack@eecs.umich.edu *
76313Sgblack@eecs.umich.edu * Redistribution and use in source and binary forms, with or without
86313Sgblack@eecs.umich.edu * modification, are permitted provided that the following conditions are met:
96313Sgblack@eecs.umich.edu *
106313Sgblack@eecs.umich.edu * 1. Redistributions of source code must retain the above copyright notice,
116313Sgblack@eecs.umich.edu * this list of conditions and the following disclaimer.
126313Sgblack@eecs.umich.edu *
136313Sgblack@eecs.umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice,
146313Sgblack@eecs.umich.edu * this list of conditions and the following disclaimer in the documentation
156313Sgblack@eecs.umich.edu * and/or other materials provided with the distribution.
166313Sgblack@eecs.umich.edu *
176313Sgblack@eecs.umich.edu * 3. Neither the name of the copyright holder nor the names of its contributors
186313Sgblack@eecs.umich.edu * may be used to endorse or promote products derived from this software
196313Sgblack@eecs.umich.edu * without specific prior written permission.
206313Sgblack@eecs.umich.edu *
216313Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
226313Sgblack@eecs.umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
236313Sgblack@eecs.umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
246313Sgblack@eecs.umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
256313Sgblack@eecs.umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
266313Sgblack@eecs.umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
276313Sgblack@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
286313Sgblack@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
296313Sgblack@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
306313Sgblack@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
316313Sgblack@eecs.umich.edu * POSSIBILITY OF SUCH DAMAGE.
326313Sgblack@eecs.umich.edu *
336313Sgblack@eecs.umich.edu * Author: John Kalamatianos, Joe Gross
348229Snate@binkert.org */
358229Snate@binkert.org
368229Snate@binkert.org#ifndef __LDS_STATE_HH__
377629Sgblack@eecs.umich.edu#define __LDS_STATE_HH__
387629Sgblack@eecs.umich.edu
398229Snate@binkert.org#include <array>
406336Sgblack@eecs.umich.edu#include <queue>
416336Sgblack@eecs.umich.edu#include <string>
426313Sgblack@eecs.umich.edu#include <unordered_map>
436313Sgblack@eecs.umich.edu#include <utility>
446336Sgblack@eecs.umich.edu#include <vector>
456313Sgblack@eecs.umich.edu
466313Sgblack@eecs.umich.edu#include "enums/MemOpType.hh"
476313Sgblack@eecs.umich.edu#include "enums/MemType.hh"
486313Sgblack@eecs.umich.edu#include "gpu-compute/misc.hh"
496313Sgblack@eecs.umich.edu#include "mem/mem_object.hh"
506313Sgblack@eecs.umich.edu#include "mem/port.hh"
516336Sgblack@eecs.umich.edu#include "params/LdsState.hh"
526336Sgblack@eecs.umich.edu
536336Sgblack@eecs.umich.educlass ComputeUnit;
546313Sgblack@eecs.umich.edu
556313Sgblack@eecs.umich.edu/**
566313Sgblack@eecs.umich.edu * this represents a slice of the overall LDS, intended to be associated with an
576313Sgblack@eecs.umich.edu * individual workgroup
586336Sgblack@eecs.umich.edu */
596336Sgblack@eecs.umich.educlass LdsChunk
606336Sgblack@eecs.umich.edu{
616336Sgblack@eecs.umich.edu  public:
626336Sgblack@eecs.umich.edu    LdsChunk(const uint32_t x_size):
636313Sgblack@eecs.umich.edu        chunk(x_size)
646313Sgblack@eecs.umich.edu    {
656313Sgblack@eecs.umich.edu    }
666336Sgblack@eecs.umich.edu
676336Sgblack@eecs.umich.edu    LdsChunk() {}
686313Sgblack@eecs.umich.edu
696359Sgblack@eecs.umich.edu    /**
706359Sgblack@eecs.umich.edu     * a read operation
716359Sgblack@eecs.umich.edu     */
726361Sgblack@eecs.umich.edu    template<class T>
736359Sgblack@eecs.umich.edu    T
746359Sgblack@eecs.umich.edu    read(const uint32_t index)
756359Sgblack@eecs.umich.edu    {
766359Sgblack@eecs.umich.edu        fatal_if(!chunk.size(), "cannot read from an LDS chunk of size 0");
776359Sgblack@eecs.umich.edu        fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
786359Sgblack@eecs.umich.edu        T *p0 = (T *) (&(chunk.at(index)));
796359Sgblack@eecs.umich.edu        return *p0;
806359Sgblack@eecs.umich.edu    }
816359Sgblack@eecs.umich.edu
826359Sgblack@eecs.umich.edu    /**
836359Sgblack@eecs.umich.edu     * a write operation
846313Sgblack@eecs.umich.edu     */
856313Sgblack@eecs.umich.edu    template<class T>
866313Sgblack@eecs.umich.edu    void
876313Sgblack@eecs.umich.edu    write(const uint32_t index, const T value)
886313Sgblack@eecs.umich.edu    {
896313Sgblack@eecs.umich.edu        fatal_if(!chunk.size(), "cannot write to an LDS chunk of size 0");
906313Sgblack@eecs.umich.edu        fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
916313Sgblack@eecs.umich.edu        T *p0 = (T *) (&(chunk.at(index)));
92        *p0 = value;
93    }
94
95    /**
96     * get the size of this chunk
97     */
98    std::vector<uint8_t>::size_type
99    size() const
100    {
101        return chunk.size();
102    }
103
104  protected:
105    // the actual data store for this slice of the LDS
106    std::vector<uint8_t> chunk;
107};
108
109// Local Data Share (LDS) State per Wavefront (contents of the LDS region
110// allocated to the WorkGroup of this Wavefront)
111class LdsState: public MemObject
112{
113  protected:
114
115    /**
116     * an event to allow event-driven execution
117     */
118    class TickEvent: public Event
119    {
120      protected:
121
122        LdsState *ldsState = nullptr;
123
124        Tick nextTick = 0;
125
126      public:
127
128        TickEvent(LdsState *_ldsState) :
129            ldsState(_ldsState)
130        {
131        }
132
133        virtual void
134        process();
135
136        void
137        schedule(Tick when)
138        {
139            mainEventQueue[0]->schedule(this, when);
140        }
141
142        void
143        deschedule()
144        {
145            mainEventQueue[0]->deschedule(this);
146        }
147    };
148
149    /**
150     * CuSidePort is the LDS Port closer to the CU side
151     */
152    class CuSidePort: public SlavePort
153    {
154      public:
155        CuSidePort(const std::string &_name, LdsState *_ownerLds) :
156                SlavePort(_name, _ownerLds), ownerLds(_ownerLds)
157        {
158        }
159
160      protected:
161        LdsState *ownerLds;
162
163        virtual bool
164        recvTimingReq(PacketPtr pkt);
165
166        virtual Tick
167        recvAtomic(PacketPtr pkt)
168        {
169          return 0;
170        }
171
172        virtual void
173        recvFunctional(PacketPtr pkt);
174
175        virtual void
176        recvRangeChange()
177        {
178        }
179
180        virtual void
181        recvRetry();
182
183        virtual void
184        recvRespRetry();
185
186        virtual AddrRangeList
187        getAddrRanges() const
188        {
189          AddrRangeList ranges;
190          ranges.push_back(ownerLds->getAddrRange());
191          return ranges;
192        }
193
194        template<typename T>
195        void
196        loadData(PacketPtr packet);
197
198        template<typename T>
199        void
200        storeData(PacketPtr packet);
201
202        template<typename T>
203        void
204        atomicOperation(PacketPtr packet);
205    };
206
207  protected:
208
209    // the lds reference counter
210    // The key is the workgroup ID and dispatch ID
211    // The value is the number of wavefronts that reference this LDS, as
212    // wavefronts are launched, the counter goes up for that workgroup and when
213    // they return it decreases, once it reaches 0 then this chunk of the LDS is
214    // returned to the available pool. However,it is deallocated on the 1->0
215    // transition, not whenever the counter is 0 as it always starts with 0 when
216    // the workgroup asks for space
217    std::unordered_map<uint32_t,
218                       std::unordered_map<uint32_t, int32_t>> refCounter;
219
220    // the map that allows workgroups to access their own chunk of the LDS
221    std::unordered_map<uint32_t,
222                       std::unordered_map<uint32_t, LdsChunk>> chunkMap;
223
224    // an event to allow the LDS to wake up at a specified time
225    TickEvent tickEvent;
226
227    // the queue of packets that are going back to the CU after a
228    // read/write/atomic op
229    // TODO need to make this have a maximum size to create flow control
230    std::queue<std::pair<Tick, PacketPtr>> returnQueue;
231
232    // whether or not there are pending responses
233    bool retryResp = false;
234
235    bool
236    process();
237
238    GPUDynInstPtr
239    getDynInstr(PacketPtr packet);
240
241    bool
242    processPacket(PacketPtr packet);
243
244    unsigned
245    countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
246
247    unsigned
248    countBankConflicts(GPUDynInstPtr gpuDynInst,
249                       unsigned *numBankAccesses);
250
251  public:
252    typedef LdsStateParams Params;
253
254    LdsState(const Params *params);
255
256    // prevent copy construction
257    LdsState(const LdsState&) = delete;
258
259    ~LdsState()
260    {
261        parent = nullptr;
262    }
263
264    const Params *
265    params() const
266    {
267        return dynamic_cast<const Params *>(_params);
268    }
269
270    bool
271    isRetryResp() const
272    {
273        return retryResp;
274    }
275
276    void
277    setRetryResp(const bool value)
278    {
279        retryResp = value;
280    }
281
282    // prevent assignment
283    LdsState &
284    operator=(const LdsState &) = delete;
285
286    /**
287     * use the dynamic wave id to create or just increase the reference count
288     */
289    int
290    increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
291    {
292        int refCount = getRefCounter(dispatchId, wgId);
293        fatal_if(refCount < 0,
294                 "reference count should not be below zero");
295        return ++refCounter[dispatchId][wgId];
296    }
297
298    /**
299     * decrease the reference count after making sure it is in the list
300     * give back this chunk if the ref counter has reached 0
301     */
302    int
303    decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
304    {
305      int refCount = getRefCounter(dispatchId, wgId);
306
307      fatal_if(refCount <= 0,
308              "reference count should not be below zero or at zero to"
309              "decrement");
310
311      refCounter[dispatchId][wgId]--;
312
313      if (refCounter[dispatchId][wgId] == 0) {
314        releaseSpace(dispatchId, wgId);
315        return 0;
316      } else {
317        return refCounter[dispatchId][wgId];
318      }
319    }
320
321    /**
322     * return the current reference count for this workgroup id
323     */
324    int
325    getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
326    {
327      auto dispatchIter = chunkMap.find(dispatchId);
328      fatal_if(dispatchIter == chunkMap.end(),
329               "could not locate this dispatch id [%d]", dispatchId);
330
331      auto workgroup = dispatchIter->second.find(wgId);
332      fatal_if(workgroup == dispatchIter->second.end(),
333               "could not find this workgroup id within this dispatch id"
334               " did[%d] wgid[%d]", dispatchId, wgId);
335
336      auto refCountIter = refCounter.find(dispatchId);
337      if (refCountIter == refCounter.end()) {
338        fatal("could not locate this dispatch id [%d]", dispatchId);
339      } else {
340        auto workgroup = refCountIter->second.find(wgId);
341        if (workgroup == refCountIter->second.end()) {
342          fatal("could not find this workgroup id within this dispatch id"
343                  " did[%d] wgid[%d]", dispatchId, wgId);
344        } else {
345          return refCounter.at(dispatchId).at(wgId);
346        }
347      }
348
349      fatal("should not reach this point");
350      return 0;
351    }
352
353    /**
354     * assign a parent and request this amount of space be set aside
355     * for this wgid
356     */
357    LdsChunk *
358    reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
359            const uint32_t size)
360    {
361        if (chunkMap.find(dispatchId) != chunkMap.end()) {
362            fatal_if(
363                chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
364                "duplicate workgroup ID asking for space in the LDS "
365                "did[%d] wgid[%d]", dispatchId, wgId);
366        }
367
368        fatal_if(bytesAllocated + size > maximumSize,
369                 "request would ask for more space than is available");
370
371        bytesAllocated += size;
372
373        chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
374        // make an entry for this workgroup
375        refCounter[dispatchId][wgId] = 0;
376
377        return &chunkMap[dispatchId][wgId];
378    }
379
380    bool
381    returnQueuePush(std::pair<Tick, PacketPtr> thePair);
382
383    Tick
384    earliestReturnTime() const
385    {
386        // TODO set to max(lastCommand+1, curTick())
387        return returnQueue.empty() ? curTick() : returnQueue.back().first;
388    }
389
390    void
391    setParent(ComputeUnit *x_parent);
392
393    void
394    regStats();
395
396    // accessors
397    ComputeUnit *
398    getParent() const
399    {
400        return parent;
401    }
402
403    std::string
404    getName()
405    {
406        return _name;
407    }
408
409    int
410    getBanks() const
411    {
412        return banks;
413    }
414
415    ComputeUnit *
416    getComputeUnit() const
417    {
418        return parent;
419    }
420
421    int
422    getBankConflictPenalty() const
423    {
424        return bankConflictPenalty;
425    }
426
427    /**
428     * get the allocated size for this workgroup
429     */
430    std::size_t
431    ldsSize(const uint32_t x_wgId)
432    {
433        return chunkMap[x_wgId].size();
434    }
435
436    AddrRange
437    getAddrRange() const
438    {
439        return range;
440    }
441
442    virtual BaseSlavePort &
443    getSlavePort(const std::string& if_name, PortID idx)
444    {
445        if (if_name == "cuPort") {
446            // TODO need to set name dynamically at this point?
447            return cuPort;
448        } else {
449            fatal("cannot resolve the port name " + if_name);
450        }
451    }
452
453    /**
454     * can this much space be reserved for a workgroup?
455     */
456    bool
457    canReserve(uint32_t x_size) const
458    {
459      return bytesAllocated + x_size <= maximumSize;
460    }
461
462  private:
463    /**
464     * give back the space
465     */
466    bool
467    releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
468    {
469        auto dispatchIter = chunkMap.find(x_dispatchId);
470
471        if (dispatchIter == chunkMap.end()) {
472          fatal("dispatch id not found [%d]", x_dispatchId);
473        } else {
474          auto workgroupIter = dispatchIter->second.find(x_wgId);
475          if (workgroupIter == dispatchIter->second.end()) {
476            fatal("workgroup id [%d] not found in dispatch id [%d]",
477                    x_wgId, x_dispatchId);
478          }
479        }
480
481        fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
482                 "releasing more space than was allocated");
483
484        bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
485        chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
486        return true;
487    }
488
489    // the port that connects this LDS to its owner CU
490    CuSidePort cuPort;
491
492    ComputeUnit* parent = nullptr;
493
494    std::string _name;
495
496    // the number of bytes currently reserved by all workgroups
497    int bytesAllocated = 0;
498
499    // the size of the LDS, the most bytes available
500    int maximumSize;
501
502    // Address range of this memory
503    AddrRange range;
504
505    // the penalty, in cycles, for each LDS bank conflict
506    int bankConflictPenalty = 0;
507
508    // the number of banks in the LDS underlying data store
509    int banks = 0;
510};
511
512#endif // __LDS_STATE_HH__
513