lds_state.hh revision 11308
16313Sgblack@eecs.umich.edu/* 26313Sgblack@eecs.umich.edu * Copyright (c) 2014-2015 Advanced Micro Devices, Inc. 36313Sgblack@eecs.umich.edu * All rights reserved. 46313Sgblack@eecs.umich.edu * 56313Sgblack@eecs.umich.edu * For use for simulation and test purposes only 66313Sgblack@eecs.umich.edu * 76313Sgblack@eecs.umich.edu * Redistribution and use in source and binary forms, with or without 86313Sgblack@eecs.umich.edu * modification, are permitted provided that the following conditions are met: 96313Sgblack@eecs.umich.edu * 106313Sgblack@eecs.umich.edu * 1. Redistributions of source code must retain the above copyright notice, 116313Sgblack@eecs.umich.edu * this list of conditions and the following disclaimer. 126313Sgblack@eecs.umich.edu * 136313Sgblack@eecs.umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice, 146313Sgblack@eecs.umich.edu * this list of conditions and the following disclaimer in the documentation 156313Sgblack@eecs.umich.edu * and/or other materials provided with the distribution. 166313Sgblack@eecs.umich.edu * 176313Sgblack@eecs.umich.edu * 3. Neither the name of the copyright holder nor the names of its contributors 186313Sgblack@eecs.umich.edu * may be used to endorse or promote products derived from this software 196313Sgblack@eecs.umich.edu * without specific prior written permission. 206313Sgblack@eecs.umich.edu * 216313Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 226313Sgblack@eecs.umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 236313Sgblack@eecs.umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 246313Sgblack@eecs.umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 256313Sgblack@eecs.umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 266313Sgblack@eecs.umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 276313Sgblack@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 286313Sgblack@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 296313Sgblack@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 306313Sgblack@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 316313Sgblack@eecs.umich.edu * POSSIBILITY OF SUCH DAMAGE. 326313Sgblack@eecs.umich.edu * 336313Sgblack@eecs.umich.edu * Author: John Kalamatianos, Joe Gross 348229Snate@binkert.org */ 358229Snate@binkert.org 368229Snate@binkert.org#ifndef __LDS_STATE_HH__ 377629Sgblack@eecs.umich.edu#define __LDS_STATE_HH__ 387629Sgblack@eecs.umich.edu 398229Snate@binkert.org#include <array> 406336Sgblack@eecs.umich.edu#include <queue> 416336Sgblack@eecs.umich.edu#include <string> 426313Sgblack@eecs.umich.edu#include <unordered_map> 436313Sgblack@eecs.umich.edu#include <utility> 446336Sgblack@eecs.umich.edu#include <vector> 456313Sgblack@eecs.umich.edu 466313Sgblack@eecs.umich.edu#include "enums/MemOpType.hh" 476313Sgblack@eecs.umich.edu#include "enums/MemType.hh" 486313Sgblack@eecs.umich.edu#include "gpu-compute/misc.hh" 496313Sgblack@eecs.umich.edu#include "mem/mem_object.hh" 506313Sgblack@eecs.umich.edu#include "mem/port.hh" 516336Sgblack@eecs.umich.edu#include "params/LdsState.hh" 526336Sgblack@eecs.umich.edu 536336Sgblack@eecs.umich.educlass ComputeUnit; 546313Sgblack@eecs.umich.edu 556313Sgblack@eecs.umich.edu/** 566313Sgblack@eecs.umich.edu * this represents a slice of the overall LDS, intended to be associated with an 576313Sgblack@eecs.umich.edu * individual workgroup 586336Sgblack@eecs.umich.edu */ 596336Sgblack@eecs.umich.educlass LdsChunk 606336Sgblack@eecs.umich.edu{ 616336Sgblack@eecs.umich.edu public: 626336Sgblack@eecs.umich.edu LdsChunk(const uint32_t x_size): 636313Sgblack@eecs.umich.edu chunk(x_size) 646313Sgblack@eecs.umich.edu { 656313Sgblack@eecs.umich.edu } 666336Sgblack@eecs.umich.edu 676336Sgblack@eecs.umich.edu LdsChunk() {} 686313Sgblack@eecs.umich.edu 696359Sgblack@eecs.umich.edu /** 706359Sgblack@eecs.umich.edu * a read operation 716359Sgblack@eecs.umich.edu */ 726361Sgblack@eecs.umich.edu template<class T> 736359Sgblack@eecs.umich.edu T 746359Sgblack@eecs.umich.edu read(const uint32_t index) 756359Sgblack@eecs.umich.edu { 766359Sgblack@eecs.umich.edu fatal_if(!chunk.size(), "cannot read from an LDS chunk of size 0"); 776359Sgblack@eecs.umich.edu fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk"); 786359Sgblack@eecs.umich.edu T *p0 = (T *) (&(chunk.at(index))); 796359Sgblack@eecs.umich.edu return *p0; 806359Sgblack@eecs.umich.edu } 816359Sgblack@eecs.umich.edu 826359Sgblack@eecs.umich.edu /** 836359Sgblack@eecs.umich.edu * a write operation 846313Sgblack@eecs.umich.edu */ 856313Sgblack@eecs.umich.edu template<class T> 866313Sgblack@eecs.umich.edu void 876313Sgblack@eecs.umich.edu write(const uint32_t index, const T value) 886313Sgblack@eecs.umich.edu { 896313Sgblack@eecs.umich.edu fatal_if(!chunk.size(), "cannot write to an LDS chunk of size 0"); 906313Sgblack@eecs.umich.edu fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk"); 916313Sgblack@eecs.umich.edu T *p0 = (T *) (&(chunk.at(index))); 92 *p0 = value; 93 } 94 95 /** 96 * get the size of this chunk 97 */ 98 std::vector<uint8_t>::size_type 99 size() const 100 { 101 return chunk.size(); 102 } 103 104 protected: 105 // the actual data store for this slice of the LDS 106 std::vector<uint8_t> chunk; 107}; 108 109// Local Data Share (LDS) State per Wavefront (contents of the LDS region 110// allocated to the WorkGroup of this Wavefront) 111class LdsState: public MemObject 112{ 113 protected: 114 115 /** 116 * an event to allow event-driven execution 117 */ 118 class TickEvent: public Event 119 { 120 protected: 121 122 LdsState *ldsState = nullptr; 123 124 Tick nextTick = 0; 125 126 public: 127 128 TickEvent(LdsState *_ldsState) : 129 ldsState(_ldsState) 130 { 131 } 132 133 virtual void 134 process(); 135 136 void 137 schedule(Tick when) 138 { 139 mainEventQueue[0]->schedule(this, when); 140 } 141 142 void 143 deschedule() 144 { 145 mainEventQueue[0]->deschedule(this); 146 } 147 }; 148 149 /** 150 * CuSidePort is the LDS Port closer to the CU side 151 */ 152 class CuSidePort: public SlavePort 153 { 154 public: 155 CuSidePort(const std::string &_name, LdsState *_ownerLds) : 156 SlavePort(_name, _ownerLds), ownerLds(_ownerLds) 157 { 158 } 159 160 protected: 161 LdsState *ownerLds; 162 163 virtual bool 164 recvTimingReq(PacketPtr pkt); 165 166 virtual Tick 167 recvAtomic(PacketPtr pkt) 168 { 169 return 0; 170 } 171 172 virtual void 173 recvFunctional(PacketPtr pkt); 174 175 virtual void 176 recvRangeChange() 177 { 178 } 179 180 virtual void 181 recvRetry(); 182 183 virtual void 184 recvRespRetry(); 185 186 virtual AddrRangeList 187 getAddrRanges() const 188 { 189 AddrRangeList ranges; 190 ranges.push_back(ownerLds->getAddrRange()); 191 return ranges; 192 } 193 194 template<typename T> 195 void 196 loadData(PacketPtr packet); 197 198 template<typename T> 199 void 200 storeData(PacketPtr packet); 201 202 template<typename T> 203 void 204 atomicOperation(PacketPtr packet); 205 }; 206 207 protected: 208 209 // the lds reference counter 210 // The key is the workgroup ID and dispatch ID 211 // The value is the number of wavefronts that reference this LDS, as 212 // wavefronts are launched, the counter goes up for that workgroup and when 213 // they return it decreases, once it reaches 0 then this chunk of the LDS is 214 // returned to the available pool. However,it is deallocated on the 1->0 215 // transition, not whenever the counter is 0 as it always starts with 0 when 216 // the workgroup asks for space 217 std::unordered_map<uint32_t, 218 std::unordered_map<uint32_t, int32_t>> refCounter; 219 220 // the map that allows workgroups to access their own chunk of the LDS 221 std::unordered_map<uint32_t, 222 std::unordered_map<uint32_t, LdsChunk>> chunkMap; 223 224 // an event to allow the LDS to wake up at a specified time 225 TickEvent tickEvent; 226 227 // the queue of packets that are going back to the CU after a 228 // read/write/atomic op 229 // TODO need to make this have a maximum size to create flow control 230 std::queue<std::pair<Tick, PacketPtr>> returnQueue; 231 232 // whether or not there are pending responses 233 bool retryResp = false; 234 235 bool 236 process(); 237 238 GPUDynInstPtr 239 getDynInstr(PacketPtr packet); 240 241 bool 242 processPacket(PacketPtr packet); 243 244 unsigned 245 countBankConflicts(PacketPtr packet, unsigned *bankAccesses); 246 247 unsigned 248 countBankConflicts(GPUDynInstPtr gpuDynInst, 249 unsigned *numBankAccesses); 250 251 public: 252 typedef LdsStateParams Params; 253 254 LdsState(const Params *params); 255 256 // prevent copy construction 257 LdsState(const LdsState&) = delete; 258 259 ~LdsState() 260 { 261 parent = nullptr; 262 } 263 264 const Params * 265 params() const 266 { 267 return dynamic_cast<const Params *>(_params); 268 } 269 270 bool 271 isRetryResp() const 272 { 273 return retryResp; 274 } 275 276 void 277 setRetryResp(const bool value) 278 { 279 retryResp = value; 280 } 281 282 // prevent assignment 283 LdsState & 284 operator=(const LdsState &) = delete; 285 286 /** 287 * use the dynamic wave id to create or just increase the reference count 288 */ 289 int 290 increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId) 291 { 292 int refCount = getRefCounter(dispatchId, wgId); 293 fatal_if(refCount < 0, 294 "reference count should not be below zero"); 295 return ++refCounter[dispatchId][wgId]; 296 } 297 298 /** 299 * decrease the reference count after making sure it is in the list 300 * give back this chunk if the ref counter has reached 0 301 */ 302 int 303 decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId) 304 { 305 int refCount = getRefCounter(dispatchId, wgId); 306 307 fatal_if(refCount <= 0, 308 "reference count should not be below zero or at zero to" 309 "decrement"); 310 311 refCounter[dispatchId][wgId]--; 312 313 if (refCounter[dispatchId][wgId] == 0) { 314 releaseSpace(dispatchId, wgId); 315 return 0; 316 } else { 317 return refCounter[dispatchId][wgId]; 318 } 319 } 320 321 /** 322 * return the current reference count for this workgroup id 323 */ 324 int 325 getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const 326 { 327 auto dispatchIter = chunkMap.find(dispatchId); 328 fatal_if(dispatchIter == chunkMap.end(), 329 "could not locate this dispatch id [%d]", dispatchId); 330 331 auto workgroup = dispatchIter->second.find(wgId); 332 fatal_if(workgroup == dispatchIter->second.end(), 333 "could not find this workgroup id within this dispatch id" 334 " did[%d] wgid[%d]", dispatchId, wgId); 335 336 auto refCountIter = refCounter.find(dispatchId); 337 if (refCountIter == refCounter.end()) { 338 fatal("could not locate this dispatch id [%d]", dispatchId); 339 } else { 340 auto workgroup = refCountIter->second.find(wgId); 341 if (workgroup == refCountIter->second.end()) { 342 fatal("could not find this workgroup id within this dispatch id" 343 " did[%d] wgid[%d]", dispatchId, wgId); 344 } else { 345 return refCounter.at(dispatchId).at(wgId); 346 } 347 } 348 349 fatal("should not reach this point"); 350 return 0; 351 } 352 353 /** 354 * assign a parent and request this amount of space be set aside 355 * for this wgid 356 */ 357 LdsChunk * 358 reserveSpace(const uint32_t dispatchId, const uint32_t wgId, 359 const uint32_t size) 360 { 361 if (chunkMap.find(dispatchId) != chunkMap.end()) { 362 fatal_if( 363 chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(), 364 "duplicate workgroup ID asking for space in the LDS " 365 "did[%d] wgid[%d]", dispatchId, wgId); 366 } 367 368 fatal_if(bytesAllocated + size > maximumSize, 369 "request would ask for more space than is available"); 370 371 bytesAllocated += size; 372 373 chunkMap[dispatchId].emplace(wgId, LdsChunk(size)); 374 // make an entry for this workgroup 375 refCounter[dispatchId][wgId] = 0; 376 377 return &chunkMap[dispatchId][wgId]; 378 } 379 380 bool 381 returnQueuePush(std::pair<Tick, PacketPtr> thePair); 382 383 Tick 384 earliestReturnTime() const 385 { 386 // TODO set to max(lastCommand+1, curTick()) 387 return returnQueue.empty() ? curTick() : returnQueue.back().first; 388 } 389 390 void 391 setParent(ComputeUnit *x_parent); 392 393 void 394 regStats(); 395 396 // accessors 397 ComputeUnit * 398 getParent() const 399 { 400 return parent; 401 } 402 403 std::string 404 getName() 405 { 406 return _name; 407 } 408 409 int 410 getBanks() const 411 { 412 return banks; 413 } 414 415 ComputeUnit * 416 getComputeUnit() const 417 { 418 return parent; 419 } 420 421 int 422 getBankConflictPenalty() const 423 { 424 return bankConflictPenalty; 425 } 426 427 /** 428 * get the allocated size for this workgroup 429 */ 430 std::size_t 431 ldsSize(const uint32_t x_wgId) 432 { 433 return chunkMap[x_wgId].size(); 434 } 435 436 AddrRange 437 getAddrRange() const 438 { 439 return range; 440 } 441 442 virtual BaseSlavePort & 443 getSlavePort(const std::string& if_name, PortID idx) 444 { 445 if (if_name == "cuPort") { 446 // TODO need to set name dynamically at this point? 447 return cuPort; 448 } else { 449 fatal("cannot resolve the port name " + if_name); 450 } 451 } 452 453 /** 454 * can this much space be reserved for a workgroup? 455 */ 456 bool 457 canReserve(uint32_t x_size) const 458 { 459 return bytesAllocated + x_size <= maximumSize; 460 } 461 462 private: 463 /** 464 * give back the space 465 */ 466 bool 467 releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId) 468 { 469 auto dispatchIter = chunkMap.find(x_dispatchId); 470 471 if (dispatchIter == chunkMap.end()) { 472 fatal("dispatch id not found [%d]", x_dispatchId); 473 } else { 474 auto workgroupIter = dispatchIter->second.find(x_wgId); 475 if (workgroupIter == dispatchIter->second.end()) { 476 fatal("workgroup id [%d] not found in dispatch id [%d]", 477 x_wgId, x_dispatchId); 478 } 479 } 480 481 fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(), 482 "releasing more space than was allocated"); 483 484 bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size(); 485 chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId)); 486 return true; 487 } 488 489 // the port that connects this LDS to its owner CU 490 CuSidePort cuPort; 491 492 ComputeUnit* parent = nullptr; 493 494 std::string _name; 495 496 // the number of bytes currently reserved by all workgroups 497 int bytesAllocated = 0; 498 499 // the size of the LDS, the most bytes available 500 int maximumSize; 501 502 // Address range of this memory 503 AddrRange range; 504 505 // the penalty, in cycles, for each LDS bank conflict 506 int bankConflictPenalty = 0; 507 508 // the number of banks in the LDS underlying data store 509 int banks = 0; 510}; 511 512#endif // __LDS_STATE_HH__ 513