mem_impl.hh revision 11308
13898Ssaidi@eecs.umich.edu/* 22934Sktlim@umich.edu * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 32934Sktlim@umich.edu * All rights reserved. 42934Sktlim@umich.edu * 52934Sktlim@umich.edu * For use for simulation and test purposes only 62934Sktlim@umich.edu * 72934Sktlim@umich.edu * Redistribution and use in source and binary forms, with or without 82934Sktlim@umich.edu * modification, are permitted provided that the following conditions are met: 92934Sktlim@umich.edu * 102934Sktlim@umich.edu * 1. Redistributions of source code must retain the above copyright notice, 112934Sktlim@umich.edu * this list of conditions and the following disclaimer. 122934Sktlim@umich.edu * 132934Sktlim@umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice, 142934Sktlim@umich.edu * this list of conditions and the following disclaimer in the documentation 152934Sktlim@umich.edu * and/or other materials provided with the distribution. 162934Sktlim@umich.edu * 172934Sktlim@umich.edu * 3. Neither the name of the copyright holder nor the names of its contributors 182934Sktlim@umich.edu * may be used to endorse or promote products derived from this software 192934Sktlim@umich.edu * without specific prior written permission. 202934Sktlim@umich.edu * 212934Sktlim@umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 222934Sktlim@umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 232934Sktlim@umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 242934Sktlim@umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 252934Sktlim@umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 262934Sktlim@umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 272934Sktlim@umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 282934Sktlim@umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 292934Sktlim@umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 302969Sktlim@umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 312934Sktlim@umich.edu * POSSIBILITY OF SUCH DAMAGE. 322995Ssaidi@eecs.umich.edu * 332934Sktlim@umich.edu * Author: Steve Reinhardt 342934Sktlim@umich.edu */ 352934Sktlim@umich.edu 362934Sktlim@umich.edu#include "arch/hsail/generic_types.hh" 372934Sktlim@umich.edu#include "gpu-compute/hsail_code.hh" 382934Sktlim@umich.edu 392934Sktlim@umich.edu// defined in code.cc, but not worth sucking in all of code.h for this 402934Sktlim@umich.edu// at this point 413898Ssaidi@eecs.umich.eduextern const char *segmentNames[]; 423898Ssaidi@eecs.umich.edu 433898Ssaidi@eecs.umich.edunamespace HsailISA 443898Ssaidi@eecs.umich.edu{ 453898Ssaidi@eecs.umich.edu template<typename DestDataType, typename AddrRegOperandType> 463898Ssaidi@eecs.umich.edu void 473898Ssaidi@eecs.umich.edu LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly() 483898Ssaidi@eecs.umich.edu { 492934Sktlim@umich.edu this->disassembly = csprintf("%s_%s %s,%s", this->opcode, 502934Sktlim@umich.edu DestDataType::label, 512934Sktlim@umich.edu this->dest.disassemble(), 522934Sktlim@umich.edu this->addr.disassemble()); 532934Sktlim@umich.edu } 542934Sktlim@umich.edu 552934Sktlim@umich.edu template<typename DestDataType, typename AddrRegOperandType> 563005Sstever@eecs.umich.edu void 572934Sktlim@umich.edu LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 583005Sstever@eecs.umich.edu { 593005Sstever@eecs.umich.edu Wavefront *w = gpuDynInst->wavefront(); 603304Sstever@eecs.umich.edu 612995Ssaidi@eecs.umich.edu typedef typename DestDataType::CType CType M5_VAR_USED; 622934Sktlim@umich.edu const VectorMask &mask = w->get_pred(); 632934Sktlim@umich.edu uint64_t addr_vec[VSZ]; 642934Sktlim@umich.edu this->addr.calcVector(w, addr_vec); 652995Ssaidi@eecs.umich.edu 662934Sktlim@umich.edu for (int lane = 0; lane < VSZ; ++lane) { 672934Sktlim@umich.edu if (mask[lane]) { 682934Sktlim@umich.edu this->dest.set(w, lane, addr_vec[lane]); 692934Sktlim@umich.edu } 702934Sktlim@umich.edu } 712995Ssaidi@eecs.umich.edu } 722934Sktlim@umich.edu 732934Sktlim@umich.edu template<typename MemDataType, typename DestDataType, 742934Sktlim@umich.edu typename AddrRegOperandType> 752934Sktlim@umich.edu void 762934Sktlim@umich.edu LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly() 772995Ssaidi@eecs.umich.edu { 782934Sktlim@umich.edu switch (num_dest_operands) { 792934Sktlim@umich.edu case 1: 802953Sktlim@umich.edu this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 812934Sktlim@umich.edu segmentNames[this->segment], 822934Sktlim@umich.edu MemDataType::label, 833449Shsul@eecs.umich.edu this->dest.disassemble(), 842934Sktlim@umich.edu this->addr.disassemble()); 852934Sktlim@umich.edu break; 862934Sktlim@umich.edu case 2: 872934Sktlim@umich.edu this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 882934Sktlim@umich.edu segmentNames[this->segment], 893584Ssaidi@eecs.umich.edu MemDataType::label, 903584Ssaidi@eecs.umich.edu this->dest_vect[0].disassemble(), 913584Ssaidi@eecs.umich.edu this->dest_vect[1].disassemble(), 923584Ssaidi@eecs.umich.edu this->addr.disassemble()); 933584Ssaidi@eecs.umich.edu break; 943584Ssaidi@eecs.umich.edu case 4: 953743Sgblack@eecs.umich.edu this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 963584Ssaidi@eecs.umich.edu this->opcode, 973743Sgblack@eecs.umich.edu segmentNames[this->segment], 983743Sgblack@eecs.umich.edu MemDataType::label, 993743Sgblack@eecs.umich.edu this->dest_vect[0].disassemble(), 1003823Ssaidi@eecs.umich.edu this->dest_vect[1].disassemble(), 1013814Ssaidi@eecs.umich.edu this->dest_vect[2].disassemble(), 1023743Sgblack@eecs.umich.edu this->dest_vect[3].disassemble(), 1033743Sgblack@eecs.umich.edu this->addr.disassemble()); 1043584Ssaidi@eecs.umich.edu break; 1053814Ssaidi@eecs.umich.edu default: 1063584Ssaidi@eecs.umich.edu fatal("Bad ld register dest operand, num vector operands: %d \n", 1073745Sgblack@eecs.umich.edu num_dest_operands); 1083745Sgblack@eecs.umich.edu break; 1093745Sgblack@eecs.umich.edu } 1103584Ssaidi@eecs.umich.edu } 1113898Ssaidi@eecs.umich.edu 1123898Ssaidi@eecs.umich.edu static Addr 1133898Ssaidi@eecs.umich.edu calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i) 1143584Ssaidi@eecs.umich.edu { 1153584Ssaidi@eecs.umich.edu // what is the size of the object we are accessing?? 1163584Ssaidi@eecs.umich.edu // NOTE: the compiler doesn't generate enough information 1173745Sgblack@eecs.umich.edu // to do this yet..have to just line up all the private 1183745Sgblack@eecs.umich.edu // work-item spaces back to back for now 1193745Sgblack@eecs.umich.edu /* 1203584Ssaidi@eecs.umich.edu StorageElement* se = 1213584Ssaidi@eecs.umich.edu i->parent->findSymbol(Brig::BrigPrivateSpace, addr); 1223584Ssaidi@eecs.umich.edu assert(se); 1233584Ssaidi@eecs.umich.edu 1243025Ssaidi@eecs.umich.edu return w->wfSlotId * w->privSizePerItem * VSZ + 1252934Sktlim@umich.edu se->offset * VSZ + 1262995Ssaidi@eecs.umich.edu lane * se->size; 1272995Ssaidi@eecs.umich.edu */ 1283025Ssaidi@eecs.umich.edu 1293025Ssaidi@eecs.umich.edu // addressing strategy: interleave the private spaces of 1303025Ssaidi@eecs.umich.edu // work-items in a wave-front on 8 byte granularity. 1313025Ssaidi@eecs.umich.edu // this won't be perfect coalescing like the spill space 1323025Ssaidi@eecs.umich.edu // strategy, but it's better than nothing. The spill space 1332934Sktlim@umich.edu // strategy won't work with private because the same address 1342934Sktlim@umich.edu // may be accessed by different sized loads/stores. 1352934Sktlim@umich.edu 136 // Note: I'm assuming that the largest load/store to private 137 // is 8 bytes. If it is larger, the stride will have to increase 138 139 Addr addr_div8 = addr / 8; 140 Addr addr_mod8 = addr % 8; 141 142 Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase; 143 144 assert(ret < w->privBase + (w->privSizePerItem * VSZ)); 145 146 return ret; 147 } 148 149 template<typename MemDataType, typename DestDataType, 150 typename AddrRegOperandType> 151 void 152 LdInst<MemDataType, DestDataType, 153 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 154 { 155 Wavefront *w = gpuDynInst->wavefront(); 156 157 typedef typename MemDataType::CType MemCType; 158 const VectorMask &mask = w->get_pred(); 159 160 // Kernarg references are handled uniquely for now (no Memory Request 161 // is used), so special-case them up front. Someday we should 162 // make this more realistic, at which we should get rid of this 163 // block and fold this case into the switch below. 164 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { 165 MemCType val; 166 167 // I assume no vector ld for kernargs 168 assert(num_dest_operands == 1); 169 170 // assuming for the moment that we'll never do register 171 // offsets into kernarg space... just to make life simpler 172 uint64_t address = this->addr.calcUniform(); 173 174 val = *(MemCType*)&w->kernelArgs[address]; 175 176 DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); 177 178 for (int lane = 0; lane < VSZ; ++lane) { 179 if (mask[lane]) { 180 this->dest.set(w, lane, val); 181 } 182 } 183 184 return; 185 } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { 186 uint64_t address = this->addr.calcUniform(); 187 for (int lane = 0; lane < VSZ; ++lane) { 188 if (mask[lane]) { 189 MemCType val = w->readCallArgMem<MemCType>(lane, address); 190 191 DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address, 192 (unsigned long long)val); 193 194 this->dest.set(w, lane, val); 195 } 196 } 197 198 return; 199 } 200 201 GPUDynInstPtr m = gpuDynInst; 202 203 this->addr.calcVector(w, m->addr); 204 205 m->m_op = Enums::MO_LD; 206 m->m_type = MemDataType::memType; 207 m->v_type = DestDataType::vgprType; 208 209 m->exec_mask = w->execMask(); 210 m->statusBitVector = 0; 211 m->equiv = this->equivClass; 212 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 213 214 m->scope = getGenericMemoryScope(this->memoryScope); 215 216 if (num_dest_operands == 1) { 217 m->dst_reg = this->dest.regIndex(); 218 m->n_reg = 1; 219 } else { 220 m->n_reg = num_dest_operands; 221 for (int i = 0; i < num_dest_operands; ++i) { 222 m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); 223 } 224 } 225 226 m->simdId = w->simdId; 227 m->wfSlotId = w->wfSlotId; 228 m->wfDynId = w->wfDynId; 229 m->kern_id = w->kern_id; 230 m->cu_id = w->computeUnit->cu_id; 231 m->latency.init(&w->computeUnit->shader->tick_cnt); 232 233 switch (this->segment) { 234 case Brig::BRIG_SEGMENT_GLOBAL: 235 m->s_type = SEG_GLOBAL; 236 m->pipeId = GLBMEM_PIPE; 237 m->latency.set(w->computeUnit->shader->ticks(1)); 238 239 // this is a complete hack to get around a compiler bug 240 // (the compiler currently generates global access for private 241 // addresses (starting from 0). We need to add the private offset) 242 for (int lane = 0; lane < VSZ; ++lane) { 243 if (m->addr[lane] < w->privSizePerItem) { 244 if (mask[lane]) { 245 // what is the size of the object we are accessing? 246 // find base for for this wavefront 247 248 // calcPrivAddr will fail if accesses are unaligned 249 assert(!((sizeof(MemCType) - 1) & m->addr[lane])); 250 251 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 252 this); 253 254 m->addr[lane] = privAddr; 255 } 256 } 257 } 258 259 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 260 w->outstanding_reqs_rd_gm++; 261 w->rd_gm_reqs_in_pipe--; 262 break; 263 264 case Brig::BRIG_SEGMENT_SPILL: 265 assert(num_dest_operands == 1); 266 m->s_type = SEG_SPILL; 267 m->pipeId = GLBMEM_PIPE; 268 m->latency.set(w->computeUnit->shader->ticks(1)); 269 { 270 for (int lane = 0; lane < VSZ; ++lane) { 271 // note: this calculation will NOT WORK if the compiler 272 // ever generates loads/stores to the same address with 273 // different widths (e.g., a ld_u32 addr and a ld_u16 addr) 274 if (mask[lane]) { 275 assert(m->addr[lane] < w->spillSizePerItem); 276 277 m->addr[lane] = m->addr[lane] * w->spillWidth + 278 lane * sizeof(MemCType) + w->spillBase; 279 280 w->last_addr[lane] = m->addr[lane]; 281 } 282 } 283 } 284 285 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 286 w->outstanding_reqs_rd_gm++; 287 w->rd_gm_reqs_in_pipe--; 288 break; 289 290 case Brig::BRIG_SEGMENT_GROUP: 291 m->s_type = SEG_SHARED; 292 m->pipeId = LDSMEM_PIPE; 293 m->latency.set(w->computeUnit->shader->ticks(24)); 294 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 295 w->outstanding_reqs_rd_lm++; 296 w->rd_lm_reqs_in_pipe--; 297 break; 298 299 case Brig::BRIG_SEGMENT_READONLY: 300 m->s_type = SEG_READONLY; 301 m->pipeId = GLBMEM_PIPE; 302 m->latency.set(w->computeUnit->shader->ticks(1)); 303 304 for (int lane = 0; lane < VSZ; ++lane) { 305 if (mask[lane]) { 306 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); 307 m->addr[lane] += w->roBase; 308 } 309 } 310 311 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 312 w->outstanding_reqs_rd_gm++; 313 w->rd_gm_reqs_in_pipe--; 314 break; 315 316 case Brig::BRIG_SEGMENT_PRIVATE: 317 m->s_type = SEG_PRIVATE; 318 m->pipeId = GLBMEM_PIPE; 319 m->latency.set(w->computeUnit->shader->ticks(1)); 320 { 321 for (int lane = 0; lane < VSZ; ++lane) { 322 if (mask[lane]) { 323 assert(m->addr[lane] < w->privSizePerItem); 324 325 m->addr[lane] = m->addr[lane] + 326 lane * sizeof(MemCType) + w->privBase; 327 } 328 } 329 } 330 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 331 w->outstanding_reqs_rd_gm++; 332 w->rd_gm_reqs_in_pipe--; 333 break; 334 335 default: 336 fatal("Load to unsupported segment %d %llxe\n", this->segment, 337 m->addr[0]); 338 } 339 340 w->outstanding_reqs++; 341 w->mem_reqs_in_pipe--; 342 } 343 344 template<typename OperationType, typename SrcDataType, 345 typename AddrRegOperandType> 346 void 347 StInst<OperationType, SrcDataType, 348 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 349 { 350 Wavefront *w = gpuDynInst->wavefront(); 351 352 typedef typename OperationType::CType CType; 353 354 const VectorMask &mask = w->get_pred(); 355 356 // arg references are handled uniquely for now (no Memory Request 357 // is used), so special-case them up front. Someday we should 358 // make this more realistic, at which we should get rid of this 359 // block and fold this case into the switch below. 360 if (this->segment == Brig::BRIG_SEGMENT_ARG) { 361 uint64_t address = this->addr.calcUniform(); 362 363 for (int lane = 0; lane < VSZ; ++lane) { 364 if (mask[lane]) { 365 CType data = this->src.template get<CType>(w, lane); 366 DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); 367 w->writeCallArgMem<CType>(lane, address, data); 368 } 369 } 370 371 return; 372 } 373 374 GPUDynInstPtr m = gpuDynInst; 375 376 m->exec_mask = w->execMask(); 377 378 this->addr.calcVector(w, m->addr); 379 380 if (num_src_operands == 1) { 381 for (int lane = 0; lane < VSZ; ++lane) { 382 if (mask[lane]) { 383 ((CType*)m->d_data)[lane] = 384 this->src.template get<CType>(w, lane); 385 } 386 } 387 } else { 388 for (int k= 0; k < num_src_operands; ++k) { 389 for (int lane = 0; lane < VSZ; ++lane) { 390 if (mask[lane]) { 391 ((CType*)m->d_data)[k * VSZ + lane] = 392 this->src_vect[k].template get<CType>(w, lane); 393 } 394 } 395 } 396 } 397 398 m->m_op = Enums::MO_ST; 399 m->m_type = OperationType::memType; 400 m->v_type = OperationType::vgprType; 401 402 m->statusBitVector = 0; 403 m->equiv = this->equivClass; 404 405 if (num_src_operands == 1) { 406 m->n_reg = 1; 407 } else { 408 m->n_reg = num_src_operands; 409 } 410 411 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 412 413 m->scope = getGenericMemoryScope(this->memoryScope); 414 415 m->simdId = w->simdId; 416 m->wfSlotId = w->wfSlotId; 417 m->wfDynId = w->wfDynId; 418 m->kern_id = w->kern_id; 419 m->cu_id = w->computeUnit->cu_id; 420 m->latency.init(&w->computeUnit->shader->tick_cnt); 421 422 switch (this->segment) { 423 case Brig::BRIG_SEGMENT_GLOBAL: 424 m->s_type = SEG_GLOBAL; 425 m->pipeId = GLBMEM_PIPE; 426 m->latency.set(w->computeUnit->shader->ticks(1)); 427 428 // this is a complete hack to get around a compiler bug 429 // (the compiler currently generates global access for private 430 // addresses (starting from 0). We need to add the private offset) 431 for (int lane = 0; lane < VSZ; ++lane) { 432 if (mask[lane]) { 433 if (m->addr[lane] < w->privSizePerItem) { 434 435 // calcPrivAddr will fail if accesses are unaligned 436 assert(!((sizeof(CType)-1) & m->addr[lane])); 437 438 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 439 this); 440 441 m->addr[lane] = privAddr; 442 } 443 } 444 } 445 446 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 447 w->outstanding_reqs_wr_gm++; 448 w->wr_gm_reqs_in_pipe--; 449 break; 450 451 case Brig::BRIG_SEGMENT_SPILL: 452 assert(num_src_operands == 1); 453 m->s_type = SEG_SPILL; 454 m->pipeId = GLBMEM_PIPE; 455 m->latency.set(w->computeUnit->shader->ticks(1)); 456 { 457 for (int lane = 0; lane < VSZ; ++lane) { 458 if (mask[lane]) { 459 assert(m->addr[lane] < w->spillSizePerItem); 460 461 m->addr[lane] = m->addr[lane] * w->spillWidth + 462 lane * sizeof(CType) + w->spillBase; 463 } 464 } 465 } 466 467 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 468 w->outstanding_reqs_wr_gm++; 469 w->wr_gm_reqs_in_pipe--; 470 break; 471 472 case Brig::BRIG_SEGMENT_GROUP: 473 m->s_type = SEG_SHARED; 474 m->pipeId = LDSMEM_PIPE; 475 m->latency.set(w->computeUnit->shader->ticks(24)); 476 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 477 w->outstanding_reqs_wr_lm++; 478 w->wr_lm_reqs_in_pipe--; 479 break; 480 481 case Brig::BRIG_SEGMENT_PRIVATE: 482 m->s_type = SEG_PRIVATE; 483 m->pipeId = GLBMEM_PIPE; 484 m->latency.set(w->computeUnit->shader->ticks(1)); 485 { 486 for (int lane = 0; lane < VSZ; ++lane) { 487 if (mask[lane]) { 488 assert(m->addr[lane] < w->privSizePerItem); 489 m->addr[lane] = m->addr[lane] + lane * 490 sizeof(CType)+w->privBase; 491 } 492 } 493 } 494 495 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 496 w->outstanding_reqs_wr_gm++; 497 w->wr_gm_reqs_in_pipe--; 498 break; 499 500 default: 501 fatal("Store to unsupported segment %d\n", this->segment); 502 } 503 504 w->outstanding_reqs++; 505 w->mem_reqs_in_pipe--; 506 } 507 508 template<typename OperationType, typename SrcDataType, 509 typename AddrRegOperandType> 510 void 511 StInst<OperationType, SrcDataType, 512 AddrRegOperandType>::generateDisassembly() 513 { 514 switch (num_src_operands) { 515 case 1: 516 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 517 segmentNames[this->segment], 518 OperationType::label, 519 this->src.disassemble(), 520 this->addr.disassemble()); 521 break; 522 case 2: 523 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 524 segmentNames[this->segment], 525 OperationType::label, 526 this->src_vect[0].disassemble(), 527 this->src_vect[1].disassemble(), 528 this->addr.disassemble()); 529 break; 530 case 4: 531 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 532 this->opcode, 533 segmentNames[this->segment], 534 OperationType::label, 535 this->src_vect[0].disassemble(), 536 this->src_vect[1].disassemble(), 537 this->src_vect[2].disassemble(), 538 this->src_vect[3].disassemble(), 539 this->addr.disassemble()); 540 break; 541 default: fatal("Bad ld register src operand, num vector operands: " 542 "%d \n", num_src_operands); 543 break; 544 } 545 } 546 547 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 548 bool HasDst> 549 void 550 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 551 HasDst>::execute(GPUDynInstPtr gpuDynInst) 552 { 553 typedef typename DataType::CType CType; 554 555 Wavefront *w = gpuDynInst->wavefront(); 556 557 GPUDynInstPtr m = gpuDynInst; 558 559 this->addr.calcVector(w, m->addr); 560 561 for (int lane = 0; lane < VSZ; ++lane) { 562 ((CType *)m->a_data)[lane] = 563 this->src[0].template get<CType>(w, lane); 564 } 565 566 // load second source operand for CAS 567 if (NumSrcOperands > 1) { 568 for (int lane = 0; lane < VSZ; ++lane) { 569 ((CType*)m->x_data)[lane] = 570 this->src[1].template get<CType>(w, lane); 571 } 572 } 573 574 assert(NumSrcOperands <= 2); 575 576 m->m_op = this->opType; 577 m->m_type = DataType::memType; 578 m->v_type = DataType::vgprType; 579 580 m->exec_mask = w->execMask(); 581 m->statusBitVector = 0; 582 m->equiv = 0; // atomics don't have an equivalence class operand 583 m->n_reg = 1; 584 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 585 586 m->scope = getGenericMemoryScope(this->memoryScope); 587 588 if (HasDst) { 589 m->dst_reg = this->dest.regIndex(); 590 } 591 592 m->simdId = w->simdId; 593 m->wfSlotId = w->wfSlotId; 594 m->wfDynId = w->wfDynId; 595 m->kern_id = w->kern_id; 596 m->cu_id = w->computeUnit->cu_id; 597 m->latency.init(&w->computeUnit->shader->tick_cnt); 598 599 switch (this->segment) { 600 case Brig::BRIG_SEGMENT_GLOBAL: 601 m->s_type = SEG_GLOBAL; 602 m->latency.set(w->computeUnit->shader->ticks(64)); 603 m->pipeId = GLBMEM_PIPE; 604 605 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 606 w->outstanding_reqs_wr_gm++; 607 w->wr_gm_reqs_in_pipe--; 608 w->outstanding_reqs_rd_gm++; 609 w->rd_gm_reqs_in_pipe--; 610 break; 611 612 case Brig::BRIG_SEGMENT_GROUP: 613 m->s_type = SEG_SHARED; 614 m->pipeId = LDSMEM_PIPE; 615 m->latency.set(w->computeUnit->shader->ticks(24)); 616 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 617 w->outstanding_reqs_wr_lm++; 618 w->wr_lm_reqs_in_pipe--; 619 w->outstanding_reqs_rd_lm++; 620 w->rd_lm_reqs_in_pipe--; 621 break; 622 623 default: 624 fatal("Atomic op to unsupported segment %d\n", 625 this->segment); 626 } 627 628 w->outstanding_reqs++; 629 w->mem_reqs_in_pipe--; 630 } 631 632 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); 633 634 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 635 bool HasDst> 636 void 637 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 638 HasDst>::generateDisassembly() 639 { 640 if (HasDst) { 641 this->disassembly = 642 csprintf("%s_%s_%s_%s %s,%s", this->opcode, 643 atomicOpToString(this->atomicOperation), 644 segmentNames[this->segment], 645 DataType::label, this->dest.disassemble(), 646 this->addr.disassemble()); 647 } else { 648 this->disassembly = 649 csprintf("%s_%s_%s_%s %s", this->opcode, 650 atomicOpToString(this->atomicOperation), 651 segmentNames[this->segment], 652 DataType::label, this->addr.disassemble()); 653 } 654 655 for (int i = 0; i < NumSrcOperands; ++i) { 656 this->disassembly += ","; 657 this->disassembly += this->src[i].disassemble(); 658 } 659 } 660} // namespace HsailISA 661