Cross Reference: /gem5/src/arch/hsail/insts/mem_impl.hh

Deleted Added

sdiff udiff text old ( 11639:2e8d4bd8108d ) new ( 11645:44ca2fc730eb )

full compact

mem_impl.hh (11639:2e8d4bd8108d)	mem_impl.hh (11645:44ca2fc730eb)
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 / 35 36#include "arch/hsail/generic_types.hh" 37#include "gpu-compute/hsail_code.hh" 38 39// defined in code.cc, but not worth sucking in all of code.h for this 40// at this point 41extern const char segmentNames[]; 42 43namespace HsailISA 44{ 45 template<typename DestDataType, typename AddrRegOperandType> 46 void 47 LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly() 48 { 49 this->disassembly = csprintf("%s_%s %s,%s", this->opcode, 50 DestDataType::label, 51 this->dest.disassemble(), 52 this->addr.disassemble()); 53 } 54 55 template<typename DestDataType, typename AddrRegOperandType> 56 void 57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 58 { 59 Wavefront *w = gpuDynInst->wavefront(); 60 61 typedef typename DestDataType::CType CType M5_VAR_USED; 62 const VectorMask &mask = w->getPred(); 63 std::vector<Addr> addr_vec; 64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); 65 this->addr.calcVector(w, addr_vec); 66 67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 68 if (mask[lane]) { 69 this->dest.set(w, lane, addr_vec[lane]); 70 } 71 } 72 addr_vec.clear(); 73 } 74 75 template<typename MemDataType, typename DestDataType, 76 typename AddrRegOperandType> 77 void 78 LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly() 79 { 80 switch (num_dest_operands) { 81 case 1: 82 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 83 segmentNames[this->segment], 84 MemDataType::label, 85 this->dest.disassemble(), 86 this->addr.disassemble()); 87 break; 88 case 2: 89 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 90 segmentNames[this->segment], 91 MemDataType::label, 92 this->dest_vect[0].disassemble(), 93 this->dest_vect[1].disassemble(), 94 this->addr.disassemble()); 95 break;	1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 / 35 36#include "arch/hsail/generic_types.hh" 37#include "gpu-compute/hsail_code.hh" 38 39// defined in code.cc, but not worth sucking in all of code.h for this 40// at this point 41extern const char segmentNames[]; 42 43namespace HsailISA 44{ 45 template<typename DestDataType, typename AddrRegOperandType> 46 void 47 LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly() 48 { 49 this->disassembly = csprintf("%s_%s %s,%s", this->opcode, 50 DestDataType::label, 51 this->dest.disassemble(), 52 this->addr.disassemble()); 53 } 54 55 template<typename DestDataType, typename AddrRegOperandType> 56 void 57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 58 { 59 Wavefront *w = gpuDynInst->wavefront(); 60 61 typedef typename DestDataType::CType CType M5_VAR_USED; 62 const VectorMask &mask = w->getPred(); 63 std::vector<Addr> addr_vec; 64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); 65 this->addr.calcVector(w, addr_vec); 66 67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 68 if (mask[lane]) { 69 this->dest.set(w, lane, addr_vec[lane]); 70 } 71 } 72 addr_vec.clear(); 73 } 74 75 template<typename MemDataType, typename DestDataType, 76 typename AddrRegOperandType> 77 void 78 LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly() 79 { 80 switch (num_dest_operands) { 81 case 1: 82 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 83 segmentNames[this->segment], 84 MemDataType::label, 85 this->dest.disassemble(), 86 this->addr.disassemble()); 87 break; 88 case 2: 89 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 90 segmentNames[this->segment], 91 MemDataType::label, 92 this->dest_vect[0].disassemble(), 93 this->dest_vect[1].disassemble(), 94 this->addr.disassemble()); 95 break;
	96 case 3: 97 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode, 98 segmentNames[this->segment], 99 MemDataType::label, 100 this->dest_vect[0].disassemble(), 101 this->dest_vect[1].disassemble(), 102 this->dest_vect[2].disassemble(), 103 this->addr.disassemble()); 104 break;
96 case 4: 97 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 98 this->opcode, 99 segmentNames[this->segment], 100 MemDataType::label, 101 this->dest_vect[0].disassemble(), 102 this->dest_vect[1].disassemble(), 103 this->dest_vect[2].disassemble(), 104 this->dest_vect[3].disassemble(), 105 this->addr.disassemble()); 106 break; 107 default: 108 fatal("Bad ld register dest operand, num vector operands: %d \n", 109 num_dest_operands); 110 break; 111 } 112 } 113 114 static Addr 115 calcPrivAddr(Addr addr, Wavefront w, int lane, GPUStaticInst i) 116 { 117 // what is the size of the object we are accessing?? 118 // NOTE: the compiler doesn't generate enough information 119 // to do this yet..have to just line up all the private 120 // work-item spaces back to back for now 121 /* 122 StorageElement* se = 123 i->parent->findSymbol(Brig::BrigPrivateSpace, addr); 124 assert(se); 125 126 return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() + 127 se->offset * w->computeUnit->wfSize() + 128 lane * se->size; 129 / 130* 131 // addressing strategy: interleave the private spaces of 132 // work-items in a wave-front on 8 byte granularity. 133 // this won't be perfect coalescing like the spill space 134 // strategy, but it's better than nothing. The spill space 135 // strategy won't work with private because the same address 136 // may be accessed by different sized loads/stores. 137 138 // Note: I'm assuming that the largest load/store to private 139 // is 8 bytes. If it is larger, the stride will have to increase 140 141 Addr addr_div8 = addr / 8; 142 Addr addr_mod8 = addr % 8; 143 144 Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 + 145 addr_mod8 + w->privBase; 146 147 assert(ret < w->privBase + 148 (w->privSizePerItem * w->computeUnit->wfSize())); 149 150 return ret; 151 } 152 153 template<typename MemDataType, typename DestDataType, 154 typename AddrRegOperandType> 155 void 156 LdInst<MemDataType, DestDataType, 157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 158 { 159 Wavefront w = gpuDynInst->wavefront(); 160* 161 typedef typename MemDataType::CType MemCType; 162 const VectorMask &mask = w->getPred(); 163 164 // Kernarg references are handled uniquely for now (no Memory Request 165 // is used), so special-case them up front. Someday we should 166 // make this more realistic, at which we should get rid of this 167 // block and fold this case into the switch below. 168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { 169 MemCType val; 170 171 // I assume no vector ld for kernargs 172 assert(num_dest_operands == 1); 173 174 // assuming for the moment that we'll never do register 175 // offsets into kernarg space... just to make life simpler 176 uint64_t address = this->addr.calcUniform(); 177 178 val = (MemCType)&w->kernelArgs[address]; 179 180 DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); 181 182 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 183 if (mask[lane]) { 184 this->dest.set(w, lane, val); 185 } 186 } 187 188 return; 189 } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { 190 uint64_t address = this->addr.calcUniform(); 191 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 192 if (mask[lane]) { 193 MemCType val = w->readCallArgMem<MemCType>(lane, address); 194 195 DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address, 196 (unsigned long long)val); 197 198 this->dest.set(w, lane, val); 199 } 200 } 201 202 return; 203 } 204 205 GPUDynInstPtr m = gpuDynInst; 206 207 this->addr.calcVector(w, m->addr); 208 209 m->m_op = Enums::MO_LD; 210 m->m_type = MemDataType::memType; 211 m->v_type = DestDataType::vgprType; 212 213 m->exec_mask = w->execMask(); 214 m->statusBitVector = 0; 215 m->equiv = this->equivClass; 216 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 217 218 m->scope = getGenericMemoryScope(this->memoryScope); 219 220 if (num_dest_operands == 1) { 221 m->dst_reg = this->dest.regIndex(); 222 m->n_reg = 1; 223 } else { 224 m->n_reg = num_dest_operands; 225 for (int i = 0; i < num_dest_operands; ++i) { 226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); 227 } 228 } 229 230 m->simdId = w->simdId; 231 m->wfSlotId = w->wfSlotId; 232 m->wfDynId = w->wfDynId; 233 m->kern_id = w->kernId; 234 m->cu_id = w->computeUnit->cu_id; 235 m->latency.init(&w->computeUnit->shader->tick_cnt); 236 237 switch (this->segment) { 238 case Brig::BRIG_SEGMENT_GLOBAL: 239 m->s_type = SEG_GLOBAL; 240 m->pipeId = GLBMEM_PIPE; 241 m->latency.set(w->computeUnit->shader->ticks(1)); 242 243 // this is a complete hack to get around a compiler bug 244 // (the compiler currently generates global access for private 245 // addresses (starting from 0). We need to add the private offset) 246 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 247 if (m->addr[lane] < w->privSizePerItem) { 248 if (mask[lane]) { 249 // what is the size of the object we are accessing? 250 // find base for for this wavefront 251 252 // calcPrivAddr will fail if accesses are unaligned 253 assert(!((sizeof(MemCType) - 1) & m->addr[lane])); 254 255 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 256 this); 257 258 m->addr[lane] = privAddr; 259 } 260 } 261 } 262 263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 264 w->outstandingReqsRdGm++; 265 w->rdGmReqsInPipe--; 266 break; 267 268 case Brig::BRIG_SEGMENT_SPILL: 269 assert(num_dest_operands == 1); 270 m->s_type = SEG_SPILL; 271 m->pipeId = GLBMEM_PIPE; 272 m->latency.set(w->computeUnit->shader->ticks(1)); 273 { 274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 275 // note: this calculation will NOT WORK if the compiler 276 // ever generates loads/stores to the same address with 277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr) 278 if (mask[lane]) { 279 assert(m->addr[lane] < w->spillSizePerItem); 280 281 m->addr[lane] = m->addr[lane] * w->spillWidth + 282 lane * sizeof(MemCType) + w->spillBase; 283 284 w->lastAddr[lane] = m->addr[lane]; 285 } 286 } 287 } 288 289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 290 w->outstandingReqsRdGm++; 291 w->rdGmReqsInPipe--; 292 break; 293 294 case Brig::BRIG_SEGMENT_GROUP: 295 m->s_type = SEG_SHARED; 296 m->pipeId = LDSMEM_PIPE; 297 m->latency.set(w->computeUnit->shader->ticks(24)); 298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 299 w->outstandingReqsRdLm++; 300 w->rdLmReqsInPipe--; 301 break; 302 303 case Brig::BRIG_SEGMENT_READONLY: 304 m->s_type = SEG_READONLY; 305 m->pipeId = GLBMEM_PIPE; 306 m->latency.set(w->computeUnit->shader->ticks(1)); 307 308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 309 if (mask[lane]) { 310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); 311 m->addr[lane] += w->roBase; 312 } 313 } 314 315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 316 w->outstandingReqsRdGm++; 317 w->rdGmReqsInPipe--; 318 break; 319 320 case Brig::BRIG_SEGMENT_PRIVATE: 321 m->s_type = SEG_PRIVATE; 322 m->pipeId = GLBMEM_PIPE; 323 m->latency.set(w->computeUnit->shader->ticks(1)); 324 { 325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 326 if (mask[lane]) { 327 assert(m->addr[lane] < w->privSizePerItem); 328 329 m->addr[lane] = m->addr[lane] + 330 lane * sizeof(MemCType) + w->privBase; 331 } 332 } 333 } 334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 335 w->outstandingReqsRdGm++; 336 w->rdGmReqsInPipe--; 337 break; 338 339 default: 340 fatal("Load to unsupported segment %d %llxe\n", this->segment, 341 m->addr[0]); 342 } 343 344 w->outstandingReqs++; 345 w->memReqsInPipe--; 346 } 347 348 template<typename OperationType, typename SrcDataType, 349 typename AddrRegOperandType> 350 void 351 StInst<OperationType, SrcDataType, 352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 353 { 354 Wavefront w = gpuDynInst->wavefront(); 355* 356 typedef typename OperationType::CType CType; 357 358 const VectorMask &mask = w->getPred(); 359 360 // arg references are handled uniquely for now (no Memory Request 361 // is used), so special-case them up front. Someday we should 362 // make this more realistic, at which we should get rid of this 363 // block and fold this case into the switch below. 364 if (this->segment == Brig::BRIG_SEGMENT_ARG) { 365 uint64_t address = this->addr.calcUniform(); 366 367 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 368 if (mask[lane]) { 369 CType data = this->src.template get<CType>(w, lane); 370 DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); 371 w->writeCallArgMem<CType>(lane, address, data); 372 } 373 } 374 375 return; 376 } 377 378 GPUDynInstPtr m = gpuDynInst; 379 380 m->exec_mask = w->execMask(); 381 382 this->addr.calcVector(w, m->addr); 383 384 if (num_src_operands == 1) { 385 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 386 if (mask[lane]) { 387 ((CType)m->d_data)[lane] = 388* this->src.template get<CType>(w, lane); 389 } 390 } 391 } else { 392 for (int k= 0; k < num_src_operands; ++k) { 393 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 394 if (mask[lane]) { 395 ((CType)m->d_data)[k w->computeUnit->wfSize() + lane] = 396 this->src_vect[k].template get<CType>(w, lane); 397 } 398 } 399 } 400 } 401 402 m->m_op = Enums::MO_ST; 403 m->m_type = OperationType::memType; 404 m->v_type = OperationType::vgprType; 405 406 m->statusBitVector = 0; 407 m->equiv = this->equivClass; 408 409 if (num_src_operands == 1) { 410 m->n_reg = 1; 411 } else { 412 m->n_reg = num_src_operands; 413 } 414 415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 416 417 m->scope = getGenericMemoryScope(this->memoryScope); 418 419 m->simdId = w->simdId; 420 m->wfSlotId = w->wfSlotId; 421 m->wfDynId = w->wfDynId; 422 m->kern_id = w->kernId; 423 m->cu_id = w->computeUnit->cu_id; 424 m->latency.init(&w->computeUnit->shader->tick_cnt); 425 426 switch (this->segment) { 427 case Brig::BRIG_SEGMENT_GLOBAL: 428 m->s_type = SEG_GLOBAL; 429 m->pipeId = GLBMEM_PIPE; 430 m->latency.set(w->computeUnit->shader->ticks(1)); 431 432 // this is a complete hack to get around a compiler bug 433 // (the compiler currently generates global access for private 434 // addresses (starting from 0). We need to add the private offset) 435 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 436 if (mask[lane]) { 437 if (m->addr[lane] < w->privSizePerItem) { 438 439 // calcPrivAddr will fail if accesses are unaligned 440 assert(!((sizeof(CType)-1) & m->addr[lane])); 441 442 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 443 this); 444 445 m->addr[lane] = privAddr; 446 } 447 } 448 } 449 450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 451 w->outstandingReqsWrGm++; 452 w->wrGmReqsInPipe--; 453 break; 454 455 case Brig::BRIG_SEGMENT_SPILL: 456 assert(num_src_operands == 1); 457 m->s_type = SEG_SPILL; 458 m->pipeId = GLBMEM_PIPE; 459 m->latency.set(w->computeUnit->shader->ticks(1)); 460 { 461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 462 if (mask[lane]) { 463 assert(m->addr[lane] < w->spillSizePerItem); 464 465 m->addr[lane] = m->addr[lane] * w->spillWidth + 466 lane * sizeof(CType) + w->spillBase; 467 } 468 } 469 } 470 471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 472 w->outstandingReqsWrGm++; 473 w->wrGmReqsInPipe--; 474 break; 475 476 case Brig::BRIG_SEGMENT_GROUP: 477 m->s_type = SEG_SHARED; 478 m->pipeId = LDSMEM_PIPE; 479 m->latency.set(w->computeUnit->shader->ticks(24)); 480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 481 w->outstandingReqsWrLm++; 482 w->wrLmReqsInPipe--; 483 break; 484 485 case Brig::BRIG_SEGMENT_PRIVATE: 486 m->s_type = SEG_PRIVATE; 487 m->pipeId = GLBMEM_PIPE; 488 m->latency.set(w->computeUnit->shader->ticks(1)); 489 { 490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 491 if (mask[lane]) { 492 assert(m->addr[lane] < w->privSizePerItem); 493 m->addr[lane] = m->addr[lane] + lane * 494 sizeof(CType)+w->privBase; 495 } 496 } 497 } 498 499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 500 w->outstandingReqsWrGm++; 501 w->wrGmReqsInPipe--; 502 break; 503 504 default: 505 fatal("Store to unsupported segment %d\n", this->segment); 506 } 507 508 w->outstandingReqs++; 509 w->memReqsInPipe--; 510 } 511 512 template<typename OperationType, typename SrcDataType, 513 typename AddrRegOperandType> 514 void 515 StInst<OperationType, SrcDataType, 516 AddrRegOperandType>::generateDisassembly() 517 { 518 switch (num_src_operands) { 519 case 1: 520 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 521 segmentNames[this->segment], 522 OperationType::label, 523 this->src.disassemble(), 524 this->addr.disassemble()); 525 break; 526 case 2: 527 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 528 segmentNames[this->segment], 529 OperationType::label, 530 this->src_vect[0].disassemble(), 531 this->src_vect[1].disassemble(), 532 this->addr.disassemble()); 533 break; 534 case 4: 535 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 536 this->opcode, 537 segmentNames[this->segment], 538 OperationType::label, 539 this->src_vect[0].disassemble(), 540 this->src_vect[1].disassemble(), 541 this->src_vect[2].disassemble(), 542 this->src_vect[3].disassemble(), 543 this->addr.disassemble()); 544 break; 545 default: fatal("Bad ld register src operand, num vector operands: " 546 "%d \n", num_src_operands); 547 break; 548 } 549 } 550 551 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 552 bool HasDst> 553 void 554 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 555 HasDst>::execute(GPUDynInstPtr gpuDynInst) 556 { 557 typedef typename DataType::CType CType; 558 559 Wavefront w = gpuDynInst->wavefront(); 560* 561 GPUDynInstPtr m = gpuDynInst; 562 563 this->addr.calcVector(w, m->addr); 564 565 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 566 ((CType )m->a_data)[lane] = 567* this->src[0].template get<CType>(w, lane); 568 } 569 570 // load second source operand for CAS 571 if (NumSrcOperands > 1) { 572 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 573 ((CType)m->x_data)[lane] = 574* this->src[1].template get<CType>(w, lane); 575 } 576 } 577 578 assert(NumSrcOperands <= 2); 579 580 m->m_op = this->opType; 581 m->m_type = DataType::memType; 582 m->v_type = DataType::vgprType; 583 584 m->exec_mask = w->execMask(); 585 m->statusBitVector = 0; 586 m->equiv = 0; // atomics don't have an equivalence class operand 587 m->n_reg = 1; 588 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 589 590 m->scope = getGenericMemoryScope(this->memoryScope); 591 592 if (HasDst) { 593 m->dst_reg = this->dest.regIndex(); 594 } 595 596 m->simdId = w->simdId; 597 m->wfSlotId = w->wfSlotId; 598 m->wfDynId = w->wfDynId; 599 m->kern_id = w->kernId; 600 m->cu_id = w->computeUnit->cu_id; 601 m->latency.init(&w->computeUnit->shader->tick_cnt); 602 603 switch (this->segment) { 604 case Brig::BRIG_SEGMENT_GLOBAL: 605 m->s_type = SEG_GLOBAL; 606 m->latency.set(w->computeUnit->shader->ticks(64)); 607 m->pipeId = GLBMEM_PIPE; 608 609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 610 w->outstandingReqsWrGm++; 611 w->wrGmReqsInPipe--; 612 w->outstandingReqsRdGm++; 613 w->rdGmReqsInPipe--; 614 break; 615 616 case Brig::BRIG_SEGMENT_GROUP: 617 m->s_type = SEG_SHARED; 618 m->pipeId = LDSMEM_PIPE; 619 m->latency.set(w->computeUnit->shader->ticks(24)); 620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 621 w->outstandingReqsWrLm++; 622 w->wrLmReqsInPipe--; 623 w->outstandingReqsRdLm++; 624 w->rdLmReqsInPipe--; 625 break; 626 627 default: 628 fatal("Atomic op to unsupported segment %d\n", 629 this->segment); 630 } 631 632 w->outstandingReqs++; 633 w->memReqsInPipe--; 634 } 635 636 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); 637 638 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 639 bool HasDst> 640 void 641 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 642 HasDst>::generateDisassembly() 643 { 644 if (HasDst) { 645 this->disassembly = 646 csprintf("%s_%s_%s_%s %s,%s", this->opcode, 647 atomicOpToString(this->atomicOperation), 648 segmentNames[this->segment], 649 DataType::label, this->dest.disassemble(), 650 this->addr.disassemble()); 651 } else { 652 this->disassembly = 653 csprintf("%s_%s_%s_%s %s", this->opcode, 654 atomicOpToString(this->atomicOperation), 655 segmentNames[this->segment], 656 DataType::label, this->addr.disassemble()); 657 } 658 659 for (int i = 0; i < NumSrcOperands; ++i) { 660 this->disassembly += ","; 661 this->disassembly += this->src[i].disassemble(); 662 } 663 } 664} // namespace HsailISA	105 case 4: 106 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 107 this->opcode, 108 segmentNames[this->segment], 109 MemDataType::label, 110 this->dest_vect[0].disassemble(), 111 this->dest_vect[1].disassemble(), 112 this->dest_vect[2].disassemble(), 113 this->dest_vect[3].disassemble(), 114 this->addr.disassemble()); 115 break; 116 default: 117 fatal("Bad ld register dest operand, num vector operands: %d \n", 118 num_dest_operands); 119 break; 120 } 121 } 122 123 static Addr 124 calcPrivAddr(Addr addr, Wavefront w, int lane, GPUStaticInst i) 125 { 126 // what is the size of the object we are accessing?? 127 // NOTE: the compiler doesn't generate enough information 128 // to do this yet..have to just line up all the private 129 // work-item spaces back to back for now 130 /* 131 StorageElement* se = 132 i->parent->findSymbol(Brig::BrigPrivateSpace, addr); 133 assert(se); 134 135 return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() + 136 se->offset * w->computeUnit->wfSize() + 137 lane * se->size; 138 / 139* 140 // addressing strategy: interleave the private spaces of 141 // work-items in a wave-front on 8 byte granularity. 142 // this won't be perfect coalescing like the spill space 143 // strategy, but it's better than nothing. The spill space 144 // strategy won't work with private because the same address 145 // may be accessed by different sized loads/stores. 146 147 // Note: I'm assuming that the largest load/store to private 148 // is 8 bytes. If it is larger, the stride will have to increase 149 150 Addr addr_div8 = addr / 8; 151 Addr addr_mod8 = addr % 8; 152 153 Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 + 154 addr_mod8 + w->privBase; 155 156 assert(ret < w->privBase + 157 (w->privSizePerItem * w->computeUnit->wfSize())); 158 159 return ret; 160 } 161 162 template<typename MemDataType, typename DestDataType, 163 typename AddrRegOperandType> 164 void 165 LdInst<MemDataType, DestDataType, 166 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 167 { 168 Wavefront w = gpuDynInst->wavefront(); 169* 170 typedef typename MemDataType::CType MemCType; 171 const VectorMask &mask = w->getPred(); 172 173 // Kernarg references are handled uniquely for now (no Memory Request 174 // is used), so special-case them up front. Someday we should 175 // make this more realistic, at which we should get rid of this 176 // block and fold this case into the switch below. 177 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { 178 MemCType val; 179 180 // I assume no vector ld for kernargs 181 assert(num_dest_operands == 1); 182 183 // assuming for the moment that we'll never do register 184 // offsets into kernarg space... just to make life simpler 185 uint64_t address = this->addr.calcUniform(); 186 187 val = (MemCType)&w->kernelArgs[address]; 188 189 DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); 190 191 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 192 if (mask[lane]) { 193 this->dest.set(w, lane, val); 194 } 195 } 196 197 return; 198 } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { 199 uint64_t address = this->addr.calcUniform(); 200 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 201 if (mask[lane]) { 202 MemCType val = w->readCallArgMem<MemCType>(lane, address); 203 204 DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address, 205 (unsigned long long)val); 206 207 this->dest.set(w, lane, val); 208 } 209 } 210 211 return; 212 } 213 214 GPUDynInstPtr m = gpuDynInst; 215 216 this->addr.calcVector(w, m->addr); 217 218 m->m_op = Enums::MO_LD; 219 m->m_type = MemDataType::memType; 220 m->v_type = DestDataType::vgprType; 221 222 m->exec_mask = w->execMask(); 223 m->statusBitVector = 0; 224 m->equiv = this->equivClass; 225 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 226 227 m->scope = getGenericMemoryScope(this->memoryScope); 228 229 if (num_dest_operands == 1) { 230 m->dst_reg = this->dest.regIndex(); 231 m->n_reg = 1; 232 } else { 233 m->n_reg = num_dest_operands; 234 for (int i = 0; i < num_dest_operands; ++i) { 235 m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); 236 } 237 } 238 239 m->simdId = w->simdId; 240 m->wfSlotId = w->wfSlotId; 241 m->wfDynId = w->wfDynId; 242 m->kern_id = w->kernId; 243 m->cu_id = w->computeUnit->cu_id; 244 m->latency.init(&w->computeUnit->shader->tick_cnt); 245 246 switch (this->segment) { 247 case Brig::BRIG_SEGMENT_GLOBAL: 248 m->s_type = SEG_GLOBAL; 249 m->pipeId = GLBMEM_PIPE; 250 m->latency.set(w->computeUnit->shader->ticks(1)); 251 252 // this is a complete hack to get around a compiler bug 253 // (the compiler currently generates global access for private 254 // addresses (starting from 0). We need to add the private offset) 255 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 256 if (m->addr[lane] < w->privSizePerItem) { 257 if (mask[lane]) { 258 // what is the size of the object we are accessing? 259 // find base for for this wavefront 260 261 // calcPrivAddr will fail if accesses are unaligned 262 assert(!((sizeof(MemCType) - 1) & m->addr[lane])); 263 264 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 265 this); 266 267 m->addr[lane] = privAddr; 268 } 269 } 270 } 271 272 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 273 w->outstandingReqsRdGm++; 274 w->rdGmReqsInPipe--; 275 break; 276 277 case Brig::BRIG_SEGMENT_SPILL: 278 assert(num_dest_operands == 1); 279 m->s_type = SEG_SPILL; 280 m->pipeId = GLBMEM_PIPE; 281 m->latency.set(w->computeUnit->shader->ticks(1)); 282 { 283 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 284 // note: this calculation will NOT WORK if the compiler 285 // ever generates loads/stores to the same address with 286 // different widths (e.g., a ld_u32 addr and a ld_u16 addr) 287 if (mask[lane]) { 288 assert(m->addr[lane] < w->spillSizePerItem); 289 290 m->addr[lane] = m->addr[lane] * w->spillWidth + 291 lane * sizeof(MemCType) + w->spillBase; 292 293 w->lastAddr[lane] = m->addr[lane]; 294 } 295 } 296 } 297 298 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 299 w->outstandingReqsRdGm++; 300 w->rdGmReqsInPipe--; 301 break; 302 303 case Brig::BRIG_SEGMENT_GROUP: 304 m->s_type = SEG_SHARED; 305 m->pipeId = LDSMEM_PIPE; 306 m->latency.set(w->computeUnit->shader->ticks(24)); 307 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 308 w->outstandingReqsRdLm++; 309 w->rdLmReqsInPipe--; 310 break; 311 312 case Brig::BRIG_SEGMENT_READONLY: 313 m->s_type = SEG_READONLY; 314 m->pipeId = GLBMEM_PIPE; 315 m->latency.set(w->computeUnit->shader->ticks(1)); 316 317 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 318 if (mask[lane]) { 319 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); 320 m->addr[lane] += w->roBase; 321 } 322 } 323 324 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 325 w->outstandingReqsRdGm++; 326 w->rdGmReqsInPipe--; 327 break; 328 329 case Brig::BRIG_SEGMENT_PRIVATE: 330 m->s_type = SEG_PRIVATE; 331 m->pipeId = GLBMEM_PIPE; 332 m->latency.set(w->computeUnit->shader->ticks(1)); 333 { 334 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 335 if (mask[lane]) { 336 assert(m->addr[lane] < w->privSizePerItem); 337 338 m->addr[lane] = m->addr[lane] + 339 lane * sizeof(MemCType) + w->privBase; 340 } 341 } 342 } 343 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 344 w->outstandingReqsRdGm++; 345 w->rdGmReqsInPipe--; 346 break; 347 348 default: 349 fatal("Load to unsupported segment %d %llxe\n", this->segment, 350 m->addr[0]); 351 } 352 353 w->outstandingReqs++; 354 w->memReqsInPipe--; 355 } 356 357 template<typename OperationType, typename SrcDataType, 358 typename AddrRegOperandType> 359 void 360 StInst<OperationType, SrcDataType, 361 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 362 { 363 Wavefront w = gpuDynInst->wavefront(); 364* 365 typedef typename OperationType::CType CType; 366 367 const VectorMask &mask = w->getPred(); 368 369 // arg references are handled uniquely for now (no Memory Request 370 // is used), so special-case them up front. Someday we should 371 // make this more realistic, at which we should get rid of this 372 // block and fold this case into the switch below. 373 if (this->segment == Brig::BRIG_SEGMENT_ARG) { 374 uint64_t address = this->addr.calcUniform(); 375 376 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 377 if (mask[lane]) { 378 CType data = this->src.template get<CType>(w, lane); 379 DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); 380 w->writeCallArgMem<CType>(lane, address, data); 381 } 382 } 383 384 return; 385 } 386 387 GPUDynInstPtr m = gpuDynInst; 388 389 m->exec_mask = w->execMask(); 390 391 this->addr.calcVector(w, m->addr); 392 393 if (num_src_operands == 1) { 394 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 395 if (mask[lane]) { 396 ((CType)m->d_data)[lane] = 397* this->src.template get<CType>(w, lane); 398 } 399 } 400 } else { 401 for (int k= 0; k < num_src_operands; ++k) { 402 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 403 if (mask[lane]) { 404 ((CType)m->d_data)[k w->computeUnit->wfSize() + lane] = 405 this->src_vect[k].template get<CType>(w, lane); 406 } 407 } 408 } 409 } 410 411 m->m_op = Enums::MO_ST; 412 m->m_type = OperationType::memType; 413 m->v_type = OperationType::vgprType; 414 415 m->statusBitVector = 0; 416 m->equiv = this->equivClass; 417 418 if (num_src_operands == 1) { 419 m->n_reg = 1; 420 } else { 421 m->n_reg = num_src_operands; 422 } 423 424 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 425 426 m->scope = getGenericMemoryScope(this->memoryScope); 427 428 m->simdId = w->simdId; 429 m->wfSlotId = w->wfSlotId; 430 m->wfDynId = w->wfDynId; 431 m->kern_id = w->kernId; 432 m->cu_id = w->computeUnit->cu_id; 433 m->latency.init(&w->computeUnit->shader->tick_cnt); 434 435 switch (this->segment) { 436 case Brig::BRIG_SEGMENT_GLOBAL: 437 m->s_type = SEG_GLOBAL; 438 m->pipeId = GLBMEM_PIPE; 439 m->latency.set(w->computeUnit->shader->ticks(1)); 440 441 // this is a complete hack to get around a compiler bug 442 // (the compiler currently generates global access for private 443 // addresses (starting from 0). We need to add the private offset) 444 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 445 if (mask[lane]) { 446 if (m->addr[lane] < w->privSizePerItem) { 447 448 // calcPrivAddr will fail if accesses are unaligned 449 assert(!((sizeof(CType)-1) & m->addr[lane])); 450 451 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 452 this); 453 454 m->addr[lane] = privAddr; 455 } 456 } 457 } 458 459 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 460 w->outstandingReqsWrGm++; 461 w->wrGmReqsInPipe--; 462 break; 463 464 case Brig::BRIG_SEGMENT_SPILL: 465 assert(num_src_operands == 1); 466 m->s_type = SEG_SPILL; 467 m->pipeId = GLBMEM_PIPE; 468 m->latency.set(w->computeUnit->shader->ticks(1)); 469 { 470 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 471 if (mask[lane]) { 472 assert(m->addr[lane] < w->spillSizePerItem); 473 474 m->addr[lane] = m->addr[lane] * w->spillWidth + 475 lane * sizeof(CType) + w->spillBase; 476 } 477 } 478 } 479 480 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 481 w->outstandingReqsWrGm++; 482 w->wrGmReqsInPipe--; 483 break; 484 485 case Brig::BRIG_SEGMENT_GROUP: 486 m->s_type = SEG_SHARED; 487 m->pipeId = LDSMEM_PIPE; 488 m->latency.set(w->computeUnit->shader->ticks(24)); 489 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 490 w->outstandingReqsWrLm++; 491 w->wrLmReqsInPipe--; 492 break; 493 494 case Brig::BRIG_SEGMENT_PRIVATE: 495 m->s_type = SEG_PRIVATE; 496 m->pipeId = GLBMEM_PIPE; 497 m->latency.set(w->computeUnit->shader->ticks(1)); 498 { 499 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 500 if (mask[lane]) { 501 assert(m->addr[lane] < w->privSizePerItem); 502 m->addr[lane] = m->addr[lane] + lane * 503 sizeof(CType)+w->privBase; 504 } 505 } 506 } 507 508 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 509 w->outstandingReqsWrGm++; 510 w->wrGmReqsInPipe--; 511 break; 512 513 default: 514 fatal("Store to unsupported segment %d\n", this->segment); 515 } 516 517 w->outstandingReqs++; 518 w->memReqsInPipe--; 519 } 520 521 template<typename OperationType, typename SrcDataType, 522 typename AddrRegOperandType> 523 void 524 StInst<OperationType, SrcDataType, 525 AddrRegOperandType>::generateDisassembly() 526 { 527 switch (num_src_operands) { 528 case 1: 529 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 530 segmentNames[this->segment], 531 OperationType::label, 532 this->src.disassemble(), 533 this->addr.disassemble()); 534 break; 535 case 2: 536 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 537 segmentNames[this->segment], 538 OperationType::label, 539 this->src_vect[0].disassemble(), 540 this->src_vect[1].disassemble(), 541 this->addr.disassemble()); 542 break; 543 case 4: 544 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 545 this->opcode, 546 segmentNames[this->segment], 547 OperationType::label, 548 this->src_vect[0].disassemble(), 549 this->src_vect[1].disassemble(), 550 this->src_vect[2].disassemble(), 551 this->src_vect[3].disassemble(), 552 this->addr.disassemble()); 553 break; 554 default: fatal("Bad ld register src operand, num vector operands: " 555 "%d \n", num_src_operands); 556 break; 557 } 558 } 559 560 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 561 bool HasDst> 562 void 563 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 564 HasDst>::execute(GPUDynInstPtr gpuDynInst) 565 { 566 typedef typename DataType::CType CType; 567 568 Wavefront w = gpuDynInst->wavefront(); 569* 570 GPUDynInstPtr m = gpuDynInst; 571 572 this->addr.calcVector(w, m->addr); 573 574 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 575 ((CType )m->a_data)[lane] = 576* this->src[0].template get<CType>(w, lane); 577 } 578 579 // load second source operand for CAS 580 if (NumSrcOperands > 1) { 581 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 582 ((CType)m->x_data)[lane] = 583* this->src[1].template get<CType>(w, lane); 584 } 585 } 586 587 assert(NumSrcOperands <= 2); 588 589 m->m_op = this->opType; 590 m->m_type = DataType::memType; 591 m->v_type = DataType::vgprType; 592 593 m->exec_mask = w->execMask(); 594 m->statusBitVector = 0; 595 m->equiv = 0; // atomics don't have an equivalence class operand 596 m->n_reg = 1; 597 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 598 599 m->scope = getGenericMemoryScope(this->memoryScope); 600 601 if (HasDst) { 602 m->dst_reg = this->dest.regIndex(); 603 } 604 605 m->simdId = w->simdId; 606 m->wfSlotId = w->wfSlotId; 607 m->wfDynId = w->wfDynId; 608 m->kern_id = w->kernId; 609 m->cu_id = w->computeUnit->cu_id; 610 m->latency.init(&w->computeUnit->shader->tick_cnt); 611 612 switch (this->segment) { 613 case Brig::BRIG_SEGMENT_GLOBAL: 614 m->s_type = SEG_GLOBAL; 615 m->latency.set(w->computeUnit->shader->ticks(64)); 616 m->pipeId = GLBMEM_PIPE; 617 618 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 619 w->outstandingReqsWrGm++; 620 w->wrGmReqsInPipe--; 621 w->outstandingReqsRdGm++; 622 w->rdGmReqsInPipe--; 623 break; 624 625 case Brig::BRIG_SEGMENT_GROUP: 626 m->s_type = SEG_SHARED; 627 m->pipeId = LDSMEM_PIPE; 628 m->latency.set(w->computeUnit->shader->ticks(24)); 629 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 630 w->outstandingReqsWrLm++; 631 w->wrLmReqsInPipe--; 632 w->outstandingReqsRdLm++; 633 w->rdLmReqsInPipe--; 634 break; 635 636 default: 637 fatal("Atomic op to unsupported segment %d\n", 638 this->segment); 639 } 640 641 w->outstandingReqs++; 642 w->memReqsInPipe--; 643 } 644 645 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); 646 647 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 648 bool HasDst> 649 void 650 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 651 HasDst>::generateDisassembly() 652 { 653 if (HasDst) { 654 this->disassembly = 655 csprintf("%s_%s_%s_%s %s,%s", this->opcode, 656 atomicOpToString(this->atomicOperation), 657 segmentNames[this->segment], 658 DataType::label, this->dest.disassemble(), 659 this->addr.disassemble()); 660 } else { 661 this->disassembly = 662 csprintf("%s_%s_%s_%s %s", this->opcode, 663 atomicOpToString(this->atomicOperation), 664 segmentNames[this->segment], 665 DataType::label, this->addr.disassemble()); 666 } 667 668 for (int i = 0; i < NumSrcOperands; ++i) { 669 this->disassembly += ","; 670 this->disassembly += this->src[i].disassemble(); 671 } 672 } 673} // namespace HsailISA