mem_impl.hh revision 11639
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#include "arch/hsail/generic_types.hh" 37#include "gpu-compute/hsail_code.hh" 38 39// defined in code.cc, but not worth sucking in all of code.h for this 40// at this point 41extern const char *segmentNames[]; 42 43namespace HsailISA 44{ 45 template<typename DestDataType, typename AddrRegOperandType> 46 void 47 LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly() 48 { 49 this->disassembly = csprintf("%s_%s %s,%s", this->opcode, 50 DestDataType::label, 51 this->dest.disassemble(), 52 this->addr.disassemble()); 53 } 54 55 template<typename DestDataType, typename AddrRegOperandType> 56 void 57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 58 { 59 Wavefront *w = gpuDynInst->wavefront(); 60 61 typedef typename DestDataType::CType CType M5_VAR_USED; 62 const VectorMask &mask = w->getPred(); 63 std::vector<Addr> addr_vec; 64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); 65 this->addr.calcVector(w, addr_vec); 66 67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 68 if (mask[lane]) { 69 this->dest.set(w, lane, addr_vec[lane]); 70 } 71 } 72 addr_vec.clear(); 73 } 74 75 template<typename MemDataType, typename DestDataType, 76 typename AddrRegOperandType> 77 void 78 LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly() 79 { 80 switch (num_dest_operands) { 81 case 1: 82 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 83 segmentNames[this->segment], 84 MemDataType::label, 85 this->dest.disassemble(), 86 this->addr.disassemble()); 87 break; 88 case 2: 89 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 90 segmentNames[this->segment], 91 MemDataType::label, 92 this->dest_vect[0].disassemble(), 93 this->dest_vect[1].disassemble(), 94 this->addr.disassemble()); 95 break; 96 case 4: 97 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 98 this->opcode, 99 segmentNames[this->segment], 100 MemDataType::label, 101 this->dest_vect[0].disassemble(), 102 this->dest_vect[1].disassemble(), 103 this->dest_vect[2].disassemble(), 104 this->dest_vect[3].disassemble(), 105 this->addr.disassemble()); 106 break; 107 default: 108 fatal("Bad ld register dest operand, num vector operands: %d \n", 109 num_dest_operands); 110 break; 111 } 112 } 113 114 static Addr 115 calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i) 116 { 117 // what is the size of the object we are accessing?? 118 // NOTE: the compiler doesn't generate enough information 119 // to do this yet..have to just line up all the private 120 // work-item spaces back to back for now 121 /* 122 StorageElement* se = 123 i->parent->findSymbol(Brig::BrigPrivateSpace, addr); 124 assert(se); 125 126 return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() + 127 se->offset * w->computeUnit->wfSize() + 128 lane * se->size; 129 */ 130 131 // addressing strategy: interleave the private spaces of 132 // work-items in a wave-front on 8 byte granularity. 133 // this won't be perfect coalescing like the spill space 134 // strategy, but it's better than nothing. The spill space 135 // strategy won't work with private because the same address 136 // may be accessed by different sized loads/stores. 137 138 // Note: I'm assuming that the largest load/store to private 139 // is 8 bytes. If it is larger, the stride will have to increase 140 141 Addr addr_div8 = addr / 8; 142 Addr addr_mod8 = addr % 8; 143 144 Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 + 145 addr_mod8 + w->privBase; 146 147 assert(ret < w->privBase + 148 (w->privSizePerItem * w->computeUnit->wfSize())); 149 150 return ret; 151 } 152 153 template<typename MemDataType, typename DestDataType, 154 typename AddrRegOperandType> 155 void 156 LdInst<MemDataType, DestDataType, 157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 158 { 159 Wavefront *w = gpuDynInst->wavefront(); 160 161 typedef typename MemDataType::CType MemCType; 162 const VectorMask &mask = w->getPred(); 163 164 // Kernarg references are handled uniquely for now (no Memory Request 165 // is used), so special-case them up front. Someday we should 166 // make this more realistic, at which we should get rid of this 167 // block and fold this case into the switch below. 168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { 169 MemCType val; 170 171 // I assume no vector ld for kernargs 172 assert(num_dest_operands == 1); 173 174 // assuming for the moment that we'll never do register 175 // offsets into kernarg space... just to make life simpler 176 uint64_t address = this->addr.calcUniform(); 177 178 val = *(MemCType*)&w->kernelArgs[address]; 179 180 DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); 181 182 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 183 if (mask[lane]) { 184 this->dest.set(w, lane, val); 185 } 186 } 187 188 return; 189 } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { 190 uint64_t address = this->addr.calcUniform(); 191 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 192 if (mask[lane]) { 193 MemCType val = w->readCallArgMem<MemCType>(lane, address); 194 195 DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address, 196 (unsigned long long)val); 197 198 this->dest.set(w, lane, val); 199 } 200 } 201 202 return; 203 } 204 205 GPUDynInstPtr m = gpuDynInst; 206 207 this->addr.calcVector(w, m->addr); 208 209 m->m_op = Enums::MO_LD; 210 m->m_type = MemDataType::memType; 211 m->v_type = DestDataType::vgprType; 212 213 m->exec_mask = w->execMask(); 214 m->statusBitVector = 0; 215 m->equiv = this->equivClass; 216 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 217 218 m->scope = getGenericMemoryScope(this->memoryScope); 219 220 if (num_dest_operands == 1) { 221 m->dst_reg = this->dest.regIndex(); 222 m->n_reg = 1; 223 } else { 224 m->n_reg = num_dest_operands; 225 for (int i = 0; i < num_dest_operands; ++i) { 226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); 227 } 228 } 229 230 m->simdId = w->simdId; 231 m->wfSlotId = w->wfSlotId; 232 m->wfDynId = w->wfDynId; 233 m->kern_id = w->kernId; 234 m->cu_id = w->computeUnit->cu_id; 235 m->latency.init(&w->computeUnit->shader->tick_cnt); 236 237 switch (this->segment) { 238 case Brig::BRIG_SEGMENT_GLOBAL: 239 m->s_type = SEG_GLOBAL; 240 m->pipeId = GLBMEM_PIPE; 241 m->latency.set(w->computeUnit->shader->ticks(1)); 242 243 // this is a complete hack to get around a compiler bug 244 // (the compiler currently generates global access for private 245 // addresses (starting from 0). We need to add the private offset) 246 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 247 if (m->addr[lane] < w->privSizePerItem) { 248 if (mask[lane]) { 249 // what is the size of the object we are accessing? 250 // find base for for this wavefront 251 252 // calcPrivAddr will fail if accesses are unaligned 253 assert(!((sizeof(MemCType) - 1) & m->addr[lane])); 254 255 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 256 this); 257 258 m->addr[lane] = privAddr; 259 } 260 } 261 } 262 263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 264 w->outstandingReqsRdGm++; 265 w->rdGmReqsInPipe--; 266 break; 267 268 case Brig::BRIG_SEGMENT_SPILL: 269 assert(num_dest_operands == 1); 270 m->s_type = SEG_SPILL; 271 m->pipeId = GLBMEM_PIPE; 272 m->latency.set(w->computeUnit->shader->ticks(1)); 273 { 274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 275 // note: this calculation will NOT WORK if the compiler 276 // ever generates loads/stores to the same address with 277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr) 278 if (mask[lane]) { 279 assert(m->addr[lane] < w->spillSizePerItem); 280 281 m->addr[lane] = m->addr[lane] * w->spillWidth + 282 lane * sizeof(MemCType) + w->spillBase; 283 284 w->lastAddr[lane] = m->addr[lane]; 285 } 286 } 287 } 288 289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 290 w->outstandingReqsRdGm++; 291 w->rdGmReqsInPipe--; 292 break; 293 294 case Brig::BRIG_SEGMENT_GROUP: 295 m->s_type = SEG_SHARED; 296 m->pipeId = LDSMEM_PIPE; 297 m->latency.set(w->computeUnit->shader->ticks(24)); 298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 299 w->outstandingReqsRdLm++; 300 w->rdLmReqsInPipe--; 301 break; 302 303 case Brig::BRIG_SEGMENT_READONLY: 304 m->s_type = SEG_READONLY; 305 m->pipeId = GLBMEM_PIPE; 306 m->latency.set(w->computeUnit->shader->ticks(1)); 307 308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 309 if (mask[lane]) { 310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); 311 m->addr[lane] += w->roBase; 312 } 313 } 314 315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 316 w->outstandingReqsRdGm++; 317 w->rdGmReqsInPipe--; 318 break; 319 320 case Brig::BRIG_SEGMENT_PRIVATE: 321 m->s_type = SEG_PRIVATE; 322 m->pipeId = GLBMEM_PIPE; 323 m->latency.set(w->computeUnit->shader->ticks(1)); 324 { 325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 326 if (mask[lane]) { 327 assert(m->addr[lane] < w->privSizePerItem); 328 329 m->addr[lane] = m->addr[lane] + 330 lane * sizeof(MemCType) + w->privBase; 331 } 332 } 333 } 334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 335 w->outstandingReqsRdGm++; 336 w->rdGmReqsInPipe--; 337 break; 338 339 default: 340 fatal("Load to unsupported segment %d %llxe\n", this->segment, 341 m->addr[0]); 342 } 343 344 w->outstandingReqs++; 345 w->memReqsInPipe--; 346 } 347 348 template<typename OperationType, typename SrcDataType, 349 typename AddrRegOperandType> 350 void 351 StInst<OperationType, SrcDataType, 352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 353 { 354 Wavefront *w = gpuDynInst->wavefront(); 355 356 typedef typename OperationType::CType CType; 357 358 const VectorMask &mask = w->getPred(); 359 360 // arg references are handled uniquely for now (no Memory Request 361 // is used), so special-case them up front. Someday we should 362 // make this more realistic, at which we should get rid of this 363 // block and fold this case into the switch below. 364 if (this->segment == Brig::BRIG_SEGMENT_ARG) { 365 uint64_t address = this->addr.calcUniform(); 366 367 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 368 if (mask[lane]) { 369 CType data = this->src.template get<CType>(w, lane); 370 DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); 371 w->writeCallArgMem<CType>(lane, address, data); 372 } 373 } 374 375 return; 376 } 377 378 GPUDynInstPtr m = gpuDynInst; 379 380 m->exec_mask = w->execMask(); 381 382 this->addr.calcVector(w, m->addr); 383 384 if (num_src_operands == 1) { 385 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 386 if (mask[lane]) { 387 ((CType*)m->d_data)[lane] = 388 this->src.template get<CType>(w, lane); 389 } 390 } 391 } else { 392 for (int k= 0; k < num_src_operands; ++k) { 393 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 394 if (mask[lane]) { 395 ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] = 396 this->src_vect[k].template get<CType>(w, lane); 397 } 398 } 399 } 400 } 401 402 m->m_op = Enums::MO_ST; 403 m->m_type = OperationType::memType; 404 m->v_type = OperationType::vgprType; 405 406 m->statusBitVector = 0; 407 m->equiv = this->equivClass; 408 409 if (num_src_operands == 1) { 410 m->n_reg = 1; 411 } else { 412 m->n_reg = num_src_operands; 413 } 414 415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 416 417 m->scope = getGenericMemoryScope(this->memoryScope); 418 419 m->simdId = w->simdId; 420 m->wfSlotId = w->wfSlotId; 421 m->wfDynId = w->wfDynId; 422 m->kern_id = w->kernId; 423 m->cu_id = w->computeUnit->cu_id; 424 m->latency.init(&w->computeUnit->shader->tick_cnt); 425 426 switch (this->segment) { 427 case Brig::BRIG_SEGMENT_GLOBAL: 428 m->s_type = SEG_GLOBAL; 429 m->pipeId = GLBMEM_PIPE; 430 m->latency.set(w->computeUnit->shader->ticks(1)); 431 432 // this is a complete hack to get around a compiler bug 433 // (the compiler currently generates global access for private 434 // addresses (starting from 0). We need to add the private offset) 435 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 436 if (mask[lane]) { 437 if (m->addr[lane] < w->privSizePerItem) { 438 439 // calcPrivAddr will fail if accesses are unaligned 440 assert(!((sizeof(CType)-1) & m->addr[lane])); 441 442 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 443 this); 444 445 m->addr[lane] = privAddr; 446 } 447 } 448 } 449 450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 451 w->outstandingReqsWrGm++; 452 w->wrGmReqsInPipe--; 453 break; 454 455 case Brig::BRIG_SEGMENT_SPILL: 456 assert(num_src_operands == 1); 457 m->s_type = SEG_SPILL; 458 m->pipeId = GLBMEM_PIPE; 459 m->latency.set(w->computeUnit->shader->ticks(1)); 460 { 461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 462 if (mask[lane]) { 463 assert(m->addr[lane] < w->spillSizePerItem); 464 465 m->addr[lane] = m->addr[lane] * w->spillWidth + 466 lane * sizeof(CType) + w->spillBase; 467 } 468 } 469 } 470 471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 472 w->outstandingReqsWrGm++; 473 w->wrGmReqsInPipe--; 474 break; 475 476 case Brig::BRIG_SEGMENT_GROUP: 477 m->s_type = SEG_SHARED; 478 m->pipeId = LDSMEM_PIPE; 479 m->latency.set(w->computeUnit->shader->ticks(24)); 480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 481 w->outstandingReqsWrLm++; 482 w->wrLmReqsInPipe--; 483 break; 484 485 case Brig::BRIG_SEGMENT_PRIVATE: 486 m->s_type = SEG_PRIVATE; 487 m->pipeId = GLBMEM_PIPE; 488 m->latency.set(w->computeUnit->shader->ticks(1)); 489 { 490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 491 if (mask[lane]) { 492 assert(m->addr[lane] < w->privSizePerItem); 493 m->addr[lane] = m->addr[lane] + lane * 494 sizeof(CType)+w->privBase; 495 } 496 } 497 } 498 499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 500 w->outstandingReqsWrGm++; 501 w->wrGmReqsInPipe--; 502 break; 503 504 default: 505 fatal("Store to unsupported segment %d\n", this->segment); 506 } 507 508 w->outstandingReqs++; 509 w->memReqsInPipe--; 510 } 511 512 template<typename OperationType, typename SrcDataType, 513 typename AddrRegOperandType> 514 void 515 StInst<OperationType, SrcDataType, 516 AddrRegOperandType>::generateDisassembly() 517 { 518 switch (num_src_operands) { 519 case 1: 520 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 521 segmentNames[this->segment], 522 OperationType::label, 523 this->src.disassemble(), 524 this->addr.disassemble()); 525 break; 526 case 2: 527 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 528 segmentNames[this->segment], 529 OperationType::label, 530 this->src_vect[0].disassemble(), 531 this->src_vect[1].disassemble(), 532 this->addr.disassemble()); 533 break; 534 case 4: 535 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 536 this->opcode, 537 segmentNames[this->segment], 538 OperationType::label, 539 this->src_vect[0].disassemble(), 540 this->src_vect[1].disassemble(), 541 this->src_vect[2].disassemble(), 542 this->src_vect[3].disassemble(), 543 this->addr.disassemble()); 544 break; 545 default: fatal("Bad ld register src operand, num vector operands: " 546 "%d \n", num_src_operands); 547 break; 548 } 549 } 550 551 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 552 bool HasDst> 553 void 554 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 555 HasDst>::execute(GPUDynInstPtr gpuDynInst) 556 { 557 typedef typename DataType::CType CType; 558 559 Wavefront *w = gpuDynInst->wavefront(); 560 561 GPUDynInstPtr m = gpuDynInst; 562 563 this->addr.calcVector(w, m->addr); 564 565 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 566 ((CType *)m->a_data)[lane] = 567 this->src[0].template get<CType>(w, lane); 568 } 569 570 // load second source operand for CAS 571 if (NumSrcOperands > 1) { 572 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 573 ((CType*)m->x_data)[lane] = 574 this->src[1].template get<CType>(w, lane); 575 } 576 } 577 578 assert(NumSrcOperands <= 2); 579 580 m->m_op = this->opType; 581 m->m_type = DataType::memType; 582 m->v_type = DataType::vgprType; 583 584 m->exec_mask = w->execMask(); 585 m->statusBitVector = 0; 586 m->equiv = 0; // atomics don't have an equivalence class operand 587 m->n_reg = 1; 588 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 589 590 m->scope = getGenericMemoryScope(this->memoryScope); 591 592 if (HasDst) { 593 m->dst_reg = this->dest.regIndex(); 594 } 595 596 m->simdId = w->simdId; 597 m->wfSlotId = w->wfSlotId; 598 m->wfDynId = w->wfDynId; 599 m->kern_id = w->kernId; 600 m->cu_id = w->computeUnit->cu_id; 601 m->latency.init(&w->computeUnit->shader->tick_cnt); 602 603 switch (this->segment) { 604 case Brig::BRIG_SEGMENT_GLOBAL: 605 m->s_type = SEG_GLOBAL; 606 m->latency.set(w->computeUnit->shader->ticks(64)); 607 m->pipeId = GLBMEM_PIPE; 608 609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 610 w->outstandingReqsWrGm++; 611 w->wrGmReqsInPipe--; 612 w->outstandingReqsRdGm++; 613 w->rdGmReqsInPipe--; 614 break; 615 616 case Brig::BRIG_SEGMENT_GROUP: 617 m->s_type = SEG_SHARED; 618 m->pipeId = LDSMEM_PIPE; 619 m->latency.set(w->computeUnit->shader->ticks(24)); 620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 621 w->outstandingReqsWrLm++; 622 w->wrLmReqsInPipe--; 623 w->outstandingReqsRdLm++; 624 w->rdLmReqsInPipe--; 625 break; 626 627 default: 628 fatal("Atomic op to unsupported segment %d\n", 629 this->segment); 630 } 631 632 w->outstandingReqs++; 633 w->memReqsInPipe--; 634 } 635 636 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); 637 638 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 639 bool HasDst> 640 void 641 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 642 HasDst>::generateDisassembly() 643 { 644 if (HasDst) { 645 this->disassembly = 646 csprintf("%s_%s_%s_%s %s,%s", this->opcode, 647 atomicOpToString(this->atomicOperation), 648 segmentNames[this->segment], 649 DataType::label, this->dest.disassemble(), 650 this->addr.disassemble()); 651 } else { 652 this->disassembly = 653 csprintf("%s_%s_%s_%s %s", this->opcode, 654 atomicOpToString(this->atomicOperation), 655 segmentNames[this->segment], 656 DataType::label, this->addr.disassemble()); 657 } 658 659 for (int i = 0; i < NumSrcOperands; ++i) { 660 this->disassembly += ","; 661 this->disassembly += this->src[i].disassemble(); 662 } 663 } 664} // namespace HsailISA 665