mem_impl.hh revision 11308:7d8836fd043d
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#include "arch/hsail/generic_types.hh" 37#include "gpu-compute/hsail_code.hh" 38 39// defined in code.cc, but not worth sucking in all of code.h for this 40// at this point 41extern const char *segmentNames[]; 42 43namespace HsailISA 44{ 45 template<typename DestDataType, typename AddrRegOperandType> 46 void 47 LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly() 48 { 49 this->disassembly = csprintf("%s_%s %s,%s", this->opcode, 50 DestDataType::label, 51 this->dest.disassemble(), 52 this->addr.disassemble()); 53 } 54 55 template<typename DestDataType, typename AddrRegOperandType> 56 void 57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 58 { 59 Wavefront *w = gpuDynInst->wavefront(); 60 61 typedef typename DestDataType::CType CType M5_VAR_USED; 62 const VectorMask &mask = w->get_pred(); 63 uint64_t addr_vec[VSZ]; 64 this->addr.calcVector(w, addr_vec); 65 66 for (int lane = 0; lane < VSZ; ++lane) { 67 if (mask[lane]) { 68 this->dest.set(w, lane, addr_vec[lane]); 69 } 70 } 71 } 72 73 template<typename MemDataType, typename DestDataType, 74 typename AddrRegOperandType> 75 void 76 LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly() 77 { 78 switch (num_dest_operands) { 79 case 1: 80 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 81 segmentNames[this->segment], 82 MemDataType::label, 83 this->dest.disassemble(), 84 this->addr.disassemble()); 85 break; 86 case 2: 87 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 88 segmentNames[this->segment], 89 MemDataType::label, 90 this->dest_vect[0].disassemble(), 91 this->dest_vect[1].disassemble(), 92 this->addr.disassemble()); 93 break; 94 case 4: 95 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 96 this->opcode, 97 segmentNames[this->segment], 98 MemDataType::label, 99 this->dest_vect[0].disassemble(), 100 this->dest_vect[1].disassemble(), 101 this->dest_vect[2].disassemble(), 102 this->dest_vect[3].disassemble(), 103 this->addr.disassemble()); 104 break; 105 default: 106 fatal("Bad ld register dest operand, num vector operands: %d \n", 107 num_dest_operands); 108 break; 109 } 110 } 111 112 static Addr 113 calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i) 114 { 115 // what is the size of the object we are accessing?? 116 // NOTE: the compiler doesn't generate enough information 117 // to do this yet..have to just line up all the private 118 // work-item spaces back to back for now 119 /* 120 StorageElement* se = 121 i->parent->findSymbol(Brig::BrigPrivateSpace, addr); 122 assert(se); 123 124 return w->wfSlotId * w->privSizePerItem * VSZ + 125 se->offset * VSZ + 126 lane * se->size; 127 */ 128 129 // addressing strategy: interleave the private spaces of 130 // work-items in a wave-front on 8 byte granularity. 131 // this won't be perfect coalescing like the spill space 132 // strategy, but it's better than nothing. The spill space 133 // strategy won't work with private because the same address 134 // may be accessed by different sized loads/stores. 135 136 // Note: I'm assuming that the largest load/store to private 137 // is 8 bytes. If it is larger, the stride will have to increase 138 139 Addr addr_div8 = addr / 8; 140 Addr addr_mod8 = addr % 8; 141 142 Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase; 143 144 assert(ret < w->privBase + (w->privSizePerItem * VSZ)); 145 146 return ret; 147 } 148 149 template<typename MemDataType, typename DestDataType, 150 typename AddrRegOperandType> 151 void 152 LdInst<MemDataType, DestDataType, 153 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 154 { 155 Wavefront *w = gpuDynInst->wavefront(); 156 157 typedef typename MemDataType::CType MemCType; 158 const VectorMask &mask = w->get_pred(); 159 160 // Kernarg references are handled uniquely for now (no Memory Request 161 // is used), so special-case them up front. Someday we should 162 // make this more realistic, at which we should get rid of this 163 // block and fold this case into the switch below. 164 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { 165 MemCType val; 166 167 // I assume no vector ld for kernargs 168 assert(num_dest_operands == 1); 169 170 // assuming for the moment that we'll never do register 171 // offsets into kernarg space... just to make life simpler 172 uint64_t address = this->addr.calcUniform(); 173 174 val = *(MemCType*)&w->kernelArgs[address]; 175 176 DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); 177 178 for (int lane = 0; lane < VSZ; ++lane) { 179 if (mask[lane]) { 180 this->dest.set(w, lane, val); 181 } 182 } 183 184 return; 185 } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { 186 uint64_t address = this->addr.calcUniform(); 187 for (int lane = 0; lane < VSZ; ++lane) { 188 if (mask[lane]) { 189 MemCType val = w->readCallArgMem<MemCType>(lane, address); 190 191 DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address, 192 (unsigned long long)val); 193 194 this->dest.set(w, lane, val); 195 } 196 } 197 198 return; 199 } 200 201 GPUDynInstPtr m = gpuDynInst; 202 203 this->addr.calcVector(w, m->addr); 204 205 m->m_op = Enums::MO_LD; 206 m->m_type = MemDataType::memType; 207 m->v_type = DestDataType::vgprType; 208 209 m->exec_mask = w->execMask(); 210 m->statusBitVector = 0; 211 m->equiv = this->equivClass; 212 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 213 214 m->scope = getGenericMemoryScope(this->memoryScope); 215 216 if (num_dest_operands == 1) { 217 m->dst_reg = this->dest.regIndex(); 218 m->n_reg = 1; 219 } else { 220 m->n_reg = num_dest_operands; 221 for (int i = 0; i < num_dest_operands; ++i) { 222 m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); 223 } 224 } 225 226 m->simdId = w->simdId; 227 m->wfSlotId = w->wfSlotId; 228 m->wfDynId = w->wfDynId; 229 m->kern_id = w->kern_id; 230 m->cu_id = w->computeUnit->cu_id; 231 m->latency.init(&w->computeUnit->shader->tick_cnt); 232 233 switch (this->segment) { 234 case Brig::BRIG_SEGMENT_GLOBAL: 235 m->s_type = SEG_GLOBAL; 236 m->pipeId = GLBMEM_PIPE; 237 m->latency.set(w->computeUnit->shader->ticks(1)); 238 239 // this is a complete hack to get around a compiler bug 240 // (the compiler currently generates global access for private 241 // addresses (starting from 0). We need to add the private offset) 242 for (int lane = 0; lane < VSZ; ++lane) { 243 if (m->addr[lane] < w->privSizePerItem) { 244 if (mask[lane]) { 245 // what is the size of the object we are accessing? 246 // find base for for this wavefront 247 248 // calcPrivAddr will fail if accesses are unaligned 249 assert(!((sizeof(MemCType) - 1) & m->addr[lane])); 250 251 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 252 this); 253 254 m->addr[lane] = privAddr; 255 } 256 } 257 } 258 259 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 260 w->outstanding_reqs_rd_gm++; 261 w->rd_gm_reqs_in_pipe--; 262 break; 263 264 case Brig::BRIG_SEGMENT_SPILL: 265 assert(num_dest_operands == 1); 266 m->s_type = SEG_SPILL; 267 m->pipeId = GLBMEM_PIPE; 268 m->latency.set(w->computeUnit->shader->ticks(1)); 269 { 270 for (int lane = 0; lane < VSZ; ++lane) { 271 // note: this calculation will NOT WORK if the compiler 272 // ever generates loads/stores to the same address with 273 // different widths (e.g., a ld_u32 addr and a ld_u16 addr) 274 if (mask[lane]) { 275 assert(m->addr[lane] < w->spillSizePerItem); 276 277 m->addr[lane] = m->addr[lane] * w->spillWidth + 278 lane * sizeof(MemCType) + w->spillBase; 279 280 w->last_addr[lane] = m->addr[lane]; 281 } 282 } 283 } 284 285 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 286 w->outstanding_reqs_rd_gm++; 287 w->rd_gm_reqs_in_pipe--; 288 break; 289 290 case Brig::BRIG_SEGMENT_GROUP: 291 m->s_type = SEG_SHARED; 292 m->pipeId = LDSMEM_PIPE; 293 m->latency.set(w->computeUnit->shader->ticks(24)); 294 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 295 w->outstanding_reqs_rd_lm++; 296 w->rd_lm_reqs_in_pipe--; 297 break; 298 299 case Brig::BRIG_SEGMENT_READONLY: 300 m->s_type = SEG_READONLY; 301 m->pipeId = GLBMEM_PIPE; 302 m->latency.set(w->computeUnit->shader->ticks(1)); 303 304 for (int lane = 0; lane < VSZ; ++lane) { 305 if (mask[lane]) { 306 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); 307 m->addr[lane] += w->roBase; 308 } 309 } 310 311 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 312 w->outstanding_reqs_rd_gm++; 313 w->rd_gm_reqs_in_pipe--; 314 break; 315 316 case Brig::BRIG_SEGMENT_PRIVATE: 317 m->s_type = SEG_PRIVATE; 318 m->pipeId = GLBMEM_PIPE; 319 m->latency.set(w->computeUnit->shader->ticks(1)); 320 { 321 for (int lane = 0; lane < VSZ; ++lane) { 322 if (mask[lane]) { 323 assert(m->addr[lane] < w->privSizePerItem); 324 325 m->addr[lane] = m->addr[lane] + 326 lane * sizeof(MemCType) + w->privBase; 327 } 328 } 329 } 330 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 331 w->outstanding_reqs_rd_gm++; 332 w->rd_gm_reqs_in_pipe--; 333 break; 334 335 default: 336 fatal("Load to unsupported segment %d %llxe\n", this->segment, 337 m->addr[0]); 338 } 339 340 w->outstanding_reqs++; 341 w->mem_reqs_in_pipe--; 342 } 343 344 template<typename OperationType, typename SrcDataType, 345 typename AddrRegOperandType> 346 void 347 StInst<OperationType, SrcDataType, 348 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 349 { 350 Wavefront *w = gpuDynInst->wavefront(); 351 352 typedef typename OperationType::CType CType; 353 354 const VectorMask &mask = w->get_pred(); 355 356 // arg references are handled uniquely for now (no Memory Request 357 // is used), so special-case them up front. Someday we should 358 // make this more realistic, at which we should get rid of this 359 // block and fold this case into the switch below. 360 if (this->segment == Brig::BRIG_SEGMENT_ARG) { 361 uint64_t address = this->addr.calcUniform(); 362 363 for (int lane = 0; lane < VSZ; ++lane) { 364 if (mask[lane]) { 365 CType data = this->src.template get<CType>(w, lane); 366 DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); 367 w->writeCallArgMem<CType>(lane, address, data); 368 } 369 } 370 371 return; 372 } 373 374 GPUDynInstPtr m = gpuDynInst; 375 376 m->exec_mask = w->execMask(); 377 378 this->addr.calcVector(w, m->addr); 379 380 if (num_src_operands == 1) { 381 for (int lane = 0; lane < VSZ; ++lane) { 382 if (mask[lane]) { 383 ((CType*)m->d_data)[lane] = 384 this->src.template get<CType>(w, lane); 385 } 386 } 387 } else { 388 for (int k= 0; k < num_src_operands; ++k) { 389 for (int lane = 0; lane < VSZ; ++lane) { 390 if (mask[lane]) { 391 ((CType*)m->d_data)[k * VSZ + lane] = 392 this->src_vect[k].template get<CType>(w, lane); 393 } 394 } 395 } 396 } 397 398 m->m_op = Enums::MO_ST; 399 m->m_type = OperationType::memType; 400 m->v_type = OperationType::vgprType; 401 402 m->statusBitVector = 0; 403 m->equiv = this->equivClass; 404 405 if (num_src_operands == 1) { 406 m->n_reg = 1; 407 } else { 408 m->n_reg = num_src_operands; 409 } 410 411 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 412 413 m->scope = getGenericMemoryScope(this->memoryScope); 414 415 m->simdId = w->simdId; 416 m->wfSlotId = w->wfSlotId; 417 m->wfDynId = w->wfDynId; 418 m->kern_id = w->kern_id; 419 m->cu_id = w->computeUnit->cu_id; 420 m->latency.init(&w->computeUnit->shader->tick_cnt); 421 422 switch (this->segment) { 423 case Brig::BRIG_SEGMENT_GLOBAL: 424 m->s_type = SEG_GLOBAL; 425 m->pipeId = GLBMEM_PIPE; 426 m->latency.set(w->computeUnit->shader->ticks(1)); 427 428 // this is a complete hack to get around a compiler bug 429 // (the compiler currently generates global access for private 430 // addresses (starting from 0). We need to add the private offset) 431 for (int lane = 0; lane < VSZ; ++lane) { 432 if (mask[lane]) { 433 if (m->addr[lane] < w->privSizePerItem) { 434 435 // calcPrivAddr will fail if accesses are unaligned 436 assert(!((sizeof(CType)-1) & m->addr[lane])); 437 438 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 439 this); 440 441 m->addr[lane] = privAddr; 442 } 443 } 444 } 445 446 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 447 w->outstanding_reqs_wr_gm++; 448 w->wr_gm_reqs_in_pipe--; 449 break; 450 451 case Brig::BRIG_SEGMENT_SPILL: 452 assert(num_src_operands == 1); 453 m->s_type = SEG_SPILL; 454 m->pipeId = GLBMEM_PIPE; 455 m->latency.set(w->computeUnit->shader->ticks(1)); 456 { 457 for (int lane = 0; lane < VSZ; ++lane) { 458 if (mask[lane]) { 459 assert(m->addr[lane] < w->spillSizePerItem); 460 461 m->addr[lane] = m->addr[lane] * w->spillWidth + 462 lane * sizeof(CType) + w->spillBase; 463 } 464 } 465 } 466 467 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 468 w->outstanding_reqs_wr_gm++; 469 w->wr_gm_reqs_in_pipe--; 470 break; 471 472 case Brig::BRIG_SEGMENT_GROUP: 473 m->s_type = SEG_SHARED; 474 m->pipeId = LDSMEM_PIPE; 475 m->latency.set(w->computeUnit->shader->ticks(24)); 476 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 477 w->outstanding_reqs_wr_lm++; 478 w->wr_lm_reqs_in_pipe--; 479 break; 480 481 case Brig::BRIG_SEGMENT_PRIVATE: 482 m->s_type = SEG_PRIVATE; 483 m->pipeId = GLBMEM_PIPE; 484 m->latency.set(w->computeUnit->shader->ticks(1)); 485 { 486 for (int lane = 0; lane < VSZ; ++lane) { 487 if (mask[lane]) { 488 assert(m->addr[lane] < w->privSizePerItem); 489 m->addr[lane] = m->addr[lane] + lane * 490 sizeof(CType)+w->privBase; 491 } 492 } 493 } 494 495 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 496 w->outstanding_reqs_wr_gm++; 497 w->wr_gm_reqs_in_pipe--; 498 break; 499 500 default: 501 fatal("Store to unsupported segment %d\n", this->segment); 502 } 503 504 w->outstanding_reqs++; 505 w->mem_reqs_in_pipe--; 506 } 507 508 template<typename OperationType, typename SrcDataType, 509 typename AddrRegOperandType> 510 void 511 StInst<OperationType, SrcDataType, 512 AddrRegOperandType>::generateDisassembly() 513 { 514 switch (num_src_operands) { 515 case 1: 516 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 517 segmentNames[this->segment], 518 OperationType::label, 519 this->src.disassemble(), 520 this->addr.disassemble()); 521 break; 522 case 2: 523 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 524 segmentNames[this->segment], 525 OperationType::label, 526 this->src_vect[0].disassemble(), 527 this->src_vect[1].disassemble(), 528 this->addr.disassemble()); 529 break; 530 case 4: 531 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 532 this->opcode, 533 segmentNames[this->segment], 534 OperationType::label, 535 this->src_vect[0].disassemble(), 536 this->src_vect[1].disassemble(), 537 this->src_vect[2].disassemble(), 538 this->src_vect[3].disassemble(), 539 this->addr.disassemble()); 540 break; 541 default: fatal("Bad ld register src operand, num vector operands: " 542 "%d \n", num_src_operands); 543 break; 544 } 545 } 546 547 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 548 bool HasDst> 549 void 550 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 551 HasDst>::execute(GPUDynInstPtr gpuDynInst) 552 { 553 typedef typename DataType::CType CType; 554 555 Wavefront *w = gpuDynInst->wavefront(); 556 557 GPUDynInstPtr m = gpuDynInst; 558 559 this->addr.calcVector(w, m->addr); 560 561 for (int lane = 0; lane < VSZ; ++lane) { 562 ((CType *)m->a_data)[lane] = 563 this->src[0].template get<CType>(w, lane); 564 } 565 566 // load second source operand for CAS 567 if (NumSrcOperands > 1) { 568 for (int lane = 0; lane < VSZ; ++lane) { 569 ((CType*)m->x_data)[lane] = 570 this->src[1].template get<CType>(w, lane); 571 } 572 } 573 574 assert(NumSrcOperands <= 2); 575 576 m->m_op = this->opType; 577 m->m_type = DataType::memType; 578 m->v_type = DataType::vgprType; 579 580 m->exec_mask = w->execMask(); 581 m->statusBitVector = 0; 582 m->equiv = 0; // atomics don't have an equivalence class operand 583 m->n_reg = 1; 584 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 585 586 m->scope = getGenericMemoryScope(this->memoryScope); 587 588 if (HasDst) { 589 m->dst_reg = this->dest.regIndex(); 590 } 591 592 m->simdId = w->simdId; 593 m->wfSlotId = w->wfSlotId; 594 m->wfDynId = w->wfDynId; 595 m->kern_id = w->kern_id; 596 m->cu_id = w->computeUnit->cu_id; 597 m->latency.init(&w->computeUnit->shader->tick_cnt); 598 599 switch (this->segment) { 600 case Brig::BRIG_SEGMENT_GLOBAL: 601 m->s_type = SEG_GLOBAL; 602 m->latency.set(w->computeUnit->shader->ticks(64)); 603 m->pipeId = GLBMEM_PIPE; 604 605 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m); 606 w->outstanding_reqs_wr_gm++; 607 w->wr_gm_reqs_in_pipe--; 608 w->outstanding_reqs_rd_gm++; 609 w->rd_gm_reqs_in_pipe--; 610 break; 611 612 case Brig::BRIG_SEGMENT_GROUP: 613 m->s_type = SEG_SHARED; 614 m->pipeId = LDSMEM_PIPE; 615 m->latency.set(w->computeUnit->shader->ticks(24)); 616 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 617 w->outstanding_reqs_wr_lm++; 618 w->wr_lm_reqs_in_pipe--; 619 w->outstanding_reqs_rd_lm++; 620 w->rd_lm_reqs_in_pipe--; 621 break; 622 623 default: 624 fatal("Atomic op to unsupported segment %d\n", 625 this->segment); 626 } 627 628 w->outstanding_reqs++; 629 w->mem_reqs_in_pipe--; 630 } 631 632 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); 633 634 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 635 bool HasDst> 636 void 637 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 638 HasDst>::generateDisassembly() 639 { 640 if (HasDst) { 641 this->disassembly = 642 csprintf("%s_%s_%s_%s %s,%s", this->opcode, 643 atomicOpToString(this->atomicOperation), 644 segmentNames[this->segment], 645 DataType::label, this->dest.disassemble(), 646 this->addr.disassemble()); 647 } else { 648 this->disassembly = 649 csprintf("%s_%s_%s_%s %s", this->opcode, 650 atomicOpToString(this->atomicOperation), 651 segmentNames[this->segment], 652 DataType::label, this->addr.disassemble()); 653 } 654 655 for (int i = 0; i < NumSrcOperands; ++i) { 656 this->disassembly += ","; 657 this->disassembly += this->src[i].disassemble(); 658 } 659 } 660} // namespace HsailISA 661