mem_impl.hh revision 11700
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: Steve Reinhardt 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "gpu-compute/hsail_code.hh" 3711308Santhony.gutierrez@amd.com 3811308Santhony.gutierrez@amd.com// defined in code.cc, but not worth sucking in all of code.h for this 3911308Santhony.gutierrez@amd.com// at this point 4011308Santhony.gutierrez@amd.comextern const char *segmentNames[]; 4111308Santhony.gutierrez@amd.com 4211308Santhony.gutierrez@amd.comnamespace HsailISA 4311308Santhony.gutierrez@amd.com{ 4411308Santhony.gutierrez@amd.com template<typename DestDataType, typename AddrRegOperandType> 4511308Santhony.gutierrez@amd.com void 4611308Santhony.gutierrez@amd.com LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly() 4711308Santhony.gutierrez@amd.com { 4811308Santhony.gutierrez@amd.com this->disassembly = csprintf("%s_%s %s,%s", this->opcode, 4911308Santhony.gutierrez@amd.com DestDataType::label, 5011308Santhony.gutierrez@amd.com this->dest.disassemble(), 5111308Santhony.gutierrez@amd.com this->addr.disassemble()); 5211308Santhony.gutierrez@amd.com } 5311308Santhony.gutierrez@amd.com 5411308Santhony.gutierrez@amd.com template<typename DestDataType, typename AddrRegOperandType> 5511308Santhony.gutierrez@amd.com void 5611308Santhony.gutierrez@amd.com LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 5711308Santhony.gutierrez@amd.com { 5811308Santhony.gutierrez@amd.com Wavefront *w = gpuDynInst->wavefront(); 5911308Santhony.gutierrez@amd.com 6011308Santhony.gutierrez@amd.com typedef typename DestDataType::CType CType M5_VAR_USED; 6111639Salexandru.dutu@amd.com const VectorMask &mask = w->getPred(); 6211534Sjohn.kalamatianos@amd.com std::vector<Addr> addr_vec; 6311534Sjohn.kalamatianos@amd.com addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); 6411308Santhony.gutierrez@amd.com this->addr.calcVector(w, addr_vec); 6511308Santhony.gutierrez@amd.com 6611534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 6711308Santhony.gutierrez@amd.com if (mask[lane]) { 6811308Santhony.gutierrez@amd.com this->dest.set(w, lane, addr_vec[lane]); 6911308Santhony.gutierrez@amd.com } 7011308Santhony.gutierrez@amd.com } 7111534Sjohn.kalamatianos@amd.com addr_vec.clear(); 7211308Santhony.gutierrez@amd.com } 7311308Santhony.gutierrez@amd.com 7411308Santhony.gutierrez@amd.com template<typename MemDataType, typename DestDataType, 7511308Santhony.gutierrez@amd.com typename AddrRegOperandType> 7611308Santhony.gutierrez@amd.com void 7711308Santhony.gutierrez@amd.com LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly() 7811308Santhony.gutierrez@amd.com { 7911308Santhony.gutierrez@amd.com switch (num_dest_operands) { 8011308Santhony.gutierrez@amd.com case 1: 8111308Santhony.gutierrez@amd.com this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 8211308Santhony.gutierrez@amd.com segmentNames[this->segment], 8311308Santhony.gutierrez@amd.com MemDataType::label, 8411308Santhony.gutierrez@amd.com this->dest.disassemble(), 8511308Santhony.gutierrez@amd.com this->addr.disassemble()); 8611308Santhony.gutierrez@amd.com break; 8711308Santhony.gutierrez@amd.com case 2: 8811308Santhony.gutierrez@amd.com this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 8911308Santhony.gutierrez@amd.com segmentNames[this->segment], 9011308Santhony.gutierrez@amd.com MemDataType::label, 9111308Santhony.gutierrez@amd.com this->dest_vect[0].disassemble(), 9211308Santhony.gutierrez@amd.com this->dest_vect[1].disassemble(), 9311308Santhony.gutierrez@amd.com this->addr.disassemble()); 9411308Santhony.gutierrez@amd.com break; 9511645Salexandru.dutu@amd.com case 3: 9611645Salexandru.dutu@amd.com this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode, 9711645Salexandru.dutu@amd.com segmentNames[this->segment], 9811645Salexandru.dutu@amd.com MemDataType::label, 9911645Salexandru.dutu@amd.com this->dest_vect[0].disassemble(), 10011645Salexandru.dutu@amd.com this->dest_vect[1].disassemble(), 10111645Salexandru.dutu@amd.com this->dest_vect[2].disassemble(), 10211645Salexandru.dutu@amd.com this->addr.disassemble()); 10311645Salexandru.dutu@amd.com break; 10411308Santhony.gutierrez@amd.com case 4: 10511308Santhony.gutierrez@amd.com this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 10611308Santhony.gutierrez@amd.com this->opcode, 10711308Santhony.gutierrez@amd.com segmentNames[this->segment], 10811308Santhony.gutierrez@amd.com MemDataType::label, 10911308Santhony.gutierrez@amd.com this->dest_vect[0].disassemble(), 11011308Santhony.gutierrez@amd.com this->dest_vect[1].disassemble(), 11111308Santhony.gutierrez@amd.com this->dest_vect[2].disassemble(), 11211308Santhony.gutierrez@amd.com this->dest_vect[3].disassemble(), 11311308Santhony.gutierrez@amd.com this->addr.disassemble()); 11411308Santhony.gutierrez@amd.com break; 11511308Santhony.gutierrez@amd.com default: 11611308Santhony.gutierrez@amd.com fatal("Bad ld register dest operand, num vector operands: %d \n", 11711308Santhony.gutierrez@amd.com num_dest_operands); 11811308Santhony.gutierrez@amd.com break; 11911308Santhony.gutierrez@amd.com } 12011308Santhony.gutierrez@amd.com } 12111308Santhony.gutierrez@amd.com 12211308Santhony.gutierrez@amd.com static Addr 12311308Santhony.gutierrez@amd.com calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i) 12411308Santhony.gutierrez@amd.com { 12511308Santhony.gutierrez@amd.com // what is the size of the object we are accessing?? 12611308Santhony.gutierrez@amd.com // NOTE: the compiler doesn't generate enough information 12711308Santhony.gutierrez@amd.com // to do this yet..have to just line up all the private 12811308Santhony.gutierrez@amd.com // work-item spaces back to back for now 12911308Santhony.gutierrez@amd.com /* 13011308Santhony.gutierrez@amd.com StorageElement* se = 13111308Santhony.gutierrez@amd.com i->parent->findSymbol(Brig::BrigPrivateSpace, addr); 13211308Santhony.gutierrez@amd.com assert(se); 13311308Santhony.gutierrez@amd.com 13411534Sjohn.kalamatianos@amd.com return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() + 13511534Sjohn.kalamatianos@amd.com se->offset * w->computeUnit->wfSize() + 13611308Santhony.gutierrez@amd.com lane * se->size; 13711308Santhony.gutierrez@amd.com */ 13811308Santhony.gutierrez@amd.com 13911308Santhony.gutierrez@amd.com // addressing strategy: interleave the private spaces of 14011308Santhony.gutierrez@amd.com // work-items in a wave-front on 8 byte granularity. 14111308Santhony.gutierrez@amd.com // this won't be perfect coalescing like the spill space 14211308Santhony.gutierrez@amd.com // strategy, but it's better than nothing. The spill space 14311308Santhony.gutierrez@amd.com // strategy won't work with private because the same address 14411308Santhony.gutierrez@amd.com // may be accessed by different sized loads/stores. 14511308Santhony.gutierrez@amd.com 14611308Santhony.gutierrez@amd.com // Note: I'm assuming that the largest load/store to private 14711308Santhony.gutierrez@amd.com // is 8 bytes. If it is larger, the stride will have to increase 14811308Santhony.gutierrez@amd.com 14911308Santhony.gutierrez@amd.com Addr addr_div8 = addr / 8; 15011308Santhony.gutierrez@amd.com Addr addr_mod8 = addr % 8; 15111308Santhony.gutierrez@amd.com 15211534Sjohn.kalamatianos@amd.com Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 + 15311534Sjohn.kalamatianos@amd.com addr_mod8 + w->privBase; 15411308Santhony.gutierrez@amd.com 15511534Sjohn.kalamatianos@amd.com assert(ret < w->privBase + 15611534Sjohn.kalamatianos@amd.com (w->privSizePerItem * w->computeUnit->wfSize())); 15711308Santhony.gutierrez@amd.com 15811308Santhony.gutierrez@amd.com return ret; 15911308Santhony.gutierrez@amd.com } 16011308Santhony.gutierrez@amd.com 16111308Santhony.gutierrez@amd.com template<typename MemDataType, typename DestDataType, 16211308Santhony.gutierrez@amd.com typename AddrRegOperandType> 16311308Santhony.gutierrez@amd.com void 16411308Santhony.gutierrez@amd.com LdInst<MemDataType, DestDataType, 16511308Santhony.gutierrez@amd.com AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 16611308Santhony.gutierrez@amd.com { 16711308Santhony.gutierrez@amd.com Wavefront *w = gpuDynInst->wavefront(); 16811308Santhony.gutierrez@amd.com 16911308Santhony.gutierrez@amd.com typedef typename MemDataType::CType MemCType; 17011639Salexandru.dutu@amd.com const VectorMask &mask = w->getPred(); 17111308Santhony.gutierrez@amd.com 17211308Santhony.gutierrez@amd.com // Kernarg references are handled uniquely for now (no Memory Request 17311308Santhony.gutierrez@amd.com // is used), so special-case them up front. Someday we should 17411308Santhony.gutierrez@amd.com // make this more realistic, at which we should get rid of this 17511308Santhony.gutierrez@amd.com // block and fold this case into the switch below. 17611308Santhony.gutierrez@amd.com if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { 17711308Santhony.gutierrez@amd.com MemCType val; 17811308Santhony.gutierrez@amd.com 17911308Santhony.gutierrez@amd.com // I assume no vector ld for kernargs 18011308Santhony.gutierrez@amd.com assert(num_dest_operands == 1); 18111308Santhony.gutierrez@amd.com 18211308Santhony.gutierrez@amd.com // assuming for the moment that we'll never do register 18311308Santhony.gutierrez@amd.com // offsets into kernarg space... just to make life simpler 18411308Santhony.gutierrez@amd.com uint64_t address = this->addr.calcUniform(); 18511308Santhony.gutierrez@amd.com 18611308Santhony.gutierrez@amd.com val = *(MemCType*)&w->kernelArgs[address]; 18711308Santhony.gutierrez@amd.com 18811308Santhony.gutierrez@amd.com DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); 18911308Santhony.gutierrez@amd.com 19011534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 19111308Santhony.gutierrez@amd.com if (mask[lane]) { 19211308Santhony.gutierrez@amd.com this->dest.set(w, lane, val); 19311308Santhony.gutierrez@amd.com } 19411308Santhony.gutierrez@amd.com } 19511308Santhony.gutierrez@amd.com 19611308Santhony.gutierrez@amd.com return; 19711308Santhony.gutierrez@amd.com } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { 19811308Santhony.gutierrez@amd.com uint64_t address = this->addr.calcUniform(); 19911534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 20011308Santhony.gutierrez@amd.com if (mask[lane]) { 20111308Santhony.gutierrez@amd.com MemCType val = w->readCallArgMem<MemCType>(lane, address); 20211308Santhony.gutierrez@amd.com 20311308Santhony.gutierrez@amd.com DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address, 20411308Santhony.gutierrez@amd.com (unsigned long long)val); 20511308Santhony.gutierrez@amd.com 20611308Santhony.gutierrez@amd.com this->dest.set(w, lane, val); 20711308Santhony.gutierrez@amd.com } 20811308Santhony.gutierrez@amd.com } 20911308Santhony.gutierrez@amd.com 21011308Santhony.gutierrez@amd.com return; 21111308Santhony.gutierrez@amd.com } 21211308Santhony.gutierrez@amd.com 21311308Santhony.gutierrez@amd.com GPUDynInstPtr m = gpuDynInst; 21411308Santhony.gutierrez@amd.com 21511308Santhony.gutierrez@amd.com this->addr.calcVector(w, m->addr); 21611308Santhony.gutierrez@amd.com 21711308Santhony.gutierrez@amd.com m->m_type = MemDataType::memType; 21811308Santhony.gutierrez@amd.com m->v_type = DestDataType::vgprType; 21911308Santhony.gutierrez@amd.com 22011308Santhony.gutierrez@amd.com m->exec_mask = w->execMask(); 22111308Santhony.gutierrez@amd.com m->statusBitVector = 0; 22211308Santhony.gutierrez@amd.com m->equiv = this->equivClass; 22311308Santhony.gutierrez@amd.com 22411308Santhony.gutierrez@amd.com if (num_dest_operands == 1) { 22511308Santhony.gutierrez@amd.com m->dst_reg = this->dest.regIndex(); 22611308Santhony.gutierrez@amd.com m->n_reg = 1; 22711308Santhony.gutierrez@amd.com } else { 22811308Santhony.gutierrez@amd.com m->n_reg = num_dest_operands; 22911308Santhony.gutierrez@amd.com for (int i = 0; i < num_dest_operands; ++i) { 23011308Santhony.gutierrez@amd.com m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); 23111308Santhony.gutierrez@amd.com } 23211308Santhony.gutierrez@amd.com } 23311308Santhony.gutierrez@amd.com 23411308Santhony.gutierrez@amd.com m->simdId = w->simdId; 23511308Santhony.gutierrez@amd.com m->wfSlotId = w->wfSlotId; 23611308Santhony.gutierrez@amd.com m->wfDynId = w->wfDynId; 23711639Salexandru.dutu@amd.com m->kern_id = w->kernId; 23811308Santhony.gutierrez@amd.com m->cu_id = w->computeUnit->cu_id; 23911308Santhony.gutierrez@amd.com m->latency.init(&w->computeUnit->shader->tick_cnt); 24011308Santhony.gutierrez@amd.com 24111308Santhony.gutierrez@amd.com switch (this->segment) { 24211308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_GLOBAL: 24311308Santhony.gutierrez@amd.com m->pipeId = GLBMEM_PIPE; 24411308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(1)); 24511308Santhony.gutierrez@amd.com 24611308Santhony.gutierrez@amd.com // this is a complete hack to get around a compiler bug 24711308Santhony.gutierrez@amd.com // (the compiler currently generates global access for private 24811308Santhony.gutierrez@amd.com // addresses (starting from 0). We need to add the private offset) 24911534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 25011308Santhony.gutierrez@amd.com if (m->addr[lane] < w->privSizePerItem) { 25111308Santhony.gutierrez@amd.com if (mask[lane]) { 25211308Santhony.gutierrez@amd.com // what is the size of the object we are accessing? 25311308Santhony.gutierrez@amd.com // find base for for this wavefront 25411308Santhony.gutierrez@amd.com 25511308Santhony.gutierrez@amd.com // calcPrivAddr will fail if accesses are unaligned 25611308Santhony.gutierrez@amd.com assert(!((sizeof(MemCType) - 1) & m->addr[lane])); 25711308Santhony.gutierrez@amd.com 25811308Santhony.gutierrez@amd.com Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 25911308Santhony.gutierrez@amd.com this); 26011308Santhony.gutierrez@amd.com 26111308Santhony.gutierrez@amd.com m->addr[lane] = privAddr; 26211308Santhony.gutierrez@amd.com } 26311308Santhony.gutierrez@amd.com } 26411308Santhony.gutierrez@amd.com } 26511308Santhony.gutierrez@amd.com 26611700Santhony.gutierrez@amd.com w->computeUnit->globalMemoryPipe.issueRequest(m); 26711639Salexandru.dutu@amd.com w->outstandingReqsRdGm++; 26811639Salexandru.dutu@amd.com w->rdGmReqsInPipe--; 26911308Santhony.gutierrez@amd.com break; 27011308Santhony.gutierrez@amd.com 27111308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_SPILL: 27211308Santhony.gutierrez@amd.com assert(num_dest_operands == 1); 27311308Santhony.gutierrez@amd.com m->pipeId = GLBMEM_PIPE; 27411308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(1)); 27511308Santhony.gutierrez@amd.com { 27611534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 27711308Santhony.gutierrez@amd.com // note: this calculation will NOT WORK if the compiler 27811308Santhony.gutierrez@amd.com // ever generates loads/stores to the same address with 27911308Santhony.gutierrez@amd.com // different widths (e.g., a ld_u32 addr and a ld_u16 addr) 28011308Santhony.gutierrez@amd.com if (mask[lane]) { 28111308Santhony.gutierrez@amd.com assert(m->addr[lane] < w->spillSizePerItem); 28211308Santhony.gutierrez@amd.com 28311308Santhony.gutierrez@amd.com m->addr[lane] = m->addr[lane] * w->spillWidth + 28411308Santhony.gutierrez@amd.com lane * sizeof(MemCType) + w->spillBase; 28511308Santhony.gutierrez@amd.com 28611639Salexandru.dutu@amd.com w->lastAddr[lane] = m->addr[lane]; 28711308Santhony.gutierrez@amd.com } 28811308Santhony.gutierrez@amd.com } 28911308Santhony.gutierrez@amd.com } 29011308Santhony.gutierrez@amd.com 29111700Santhony.gutierrez@amd.com w->computeUnit->globalMemoryPipe.issueRequest(m); 29211639Salexandru.dutu@amd.com w->outstandingReqsRdGm++; 29311639Salexandru.dutu@amd.com w->rdGmReqsInPipe--; 29411308Santhony.gutierrez@amd.com break; 29511308Santhony.gutierrez@amd.com 29611308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_GROUP: 29711308Santhony.gutierrez@amd.com m->pipeId = LDSMEM_PIPE; 29811308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(24)); 29911308Santhony.gutierrez@amd.com w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 30011639Salexandru.dutu@amd.com w->outstandingReqsRdLm++; 30111639Salexandru.dutu@amd.com w->rdLmReqsInPipe--; 30211308Santhony.gutierrez@amd.com break; 30311308Santhony.gutierrez@amd.com 30411308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_READONLY: 30511308Santhony.gutierrez@amd.com m->pipeId = GLBMEM_PIPE; 30611308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(1)); 30711308Santhony.gutierrez@amd.com 30811534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 30911308Santhony.gutierrez@amd.com if (mask[lane]) { 31011308Santhony.gutierrez@amd.com assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); 31111308Santhony.gutierrez@amd.com m->addr[lane] += w->roBase; 31211308Santhony.gutierrez@amd.com } 31311308Santhony.gutierrez@amd.com } 31411308Santhony.gutierrez@amd.com 31511700Santhony.gutierrez@amd.com w->computeUnit->globalMemoryPipe.issueRequest(m); 31611639Salexandru.dutu@amd.com w->outstandingReqsRdGm++; 31711639Salexandru.dutu@amd.com w->rdGmReqsInPipe--; 31811308Santhony.gutierrez@amd.com break; 31911308Santhony.gutierrez@amd.com 32011308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_PRIVATE: 32111308Santhony.gutierrez@amd.com m->pipeId = GLBMEM_PIPE; 32211308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(1)); 32311308Santhony.gutierrez@amd.com { 32411534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 32511308Santhony.gutierrez@amd.com if (mask[lane]) { 32611308Santhony.gutierrez@amd.com assert(m->addr[lane] < w->privSizePerItem); 32711308Santhony.gutierrez@amd.com 32811308Santhony.gutierrez@amd.com m->addr[lane] = m->addr[lane] + 32911308Santhony.gutierrez@amd.com lane * sizeof(MemCType) + w->privBase; 33011308Santhony.gutierrez@amd.com } 33111308Santhony.gutierrez@amd.com } 33211308Santhony.gutierrez@amd.com } 33311700Santhony.gutierrez@amd.com w->computeUnit->globalMemoryPipe.issueRequest(m); 33411639Salexandru.dutu@amd.com w->outstandingReqsRdGm++; 33511639Salexandru.dutu@amd.com w->rdGmReqsInPipe--; 33611308Santhony.gutierrez@amd.com break; 33711308Santhony.gutierrez@amd.com 33811308Santhony.gutierrez@amd.com default: 33911308Santhony.gutierrez@amd.com fatal("Load to unsupported segment %d %llxe\n", this->segment, 34011308Santhony.gutierrez@amd.com m->addr[0]); 34111308Santhony.gutierrez@amd.com } 34211308Santhony.gutierrez@amd.com 34311639Salexandru.dutu@amd.com w->outstandingReqs++; 34411639Salexandru.dutu@amd.com w->memReqsInPipe--; 34511308Santhony.gutierrez@amd.com } 34611308Santhony.gutierrez@amd.com 34711308Santhony.gutierrez@amd.com template<typename OperationType, typename SrcDataType, 34811308Santhony.gutierrez@amd.com typename AddrRegOperandType> 34911308Santhony.gutierrez@amd.com void 35011308Santhony.gutierrez@amd.com StInst<OperationType, SrcDataType, 35111308Santhony.gutierrez@amd.com AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 35211308Santhony.gutierrez@amd.com { 35311308Santhony.gutierrez@amd.com Wavefront *w = gpuDynInst->wavefront(); 35411308Santhony.gutierrez@amd.com 35511308Santhony.gutierrez@amd.com typedef typename OperationType::CType CType; 35611308Santhony.gutierrez@amd.com 35711639Salexandru.dutu@amd.com const VectorMask &mask = w->getPred(); 35811308Santhony.gutierrez@amd.com 35911308Santhony.gutierrez@amd.com // arg references are handled uniquely for now (no Memory Request 36011308Santhony.gutierrez@amd.com // is used), so special-case them up front. Someday we should 36111308Santhony.gutierrez@amd.com // make this more realistic, at which we should get rid of this 36211308Santhony.gutierrez@amd.com // block and fold this case into the switch below. 36311308Santhony.gutierrez@amd.com if (this->segment == Brig::BRIG_SEGMENT_ARG) { 36411308Santhony.gutierrez@amd.com uint64_t address = this->addr.calcUniform(); 36511308Santhony.gutierrez@amd.com 36611534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 36711308Santhony.gutierrez@amd.com if (mask[lane]) { 36811308Santhony.gutierrez@amd.com CType data = this->src.template get<CType>(w, lane); 36911308Santhony.gutierrez@amd.com DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); 37011308Santhony.gutierrez@amd.com w->writeCallArgMem<CType>(lane, address, data); 37111308Santhony.gutierrez@amd.com } 37211308Santhony.gutierrez@amd.com } 37311308Santhony.gutierrez@amd.com 37411308Santhony.gutierrez@amd.com return; 37511308Santhony.gutierrez@amd.com } 37611308Santhony.gutierrez@amd.com 37711308Santhony.gutierrez@amd.com GPUDynInstPtr m = gpuDynInst; 37811308Santhony.gutierrez@amd.com 37911308Santhony.gutierrez@amd.com m->exec_mask = w->execMask(); 38011308Santhony.gutierrez@amd.com 38111308Santhony.gutierrez@amd.com this->addr.calcVector(w, m->addr); 38211308Santhony.gutierrez@amd.com 38311308Santhony.gutierrez@amd.com if (num_src_operands == 1) { 38411534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 38511308Santhony.gutierrez@amd.com if (mask[lane]) { 38611308Santhony.gutierrez@amd.com ((CType*)m->d_data)[lane] = 38711308Santhony.gutierrez@amd.com this->src.template get<CType>(w, lane); 38811308Santhony.gutierrez@amd.com } 38911308Santhony.gutierrez@amd.com } 39011308Santhony.gutierrez@amd.com } else { 39111308Santhony.gutierrez@amd.com for (int k= 0; k < num_src_operands; ++k) { 39211534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 39311308Santhony.gutierrez@amd.com if (mask[lane]) { 39411534Sjohn.kalamatianos@amd.com ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] = 39511308Santhony.gutierrez@amd.com this->src_vect[k].template get<CType>(w, lane); 39611308Santhony.gutierrez@amd.com } 39711308Santhony.gutierrez@amd.com } 39811308Santhony.gutierrez@amd.com } 39911308Santhony.gutierrez@amd.com } 40011308Santhony.gutierrez@amd.com 40111308Santhony.gutierrez@amd.com m->m_type = OperationType::memType; 40211308Santhony.gutierrez@amd.com m->v_type = OperationType::vgprType; 40311308Santhony.gutierrez@amd.com 40411308Santhony.gutierrez@amd.com m->statusBitVector = 0; 40511308Santhony.gutierrez@amd.com m->equiv = this->equivClass; 40611308Santhony.gutierrez@amd.com 40711308Santhony.gutierrez@amd.com if (num_src_operands == 1) { 40811308Santhony.gutierrez@amd.com m->n_reg = 1; 40911308Santhony.gutierrez@amd.com } else { 41011308Santhony.gutierrez@amd.com m->n_reg = num_src_operands; 41111308Santhony.gutierrez@amd.com } 41211308Santhony.gutierrez@amd.com 41311308Santhony.gutierrez@amd.com m->simdId = w->simdId; 41411308Santhony.gutierrez@amd.com m->wfSlotId = w->wfSlotId; 41511308Santhony.gutierrez@amd.com m->wfDynId = w->wfDynId; 41611639Salexandru.dutu@amd.com m->kern_id = w->kernId; 41711308Santhony.gutierrez@amd.com m->cu_id = w->computeUnit->cu_id; 41811308Santhony.gutierrez@amd.com m->latency.init(&w->computeUnit->shader->tick_cnt); 41911308Santhony.gutierrez@amd.com 42011308Santhony.gutierrez@amd.com switch (this->segment) { 42111308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_GLOBAL: 42211308Santhony.gutierrez@amd.com m->pipeId = GLBMEM_PIPE; 42311308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(1)); 42411308Santhony.gutierrez@amd.com 42511308Santhony.gutierrez@amd.com // this is a complete hack to get around a compiler bug 42611308Santhony.gutierrez@amd.com // (the compiler currently generates global access for private 42711308Santhony.gutierrez@amd.com // addresses (starting from 0). We need to add the private offset) 42811534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 42911308Santhony.gutierrez@amd.com if (mask[lane]) { 43011308Santhony.gutierrez@amd.com if (m->addr[lane] < w->privSizePerItem) { 43111308Santhony.gutierrez@amd.com 43211308Santhony.gutierrez@amd.com // calcPrivAddr will fail if accesses are unaligned 43311308Santhony.gutierrez@amd.com assert(!((sizeof(CType)-1) & m->addr[lane])); 43411308Santhony.gutierrez@amd.com 43511308Santhony.gutierrez@amd.com Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, 43611308Santhony.gutierrez@amd.com this); 43711308Santhony.gutierrez@amd.com 43811308Santhony.gutierrez@amd.com m->addr[lane] = privAddr; 43911308Santhony.gutierrez@amd.com } 44011308Santhony.gutierrez@amd.com } 44111308Santhony.gutierrez@amd.com } 44211308Santhony.gutierrez@amd.com 44311700Santhony.gutierrez@amd.com w->computeUnit->globalMemoryPipe.issueRequest(m); 44411639Salexandru.dutu@amd.com w->outstandingReqsWrGm++; 44511639Salexandru.dutu@amd.com w->wrGmReqsInPipe--; 44611308Santhony.gutierrez@amd.com break; 44711308Santhony.gutierrez@amd.com 44811308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_SPILL: 44911308Santhony.gutierrez@amd.com assert(num_src_operands == 1); 45011308Santhony.gutierrez@amd.com m->pipeId = GLBMEM_PIPE; 45111308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(1)); 45211308Santhony.gutierrez@amd.com { 45311534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 45411308Santhony.gutierrez@amd.com if (mask[lane]) { 45511308Santhony.gutierrez@amd.com assert(m->addr[lane] < w->spillSizePerItem); 45611308Santhony.gutierrez@amd.com 45711308Santhony.gutierrez@amd.com m->addr[lane] = m->addr[lane] * w->spillWidth + 45811308Santhony.gutierrez@amd.com lane * sizeof(CType) + w->spillBase; 45911308Santhony.gutierrez@amd.com } 46011308Santhony.gutierrez@amd.com } 46111308Santhony.gutierrez@amd.com } 46211308Santhony.gutierrez@amd.com 46311700Santhony.gutierrez@amd.com w->computeUnit->globalMemoryPipe.issueRequest(m); 46411639Salexandru.dutu@amd.com w->outstandingReqsWrGm++; 46511639Salexandru.dutu@amd.com w->wrGmReqsInPipe--; 46611308Santhony.gutierrez@amd.com break; 46711308Santhony.gutierrez@amd.com 46811308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_GROUP: 46911308Santhony.gutierrez@amd.com m->pipeId = LDSMEM_PIPE; 47011308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(24)); 47111308Santhony.gutierrez@amd.com w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 47211639Salexandru.dutu@amd.com w->outstandingReqsWrLm++; 47311639Salexandru.dutu@amd.com w->wrLmReqsInPipe--; 47411308Santhony.gutierrez@amd.com break; 47511308Santhony.gutierrez@amd.com 47611308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_PRIVATE: 47711308Santhony.gutierrez@amd.com m->pipeId = GLBMEM_PIPE; 47811308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(1)); 47911308Santhony.gutierrez@amd.com { 48011534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 48111308Santhony.gutierrez@amd.com if (mask[lane]) { 48211308Santhony.gutierrez@amd.com assert(m->addr[lane] < w->privSizePerItem); 48311308Santhony.gutierrez@amd.com m->addr[lane] = m->addr[lane] + lane * 48411308Santhony.gutierrez@amd.com sizeof(CType)+w->privBase; 48511308Santhony.gutierrez@amd.com } 48611308Santhony.gutierrez@amd.com } 48711308Santhony.gutierrez@amd.com } 48811308Santhony.gutierrez@amd.com 48911700Santhony.gutierrez@amd.com w->computeUnit->globalMemoryPipe.issueRequest(m); 49011639Salexandru.dutu@amd.com w->outstandingReqsWrGm++; 49111639Salexandru.dutu@amd.com w->wrGmReqsInPipe--; 49211308Santhony.gutierrez@amd.com break; 49311308Santhony.gutierrez@amd.com 49411308Santhony.gutierrez@amd.com default: 49511308Santhony.gutierrez@amd.com fatal("Store to unsupported segment %d\n", this->segment); 49611308Santhony.gutierrez@amd.com } 49711308Santhony.gutierrez@amd.com 49811639Salexandru.dutu@amd.com w->outstandingReqs++; 49911639Salexandru.dutu@amd.com w->memReqsInPipe--; 50011308Santhony.gutierrez@amd.com } 50111308Santhony.gutierrez@amd.com 50211308Santhony.gutierrez@amd.com template<typename OperationType, typename SrcDataType, 50311308Santhony.gutierrez@amd.com typename AddrRegOperandType> 50411308Santhony.gutierrez@amd.com void 50511308Santhony.gutierrez@amd.com StInst<OperationType, SrcDataType, 50611308Santhony.gutierrez@amd.com AddrRegOperandType>::generateDisassembly() 50711308Santhony.gutierrez@amd.com { 50811308Santhony.gutierrez@amd.com switch (num_src_operands) { 50911308Santhony.gutierrez@amd.com case 1: 51011308Santhony.gutierrez@amd.com this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, 51111308Santhony.gutierrez@amd.com segmentNames[this->segment], 51211308Santhony.gutierrez@amd.com OperationType::label, 51311308Santhony.gutierrez@amd.com this->src.disassemble(), 51411308Santhony.gutierrez@amd.com this->addr.disassemble()); 51511308Santhony.gutierrez@amd.com break; 51611308Santhony.gutierrez@amd.com case 2: 51711308Santhony.gutierrez@amd.com this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, 51811308Santhony.gutierrez@amd.com segmentNames[this->segment], 51911308Santhony.gutierrez@amd.com OperationType::label, 52011308Santhony.gutierrez@amd.com this->src_vect[0].disassemble(), 52111308Santhony.gutierrez@amd.com this->src_vect[1].disassemble(), 52211308Santhony.gutierrez@amd.com this->addr.disassemble()); 52311308Santhony.gutierrez@amd.com break; 52411308Santhony.gutierrez@amd.com case 4: 52511308Santhony.gutierrez@amd.com this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", 52611308Santhony.gutierrez@amd.com this->opcode, 52711308Santhony.gutierrez@amd.com segmentNames[this->segment], 52811308Santhony.gutierrez@amd.com OperationType::label, 52911308Santhony.gutierrez@amd.com this->src_vect[0].disassemble(), 53011308Santhony.gutierrez@amd.com this->src_vect[1].disassemble(), 53111308Santhony.gutierrez@amd.com this->src_vect[2].disassemble(), 53211308Santhony.gutierrez@amd.com this->src_vect[3].disassemble(), 53311308Santhony.gutierrez@amd.com this->addr.disassemble()); 53411308Santhony.gutierrez@amd.com break; 53511308Santhony.gutierrez@amd.com default: fatal("Bad ld register src operand, num vector operands: " 53611308Santhony.gutierrez@amd.com "%d \n", num_src_operands); 53711308Santhony.gutierrez@amd.com break; 53811308Santhony.gutierrez@amd.com } 53911308Santhony.gutierrez@amd.com } 54011308Santhony.gutierrez@amd.com 54111308Santhony.gutierrez@amd.com template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 54211308Santhony.gutierrez@amd.com bool HasDst> 54311308Santhony.gutierrez@amd.com void 54411308Santhony.gutierrez@amd.com AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 54511308Santhony.gutierrez@amd.com HasDst>::execute(GPUDynInstPtr gpuDynInst) 54611308Santhony.gutierrez@amd.com { 54711308Santhony.gutierrez@amd.com typedef typename DataType::CType CType; 54811308Santhony.gutierrez@amd.com 54911308Santhony.gutierrez@amd.com Wavefront *w = gpuDynInst->wavefront(); 55011308Santhony.gutierrez@amd.com 55111308Santhony.gutierrez@amd.com GPUDynInstPtr m = gpuDynInst; 55211308Santhony.gutierrez@amd.com 55311308Santhony.gutierrez@amd.com this->addr.calcVector(w, m->addr); 55411308Santhony.gutierrez@amd.com 55511534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 55611308Santhony.gutierrez@amd.com ((CType *)m->a_data)[lane] = 55711308Santhony.gutierrez@amd.com this->src[0].template get<CType>(w, lane); 55811308Santhony.gutierrez@amd.com } 55911308Santhony.gutierrez@amd.com 56011308Santhony.gutierrez@amd.com // load second source operand for CAS 56111308Santhony.gutierrez@amd.com if (NumSrcOperands > 1) { 56211534Sjohn.kalamatianos@amd.com for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 56311308Santhony.gutierrez@amd.com ((CType*)m->x_data)[lane] = 56411308Santhony.gutierrez@amd.com this->src[1].template get<CType>(w, lane); 56511308Santhony.gutierrez@amd.com } 56611308Santhony.gutierrez@amd.com } 56711308Santhony.gutierrez@amd.com 56811308Santhony.gutierrez@amd.com assert(NumSrcOperands <= 2); 56911308Santhony.gutierrez@amd.com 57011308Santhony.gutierrez@amd.com m->m_type = DataType::memType; 57111308Santhony.gutierrez@amd.com m->v_type = DataType::vgprType; 57211308Santhony.gutierrez@amd.com 57311308Santhony.gutierrez@amd.com m->exec_mask = w->execMask(); 57411308Santhony.gutierrez@amd.com m->statusBitVector = 0; 57511308Santhony.gutierrez@amd.com m->equiv = 0; // atomics don't have an equivalence class operand 57611308Santhony.gutierrez@amd.com m->n_reg = 1; 57711308Santhony.gutierrez@amd.com 57811308Santhony.gutierrez@amd.com if (HasDst) { 57911308Santhony.gutierrez@amd.com m->dst_reg = this->dest.regIndex(); 58011308Santhony.gutierrez@amd.com } 58111308Santhony.gutierrez@amd.com 58211308Santhony.gutierrez@amd.com m->simdId = w->simdId; 58311308Santhony.gutierrez@amd.com m->wfSlotId = w->wfSlotId; 58411308Santhony.gutierrez@amd.com m->wfDynId = w->wfDynId; 58511639Salexandru.dutu@amd.com m->kern_id = w->kernId; 58611308Santhony.gutierrez@amd.com m->cu_id = w->computeUnit->cu_id; 58711308Santhony.gutierrez@amd.com m->latency.init(&w->computeUnit->shader->tick_cnt); 58811308Santhony.gutierrez@amd.com 58911308Santhony.gutierrez@amd.com switch (this->segment) { 59011308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_GLOBAL: 59111308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(64)); 59211308Santhony.gutierrez@amd.com m->pipeId = GLBMEM_PIPE; 59311308Santhony.gutierrez@amd.com 59411700Santhony.gutierrez@amd.com w->computeUnit->globalMemoryPipe.issueRequest(m); 59511639Salexandru.dutu@amd.com w->outstandingReqsWrGm++; 59611639Salexandru.dutu@amd.com w->wrGmReqsInPipe--; 59711639Salexandru.dutu@amd.com w->outstandingReqsRdGm++; 59811639Salexandru.dutu@amd.com w->rdGmReqsInPipe--; 59911308Santhony.gutierrez@amd.com break; 60011308Santhony.gutierrez@amd.com 60111308Santhony.gutierrez@amd.com case Brig::BRIG_SEGMENT_GROUP: 60211308Santhony.gutierrez@amd.com m->pipeId = LDSMEM_PIPE; 60311308Santhony.gutierrez@amd.com m->latency.set(w->computeUnit->shader->ticks(24)); 60411308Santhony.gutierrez@amd.com w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); 60511639Salexandru.dutu@amd.com w->outstandingReqsWrLm++; 60611639Salexandru.dutu@amd.com w->wrLmReqsInPipe--; 60711639Salexandru.dutu@amd.com w->outstandingReqsRdLm++; 60811639Salexandru.dutu@amd.com w->rdLmReqsInPipe--; 60911308Santhony.gutierrez@amd.com break; 61011308Santhony.gutierrez@amd.com 61111308Santhony.gutierrez@amd.com default: 61211308Santhony.gutierrez@amd.com fatal("Atomic op to unsupported segment %d\n", 61311308Santhony.gutierrez@amd.com this->segment); 61411308Santhony.gutierrez@amd.com } 61511308Santhony.gutierrez@amd.com 61611639Salexandru.dutu@amd.com w->outstandingReqs++; 61711639Salexandru.dutu@amd.com w->memReqsInPipe--; 61811308Santhony.gutierrez@amd.com } 61911308Santhony.gutierrez@amd.com 62011308Santhony.gutierrez@amd.com const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); 62111308Santhony.gutierrez@amd.com 62211308Santhony.gutierrez@amd.com template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 62311308Santhony.gutierrez@amd.com bool HasDst> 62411308Santhony.gutierrez@amd.com void 62511308Santhony.gutierrez@amd.com AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, 62611308Santhony.gutierrez@amd.com HasDst>::generateDisassembly() 62711308Santhony.gutierrez@amd.com { 62811308Santhony.gutierrez@amd.com if (HasDst) { 62911308Santhony.gutierrez@amd.com this->disassembly = 63011308Santhony.gutierrez@amd.com csprintf("%s_%s_%s_%s %s,%s", this->opcode, 63111308Santhony.gutierrez@amd.com atomicOpToString(this->atomicOperation), 63211308Santhony.gutierrez@amd.com segmentNames[this->segment], 63311308Santhony.gutierrez@amd.com DataType::label, this->dest.disassemble(), 63411308Santhony.gutierrez@amd.com this->addr.disassemble()); 63511308Santhony.gutierrez@amd.com } else { 63611308Santhony.gutierrez@amd.com this->disassembly = 63711308Santhony.gutierrez@amd.com csprintf("%s_%s_%s_%s %s", this->opcode, 63811308Santhony.gutierrez@amd.com atomicOpToString(this->atomicOperation), 63911308Santhony.gutierrez@amd.com segmentNames[this->segment], 64011308Santhony.gutierrez@amd.com DataType::label, this->addr.disassemble()); 64111308Santhony.gutierrez@amd.com } 64211308Santhony.gutierrez@amd.com 64311308Santhony.gutierrez@amd.com for (int i = 0; i < NumSrcOperands; ++i) { 64411308Santhony.gutierrez@amd.com this->disassembly += ","; 64511308Santhony.gutierrez@amd.com this->disassembly += this->src[i].disassemble(); 64611308Santhony.gutierrez@amd.com } 64711308Santhony.gutierrez@amd.com } 64811308Santhony.gutierrez@amd.com} // namespace HsailISA 649