mem.hh (11536:fdfc2455b091) | mem.hh (11692:e772fdcd3809) |
---|---|
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 82 unchanged lines hidden (view full) --- 91 AddrOperandType addr; 92 93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 94 const char *_opcode) 95 : HsailGPUStaticInst(obj, _opcode) 96 { 97 using namespace Brig; 98 | 1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 82 unchanged lines hidden (view full) --- 91 AddrOperandType addr; 92 93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 94 const char *_opcode) 95 : HsailGPUStaticInst(obj, _opcode) 96 { 97 using namespace Brig; 98 |
99 setFlag(ALU); 100 |
|
99 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 100 dest.init(op_offs, obj); 101 op_offs = obj->getOperandPtr(ib->operands, 1); 102 addr.init(op_offs, obj); 103 } 104 105 int numSrcRegOperands() override 106 { return(this->addr.isVectorRegister()); } --- 99 unchanged lines hidden (view full) --- 206 Brig::BrigWidth8_t width; 207 typename DestOperandType::DestOperand dest; 208 AddrOperandType addr; 209 210 Brig::BrigSegment segment; 211 Brig::BrigMemoryOrder memoryOrder; 212 Brig::BrigMemoryScope memoryScope; 213 unsigned int equivClass; | 101 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 102 dest.init(op_offs, obj); 103 op_offs = obj->getOperandPtr(ib->operands, 1); 104 addr.init(op_offs, obj); 105 } 106 107 int numSrcRegOperands() override 108 { return(this->addr.isVectorRegister()); } --- 99 unchanged lines hidden (view full) --- 208 Brig::BrigWidth8_t width; 209 typename DestOperandType::DestOperand dest; 210 AddrOperandType addr; 211 212 Brig::BrigSegment segment; 213 Brig::BrigMemoryOrder memoryOrder; 214 Brig::BrigMemoryScope memoryScope; 215 unsigned int equivClass; |
214 bool isArgLoad() | 216 217 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 218 const char *_opcode) 219 : HsailGPUStaticInst(obj, _opcode) |
215 { | 220 { |
216 return segment == Brig::BRIG_SEGMENT_KERNARG || 217 segment == Brig::BRIG_SEGMENT_ARG; 218 } 219 void 220 initLd(const Brig::BrigInstBase *ib, const BrigObject *obj, 221 const char *_opcode) 222 { | |
223 using namespace Brig; 224 | 221 using namespace Brig; 222 |
225 const BrigInstMem *ldst = (const BrigInstMem*)ib; | 223 setFlag(MemoryRef); 224 setFlag(Load); |
226 | 225 |
227 segment = (BrigSegment)ldst->segment; 228 memoryOrder = BRIG_MEMORY_ORDER_NONE; 229 memoryScope = BRIG_MEMORY_SCOPE_NONE; 230 equivClass = ldst->equivClass; | 226 if (ib->opcode == BRIG_OPCODE_LD) { 227 const BrigInstMem *ldst = (const BrigInstMem*)ib; |
231 | 228 |
232 switch (segment) { 233 case BRIG_SEGMENT_GLOBAL: 234 o_type = Enums::OT_GLOBAL_READ; 235 break; | 229 segment = (BrigSegment)ldst->segment; 230 memoryOrder = BRIG_MEMORY_ORDER_NONE; 231 memoryScope = BRIG_MEMORY_SCOPE_NONE; 232 equivClass = ldst->equivClass; |
236 | 233 |
237 case BRIG_SEGMENT_GROUP: 238 o_type = Enums::OT_SHARED_READ; 239 break; | 234 width = ldst->width; 235 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 236 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 237 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 238 dest.init(op_offs, obj); |
240 | 239 |
241 case BRIG_SEGMENT_PRIVATE: 242 o_type = Enums::OT_PRIVATE_READ; 243 break; | 240 op_offs = obj->getOperandPtr(ib->operands, 1); 241 addr.init(op_offs, obj); 242 } else { 243 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; |
244 | 244 |
245 case BRIG_SEGMENT_READONLY: 246 o_type = Enums::OT_READONLY_READ; 247 break; | 245 segment = (BrigSegment)at->segment; 246 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 247 memoryScope = (BrigMemoryScope)at->memoryScope; 248 equivClass = 0; |
248 | 249 |
249 case BRIG_SEGMENT_SPILL: 250 o_type = Enums::OT_SPILL_READ; 251 break; | 250 width = BRIG_WIDTH_1; 251 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 252 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); |
252 | 253 |
253 case BRIG_SEGMENT_FLAT: 254 o_type = Enums::OT_FLAT_READ; 255 break; | 254 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 255 dest.init(op_offs, obj); |
256 | 256 |
257 case BRIG_SEGMENT_KERNARG: 258 o_type = Enums::OT_KERN_READ; 259 break; | 257 op_offs = obj->getOperandPtr(ib->operands,1); 258 addr.init(op_offs, obj); 259 } |
260 | 260 |
261 case BRIG_SEGMENT_ARG: 262 o_type = Enums::OT_ARG; | 261 switch (memoryOrder) { 262 case BRIG_MEMORY_ORDER_NONE: 263 setFlag(NoOrder); |
263 break; | 264 break; |
265 case BRIG_MEMORY_ORDER_RELAXED: 266 setFlag(RelaxedOrder); 267 break; 268 case BRIG_MEMORY_ORDER_SC_ACQUIRE: 269 setFlag(Acquire); 270 break; 271 case BRIG_MEMORY_ORDER_SC_RELEASE: 272 setFlag(Release); 273 break; 274 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: 275 setFlag(AcquireRelease); 276 break; 277 default: 278 fatal("LdInst has bad memory order type\n"); 279 } |
|
264 | 280 |
281 switch (memoryScope) { 282 case BRIG_MEMORY_SCOPE_NONE: 283 setFlag(NoScope); 284 break; 285 case BRIG_MEMORY_SCOPE_WORKITEM: 286 setFlag(WorkitemScope); 287 break; 288 case BRIG_MEMORY_SCOPE_WORKGROUP: 289 setFlag(WorkgroupScope); 290 break; 291 case BRIG_MEMORY_SCOPE_AGENT: 292 setFlag(DeviceScope); 293 break; 294 case BRIG_MEMORY_SCOPE_SYSTEM: 295 setFlag(SystemScope); 296 break; |
|
265 default: | 297 default: |
266 panic("Ld: segment %d not supported\n", segment); | 298 fatal("LdInst has bad memory scope type\n"); |
267 } 268 | 299 } 300 |
269 width = ldst->width; 270 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 271 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 272 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 273 dest.init(op_offs, obj); 274 275 op_offs = obj->getOperandPtr(ib->operands, 1); 276 addr.init(op_offs, obj); 277 } 278 279 void 280 initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj, 281 const char *_opcode) 282 { 283 using namespace Brig; 284 285 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 286 287 segment = (BrigSegment)at->segment; 288 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 289 memoryScope = (BrigMemoryScope)at->memoryScope; 290 equivClass = 0; 291 | |
292 switch (segment) { 293 case BRIG_SEGMENT_GLOBAL: | 301 switch (segment) { 302 case BRIG_SEGMENT_GLOBAL: |
294 o_type = Enums::OT_GLOBAL_READ; | 303 setFlag(GlobalSegment); |
295 break; | 304 break; |
296 | |
297 case BRIG_SEGMENT_GROUP: | 305 case BRIG_SEGMENT_GROUP: |
298 o_type = Enums::OT_SHARED_READ; | 306 setFlag(GroupSegment); |
299 break; | 307 break; |
300 | |
301 case BRIG_SEGMENT_PRIVATE: | 308 case BRIG_SEGMENT_PRIVATE: |
302 o_type = Enums::OT_PRIVATE_READ; | 309 setFlag(PrivateSegment); |
303 break; | 310 break; |
304 | |
305 case BRIG_SEGMENT_READONLY: | 311 case BRIG_SEGMENT_READONLY: |
306 o_type = Enums::OT_READONLY_READ; | 312 setFlag(ReadOnlySegment); |
307 break; | 313 break; |
308 | |
309 case BRIG_SEGMENT_SPILL: | 314 case BRIG_SEGMENT_SPILL: |
310 o_type = Enums::OT_SPILL_READ; | 315 setFlag(SpillSegment); |
311 break; | 316 break; |
312 | |
313 case BRIG_SEGMENT_FLAT: | 317 case BRIG_SEGMENT_FLAT: |
314 o_type = Enums::OT_FLAT_READ; | 318 setFlag(Flat); |
315 break; | 319 break; |
316 | |
317 case BRIG_SEGMENT_KERNARG: | 320 case BRIG_SEGMENT_KERNARG: |
318 o_type = Enums::OT_KERN_READ; | 321 setFlag(KernArgSegment); |
319 break; | 322 break; |
320 | |
321 case BRIG_SEGMENT_ARG: | 323 case BRIG_SEGMENT_ARG: |
322 o_type = Enums::OT_ARG; | 324 setFlag(ArgSegment); |
323 break; | 325 break; |
324 | |
325 default: 326 panic("Ld: segment %d not supported\n", segment); 327 } | 326 default: 327 panic("Ld: segment %d not supported\n", segment); 328 } |
328 329 width = BRIG_WIDTH_1; 330 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 331 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 332 333 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 334 dest.init(op_offs, obj); 335 336 op_offs = obj->getOperandPtr(ib->operands,1); 337 addr.init(op_offs, obj); | |
338 } 339 | 329 } 330 |
340 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 341 const char *_opcode) 342 : HsailGPUStaticInst(obj, _opcode) 343 { 344 using namespace Brig; 345 346 if (ib->opcode == BRIG_OPCODE_LD) { 347 initLd(ib, obj, _opcode); 348 } else { 349 initAtomicLd(ib, obj, _opcode); 350 } 351 } 352 | |
353 int numSrcRegOperands() override 354 { return(this->addr.isVectorRegister()); } 355 int numDstRegOperands() override { return dest.isVectorRegister(); } 356 int getNumOperands() override 357 { 358 if (this->addr.isVectorRegister()) 359 return 2; 360 else --- 107 unchanged lines hidden (view full) --- 468 469 c0 *d = &((c0*)gpuDynInst->d_data) 470 [k * gpuDynInst->computeUnit()->wfSize()]; 471 472 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 473 if (gpuDynInst->exec_mask[i]) { 474 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 475 | 331 int numSrcRegOperands() override 332 { return(this->addr.isVectorRegister()); } 333 int numDstRegOperands() override { return dest.isVectorRegister(); } 334 int getNumOperands() override 335 { 336 if (this->addr.isVectorRegister()) 337 return 2; 338 else --- 107 unchanged lines hidden (view full) --- 446 447 c0 *d = &((c0*)gpuDynInst->d_data) 448 [k * gpuDynInst->computeUnit()->wfSize()]; 449 450 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 451 if (gpuDynInst->exec_mask[i]) { 452 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 453 |
476 if (isLocalMem()) { | 454 if (this->isLocalMem()) { |
477 // load from shared memory 478 *d = gpuDynInst->wavefront()->ldsChunk-> 479 read<c0>(vaddr); 480 } else { 481 Request *req = new Request(0, vaddr, sizeof(c0), 0, 482 gpuDynInst->computeUnit()->masterId(), 483 0, gpuDynInst->wfDynId); 484 485 gpuDynInst->setRequestFlags(req); 486 PacketPtr pkt = new Packet(req, MemCmd::ReadReq); 487 pkt->dataStatic(d); 488 489 if (gpuDynInst->computeUnit()->shader-> 490 separate_acquire_release && | 455 // load from shared memory 456 *d = gpuDynInst->wavefront()->ldsChunk-> 457 read<c0>(vaddr); 458 } else { 459 Request *req = new Request(0, vaddr, sizeof(c0), 0, 460 gpuDynInst->computeUnit()->masterId(), 461 0, gpuDynInst->wfDynId); 462 463 gpuDynInst->setRequestFlags(req); 464 PacketPtr pkt = new Packet(req, MemCmd::ReadReq); 465 pkt->dataStatic(d); 466 467 if (gpuDynInst->computeUnit()->shader-> 468 separate_acquire_release && |
491 gpuDynInst->memoryOrder == 492 Enums::MEMORY_ORDER_SC_ACQUIRE) { | 469 gpuDynInst->isAcquire()) { |
493 // if this load has acquire semantics, 494 // set the response continuation function 495 // to perform an Acquire request 496 gpuDynInst->execContinuation = 497 &GPUStaticInst::execLdAcq; 498 499 gpuDynInst->useContinuation = true; 500 } else { --- 14 unchanged lines hidden (view full) --- 515 } 516 517 private: 518 void 519 execLdAcq(GPUDynInstPtr gpuDynInst) override 520 { 521 // after the load has complete and if the load has acquire 522 // semantics, issue an acquire request. | 470 // if this load has acquire semantics, 471 // set the response continuation function 472 // to perform an Acquire request 473 gpuDynInst->execContinuation = 474 &GPUStaticInst::execLdAcq; 475 476 gpuDynInst->useContinuation = true; 477 } else { --- 14 unchanged lines hidden (view full) --- 492 } 493 494 private: 495 void 496 execLdAcq(GPUDynInstPtr gpuDynInst) override 497 { 498 // after the load has complete and if the load has acquire 499 // semantics, issue an acquire request. |
523 if (!isLocalMem()) { | 500 if (!this->isLocalMem()) { |
524 if (gpuDynInst->computeUnit()->shader->separate_acquire_release | 501 if (gpuDynInst->computeUnit()->shader->separate_acquire_release |
525 && gpuDynInst->memoryOrder == 526 Enums::MEMORY_ORDER_SC_ACQUIRE) { | 502 && gpuDynInst->isAcquire()) { |
527 gpuDynInst->statusBitVector = VectorMask(1); 528 gpuDynInst->useContinuation = false; 529 // create request 530 Request *req = new Request(0, 0, 0, 0, 531 gpuDynInst->computeUnit()->masterId(), 532 0, gpuDynInst->wfDynId); 533 req->setFlags(Request::ACQUIRE); 534 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 535 } 536 } 537 } 538 539 public: | 503 gpuDynInst->statusBitVector = VectorMask(1); 504 gpuDynInst->useContinuation = false; 505 // create request 506 Request *req = new Request(0, 0, 0, 0, 507 gpuDynInst->computeUnit()->masterId(), 508 0, gpuDynInst->wfDynId); 509 req->setFlags(Request::ACQUIRE); 510 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 511 } 512 } 513 } 514 515 public: |
540 bool 541 isLocalMem() const override 542 { 543 return this->segment == Brig::BRIG_SEGMENT_GROUP; 544 } 545 | |
546 bool isVectorRegister(int operandIndex) override 547 { 548 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 549 if ((num_dest_operands != getNumOperands()) && 550 (operandIndex == (getNumOperands()-1))) 551 return(this->addr.isVectorRegister()); 552 if (num_dest_operands > 1) { 553 return dest_vect[operandIndex].isVectorRegister(); --- 172 unchanged lines hidden (view full) --- 726 typename SrcOperandType::SrcOperand src; 727 AddrOperandType addr; 728 729 Brig::BrigSegment segment; 730 Brig::BrigMemoryScope memoryScope; 731 Brig::BrigMemoryOrder memoryOrder; 732 unsigned int equivClass; 733 | 516 bool isVectorRegister(int operandIndex) override 517 { 518 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 519 if ((num_dest_operands != getNumOperands()) && 520 (operandIndex == (getNumOperands()-1))) 521 return(this->addr.isVectorRegister()); 522 if (num_dest_operands > 1) { 523 return dest_vect[operandIndex].isVectorRegister(); --- 172 unchanged lines hidden (view full) --- 696 typename SrcOperandType::SrcOperand src; 697 AddrOperandType addr; 698 699 Brig::BrigSegment segment; 700 Brig::BrigMemoryScope memoryScope; 701 Brig::BrigMemoryOrder memoryOrder; 702 unsigned int equivClass; 703 |
734 void 735 initSt(const Brig::BrigInstBase *ib, const BrigObject *obj, 736 const char *_opcode) | 704 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 705 const char *_opcode) 706 : HsailGPUStaticInst(obj, _opcode) |
737 { 738 using namespace Brig; 739 | 707 { 708 using namespace Brig; 709 |
740 const BrigInstMem *ldst = (const BrigInstMem*)ib; | 710 setFlag(MemoryRef); 711 setFlag(Store); |
741 | 712 |
742 segment = (BrigSegment)ldst->segment; 743 memoryOrder = BRIG_MEMORY_ORDER_NONE; 744 memoryScope = BRIG_MEMORY_SCOPE_NONE; 745 equivClass = ldst->equivClass; | 713 if (ib->opcode == BRIG_OPCODE_ST) { 714 const BrigInstMem *ldst = (const BrigInstMem*)ib; |
746 | 715 |
747 switch (segment) { 748 case BRIG_SEGMENT_GLOBAL: 749 o_type = Enums::OT_GLOBAL_WRITE; 750 break; | 716 segment = (BrigSegment)ldst->segment; 717 memoryOrder = BRIG_MEMORY_ORDER_NONE; 718 memoryScope = BRIG_MEMORY_SCOPE_NONE; 719 equivClass = ldst->equivClass; |
751 | 720 |
752 case BRIG_SEGMENT_GROUP: 753 o_type = Enums::OT_SHARED_WRITE; 754 break; | 721 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 722 const BrigOperand *baseOp = obj->getOperand(op_offs); |
755 | 723 |
756 case BRIG_SEGMENT_PRIVATE: 757 o_type = Enums::OT_PRIVATE_WRITE; 758 break; | 724 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) || 725 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) { 726 src.init(op_offs, obj); 727 } |
759 | 728 |
760 case BRIG_SEGMENT_READONLY: 761 o_type = Enums::OT_READONLY_WRITE; 762 break; | 729 op_offs = obj->getOperandPtr(ib->operands, 1); 730 addr.init(op_offs, obj); 731 } else { 732 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; |
763 | 733 |
764 case BRIG_SEGMENT_SPILL: 765 o_type = Enums::OT_SPILL_WRITE; 766 break; | 734 segment = (BrigSegment)at->segment; 735 memoryScope = (BrigMemoryScope)at->memoryScope; 736 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 737 equivClass = 0; |
767 | 738 |
768 case BRIG_SEGMENT_FLAT: 769 o_type = Enums::OT_FLAT_WRITE; 770 break; | 739 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 740 addr.init(op_offs, obj); |
771 | 741 |
772 case BRIG_SEGMENT_ARG: 773 o_type = Enums::OT_ARG; 774 break; | 742 op_offs = obj->getOperandPtr(ib->operands, 1); 743 src.init(op_offs, obj); 744 } |
775 | 745 |
746 switch (memoryOrder) { 747 case BRIG_MEMORY_ORDER_NONE: 748 setFlag(NoOrder); 749 break; 750 case BRIG_MEMORY_ORDER_RELAXED: 751 setFlag(RelaxedOrder); 752 break; 753 case BRIG_MEMORY_ORDER_SC_ACQUIRE: 754 setFlag(Acquire); 755 break; 756 case BRIG_MEMORY_ORDER_SC_RELEASE: 757 setFlag(Release); 758 break; 759 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: 760 setFlag(AcquireRelease); 761 break; |
|
776 default: | 762 default: |
777 panic("St: segment %d not supported\n", segment); | 763 fatal("StInst has bad memory order type\n"); |
778 } 779 | 764 } 765 |
780 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 781 const BrigOperand *baseOp = obj->getOperand(op_offs); 782 783 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) || 784 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) { 785 src.init(op_offs, obj); | 766 switch (memoryScope) { 767 case BRIG_MEMORY_SCOPE_NONE: 768 setFlag(NoScope); 769 break; 770 case BRIG_MEMORY_SCOPE_WORKITEM: 771 setFlag(WorkitemScope); 772 break; 773 case BRIG_MEMORY_SCOPE_WORKGROUP: 774 setFlag(WorkgroupScope); 775 break; 776 case BRIG_MEMORY_SCOPE_AGENT: 777 setFlag(DeviceScope); 778 break; 779 case BRIG_MEMORY_SCOPE_SYSTEM: 780 setFlag(SystemScope); 781 break; 782 default: 783 fatal("StInst has bad memory scope type\n"); |
786 } 787 | 784 } 785 |
788 op_offs = obj->getOperandPtr(ib->operands, 1); 789 addr.init(op_offs, obj); 790 } 791 792 void 793 initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj, 794 const char *_opcode) 795 { 796 using namespace Brig; 797 798 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 799 800 segment = (BrigSegment)at->segment; 801 memoryScope = (BrigMemoryScope)at->memoryScope; 802 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 803 equivClass = 0; 804 | |
805 switch (segment) { 806 case BRIG_SEGMENT_GLOBAL: | 786 switch (segment) { 787 case BRIG_SEGMENT_GLOBAL: |
807 o_type = Enums::OT_GLOBAL_WRITE; | 788 setFlag(GlobalSegment); |
808 break; | 789 break; |
809 | |
810 case BRIG_SEGMENT_GROUP: | 790 case BRIG_SEGMENT_GROUP: |
811 o_type = Enums::OT_SHARED_WRITE; | 791 setFlag(GroupSegment); |
812 break; | 792 break; |
813 | |
814 case BRIG_SEGMENT_PRIVATE: | 793 case BRIG_SEGMENT_PRIVATE: |
815 o_type = Enums::OT_PRIVATE_WRITE; | 794 setFlag(PrivateSegment); |
816 break; | 795 break; |
817 | |
818 case BRIG_SEGMENT_READONLY: | 796 case BRIG_SEGMENT_READONLY: |
819 o_type = Enums::OT_READONLY_WRITE; | 797 setFlag(ReadOnlySegment); |
820 break; | 798 break; |
821 | |
822 case BRIG_SEGMENT_SPILL: | 799 case BRIG_SEGMENT_SPILL: |
823 o_type = Enums::OT_SPILL_WRITE; | 800 setFlag(SpillSegment); |
824 break; | 801 break; |
825 | |
826 case BRIG_SEGMENT_FLAT: | 802 case BRIG_SEGMENT_FLAT: |
827 o_type = Enums::OT_FLAT_WRITE; | 803 setFlag(Flat); |
828 break; | 804 break; |
829 | |
830 case BRIG_SEGMENT_ARG: | 805 case BRIG_SEGMENT_ARG: |
831 o_type = Enums::OT_ARG; | 806 setFlag(ArgSegment); |
832 break; | 807 break; |
833 | |
834 default: 835 panic("St: segment %d not supported\n", segment); 836 } | 808 default: 809 panic("St: segment %d not supported\n", segment); 810 } |
837 838 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 839 addr.init(op_offs, obj); 840 841 op_offs = obj->getOperandPtr(ib->operands, 1); 842 src.init(op_offs, obj); | |
843 } 844 | 811 } 812 |
845 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 846 const char *_opcode) 847 : HsailGPUStaticInst(obj, _opcode) 848 { 849 using namespace Brig; 850 851 if (ib->opcode == BRIG_OPCODE_ST) { 852 initSt(ib, obj, _opcode); 853 } else { 854 initAtomicSt(ib, obj, _opcode); 855 } 856 } 857 | |
858 int numDstRegOperands() override { return 0; } 859 int numSrcRegOperands() override 860 { 861 return src.isVectorRegister() + this->addr.isVectorRegister(); 862 } 863 int getNumOperands() override 864 { 865 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) --- 93 unchanged lines hidden (view full) --- 959 } 960 } 961 962 void 963 initiateAcc(GPUDynInstPtr gpuDynInst) override 964 { 965 // before performing a store, check if this store has 966 // release semantics, and if so issue a release first | 813 int numDstRegOperands() override { return 0; } 814 int numSrcRegOperands() override 815 { 816 return src.isVectorRegister() + this->addr.isVectorRegister(); 817 } 818 int getNumOperands() override 819 { 820 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) --- 93 unchanged lines hidden (view full) --- 914 } 915 } 916 917 void 918 initiateAcc(GPUDynInstPtr gpuDynInst) override 919 { 920 // before performing a store, check if this store has 921 // release semantics, and if so issue a release first |
967 if (!isLocalMem()) { | 922 if (!this->isLocalMem()) { |
968 if (gpuDynInst->computeUnit()->shader->separate_acquire_release | 923 if (gpuDynInst->computeUnit()->shader->separate_acquire_release |
969 && gpuDynInst->memoryOrder == 970 Enums::MEMORY_ORDER_SC_RELEASE) { | 924 && gpuDynInst->isRelease()) { |
971 972 gpuDynInst->statusBitVector = VectorMask(1); 973 gpuDynInst->execContinuation = &GPUStaticInst::execSt; 974 gpuDynInst->useContinuation = true; 975 // create request 976 Request *req = new Request(0, 0, 0, 0, 977 gpuDynInst->computeUnit()->masterId(), 978 0, gpuDynInst->wfDynId); 979 req->setFlags(Request::RELEASE); 980 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 981 982 return; 983 } 984 } 985 986 // if there is no release semantic, perform stores immediately 987 execSt(gpuDynInst); 988 } 989 | 925 926 gpuDynInst->statusBitVector = VectorMask(1); 927 gpuDynInst->execContinuation = &GPUStaticInst::execSt; 928 gpuDynInst->useContinuation = true; 929 // create request 930 Request *req = new Request(0, 0, 0, 0, 931 gpuDynInst->computeUnit()->masterId(), 932 0, gpuDynInst->wfDynId); 933 req->setFlags(Request::RELEASE); 934 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 935 936 return; 937 } 938 } 939 940 // if there is no release semantic, perform stores immediately 941 execSt(gpuDynInst); 942 } 943 |
990 bool 991 isLocalMem() const override 992 { 993 return this->segment == Brig::BRIG_SEGMENT_GROUP; 994 } 995 | |
996 private: 997 // execSt may be called through a continuation 998 // if the store had release semantics. see comment for 999 // execSt in gpu_static_inst.hh 1000 void 1001 execSt(GPUDynInstPtr gpuDynInst) override 1002 { 1003 typedef typename MemDataType::CType c0; --- 11 unchanged lines hidden (view full) --- 1015 for (int k = 0; k < num_src_operands; ++k) { 1016 c0 *d = &((c0*)gpuDynInst->d_data) 1017 [k * gpuDynInst->computeUnit()->wfSize()]; 1018 1019 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 1020 if (gpuDynInst->exec_mask[i]) { 1021 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 1022 | 944 private: 945 // execSt may be called through a continuation 946 // if the store had release semantics. see comment for 947 // execSt in gpu_static_inst.hh 948 void 949 execSt(GPUDynInstPtr gpuDynInst) override 950 { 951 typedef typename MemDataType::CType c0; --- 11 unchanged lines hidden (view full) --- 963 for (int k = 0; k < num_src_operands; ++k) { 964 c0 *d = &((c0*)gpuDynInst->d_data) 965 [k * gpuDynInst->computeUnit()->wfSize()]; 966 967 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 968 if (gpuDynInst->exec_mask[i]) { 969 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 970 |
1023 if (isLocalMem()) { | 971 if (this->isLocalMem()) { |
1024 //store to shared memory 1025 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr, 1026 *d); 1027 } else { 1028 Request *req = 1029 new Request(0, vaddr, sizeof(c0), 0, 1030 gpuDynInst->computeUnit()->masterId(), 1031 0, gpuDynInst->wfDynId); --- 129 unchanged lines hidden (view full) --- 1161 default: 1162 fatal("Bad st register operand type %d\n", tmp.type); 1163 } 1164 } else { 1165 fatal("Bad st register operand kind %d\n", tmp.kind); 1166 } 1167 } 1168 | 972 //store to shared memory 973 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr, 974 *d); 975 } else { 976 Request *req = 977 new Request(0, vaddr, sizeof(c0), 0, 978 gpuDynInst->computeUnit()->masterId(), 979 0, gpuDynInst->wfDynId); --- 129 unchanged lines hidden (view full) --- 1109 default: 1110 fatal("Bad st register operand type %d\n", tmp.type); 1111 } 1112 } else { 1113 fatal("Bad st register operand kind %d\n", tmp.kind); 1114 } 1115 } 1116 |
1169 Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode, 1170 Brig::BrigAtomicOperation brigOp); 1171 | |
1172 template<typename OperandType, typename AddrOperandType, int NumSrcOperands, 1173 bool HasDst> 1174 class AtomicInstBase : public HsailGPUStaticInst 1175 { 1176 public: 1177 typename OperandType::DestOperand dest; 1178 typename OperandType::SrcOperand src[NumSrcOperands]; 1179 AddrOperandType addr; 1180 1181 Brig::BrigSegment segment; 1182 Brig::BrigMemoryOrder memoryOrder; 1183 Brig::BrigAtomicOperation atomicOperation; 1184 Brig::BrigMemoryScope memoryScope; 1185 Brig::BrigOpcode opcode; | 1117 template<typename OperandType, typename AddrOperandType, int NumSrcOperands, 1118 bool HasDst> 1119 class AtomicInstBase : public HsailGPUStaticInst 1120 { 1121 public: 1122 typename OperandType::DestOperand dest; 1123 typename OperandType::SrcOperand src[NumSrcOperands]; 1124 AddrOperandType addr; 1125 1126 Brig::BrigSegment segment; 1127 Brig::BrigMemoryOrder memoryOrder; 1128 Brig::BrigAtomicOperation atomicOperation; 1129 Brig::BrigMemoryScope memoryScope; 1130 Brig::BrigOpcode opcode; |
1186 Enums::MemOpType opType; | |
1187 1188 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 1189 const char *_opcode) 1190 : HsailGPUStaticInst(obj, _opcode) 1191 { 1192 using namespace Brig; 1193 1194 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 1195 1196 segment = (BrigSegment)at->segment; 1197 memoryScope = (BrigMemoryScope)at->memoryScope; 1198 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 1199 atomicOperation = (BrigAtomicOperation)at->atomicOperation; 1200 opcode = (BrigOpcode)ib->opcode; | 1131 1132 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 1133 const char *_opcode) 1134 : HsailGPUStaticInst(obj, _opcode) 1135 { 1136 using namespace Brig; 1137 1138 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 1139 1140 segment = (BrigSegment)at->segment; 1141 memoryScope = (BrigMemoryScope)at->memoryScope; 1142 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 1143 atomicOperation = (BrigAtomicOperation)at->atomicOperation; 1144 opcode = (BrigOpcode)ib->opcode; |
1201 opType = brigAtomicToMemOpType(opcode, atomicOperation); | |
1202 | 1145 |
1146 assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET || 1147 opcode == Brig::BRIG_OPCODE_ATOMIC); 1148 1149 setFlag(MemoryRef); 1150 1151 if (opcode == Brig::BRIG_OPCODE_ATOMIC) { 1152 setFlag(AtomicReturn); 1153 } else { 1154 setFlag(AtomicNoReturn); 1155 } 1156 1157 switch (memoryOrder) { 1158 case BRIG_MEMORY_ORDER_NONE: 1159 setFlag(NoOrder); 1160 break; 1161 case BRIG_MEMORY_ORDER_RELAXED: 1162 setFlag(RelaxedOrder); 1163 break; 1164 case BRIG_MEMORY_ORDER_SC_ACQUIRE: 1165 setFlag(Acquire); 1166 break; 1167 case BRIG_MEMORY_ORDER_SC_RELEASE: 1168 setFlag(Release); 1169 break; 1170 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: 1171 setFlag(AcquireRelease); 1172 break; 1173 default: 1174 fatal("AtomicInst has bad memory order type\n"); 1175 } 1176 1177 switch (memoryScope) { 1178 case BRIG_MEMORY_SCOPE_NONE: 1179 setFlag(NoScope); 1180 break; 1181 case BRIG_MEMORY_SCOPE_WORKITEM: 1182 setFlag(WorkitemScope); 1183 break; 1184 case BRIG_MEMORY_SCOPE_WORKGROUP: 1185 setFlag(WorkgroupScope); 1186 break; 1187 case BRIG_MEMORY_SCOPE_AGENT: 1188 setFlag(DeviceScope); 1189 break; 1190 case BRIG_MEMORY_SCOPE_SYSTEM: 1191 setFlag(SystemScope); 1192 break; 1193 default: 1194 fatal("AtomicInst has bad memory scope type\n"); 1195 } 1196 1197 switch (atomicOperation) { 1198 case Brig::BRIG_ATOMIC_AND: 1199 setFlag(AtomicAnd); 1200 break; 1201 case Brig::BRIG_ATOMIC_OR: 1202 setFlag(AtomicOr); 1203 break; 1204 case Brig::BRIG_ATOMIC_XOR: 1205 setFlag(AtomicXor); 1206 break; 1207 case Brig::BRIG_ATOMIC_CAS: 1208 setFlag(AtomicCAS); 1209 break; 1210 case Brig::BRIG_ATOMIC_EXCH: 1211 setFlag(AtomicExch); 1212 break; 1213 case Brig::BRIG_ATOMIC_ADD: 1214 setFlag(AtomicAdd); 1215 break; 1216 case Brig::BRIG_ATOMIC_WRAPINC: 1217 setFlag(AtomicInc); 1218 break; 1219 case Brig::BRIG_ATOMIC_WRAPDEC: 1220 setFlag(AtomicDec); 1221 break; 1222 case Brig::BRIG_ATOMIC_MIN: 1223 setFlag(AtomicMin); 1224 break; 1225 case Brig::BRIG_ATOMIC_MAX: 1226 setFlag(AtomicMax); 1227 break; 1228 case Brig::BRIG_ATOMIC_SUB: 1229 setFlag(AtomicSub); 1230 break; 1231 default: 1232 fatal("Bad BrigAtomicOperation code %d\n", atomicOperation); 1233 } 1234 |
|
1203 switch (segment) { 1204 case BRIG_SEGMENT_GLOBAL: | 1235 switch (segment) { 1236 case BRIG_SEGMENT_GLOBAL: |
1205 o_type = Enums::OT_GLOBAL_ATOMIC; | 1237 setFlag(GlobalSegment); |
1206 break; | 1238 break; |
1207 | |
1208 case BRIG_SEGMENT_GROUP: | 1239 case BRIG_SEGMENT_GROUP: |
1209 o_type = Enums::OT_SHARED_ATOMIC; | 1240 setFlag(GroupSegment); |
1210 break; | 1241 break; |
1211 | |
1212 case BRIG_SEGMENT_FLAT: | 1242 case BRIG_SEGMENT_FLAT: |
1213 o_type = Enums::OT_FLAT_ATOMIC; | 1243 setFlag(Flat); |
1214 break; | 1244 break; |
1215 | |
1216 default: 1217 panic("Atomic: segment %d not supported\n", segment); 1218 } 1219 1220 if (HasDst) { 1221 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1222 dest.init(op_offs, obj); 1223 --- 125 unchanged lines hidden (view full) --- 1349 init_addr(&this->addr); 1350 } 1351 1352 void 1353 initiateAcc(GPUDynInstPtr gpuDynInst) override 1354 { 1355 // before doing the RMW, check if this atomic has 1356 // release semantics, and if so issue a release first | 1245 default: 1246 panic("Atomic: segment %d not supported\n", segment); 1247 } 1248 1249 if (HasDst) { 1250 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1251 dest.init(op_offs, obj); 1252 --- 125 unchanged lines hidden (view full) --- 1378 init_addr(&this->addr); 1379 } 1380 1381 void 1382 initiateAcc(GPUDynInstPtr gpuDynInst) override 1383 { 1384 // before doing the RMW, check if this atomic has 1385 // release semantics, and if so issue a release first |
1357 if (!isLocalMem()) { | 1386 if (!this->isLocalMem()) { |
1358 if (gpuDynInst->computeUnit()->shader->separate_acquire_release | 1387 if (gpuDynInst->computeUnit()->shader->separate_acquire_release |
1359 && (gpuDynInst->memoryOrder == 1360 Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder == 1361 Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) { | 1388 && (gpuDynInst->isRelease() 1389 || gpuDynInst->isAcquireRelease())) { |
1362 1363 gpuDynInst->statusBitVector = VectorMask(1); 1364 1365 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic; 1366 gpuDynInst->useContinuation = true; 1367 1368 // create request 1369 Request *req = new Request(0, 0, 0, 0, --- 8 unchanged lines hidden (view full) --- 1378 1379 // if there is no release semantic, execute the RMW immediately 1380 execAtomic(gpuDynInst); 1381 1382 } 1383 1384 void execute(GPUDynInstPtr gpuDynInst) override; 1385 | 1390 1391 gpuDynInst->statusBitVector = VectorMask(1); 1392 1393 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic; 1394 gpuDynInst->useContinuation = true; 1395 1396 // create request 1397 Request *req = new Request(0, 0, 0, 0, --- 8 unchanged lines hidden (view full) --- 1406 1407 // if there is no release semantic, execute the RMW immediately 1408 execAtomic(gpuDynInst); 1409 1410 } 1411 1412 void execute(GPUDynInstPtr gpuDynInst) override; 1413 |
1386 bool 1387 isLocalMem() const override 1388 { 1389 return this->segment == Brig::BRIG_SEGMENT_GROUP; 1390 } 1391 | |
1392 private: 1393 // execAtomic may be called through a continuation 1394 // if the RMW had release semantics. see comment for 1395 // execContinuation in gpu_dyn_inst.hh 1396 void 1397 execAtomic(GPUDynInstPtr gpuDynInst) override 1398 { 1399 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1400 1401 typedef typename MemDataType::CType c0; 1402 1403 c0 *d = &((c0*) gpuDynInst->d_data)[0]; 1404 c0 *e = &((c0*) gpuDynInst->a_data)[0]; 1405 c0 *f = &((c0*) gpuDynInst->x_data)[0]; 1406 1407 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 1408 if (gpuDynInst->exec_mask[i]) { 1409 Addr vaddr = gpuDynInst->addr[i]; 1410 | 1414 private: 1415 // execAtomic may be called through a continuation 1416 // if the RMW had release semantics. see comment for 1417 // execContinuation in gpu_dyn_inst.hh 1418 void 1419 execAtomic(GPUDynInstPtr gpuDynInst) override 1420 { 1421 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1422 1423 typedef typename MemDataType::CType c0; 1424 1425 c0 *d = &((c0*) gpuDynInst->d_data)[0]; 1426 c0 *e = &((c0*) gpuDynInst->a_data)[0]; 1427 c0 *f = &((c0*) gpuDynInst->x_data)[0]; 1428 1429 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 1430 if (gpuDynInst->exec_mask[i]) { 1431 Addr vaddr = gpuDynInst->addr[i]; 1432 |
1411 if (isLocalMem()) { | 1433 if (this->isLocalMem()) { |
1412 Wavefront *wavefront = gpuDynInst->wavefront(); 1413 *d = wavefront->ldsChunk->read<c0>(vaddr); 1414 | 1434 Wavefront *wavefront = gpuDynInst->wavefront(); 1435 *d = wavefront->ldsChunk->read<c0>(vaddr); 1436 |
1415 switch (this->opType) { 1416 case Enums::MO_AADD: 1417 case Enums::MO_ANRADD: | 1437 if (this->isAtomicAdd()) { |
1418 wavefront->ldsChunk->write<c0>(vaddr, 1419 wavefront->ldsChunk->read<c0>(vaddr) + (*e)); | 1438 wavefront->ldsChunk->write<c0>(vaddr, 1439 wavefront->ldsChunk->read<c0>(vaddr) + (*e)); |
1420 break; 1421 case Enums::MO_ASUB: 1422 case Enums::MO_ANRSUB: | 1440 } else if (this->isAtomicSub()) { |
1423 wavefront->ldsChunk->write<c0>(vaddr, 1424 wavefront->ldsChunk->read<c0>(vaddr) - (*e)); | 1441 wavefront->ldsChunk->write<c0>(vaddr, 1442 wavefront->ldsChunk->read<c0>(vaddr) - (*e)); |
1425 break; 1426 case Enums::MO_AMAX: 1427 case Enums::MO_ANRMAX: | 1443 } else if (this->isAtomicMax()) { |
1428 wavefront->ldsChunk->write<c0>(vaddr, 1429 std::max(wavefront->ldsChunk->read<c0>(vaddr), 1430 (*e))); | 1444 wavefront->ldsChunk->write<c0>(vaddr, 1445 std::max(wavefront->ldsChunk->read<c0>(vaddr), 1446 (*e))); |
1431 break; 1432 case Enums::MO_AMIN: 1433 case Enums::MO_ANRMIN: | 1447 } else if (this->isAtomicMin()) { |
1434 wavefront->ldsChunk->write<c0>(vaddr, 1435 std::min(wavefront->ldsChunk->read<c0>(vaddr), 1436 (*e))); | 1448 wavefront->ldsChunk->write<c0>(vaddr, 1449 std::min(wavefront->ldsChunk->read<c0>(vaddr), 1450 (*e))); |
1437 break; 1438 case Enums::MO_AAND: 1439 case Enums::MO_ANRAND: | 1451 } else if (this->isAtomicAnd()) { |
1440 wavefront->ldsChunk->write<c0>(vaddr, 1441 wavefront->ldsChunk->read<c0>(vaddr) & (*e)); | 1452 wavefront->ldsChunk->write<c0>(vaddr, 1453 wavefront->ldsChunk->read<c0>(vaddr) & (*e)); |
1442 break; 1443 case Enums::MO_AOR: 1444 case Enums::MO_ANROR: | 1454 } else if (this->isAtomicOr()) { |
1445 wavefront->ldsChunk->write<c0>(vaddr, 1446 wavefront->ldsChunk->read<c0>(vaddr) | (*e)); | 1455 wavefront->ldsChunk->write<c0>(vaddr, 1456 wavefront->ldsChunk->read<c0>(vaddr) | (*e)); |
1447 break; 1448 case Enums::MO_AXOR: 1449 case Enums::MO_ANRXOR: | 1457 } else if (this->isAtomicXor()) { |
1450 wavefront->ldsChunk->write<c0>(vaddr, 1451 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e)); | 1458 wavefront->ldsChunk->write<c0>(vaddr, 1459 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e)); |
1452 break; 1453 case Enums::MO_AINC: 1454 case Enums::MO_ANRINC: | 1460 } else if (this->isAtomicInc()) { |
1455 wavefront->ldsChunk->write<c0>(vaddr, 1456 wavefront->ldsChunk->read<c0>(vaddr) + 1); | 1461 wavefront->ldsChunk->write<c0>(vaddr, 1462 wavefront->ldsChunk->read<c0>(vaddr) + 1); |
1457 break; 1458 case Enums::MO_ADEC: 1459 case Enums::MO_ANRDEC: | 1463 } else if (this->isAtomicDec()) { |
1460 wavefront->ldsChunk->write<c0>(vaddr, 1461 wavefront->ldsChunk->read<c0>(vaddr) - 1); | 1464 wavefront->ldsChunk->write<c0>(vaddr, 1465 wavefront->ldsChunk->read<c0>(vaddr) - 1); |
1462 break; 1463 case Enums::MO_AEXCH: 1464 case Enums::MO_ANREXCH: | 1466 } else if (this->isAtomicExch()) { |
1465 wavefront->ldsChunk->write<c0>(vaddr, (*e)); | 1467 wavefront->ldsChunk->write<c0>(vaddr, (*e)); |
1466 break; 1467 case Enums::MO_ACAS: 1468 case Enums::MO_ANRCAS: | 1468 } else if (this->isAtomicCAS()) { |
1469 wavefront->ldsChunk->write<c0>(vaddr, 1470 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ? 1471 (*f) : wavefront->ldsChunk->read<c0>(vaddr)); | 1469 wavefront->ldsChunk->write<c0>(vaddr, 1470 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ? 1471 (*f) : wavefront->ldsChunk->read<c0>(vaddr)); |
1472 break; 1473 default: | 1472 } else { |
1474 fatal("Unrecognized or invalid HSAIL atomic op " 1475 "type.\n"); | 1473 fatal("Unrecognized or invalid HSAIL atomic op " 1474 "type.\n"); |
1476 break; | |
1477 } 1478 } else { 1479 Request *req = 1480 new Request(0, vaddr, sizeof(c0), 0, 1481 gpuDynInst->computeUnit()->masterId(), 1482 0, gpuDynInst->wfDynId, 1483 gpuDynInst->makeAtomicOpFunctor<c0>(e, | 1475 } 1476 } else { 1477 Request *req = 1478 new Request(0, vaddr, sizeof(c0), 0, 1479 gpuDynInst->computeUnit()->masterId(), 1480 0, gpuDynInst->wfDynId, 1481 gpuDynInst->makeAtomicOpFunctor<c0>(e, |
1484 f, this->opType)); | 1482 f)); |
1485 1486 gpuDynInst->setRequestFlags(req); 1487 PacketPtr pkt = new Packet(req, MemCmd::SwapReq); 1488 pkt->dataStatic(d); 1489 1490 if (gpuDynInst->computeUnit()->shader-> 1491 separate_acquire_release && | 1483 1484 gpuDynInst->setRequestFlags(req); 1485 PacketPtr pkt = new Packet(req, MemCmd::SwapReq); 1486 pkt->dataStatic(d); 1487 1488 if (gpuDynInst->computeUnit()->shader-> 1489 separate_acquire_release && |
1492 (gpuDynInst->memoryOrder == 1493 Enums::MEMORY_ORDER_SC_ACQUIRE)) { | 1490 (gpuDynInst->isAcquire())) { |
1494 // if this atomic has acquire semantics, 1495 // schedule the continuation to perform an 1496 // acquire after the RMW completes 1497 gpuDynInst->execContinuation = 1498 &GPUStaticInst::execAtomicAcq; 1499 1500 gpuDynInst->useContinuation = true; 1501 } else { --- 16 unchanged lines hidden (view full) --- 1518 1519 // execAtomicACq will always be called through a continuation. 1520 // see comment for execContinuation in gpu_dyn_inst.hh 1521 void 1522 execAtomicAcq(GPUDynInstPtr gpuDynInst) override 1523 { 1524 // after performing the RMW, check to see if this instruction 1525 // has acquire semantics, and if so, issue an acquire | 1491 // if this atomic has acquire semantics, 1492 // schedule the continuation to perform an 1493 // acquire after the RMW completes 1494 gpuDynInst->execContinuation = 1495 &GPUStaticInst::execAtomicAcq; 1496 1497 gpuDynInst->useContinuation = true; 1498 } else { --- 16 unchanged lines hidden (view full) --- 1515 1516 // execAtomicACq will always be called through a continuation. 1517 // see comment for execContinuation in gpu_dyn_inst.hh 1518 void 1519 execAtomicAcq(GPUDynInstPtr gpuDynInst) override 1520 { 1521 // after performing the RMW, check to see if this instruction 1522 // has acquire semantics, and if so, issue an acquire |
1526 if (!isLocalMem()) { | 1523 if (!this->isLocalMem()) { |
1527 if (gpuDynInst->computeUnit()->shader->separate_acquire_release | 1524 if (gpuDynInst->computeUnit()->shader->separate_acquire_release |
1528 && gpuDynInst->memoryOrder == 1529 Enums::MEMORY_ORDER_SC_ACQUIRE) { | 1525 && gpuDynInst->isAcquire()) { |
1530 gpuDynInst->statusBitVector = VectorMask(1); 1531 1532 // the request will be finished when 1533 // the acquire completes 1534 gpuDynInst->useContinuation = false; 1535 // create request 1536 Request *req = new Request(0, 0, 0, 0, 1537 gpuDynInst->computeUnit()->masterId(), --- 97 unchanged lines hidden --- | 1526 gpuDynInst->statusBitVector = VectorMask(1); 1527 1528 // the request will be finished when 1529 // the acquire completes 1530 gpuDynInst->useContinuation = false; 1531 // create request 1532 Request *req = new Request(0, 0, 0, 0, 1533 gpuDynInst->computeUnit()->masterId(), --- 97 unchanged lines hidden --- |