mem.hh (11536:fdfc2455b091) mem.hh (11692:e772fdcd3809)
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 82 unchanged lines hidden (view full) ---

91 AddrOperandType addr;
92
93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
94 const char *_opcode)
95 : HsailGPUStaticInst(obj, _opcode)
96 {
97 using namespace Brig;
98
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 82 unchanged lines hidden (view full) ---

91 AddrOperandType addr;
92
93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
94 const char *_opcode)
95 : HsailGPUStaticInst(obj, _opcode)
96 {
97 using namespace Brig;
98
99 setFlag(ALU);
100
99 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
100 dest.init(op_offs, obj);
101 op_offs = obj->getOperandPtr(ib->operands, 1);
102 addr.init(op_offs, obj);
103 }
104
105 int numSrcRegOperands() override
106 { return(this->addr.isVectorRegister()); }

--- 99 unchanged lines hidden (view full) ---

206 Brig::BrigWidth8_t width;
207 typename DestOperandType::DestOperand dest;
208 AddrOperandType addr;
209
210 Brig::BrigSegment segment;
211 Brig::BrigMemoryOrder memoryOrder;
212 Brig::BrigMemoryScope memoryScope;
213 unsigned int equivClass;
101 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
102 dest.init(op_offs, obj);
103 op_offs = obj->getOperandPtr(ib->operands, 1);
104 addr.init(op_offs, obj);
105 }
106
107 int numSrcRegOperands() override
108 { return(this->addr.isVectorRegister()); }

--- 99 unchanged lines hidden (view full) ---

208 Brig::BrigWidth8_t width;
209 typename DestOperandType::DestOperand dest;
210 AddrOperandType addr;
211
212 Brig::BrigSegment segment;
213 Brig::BrigMemoryOrder memoryOrder;
214 Brig::BrigMemoryScope memoryScope;
215 unsigned int equivClass;
214 bool isArgLoad()
216
217 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
218 const char *_opcode)
219 : HsailGPUStaticInst(obj, _opcode)
215 {
220 {
216 return segment == Brig::BRIG_SEGMENT_KERNARG ||
217 segment == Brig::BRIG_SEGMENT_ARG;
218 }
219 void
220 initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
221 const char *_opcode)
222 {
223 using namespace Brig;
224
221 using namespace Brig;
222
225 const BrigInstMem *ldst = (const BrigInstMem*)ib;
223 setFlag(MemoryRef);
224 setFlag(Load);
226
225
227 segment = (BrigSegment)ldst->segment;
228 memoryOrder = BRIG_MEMORY_ORDER_NONE;
229 memoryScope = BRIG_MEMORY_SCOPE_NONE;
230 equivClass = ldst->equivClass;
226 if (ib->opcode == BRIG_OPCODE_LD) {
227 const BrigInstMem *ldst = (const BrigInstMem*)ib;
231
228
232 switch (segment) {
233 case BRIG_SEGMENT_GLOBAL:
234 o_type = Enums::OT_GLOBAL_READ;
235 break;
229 segment = (BrigSegment)ldst->segment;
230 memoryOrder = BRIG_MEMORY_ORDER_NONE;
231 memoryScope = BRIG_MEMORY_SCOPE_NONE;
232 equivClass = ldst->equivClass;
236
233
237 case BRIG_SEGMENT_GROUP:
238 o_type = Enums::OT_SHARED_READ;
239 break;
234 width = ldst->width;
235 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
236 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
237 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
238 dest.init(op_offs, obj);
240
239
241 case BRIG_SEGMENT_PRIVATE:
242 o_type = Enums::OT_PRIVATE_READ;
243 break;
240 op_offs = obj->getOperandPtr(ib->operands, 1);
241 addr.init(op_offs, obj);
242 } else {
243 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
244
244
245 case BRIG_SEGMENT_READONLY:
246 o_type = Enums::OT_READONLY_READ;
247 break;
245 segment = (BrigSegment)at->segment;
246 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
247 memoryScope = (BrigMemoryScope)at->memoryScope;
248 equivClass = 0;
248
249
249 case BRIG_SEGMENT_SPILL:
250 o_type = Enums::OT_SPILL_READ;
251 break;
250 width = BRIG_WIDTH_1;
251 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
252 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
252
253
253 case BRIG_SEGMENT_FLAT:
254 o_type = Enums::OT_FLAT_READ;
255 break;
254 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
255 dest.init(op_offs, obj);
256
256
257 case BRIG_SEGMENT_KERNARG:
258 o_type = Enums::OT_KERN_READ;
259 break;
257 op_offs = obj->getOperandPtr(ib->operands,1);
258 addr.init(op_offs, obj);
259 }
260
260
261 case BRIG_SEGMENT_ARG:
262 o_type = Enums::OT_ARG;
261 switch (memoryOrder) {
262 case BRIG_MEMORY_ORDER_NONE:
263 setFlag(NoOrder);
263 break;
264 break;
265 case BRIG_MEMORY_ORDER_RELAXED:
266 setFlag(RelaxedOrder);
267 break;
268 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
269 setFlag(Acquire);
270 break;
271 case BRIG_MEMORY_ORDER_SC_RELEASE:
272 setFlag(Release);
273 break;
274 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
275 setFlag(AcquireRelease);
276 break;
277 default:
278 fatal("LdInst has bad memory order type\n");
279 }
264
280
281 switch (memoryScope) {
282 case BRIG_MEMORY_SCOPE_NONE:
283 setFlag(NoScope);
284 break;
285 case BRIG_MEMORY_SCOPE_WORKITEM:
286 setFlag(WorkitemScope);
287 break;
288 case BRIG_MEMORY_SCOPE_WORKGROUP:
289 setFlag(WorkgroupScope);
290 break;
291 case BRIG_MEMORY_SCOPE_AGENT:
292 setFlag(DeviceScope);
293 break;
294 case BRIG_MEMORY_SCOPE_SYSTEM:
295 setFlag(SystemScope);
296 break;
265 default:
297 default:
266 panic("Ld: segment %d not supported\n", segment);
298 fatal("LdInst has bad memory scope type\n");
267 }
268
299 }
300
269 width = ldst->width;
270 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
271 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
272 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
273 dest.init(op_offs, obj);
274
275 op_offs = obj->getOperandPtr(ib->operands, 1);
276 addr.init(op_offs, obj);
277 }
278
279 void
280 initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
281 const char *_opcode)
282 {
283 using namespace Brig;
284
285 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
286
287 segment = (BrigSegment)at->segment;
288 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
289 memoryScope = (BrigMemoryScope)at->memoryScope;
290 equivClass = 0;
291
292 switch (segment) {
293 case BRIG_SEGMENT_GLOBAL:
301 switch (segment) {
302 case BRIG_SEGMENT_GLOBAL:
294 o_type = Enums::OT_GLOBAL_READ;
303 setFlag(GlobalSegment);
295 break;
304 break;
296
297 case BRIG_SEGMENT_GROUP:
305 case BRIG_SEGMENT_GROUP:
298 o_type = Enums::OT_SHARED_READ;
306 setFlag(GroupSegment);
299 break;
307 break;
300
301 case BRIG_SEGMENT_PRIVATE:
308 case BRIG_SEGMENT_PRIVATE:
302 o_type = Enums::OT_PRIVATE_READ;
309 setFlag(PrivateSegment);
303 break;
310 break;
304
305 case BRIG_SEGMENT_READONLY:
311 case BRIG_SEGMENT_READONLY:
306 o_type = Enums::OT_READONLY_READ;
312 setFlag(ReadOnlySegment);
307 break;
313 break;
308
309 case BRIG_SEGMENT_SPILL:
314 case BRIG_SEGMENT_SPILL:
310 o_type = Enums::OT_SPILL_READ;
315 setFlag(SpillSegment);
311 break;
316 break;
312
313 case BRIG_SEGMENT_FLAT:
317 case BRIG_SEGMENT_FLAT:
314 o_type = Enums::OT_FLAT_READ;
318 setFlag(Flat);
315 break;
319 break;
316
317 case BRIG_SEGMENT_KERNARG:
320 case BRIG_SEGMENT_KERNARG:
318 o_type = Enums::OT_KERN_READ;
321 setFlag(KernArgSegment);
319 break;
322 break;
320
321 case BRIG_SEGMENT_ARG:
323 case BRIG_SEGMENT_ARG:
322 o_type = Enums::OT_ARG;
324 setFlag(ArgSegment);
323 break;
325 break;
324
325 default:
326 panic("Ld: segment %d not supported\n", segment);
327 }
326 default:
327 panic("Ld: segment %d not supported\n", segment);
328 }
328
329 width = BRIG_WIDTH_1;
330 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
331 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
332
333 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
334 dest.init(op_offs, obj);
335
336 op_offs = obj->getOperandPtr(ib->operands,1);
337 addr.init(op_offs, obj);
338 }
339
329 }
330
340 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
341 const char *_opcode)
342 : HsailGPUStaticInst(obj, _opcode)
343 {
344 using namespace Brig;
345
346 if (ib->opcode == BRIG_OPCODE_LD) {
347 initLd(ib, obj, _opcode);
348 } else {
349 initAtomicLd(ib, obj, _opcode);
350 }
351 }
352
353 int numSrcRegOperands() override
354 { return(this->addr.isVectorRegister()); }
355 int numDstRegOperands() override { return dest.isVectorRegister(); }
356 int getNumOperands() override
357 {
358 if (this->addr.isVectorRegister())
359 return 2;
360 else

--- 107 unchanged lines hidden (view full) ---

468
469 c0 *d = &((c0*)gpuDynInst->d_data)
470 [k * gpuDynInst->computeUnit()->wfSize()];
471
472 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
473 if (gpuDynInst->exec_mask[i]) {
474 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
475
331 int numSrcRegOperands() override
332 { return(this->addr.isVectorRegister()); }
333 int numDstRegOperands() override { return dest.isVectorRegister(); }
334 int getNumOperands() override
335 {
336 if (this->addr.isVectorRegister())
337 return 2;
338 else

--- 107 unchanged lines hidden (view full) ---

446
447 c0 *d = &((c0*)gpuDynInst->d_data)
448 [k * gpuDynInst->computeUnit()->wfSize()];
449
450 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
451 if (gpuDynInst->exec_mask[i]) {
452 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
453
476 if (isLocalMem()) {
454 if (this->isLocalMem()) {
477 // load from shared memory
478 *d = gpuDynInst->wavefront()->ldsChunk->
479 read<c0>(vaddr);
480 } else {
481 Request *req = new Request(0, vaddr, sizeof(c0), 0,
482 gpuDynInst->computeUnit()->masterId(),
483 0, gpuDynInst->wfDynId);
484
485 gpuDynInst->setRequestFlags(req);
486 PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
487 pkt->dataStatic(d);
488
489 if (gpuDynInst->computeUnit()->shader->
490 separate_acquire_release &&
455 // load from shared memory
456 *d = gpuDynInst->wavefront()->ldsChunk->
457 read<c0>(vaddr);
458 } else {
459 Request *req = new Request(0, vaddr, sizeof(c0), 0,
460 gpuDynInst->computeUnit()->masterId(),
461 0, gpuDynInst->wfDynId);
462
463 gpuDynInst->setRequestFlags(req);
464 PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
465 pkt->dataStatic(d);
466
467 if (gpuDynInst->computeUnit()->shader->
468 separate_acquire_release &&
491 gpuDynInst->memoryOrder ==
492 Enums::MEMORY_ORDER_SC_ACQUIRE) {
469 gpuDynInst->isAcquire()) {
493 // if this load has acquire semantics,
494 // set the response continuation function
495 // to perform an Acquire request
496 gpuDynInst->execContinuation =
497 &GPUStaticInst::execLdAcq;
498
499 gpuDynInst->useContinuation = true;
500 } else {

--- 14 unchanged lines hidden (view full) ---

515 }
516
517 private:
518 void
519 execLdAcq(GPUDynInstPtr gpuDynInst) override
520 {
521 // after the load has complete and if the load has acquire
522 // semantics, issue an acquire request.
470 // if this load has acquire semantics,
471 // set the response continuation function
472 // to perform an Acquire request
473 gpuDynInst->execContinuation =
474 &GPUStaticInst::execLdAcq;
475
476 gpuDynInst->useContinuation = true;
477 } else {

--- 14 unchanged lines hidden (view full) ---

492 }
493
494 private:
495 void
496 execLdAcq(GPUDynInstPtr gpuDynInst) override
497 {
498 // after the load has complete and if the load has acquire
499 // semantics, issue an acquire request.
523 if (!isLocalMem()) {
500 if (!this->isLocalMem()) {
524 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
501 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
525 && gpuDynInst->memoryOrder ==
526 Enums::MEMORY_ORDER_SC_ACQUIRE) {
502 && gpuDynInst->isAcquire()) {
527 gpuDynInst->statusBitVector = VectorMask(1);
528 gpuDynInst->useContinuation = false;
529 // create request
530 Request *req = new Request(0, 0, 0, 0,
531 gpuDynInst->computeUnit()->masterId(),
532 0, gpuDynInst->wfDynId);
533 req->setFlags(Request::ACQUIRE);
534 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
535 }
536 }
537 }
538
539 public:
503 gpuDynInst->statusBitVector = VectorMask(1);
504 gpuDynInst->useContinuation = false;
505 // create request
506 Request *req = new Request(0, 0, 0, 0,
507 gpuDynInst->computeUnit()->masterId(),
508 0, gpuDynInst->wfDynId);
509 req->setFlags(Request::ACQUIRE);
510 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
511 }
512 }
513 }
514
515 public:
540 bool
541 isLocalMem() const override
542 {
543 return this->segment == Brig::BRIG_SEGMENT_GROUP;
544 }
545
546 bool isVectorRegister(int operandIndex) override
547 {
548 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
549 if ((num_dest_operands != getNumOperands()) &&
550 (operandIndex == (getNumOperands()-1)))
551 return(this->addr.isVectorRegister());
552 if (num_dest_operands > 1) {
553 return dest_vect[operandIndex].isVectorRegister();

--- 172 unchanged lines hidden (view full) ---

726 typename SrcOperandType::SrcOperand src;
727 AddrOperandType addr;
728
729 Brig::BrigSegment segment;
730 Brig::BrigMemoryScope memoryScope;
731 Brig::BrigMemoryOrder memoryOrder;
732 unsigned int equivClass;
733
516 bool isVectorRegister(int operandIndex) override
517 {
518 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
519 if ((num_dest_operands != getNumOperands()) &&
520 (operandIndex == (getNumOperands()-1)))
521 return(this->addr.isVectorRegister());
522 if (num_dest_operands > 1) {
523 return dest_vect[operandIndex].isVectorRegister();

--- 172 unchanged lines hidden (view full) ---

696 typename SrcOperandType::SrcOperand src;
697 AddrOperandType addr;
698
699 Brig::BrigSegment segment;
700 Brig::BrigMemoryScope memoryScope;
701 Brig::BrigMemoryOrder memoryOrder;
702 unsigned int equivClass;
703
734 void
735 initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
736 const char *_opcode)
704 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
705 const char *_opcode)
706 : HsailGPUStaticInst(obj, _opcode)
737 {
738 using namespace Brig;
739
707 {
708 using namespace Brig;
709
740 const BrigInstMem *ldst = (const BrigInstMem*)ib;
710 setFlag(MemoryRef);
711 setFlag(Store);
741
712
742 segment = (BrigSegment)ldst->segment;
743 memoryOrder = BRIG_MEMORY_ORDER_NONE;
744 memoryScope = BRIG_MEMORY_SCOPE_NONE;
745 equivClass = ldst->equivClass;
713 if (ib->opcode == BRIG_OPCODE_ST) {
714 const BrigInstMem *ldst = (const BrigInstMem*)ib;
746
715
747 switch (segment) {
748 case BRIG_SEGMENT_GLOBAL:
749 o_type = Enums::OT_GLOBAL_WRITE;
750 break;
716 segment = (BrigSegment)ldst->segment;
717 memoryOrder = BRIG_MEMORY_ORDER_NONE;
718 memoryScope = BRIG_MEMORY_SCOPE_NONE;
719 equivClass = ldst->equivClass;
751
720
752 case BRIG_SEGMENT_GROUP:
753 o_type = Enums::OT_SHARED_WRITE;
754 break;
721 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
722 const BrigOperand *baseOp = obj->getOperand(op_offs);
755
723
756 case BRIG_SEGMENT_PRIVATE:
757 o_type = Enums::OT_PRIVATE_WRITE;
758 break;
724 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
725 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
726 src.init(op_offs, obj);
727 }
759
728
760 case BRIG_SEGMENT_READONLY:
761 o_type = Enums::OT_READONLY_WRITE;
762 break;
729 op_offs = obj->getOperandPtr(ib->operands, 1);
730 addr.init(op_offs, obj);
731 } else {
732 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
763
733
764 case BRIG_SEGMENT_SPILL:
765 o_type = Enums::OT_SPILL_WRITE;
766 break;
734 segment = (BrigSegment)at->segment;
735 memoryScope = (BrigMemoryScope)at->memoryScope;
736 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
737 equivClass = 0;
767
738
768 case BRIG_SEGMENT_FLAT:
769 o_type = Enums::OT_FLAT_WRITE;
770 break;
739 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
740 addr.init(op_offs, obj);
771
741
772 case BRIG_SEGMENT_ARG:
773 o_type = Enums::OT_ARG;
774 break;
742 op_offs = obj->getOperandPtr(ib->operands, 1);
743 src.init(op_offs, obj);
744 }
775
745
746 switch (memoryOrder) {
747 case BRIG_MEMORY_ORDER_NONE:
748 setFlag(NoOrder);
749 break;
750 case BRIG_MEMORY_ORDER_RELAXED:
751 setFlag(RelaxedOrder);
752 break;
753 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
754 setFlag(Acquire);
755 break;
756 case BRIG_MEMORY_ORDER_SC_RELEASE:
757 setFlag(Release);
758 break;
759 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
760 setFlag(AcquireRelease);
761 break;
776 default:
762 default:
777 panic("St: segment %d not supported\n", segment);
763 fatal("StInst has bad memory order type\n");
778 }
779
764 }
765
780 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
781 const BrigOperand *baseOp = obj->getOperand(op_offs);
782
783 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
784 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
785 src.init(op_offs, obj);
766 switch (memoryScope) {
767 case BRIG_MEMORY_SCOPE_NONE:
768 setFlag(NoScope);
769 break;
770 case BRIG_MEMORY_SCOPE_WORKITEM:
771 setFlag(WorkitemScope);
772 break;
773 case BRIG_MEMORY_SCOPE_WORKGROUP:
774 setFlag(WorkgroupScope);
775 break;
776 case BRIG_MEMORY_SCOPE_AGENT:
777 setFlag(DeviceScope);
778 break;
779 case BRIG_MEMORY_SCOPE_SYSTEM:
780 setFlag(SystemScope);
781 break;
782 default:
783 fatal("StInst has bad memory scope type\n");
786 }
787
784 }
785
788 op_offs = obj->getOperandPtr(ib->operands, 1);
789 addr.init(op_offs, obj);
790 }
791
792 void
793 initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
794 const char *_opcode)
795 {
796 using namespace Brig;
797
798 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
799
800 segment = (BrigSegment)at->segment;
801 memoryScope = (BrigMemoryScope)at->memoryScope;
802 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
803 equivClass = 0;
804
805 switch (segment) {
806 case BRIG_SEGMENT_GLOBAL:
786 switch (segment) {
787 case BRIG_SEGMENT_GLOBAL:
807 o_type = Enums::OT_GLOBAL_WRITE;
788 setFlag(GlobalSegment);
808 break;
789 break;
809
810 case BRIG_SEGMENT_GROUP:
790 case BRIG_SEGMENT_GROUP:
811 o_type = Enums::OT_SHARED_WRITE;
791 setFlag(GroupSegment);
812 break;
792 break;
813
814 case BRIG_SEGMENT_PRIVATE:
793 case BRIG_SEGMENT_PRIVATE:
815 o_type = Enums::OT_PRIVATE_WRITE;
794 setFlag(PrivateSegment);
816 break;
795 break;
817
818 case BRIG_SEGMENT_READONLY:
796 case BRIG_SEGMENT_READONLY:
819 o_type = Enums::OT_READONLY_WRITE;
797 setFlag(ReadOnlySegment);
820 break;
798 break;
821
822 case BRIG_SEGMENT_SPILL:
799 case BRIG_SEGMENT_SPILL:
823 o_type = Enums::OT_SPILL_WRITE;
800 setFlag(SpillSegment);
824 break;
801 break;
825
826 case BRIG_SEGMENT_FLAT:
802 case BRIG_SEGMENT_FLAT:
827 o_type = Enums::OT_FLAT_WRITE;
803 setFlag(Flat);
828 break;
804 break;
829
830 case BRIG_SEGMENT_ARG:
805 case BRIG_SEGMENT_ARG:
831 o_type = Enums::OT_ARG;
806 setFlag(ArgSegment);
832 break;
807 break;
833
834 default:
835 panic("St: segment %d not supported\n", segment);
836 }
808 default:
809 panic("St: segment %d not supported\n", segment);
810 }
837
838 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
839 addr.init(op_offs, obj);
840
841 op_offs = obj->getOperandPtr(ib->operands, 1);
842 src.init(op_offs, obj);
843 }
844
811 }
812
845 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
846 const char *_opcode)
847 : HsailGPUStaticInst(obj, _opcode)
848 {
849 using namespace Brig;
850
851 if (ib->opcode == BRIG_OPCODE_ST) {
852 initSt(ib, obj, _opcode);
853 } else {
854 initAtomicSt(ib, obj, _opcode);
855 }
856 }
857
858 int numDstRegOperands() override { return 0; }
859 int numSrcRegOperands() override
860 {
861 return src.isVectorRegister() + this->addr.isVectorRegister();
862 }
863 int getNumOperands() override
864 {
865 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())

--- 93 unchanged lines hidden (view full) ---

959 }
960 }
961
962 void
963 initiateAcc(GPUDynInstPtr gpuDynInst) override
964 {
965 // before performing a store, check if this store has
966 // release semantics, and if so issue a release first
813 int numDstRegOperands() override { return 0; }
814 int numSrcRegOperands() override
815 {
816 return src.isVectorRegister() + this->addr.isVectorRegister();
817 }
818 int getNumOperands() override
819 {
820 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())

--- 93 unchanged lines hidden (view full) ---

914 }
915 }
916
917 void
918 initiateAcc(GPUDynInstPtr gpuDynInst) override
919 {
920 // before performing a store, check if this store has
921 // release semantics, and if so issue a release first
967 if (!isLocalMem()) {
922 if (!this->isLocalMem()) {
968 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
923 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
969 && gpuDynInst->memoryOrder ==
970 Enums::MEMORY_ORDER_SC_RELEASE) {
924 && gpuDynInst->isRelease()) {
971
972 gpuDynInst->statusBitVector = VectorMask(1);
973 gpuDynInst->execContinuation = &GPUStaticInst::execSt;
974 gpuDynInst->useContinuation = true;
975 // create request
976 Request *req = new Request(0, 0, 0, 0,
977 gpuDynInst->computeUnit()->masterId(),
978 0, gpuDynInst->wfDynId);
979 req->setFlags(Request::RELEASE);
980 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
981
982 return;
983 }
984 }
985
986 // if there is no release semantic, perform stores immediately
987 execSt(gpuDynInst);
988 }
989
925
926 gpuDynInst->statusBitVector = VectorMask(1);
927 gpuDynInst->execContinuation = &GPUStaticInst::execSt;
928 gpuDynInst->useContinuation = true;
929 // create request
930 Request *req = new Request(0, 0, 0, 0,
931 gpuDynInst->computeUnit()->masterId(),
932 0, gpuDynInst->wfDynId);
933 req->setFlags(Request::RELEASE);
934 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
935
936 return;
937 }
938 }
939
940 // if there is no release semantic, perform stores immediately
941 execSt(gpuDynInst);
942 }
943
990 bool
991 isLocalMem() const override
992 {
993 return this->segment == Brig::BRIG_SEGMENT_GROUP;
994 }
995
996 private:
997 // execSt may be called through a continuation
998 // if the store had release semantics. see comment for
999 // execSt in gpu_static_inst.hh
1000 void
1001 execSt(GPUDynInstPtr gpuDynInst) override
1002 {
1003 typedef typename MemDataType::CType c0;

--- 11 unchanged lines hidden (view full) ---

1015 for (int k = 0; k < num_src_operands; ++k) {
1016 c0 *d = &((c0*)gpuDynInst->d_data)
1017 [k * gpuDynInst->computeUnit()->wfSize()];
1018
1019 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1020 if (gpuDynInst->exec_mask[i]) {
1021 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
1022
944 private:
945 // execSt may be called through a continuation
946 // if the store had release semantics. see comment for
947 // execSt in gpu_static_inst.hh
948 void
949 execSt(GPUDynInstPtr gpuDynInst) override
950 {
951 typedef typename MemDataType::CType c0;

--- 11 unchanged lines hidden (view full) ---

963 for (int k = 0; k < num_src_operands; ++k) {
964 c0 *d = &((c0*)gpuDynInst->d_data)
965 [k * gpuDynInst->computeUnit()->wfSize()];
966
967 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
968 if (gpuDynInst->exec_mask[i]) {
969 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
970
1023 if (isLocalMem()) {
971 if (this->isLocalMem()) {
1024 //store to shared memory
1025 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
1026 *d);
1027 } else {
1028 Request *req =
1029 new Request(0, vaddr, sizeof(c0), 0,
1030 gpuDynInst->computeUnit()->masterId(),
1031 0, gpuDynInst->wfDynId);

--- 129 unchanged lines hidden (view full) ---

1161 default:
1162 fatal("Bad st register operand type %d\n", tmp.type);
1163 }
1164 } else {
1165 fatal("Bad st register operand kind %d\n", tmp.kind);
1166 }
1167 }
1168
972 //store to shared memory
973 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
974 *d);
975 } else {
976 Request *req =
977 new Request(0, vaddr, sizeof(c0), 0,
978 gpuDynInst->computeUnit()->masterId(),
979 0, gpuDynInst->wfDynId);

--- 129 unchanged lines hidden (view full) ---

1109 default:
1110 fatal("Bad st register operand type %d\n", tmp.type);
1111 }
1112 } else {
1113 fatal("Bad st register operand kind %d\n", tmp.kind);
1114 }
1115 }
1116
1169 Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
1170 Brig::BrigAtomicOperation brigOp);
1171
1172 template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1173 bool HasDst>
1174 class AtomicInstBase : public HsailGPUStaticInst
1175 {
1176 public:
1177 typename OperandType::DestOperand dest;
1178 typename OperandType::SrcOperand src[NumSrcOperands];
1179 AddrOperandType addr;
1180
1181 Brig::BrigSegment segment;
1182 Brig::BrigMemoryOrder memoryOrder;
1183 Brig::BrigAtomicOperation atomicOperation;
1184 Brig::BrigMemoryScope memoryScope;
1185 Brig::BrigOpcode opcode;
1117 template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1118 bool HasDst>
1119 class AtomicInstBase : public HsailGPUStaticInst
1120 {
1121 public:
1122 typename OperandType::DestOperand dest;
1123 typename OperandType::SrcOperand src[NumSrcOperands];
1124 AddrOperandType addr;
1125
1126 Brig::BrigSegment segment;
1127 Brig::BrigMemoryOrder memoryOrder;
1128 Brig::BrigAtomicOperation atomicOperation;
1129 Brig::BrigMemoryScope memoryScope;
1130 Brig::BrigOpcode opcode;
1186 Enums::MemOpType opType;
1187
1188 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1189 const char *_opcode)
1190 : HsailGPUStaticInst(obj, _opcode)
1191 {
1192 using namespace Brig;
1193
1194 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1195
1196 segment = (BrigSegment)at->segment;
1197 memoryScope = (BrigMemoryScope)at->memoryScope;
1198 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1199 atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1200 opcode = (BrigOpcode)ib->opcode;
1131
1132 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1133 const char *_opcode)
1134 : HsailGPUStaticInst(obj, _opcode)
1135 {
1136 using namespace Brig;
1137
1138 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1139
1140 segment = (BrigSegment)at->segment;
1141 memoryScope = (BrigMemoryScope)at->memoryScope;
1142 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1143 atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1144 opcode = (BrigOpcode)ib->opcode;
1201 opType = brigAtomicToMemOpType(opcode, atomicOperation);
1202
1145
1146 assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
1147 opcode == Brig::BRIG_OPCODE_ATOMIC);
1148
1149 setFlag(MemoryRef);
1150
1151 if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
1152 setFlag(AtomicReturn);
1153 } else {
1154 setFlag(AtomicNoReturn);
1155 }
1156
1157 switch (memoryOrder) {
1158 case BRIG_MEMORY_ORDER_NONE:
1159 setFlag(NoOrder);
1160 break;
1161 case BRIG_MEMORY_ORDER_RELAXED:
1162 setFlag(RelaxedOrder);
1163 break;
1164 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
1165 setFlag(Acquire);
1166 break;
1167 case BRIG_MEMORY_ORDER_SC_RELEASE:
1168 setFlag(Release);
1169 break;
1170 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
1171 setFlag(AcquireRelease);
1172 break;
1173 default:
1174 fatal("AtomicInst has bad memory order type\n");
1175 }
1176
1177 switch (memoryScope) {
1178 case BRIG_MEMORY_SCOPE_NONE:
1179 setFlag(NoScope);
1180 break;
1181 case BRIG_MEMORY_SCOPE_WORKITEM:
1182 setFlag(WorkitemScope);
1183 break;
1184 case BRIG_MEMORY_SCOPE_WORKGROUP:
1185 setFlag(WorkgroupScope);
1186 break;
1187 case BRIG_MEMORY_SCOPE_AGENT:
1188 setFlag(DeviceScope);
1189 break;
1190 case BRIG_MEMORY_SCOPE_SYSTEM:
1191 setFlag(SystemScope);
1192 break;
1193 default:
1194 fatal("AtomicInst has bad memory scope type\n");
1195 }
1196
1197 switch (atomicOperation) {
1198 case Brig::BRIG_ATOMIC_AND:
1199 setFlag(AtomicAnd);
1200 break;
1201 case Brig::BRIG_ATOMIC_OR:
1202 setFlag(AtomicOr);
1203 break;
1204 case Brig::BRIG_ATOMIC_XOR:
1205 setFlag(AtomicXor);
1206 break;
1207 case Brig::BRIG_ATOMIC_CAS:
1208 setFlag(AtomicCAS);
1209 break;
1210 case Brig::BRIG_ATOMIC_EXCH:
1211 setFlag(AtomicExch);
1212 break;
1213 case Brig::BRIG_ATOMIC_ADD:
1214 setFlag(AtomicAdd);
1215 break;
1216 case Brig::BRIG_ATOMIC_WRAPINC:
1217 setFlag(AtomicInc);
1218 break;
1219 case Brig::BRIG_ATOMIC_WRAPDEC:
1220 setFlag(AtomicDec);
1221 break;
1222 case Brig::BRIG_ATOMIC_MIN:
1223 setFlag(AtomicMin);
1224 break;
1225 case Brig::BRIG_ATOMIC_MAX:
1226 setFlag(AtomicMax);
1227 break;
1228 case Brig::BRIG_ATOMIC_SUB:
1229 setFlag(AtomicSub);
1230 break;
1231 default:
1232 fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
1233 }
1234
1203 switch (segment) {
1204 case BRIG_SEGMENT_GLOBAL:
1235 switch (segment) {
1236 case BRIG_SEGMENT_GLOBAL:
1205 o_type = Enums::OT_GLOBAL_ATOMIC;
1237 setFlag(GlobalSegment);
1206 break;
1238 break;
1207
1208 case BRIG_SEGMENT_GROUP:
1239 case BRIG_SEGMENT_GROUP:
1209 o_type = Enums::OT_SHARED_ATOMIC;
1240 setFlag(GroupSegment);
1210 break;
1241 break;
1211
1212 case BRIG_SEGMENT_FLAT:
1242 case BRIG_SEGMENT_FLAT:
1213 o_type = Enums::OT_FLAT_ATOMIC;
1243 setFlag(Flat);
1214 break;
1244 break;
1215
1216 default:
1217 panic("Atomic: segment %d not supported\n", segment);
1218 }
1219
1220 if (HasDst) {
1221 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1222 dest.init(op_offs, obj);
1223

--- 125 unchanged lines hidden (view full) ---

1349 init_addr(&this->addr);
1350 }
1351
1352 void
1353 initiateAcc(GPUDynInstPtr gpuDynInst) override
1354 {
1355 // before doing the RMW, check if this atomic has
1356 // release semantics, and if so issue a release first
1245 default:
1246 panic("Atomic: segment %d not supported\n", segment);
1247 }
1248
1249 if (HasDst) {
1250 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1251 dest.init(op_offs, obj);
1252

--- 125 unchanged lines hidden (view full) ---

1378 init_addr(&this->addr);
1379 }
1380
1381 void
1382 initiateAcc(GPUDynInstPtr gpuDynInst) override
1383 {
1384 // before doing the RMW, check if this atomic has
1385 // release semantics, and if so issue a release first
1357 if (!isLocalMem()) {
1386 if (!this->isLocalMem()) {
1358 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1387 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1359 && (gpuDynInst->memoryOrder ==
1360 Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
1361 Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
1388 && (gpuDynInst->isRelease()
1389 || gpuDynInst->isAcquireRelease())) {
1362
1363 gpuDynInst->statusBitVector = VectorMask(1);
1364
1365 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1366 gpuDynInst->useContinuation = true;
1367
1368 // create request
1369 Request *req = new Request(0, 0, 0, 0,

--- 8 unchanged lines hidden (view full) ---

1378
1379 // if there is no release semantic, execute the RMW immediately
1380 execAtomic(gpuDynInst);
1381
1382 }
1383
1384 void execute(GPUDynInstPtr gpuDynInst) override;
1385
1390
1391 gpuDynInst->statusBitVector = VectorMask(1);
1392
1393 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1394 gpuDynInst->useContinuation = true;
1395
1396 // create request
1397 Request *req = new Request(0, 0, 0, 0,

--- 8 unchanged lines hidden (view full) ---

1406
1407 // if there is no release semantic, execute the RMW immediately
1408 execAtomic(gpuDynInst);
1409
1410 }
1411
1412 void execute(GPUDynInstPtr gpuDynInst) override;
1413
1386 bool
1387 isLocalMem() const override
1388 {
1389 return this->segment == Brig::BRIG_SEGMENT_GROUP;
1390 }
1391
1392 private:
1393 // execAtomic may be called through a continuation
1394 // if the RMW had release semantics. see comment for
1395 // execContinuation in gpu_dyn_inst.hh
1396 void
1397 execAtomic(GPUDynInstPtr gpuDynInst) override
1398 {
1399 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1400
1401 typedef typename MemDataType::CType c0;
1402
1403 c0 *d = &((c0*) gpuDynInst->d_data)[0];
1404 c0 *e = &((c0*) gpuDynInst->a_data)[0];
1405 c0 *f = &((c0*) gpuDynInst->x_data)[0];
1406
1407 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1408 if (gpuDynInst->exec_mask[i]) {
1409 Addr vaddr = gpuDynInst->addr[i];
1410
1414 private:
1415 // execAtomic may be called through a continuation
1416 // if the RMW had release semantics. see comment for
1417 // execContinuation in gpu_dyn_inst.hh
1418 void
1419 execAtomic(GPUDynInstPtr gpuDynInst) override
1420 {
1421 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1422
1423 typedef typename MemDataType::CType c0;
1424
1425 c0 *d = &((c0*) gpuDynInst->d_data)[0];
1426 c0 *e = &((c0*) gpuDynInst->a_data)[0];
1427 c0 *f = &((c0*) gpuDynInst->x_data)[0];
1428
1429 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1430 if (gpuDynInst->exec_mask[i]) {
1431 Addr vaddr = gpuDynInst->addr[i];
1432
1411 if (isLocalMem()) {
1433 if (this->isLocalMem()) {
1412 Wavefront *wavefront = gpuDynInst->wavefront();
1413 *d = wavefront->ldsChunk->read<c0>(vaddr);
1414
1434 Wavefront *wavefront = gpuDynInst->wavefront();
1435 *d = wavefront->ldsChunk->read<c0>(vaddr);
1436
1415 switch (this->opType) {
1416 case Enums::MO_AADD:
1417 case Enums::MO_ANRADD:
1437 if (this->isAtomicAdd()) {
1418 wavefront->ldsChunk->write<c0>(vaddr,
1419 wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1438 wavefront->ldsChunk->write<c0>(vaddr,
1439 wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1420 break;
1421 case Enums::MO_ASUB:
1422 case Enums::MO_ANRSUB:
1440 } else if (this->isAtomicSub()) {
1423 wavefront->ldsChunk->write<c0>(vaddr,
1424 wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1441 wavefront->ldsChunk->write<c0>(vaddr,
1442 wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1425 break;
1426 case Enums::MO_AMAX:
1427 case Enums::MO_ANRMAX:
1443 } else if (this->isAtomicMax()) {
1428 wavefront->ldsChunk->write<c0>(vaddr,
1429 std::max(wavefront->ldsChunk->read<c0>(vaddr),
1430 (*e)));
1444 wavefront->ldsChunk->write<c0>(vaddr,
1445 std::max(wavefront->ldsChunk->read<c0>(vaddr),
1446 (*e)));
1431 break;
1432 case Enums::MO_AMIN:
1433 case Enums::MO_ANRMIN:
1447 } else if (this->isAtomicMin()) {
1434 wavefront->ldsChunk->write<c0>(vaddr,
1435 std::min(wavefront->ldsChunk->read<c0>(vaddr),
1436 (*e)));
1448 wavefront->ldsChunk->write<c0>(vaddr,
1449 std::min(wavefront->ldsChunk->read<c0>(vaddr),
1450 (*e)));
1437 break;
1438 case Enums::MO_AAND:
1439 case Enums::MO_ANRAND:
1451 } else if (this->isAtomicAnd()) {
1440 wavefront->ldsChunk->write<c0>(vaddr,
1441 wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1452 wavefront->ldsChunk->write<c0>(vaddr,
1453 wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1442 break;
1443 case Enums::MO_AOR:
1444 case Enums::MO_ANROR:
1454 } else if (this->isAtomicOr()) {
1445 wavefront->ldsChunk->write<c0>(vaddr,
1446 wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1455 wavefront->ldsChunk->write<c0>(vaddr,
1456 wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1447 break;
1448 case Enums::MO_AXOR:
1449 case Enums::MO_ANRXOR:
1457 } else if (this->isAtomicXor()) {
1450 wavefront->ldsChunk->write<c0>(vaddr,
1451 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1458 wavefront->ldsChunk->write<c0>(vaddr,
1459 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1452 break;
1453 case Enums::MO_AINC:
1454 case Enums::MO_ANRINC:
1460 } else if (this->isAtomicInc()) {
1455 wavefront->ldsChunk->write<c0>(vaddr,
1456 wavefront->ldsChunk->read<c0>(vaddr) + 1);
1461 wavefront->ldsChunk->write<c0>(vaddr,
1462 wavefront->ldsChunk->read<c0>(vaddr) + 1);
1457 break;
1458 case Enums::MO_ADEC:
1459 case Enums::MO_ANRDEC:
1463 } else if (this->isAtomicDec()) {
1460 wavefront->ldsChunk->write<c0>(vaddr,
1461 wavefront->ldsChunk->read<c0>(vaddr) - 1);
1464 wavefront->ldsChunk->write<c0>(vaddr,
1465 wavefront->ldsChunk->read<c0>(vaddr) - 1);
1462 break;
1463 case Enums::MO_AEXCH:
1464 case Enums::MO_ANREXCH:
1466 } else if (this->isAtomicExch()) {
1465 wavefront->ldsChunk->write<c0>(vaddr, (*e));
1467 wavefront->ldsChunk->write<c0>(vaddr, (*e));
1466 break;
1467 case Enums::MO_ACAS:
1468 case Enums::MO_ANRCAS:
1468 } else if (this->isAtomicCAS()) {
1469 wavefront->ldsChunk->write<c0>(vaddr,
1470 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1471 (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1469 wavefront->ldsChunk->write<c0>(vaddr,
1470 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1471 (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1472 break;
1473 default:
1472 } else {
1474 fatal("Unrecognized or invalid HSAIL atomic op "
1475 "type.\n");
1473 fatal("Unrecognized or invalid HSAIL atomic op "
1474 "type.\n");
1476 break;
1477 }
1478 } else {
1479 Request *req =
1480 new Request(0, vaddr, sizeof(c0), 0,
1481 gpuDynInst->computeUnit()->masterId(),
1482 0, gpuDynInst->wfDynId,
1483 gpuDynInst->makeAtomicOpFunctor<c0>(e,
1475 }
1476 } else {
1477 Request *req =
1478 new Request(0, vaddr, sizeof(c0), 0,
1479 gpuDynInst->computeUnit()->masterId(),
1480 0, gpuDynInst->wfDynId,
1481 gpuDynInst->makeAtomicOpFunctor<c0>(e,
1484 f, this->opType));
1482 f));
1485
1486 gpuDynInst->setRequestFlags(req);
1487 PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1488 pkt->dataStatic(d);
1489
1490 if (gpuDynInst->computeUnit()->shader->
1491 separate_acquire_release &&
1483
1484 gpuDynInst->setRequestFlags(req);
1485 PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1486 pkt->dataStatic(d);
1487
1488 if (gpuDynInst->computeUnit()->shader->
1489 separate_acquire_release &&
1492 (gpuDynInst->memoryOrder ==
1493 Enums::MEMORY_ORDER_SC_ACQUIRE)) {
1490 (gpuDynInst->isAcquire())) {
1494 // if this atomic has acquire semantics,
1495 // schedule the continuation to perform an
1496 // acquire after the RMW completes
1497 gpuDynInst->execContinuation =
1498 &GPUStaticInst::execAtomicAcq;
1499
1500 gpuDynInst->useContinuation = true;
1501 } else {

--- 16 unchanged lines hidden (view full) ---

1518
1519 // execAtomicACq will always be called through a continuation.
1520 // see comment for execContinuation in gpu_dyn_inst.hh
1521 void
1522 execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1523 {
1524 // after performing the RMW, check to see if this instruction
1525 // has acquire semantics, and if so, issue an acquire
1491 // if this atomic has acquire semantics,
1492 // schedule the continuation to perform an
1493 // acquire after the RMW completes
1494 gpuDynInst->execContinuation =
1495 &GPUStaticInst::execAtomicAcq;
1496
1497 gpuDynInst->useContinuation = true;
1498 } else {

--- 16 unchanged lines hidden (view full) ---

1515
1516 // execAtomicACq will always be called through a continuation.
1517 // see comment for execContinuation in gpu_dyn_inst.hh
1518 void
1519 execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1520 {
1521 // after performing the RMW, check to see if this instruction
1522 // has acquire semantics, and if so, issue an acquire
1526 if (!isLocalMem()) {
1523 if (!this->isLocalMem()) {
1527 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1524 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1528 && gpuDynInst->memoryOrder ==
1529 Enums::MEMORY_ORDER_SC_ACQUIRE) {
1525 && gpuDynInst->isAcquire()) {
1530 gpuDynInst->statusBitVector = VectorMask(1);
1531
1532 // the request will be finished when
1533 // the acquire completes
1534 gpuDynInst->useContinuation = false;
1535 // create request
1536 Request *req = new Request(0, 0, 0, 0,
1537 gpuDynInst->computeUnit()->masterId(),

--- 97 unchanged lines hidden ---
1526 gpuDynInst->statusBitVector = VectorMask(1);
1527
1528 // the request will be finished when
1529 // the acquire completes
1530 gpuDynInst->useContinuation = false;
1531 // create request
1532 Request *req = new Request(0, 0, 0, 0,
1533 gpuDynInst->computeUnit()->masterId(),

--- 97 unchanged lines hidden ---