mem_impl.hh (11534:7106f550afad) mem_impl.hh (11639:2e8d4bd8108d)
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 45 unchanged lines hidden (view full) ---

54
55 template<typename DestDataType, typename AddrRegOperandType>
56 void
57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58 {
59 Wavefront *w = gpuDynInst->wavefront();
60
61 typedef typename DestDataType::CType CType M5_VAR_USED;
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 45 unchanged lines hidden (view full) ---

54
55 template<typename DestDataType, typename AddrRegOperandType>
56 void
57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58 {
59 Wavefront *w = gpuDynInst->wavefront();
60
61 typedef typename DestDataType::CType CType M5_VAR_USED;
62 const VectorMask &mask = w->get_pred();
62 const VectorMask &mask = w->getPred();
63 std::vector<Addr> addr_vec;
64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
65 this->addr.calcVector(w, addr_vec);
66
67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
68 if (mask[lane]) {
69 this->dest.set(w, lane, addr_vec[lane]);
70 }

--- 83 unchanged lines hidden (view full) ---

154 typename AddrRegOperandType>
155 void
156 LdInst<MemDataType, DestDataType,
157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
158 {
159 Wavefront *w = gpuDynInst->wavefront();
160
161 typedef typename MemDataType::CType MemCType;
63 std::vector<Addr> addr_vec;
64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
65 this->addr.calcVector(w, addr_vec);
66
67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
68 if (mask[lane]) {
69 this->dest.set(w, lane, addr_vec[lane]);
70 }

--- 83 unchanged lines hidden (view full) ---

154 typename AddrRegOperandType>
155 void
156 LdInst<MemDataType, DestDataType,
157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
158 {
159 Wavefront *w = gpuDynInst->wavefront();
160
161 typedef typename MemDataType::CType MemCType;
162 const VectorMask &mask = w->get_pred();
162 const VectorMask &mask = w->getPred();
163
164 // Kernarg references are handled uniquely for now (no Memory Request
165 // is used), so special-case them up front. Someday we should
166 // make this more realistic, at which we should get rid of this
167 // block and fold this case into the switch below.
168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
169 MemCType val;
170

--- 54 unchanged lines hidden (view full) ---

225 for (int i = 0; i < num_dest_operands; ++i) {
226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
227 }
228 }
229
230 m->simdId = w->simdId;
231 m->wfSlotId = w->wfSlotId;
232 m->wfDynId = w->wfDynId;
163
164 // Kernarg references are handled uniquely for now (no Memory Request
165 // is used), so special-case them up front. Someday we should
166 // make this more realistic, at which we should get rid of this
167 // block and fold this case into the switch below.
168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
169 MemCType val;
170

--- 54 unchanged lines hidden (view full) ---

225 for (int i = 0; i < num_dest_operands; ++i) {
226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
227 }
228 }
229
230 m->simdId = w->simdId;
231 m->wfSlotId = w->wfSlotId;
232 m->wfDynId = w->wfDynId;
233 m->kern_id = w->kern_id;
233 m->kern_id = w->kernId;
234 m->cu_id = w->computeUnit->cu_id;
235 m->latency.init(&w->computeUnit->shader->tick_cnt);
236
237 switch (this->segment) {
238 case Brig::BRIG_SEGMENT_GLOBAL:
239 m->s_type = SEG_GLOBAL;
240 m->pipeId = GLBMEM_PIPE;
241 m->latency.set(w->computeUnit->shader->ticks(1));

--- 14 unchanged lines hidden (view full) ---

256 this);
257
258 m->addr[lane] = privAddr;
259 }
260 }
261 }
262
263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
234 m->cu_id = w->computeUnit->cu_id;
235 m->latency.init(&w->computeUnit->shader->tick_cnt);
236
237 switch (this->segment) {
238 case Brig::BRIG_SEGMENT_GLOBAL:
239 m->s_type = SEG_GLOBAL;
240 m->pipeId = GLBMEM_PIPE;
241 m->latency.set(w->computeUnit->shader->ticks(1));

--- 14 unchanged lines hidden (view full) ---

256 this);
257
258 m->addr[lane] = privAddr;
259 }
260 }
261 }
262
263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
264 w->outstanding_reqs_rd_gm++;
265 w->rd_gm_reqs_in_pipe--;
264 w->outstandingReqsRdGm++;
265 w->rdGmReqsInPipe--;
266 break;
267
268 case Brig::BRIG_SEGMENT_SPILL:
269 assert(num_dest_operands == 1);
270 m->s_type = SEG_SPILL;
271 m->pipeId = GLBMEM_PIPE;
272 m->latency.set(w->computeUnit->shader->ticks(1));
273 {
274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
275 // note: this calculation will NOT WORK if the compiler
276 // ever generates loads/stores to the same address with
277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
278 if (mask[lane]) {
279 assert(m->addr[lane] < w->spillSizePerItem);
280
281 m->addr[lane] = m->addr[lane] * w->spillWidth +
282 lane * sizeof(MemCType) + w->spillBase;
283
266 break;
267
268 case Brig::BRIG_SEGMENT_SPILL:
269 assert(num_dest_operands == 1);
270 m->s_type = SEG_SPILL;
271 m->pipeId = GLBMEM_PIPE;
272 m->latency.set(w->computeUnit->shader->ticks(1));
273 {
274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
275 // note: this calculation will NOT WORK if the compiler
276 // ever generates loads/stores to the same address with
277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
278 if (mask[lane]) {
279 assert(m->addr[lane] < w->spillSizePerItem);
280
281 m->addr[lane] = m->addr[lane] * w->spillWidth +
282 lane * sizeof(MemCType) + w->spillBase;
283
284 w->last_addr[lane] = m->addr[lane];
284 w->lastAddr[lane] = m->addr[lane];
285 }
286 }
287 }
288
289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
285 }
286 }
287 }
288
289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
290 w->outstanding_reqs_rd_gm++;
291 w->rd_gm_reqs_in_pipe--;
290 w->outstandingReqsRdGm++;
291 w->rdGmReqsInPipe--;
292 break;
293
294 case Brig::BRIG_SEGMENT_GROUP:
295 m->s_type = SEG_SHARED;
296 m->pipeId = LDSMEM_PIPE;
297 m->latency.set(w->computeUnit->shader->ticks(24));
298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
292 break;
293
294 case Brig::BRIG_SEGMENT_GROUP:
295 m->s_type = SEG_SHARED;
296 m->pipeId = LDSMEM_PIPE;
297 m->latency.set(w->computeUnit->shader->ticks(24));
298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
299 w->outstanding_reqs_rd_lm++;
300 w->rd_lm_reqs_in_pipe--;
299 w->outstandingReqsRdLm++;
300 w->rdLmReqsInPipe--;
301 break;
302
303 case Brig::BRIG_SEGMENT_READONLY:
304 m->s_type = SEG_READONLY;
305 m->pipeId = GLBMEM_PIPE;
306 m->latency.set(w->computeUnit->shader->ticks(1));
307
308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
309 if (mask[lane]) {
310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
311 m->addr[lane] += w->roBase;
312 }
313 }
314
315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
301 break;
302
303 case Brig::BRIG_SEGMENT_READONLY:
304 m->s_type = SEG_READONLY;
305 m->pipeId = GLBMEM_PIPE;
306 m->latency.set(w->computeUnit->shader->ticks(1));
307
308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
309 if (mask[lane]) {
310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
311 m->addr[lane] += w->roBase;
312 }
313 }
314
315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
316 w->outstanding_reqs_rd_gm++;
317 w->rd_gm_reqs_in_pipe--;
316 w->outstandingReqsRdGm++;
317 w->rdGmReqsInPipe--;
318 break;
319
320 case Brig::BRIG_SEGMENT_PRIVATE:
321 m->s_type = SEG_PRIVATE;
322 m->pipeId = GLBMEM_PIPE;
323 m->latency.set(w->computeUnit->shader->ticks(1));
324 {
325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
326 if (mask[lane]) {
327 assert(m->addr[lane] < w->privSizePerItem);
328
329 m->addr[lane] = m->addr[lane] +
330 lane * sizeof(MemCType) + w->privBase;
331 }
332 }
333 }
334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
318 break;
319
320 case Brig::BRIG_SEGMENT_PRIVATE:
321 m->s_type = SEG_PRIVATE;
322 m->pipeId = GLBMEM_PIPE;
323 m->latency.set(w->computeUnit->shader->ticks(1));
324 {
325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
326 if (mask[lane]) {
327 assert(m->addr[lane] < w->privSizePerItem);
328
329 m->addr[lane] = m->addr[lane] +
330 lane * sizeof(MemCType) + w->privBase;
331 }
332 }
333 }
334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
335 w->outstanding_reqs_rd_gm++;
336 w->rd_gm_reqs_in_pipe--;
335 w->outstandingReqsRdGm++;
336 w->rdGmReqsInPipe--;
337 break;
338
339 default:
340 fatal("Load to unsupported segment %d %llxe\n", this->segment,
341 m->addr[0]);
342 }
343
337 break;
338
339 default:
340 fatal("Load to unsupported segment %d %llxe\n", this->segment,
341 m->addr[0]);
342 }
343
344 w->outstanding_reqs++;
345 w->mem_reqs_in_pipe--;
344 w->outstandingReqs++;
345 w->memReqsInPipe--;
346 }
347
348 template<typename OperationType, typename SrcDataType,
349 typename AddrRegOperandType>
350 void
351 StInst<OperationType, SrcDataType,
352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
353 {
354 Wavefront *w = gpuDynInst->wavefront();
355
356 typedef typename OperationType::CType CType;
357
346 }
347
348 template<typename OperationType, typename SrcDataType,
349 typename AddrRegOperandType>
350 void
351 StInst<OperationType, SrcDataType,
352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
353 {
354 Wavefront *w = gpuDynInst->wavefront();
355
356 typedef typename OperationType::CType CType;
357
358 const VectorMask &mask = w->get_pred();
358 const VectorMask &mask = w->getPred();
359
360 // arg references are handled uniquely for now (no Memory Request
361 // is used), so special-case them up front. Someday we should
362 // make this more realistic, at which we should get rid of this
363 // block and fold this case into the switch below.
364 if (this->segment == Brig::BRIG_SEGMENT_ARG) {
365 uint64_t address = this->addr.calcUniform();
366

--- 47 unchanged lines hidden (view full) ---

414
415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
416
417 m->scope = getGenericMemoryScope(this->memoryScope);
418
419 m->simdId = w->simdId;
420 m->wfSlotId = w->wfSlotId;
421 m->wfDynId = w->wfDynId;
359
360 // arg references are handled uniquely for now (no Memory Request
361 // is used), so special-case them up front. Someday we should
362 // make this more realistic, at which we should get rid of this
363 // block and fold this case into the switch below.
364 if (this->segment == Brig::BRIG_SEGMENT_ARG) {
365 uint64_t address = this->addr.calcUniform();
366

--- 47 unchanged lines hidden (view full) ---

414
415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
416
417 m->scope = getGenericMemoryScope(this->memoryScope);
418
419 m->simdId = w->simdId;
420 m->wfSlotId = w->wfSlotId;
421 m->wfDynId = w->wfDynId;
422 m->kern_id = w->kern_id;
422 m->kern_id = w->kernId;
423 m->cu_id = w->computeUnit->cu_id;
424 m->latency.init(&w->computeUnit->shader->tick_cnt);
425
426 switch (this->segment) {
427 case Brig::BRIG_SEGMENT_GLOBAL:
428 m->s_type = SEG_GLOBAL;
429 m->pipeId = GLBMEM_PIPE;
430 m->latency.set(w->computeUnit->shader->ticks(1));

--- 12 unchanged lines hidden (view full) ---

443 this);
444
445 m->addr[lane] = privAddr;
446 }
447 }
448 }
449
450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
423 m->cu_id = w->computeUnit->cu_id;
424 m->latency.init(&w->computeUnit->shader->tick_cnt);
425
426 switch (this->segment) {
427 case Brig::BRIG_SEGMENT_GLOBAL:
428 m->s_type = SEG_GLOBAL;
429 m->pipeId = GLBMEM_PIPE;
430 m->latency.set(w->computeUnit->shader->ticks(1));

--- 12 unchanged lines hidden (view full) ---

443 this);
444
445 m->addr[lane] = privAddr;
446 }
447 }
448 }
449
450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
451 w->outstanding_reqs_wr_gm++;
452 w->wr_gm_reqs_in_pipe--;
451 w->outstandingReqsWrGm++;
452 w->wrGmReqsInPipe--;
453 break;
454
455 case Brig::BRIG_SEGMENT_SPILL:
456 assert(num_src_operands == 1);
457 m->s_type = SEG_SPILL;
458 m->pipeId = GLBMEM_PIPE;
459 m->latency.set(w->computeUnit->shader->ticks(1));
460 {
461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462 if (mask[lane]) {
463 assert(m->addr[lane] < w->spillSizePerItem);
464
465 m->addr[lane] = m->addr[lane] * w->spillWidth +
466 lane * sizeof(CType) + w->spillBase;
467 }
468 }
469 }
470
471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
453 break;
454
455 case Brig::BRIG_SEGMENT_SPILL:
456 assert(num_src_operands == 1);
457 m->s_type = SEG_SPILL;
458 m->pipeId = GLBMEM_PIPE;
459 m->latency.set(w->computeUnit->shader->ticks(1));
460 {
461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462 if (mask[lane]) {
463 assert(m->addr[lane] < w->spillSizePerItem);
464
465 m->addr[lane] = m->addr[lane] * w->spillWidth +
466 lane * sizeof(CType) + w->spillBase;
467 }
468 }
469 }
470
471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
472 w->outstanding_reqs_wr_gm++;
473 w->wr_gm_reqs_in_pipe--;
472 w->outstandingReqsWrGm++;
473 w->wrGmReqsInPipe--;
474 break;
475
476 case Brig::BRIG_SEGMENT_GROUP:
477 m->s_type = SEG_SHARED;
478 m->pipeId = LDSMEM_PIPE;
479 m->latency.set(w->computeUnit->shader->ticks(24));
480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
474 break;
475
476 case Brig::BRIG_SEGMENT_GROUP:
477 m->s_type = SEG_SHARED;
478 m->pipeId = LDSMEM_PIPE;
479 m->latency.set(w->computeUnit->shader->ticks(24));
480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
481 w->outstanding_reqs_wr_lm++;
482 w->wr_lm_reqs_in_pipe--;
481 w->outstandingReqsWrLm++;
482 w->wrLmReqsInPipe--;
483 break;
484
485 case Brig::BRIG_SEGMENT_PRIVATE:
486 m->s_type = SEG_PRIVATE;
487 m->pipeId = GLBMEM_PIPE;
488 m->latency.set(w->computeUnit->shader->ticks(1));
489 {
490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
491 if (mask[lane]) {
492 assert(m->addr[lane] < w->privSizePerItem);
493 m->addr[lane] = m->addr[lane] + lane *
494 sizeof(CType)+w->privBase;
495 }
496 }
497 }
498
499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
483 break;
484
485 case Brig::BRIG_SEGMENT_PRIVATE:
486 m->s_type = SEG_PRIVATE;
487 m->pipeId = GLBMEM_PIPE;
488 m->latency.set(w->computeUnit->shader->ticks(1));
489 {
490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
491 if (mask[lane]) {
492 assert(m->addr[lane] < w->privSizePerItem);
493 m->addr[lane] = m->addr[lane] + lane *
494 sizeof(CType)+w->privBase;
495 }
496 }
497 }
498
499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
500 w->outstanding_reqs_wr_gm++;
501 w->wr_gm_reqs_in_pipe--;
500 w->outstandingReqsWrGm++;
501 w->wrGmReqsInPipe--;
502 break;
503
504 default:
505 fatal("Store to unsupported segment %d\n", this->segment);
506 }
507
502 break;
503
504 default:
505 fatal("Store to unsupported segment %d\n", this->segment);
506 }
507
508 w->outstanding_reqs++;
509 w->mem_reqs_in_pipe--;
508 w->outstandingReqs++;
509 w->memReqsInPipe--;
510 }
511
512 template<typename OperationType, typename SrcDataType,
513 typename AddrRegOperandType>
514 void
515 StInst<OperationType, SrcDataType,
516 AddrRegOperandType>::generateDisassembly()
517 {

--- 73 unchanged lines hidden (view full) ---

591
592 if (HasDst) {
593 m->dst_reg = this->dest.regIndex();
594 }
595
596 m->simdId = w->simdId;
597 m->wfSlotId = w->wfSlotId;
598 m->wfDynId = w->wfDynId;
510 }
511
512 template<typename OperationType, typename SrcDataType,
513 typename AddrRegOperandType>
514 void
515 StInst<OperationType, SrcDataType,
516 AddrRegOperandType>::generateDisassembly()
517 {

--- 73 unchanged lines hidden (view full) ---

591
592 if (HasDst) {
593 m->dst_reg = this->dest.regIndex();
594 }
595
596 m->simdId = w->simdId;
597 m->wfSlotId = w->wfSlotId;
598 m->wfDynId = w->wfDynId;
599 m->kern_id = w->kern_id;
599 m->kern_id = w->kernId;
600 m->cu_id = w->computeUnit->cu_id;
601 m->latency.init(&w->computeUnit->shader->tick_cnt);
602
603 switch (this->segment) {
604 case Brig::BRIG_SEGMENT_GLOBAL:
605 m->s_type = SEG_GLOBAL;
606 m->latency.set(w->computeUnit->shader->ticks(64));
607 m->pipeId = GLBMEM_PIPE;
608
609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
600 m->cu_id = w->computeUnit->cu_id;
601 m->latency.init(&w->computeUnit->shader->tick_cnt);
602
603 switch (this->segment) {
604 case Brig::BRIG_SEGMENT_GLOBAL:
605 m->s_type = SEG_GLOBAL;
606 m->latency.set(w->computeUnit->shader->ticks(64));
607 m->pipeId = GLBMEM_PIPE;
608
609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
610 w->outstanding_reqs_wr_gm++;
611 w->wr_gm_reqs_in_pipe--;
612 w->outstanding_reqs_rd_gm++;
613 w->rd_gm_reqs_in_pipe--;
610 w->outstandingReqsWrGm++;
611 w->wrGmReqsInPipe--;
612 w->outstandingReqsRdGm++;
613 w->rdGmReqsInPipe--;
614 break;
615
616 case Brig::BRIG_SEGMENT_GROUP:
617 m->s_type = SEG_SHARED;
618 m->pipeId = LDSMEM_PIPE;
619 m->latency.set(w->computeUnit->shader->ticks(24));
620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
614 break;
615
616 case Brig::BRIG_SEGMENT_GROUP:
617 m->s_type = SEG_SHARED;
618 m->pipeId = LDSMEM_PIPE;
619 m->latency.set(w->computeUnit->shader->ticks(24));
620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
621 w->outstanding_reqs_wr_lm++;
622 w->wr_lm_reqs_in_pipe--;
623 w->outstanding_reqs_rd_lm++;
624 w->rd_lm_reqs_in_pipe--;
621 w->outstandingReqsWrLm++;
622 w->wrLmReqsInPipe--;
623 w->outstandingReqsRdLm++;
624 w->rdLmReqsInPipe--;
625 break;
626
627 default:
628 fatal("Atomic op to unsupported segment %d\n",
629 this->segment);
630 }
631
625 break;
626
627 default:
628 fatal("Atomic op to unsupported segment %d\n",
629 this->segment);
630 }
631
632 w->outstanding_reqs++;
633 w->mem_reqs_in_pipe--;
632 w->outstandingReqs++;
633 w->memReqsInPipe--;
634 }
635
636 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
637
638 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
639 bool HasDst>
640 void
641 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,

--- 23 unchanged lines hidden ---
634 }
635
636 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
637
638 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
639 bool HasDst>
640 void
641 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,

--- 23 unchanged lines hidden ---