Cross Reference: /gem5/src/arch/hsail/insts/mem

mem_impl.hh (11534:7106f550afad)	mem_impl.hh (11639:2e8d4bd8108d)
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 45 unchanged lines hidden (view full) --- 54 55 template<typename DestDataType, typename AddrRegOperandType> 56 void 57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 58 { 59 Wavefront *w = gpuDynInst->wavefront(); 60 61 typedef typename DestDataType::CType CType M5_VAR_USED;	1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 45 unchanged lines hidden (view full) --- 54 55 template<typename DestDataType, typename AddrRegOperandType> 56 void 57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 58 { 59 Wavefront *w = gpuDynInst->wavefront(); 60 61 typedef typename DestDataType::CType CType M5_VAR_USED;
62 const VectorMask &mask = w->get_pred();	62 const VectorMask &mask = w->getPred();
63 std::vector<Addr> addr_vec; 64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); 65 this->addr.calcVector(w, addr_vec); 66 67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 68 if (mask[lane]) { 69 this->dest.set(w, lane, addr_vec[lane]); 70 } --- 83 unchanged lines hidden (view full) --- 154 typename AddrRegOperandType> 155 void 156 LdInst<MemDataType, DestDataType, 157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 158 { 159 Wavefront w = gpuDynInst->wavefront(); 160* 161 typedef typename MemDataType::CType MemCType;	63 std::vector<Addr> addr_vec; 64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); 65 this->addr.calcVector(w, addr_vec); 66 67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 68 if (mask[lane]) { 69 this->dest.set(w, lane, addr_vec[lane]); 70 } --- 83 unchanged lines hidden (view full) --- 154 typename AddrRegOperandType> 155 void 156 LdInst<MemDataType, DestDataType, 157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 158 { 159 Wavefront w = gpuDynInst->wavefront(); 160* 161 typedef typename MemDataType::CType MemCType;
162 const VectorMask &mask = w->get_pred();	162 const VectorMask &mask = w->getPred();
163 164 // Kernarg references are handled uniquely for now (no Memory Request 165 // is used), so special-case them up front. Someday we should 166 // make this more realistic, at which we should get rid of this 167 // block and fold this case into the switch below. 168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { 169 MemCType val; 170 --- 54 unchanged lines hidden (view full) --- 225 for (int i = 0; i < num_dest_operands; ++i) { 226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); 227 } 228 } 229 230 m->simdId = w->simdId; 231 m->wfSlotId = w->wfSlotId; 232 m->wfDynId = w->wfDynId;	163 164 // Kernarg references are handled uniquely for now (no Memory Request 165 // is used), so special-case them up front. Someday we should 166 // make this more realistic, at which we should get rid of this 167 // block and fold this case into the switch below. 168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { 169 MemCType val; 170 --- 54 unchanged lines hidden (view full) --- 225 for (int i = 0; i < num_dest_operands; ++i) { 226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); 227 } 228 } 229 230 m->simdId = w->simdId; 231 m->wfSlotId = w->wfSlotId; 232 m->wfDynId = w->wfDynId;
233 m->kern_id = w->kern_id;	233 m->kern_id = w->kernId;
234 m->cu_id = w->computeUnit->cu_id; 235 m->latency.init(&w->computeUnit->shader->tick_cnt); 236 237 switch (this->segment) { 238 case Brig::BRIG_SEGMENT_GLOBAL: 239 m->s_type = SEG_GLOBAL; 240 m->pipeId = GLBMEM_PIPE; 241 m->latency.set(w->computeUnit->shader->ticks(1)); --- 14 unchanged lines hidden (view full) --- 256 this); 257 258 m->addr[lane] = privAddr; 259 } 260 } 261 } 262 263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);	234 m->cu_id = w->computeUnit->cu_id; 235 m->latency.init(&w->computeUnit->shader->tick_cnt); 236 237 switch (this->segment) { 238 case Brig::BRIG_SEGMENT_GLOBAL: 239 m->s_type = SEG_GLOBAL; 240 m->pipeId = GLBMEM_PIPE; 241 m->latency.set(w->computeUnit->shader->ticks(1)); --- 14 unchanged lines hidden (view full) --- 256 this); 257 258 m->addr[lane] = privAddr; 259 } 260 } 261 } 262 263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
264 w->outstanding_reqs_rd_gm++; 265 w->rd_gm_reqs_in_pipe--;	264 w->outstandingReqsRdGm++; 265 w->rdGmReqsInPipe--;
266 break; 267 268 case Brig::BRIG_SEGMENT_SPILL: 269 assert(num_dest_operands == 1); 270 m->s_type = SEG_SPILL; 271 m->pipeId = GLBMEM_PIPE; 272 m->latency.set(w->computeUnit->shader->ticks(1)); 273 { 274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 275 // note: this calculation will NOT WORK if the compiler 276 // ever generates loads/stores to the same address with 277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr) 278 if (mask[lane]) { 279 assert(m->addr[lane] < w->spillSizePerItem); 280 281 m->addr[lane] = m->addr[lane] * w->spillWidth + 282 lane * sizeof(MemCType) + w->spillBase; 283	266 break; 267 268 case Brig::BRIG_SEGMENT_SPILL: 269 assert(num_dest_operands == 1); 270 m->s_type = SEG_SPILL; 271 m->pipeId = GLBMEM_PIPE; 272 m->latency.set(w->computeUnit->shader->ticks(1)); 273 { 274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 275 // note: this calculation will NOT WORK if the compiler 276 // ever generates loads/stores to the same address with 277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr) 278 if (mask[lane]) { 279 assert(m->addr[lane] < w->spillSizePerItem); 280 281 m->addr[lane] = m->addr[lane] * w->spillWidth + 282 lane * sizeof(MemCType) + w->spillBase; 283
284 w->last_addr[lane] = m->addr[lane];	284 w->lastAddr[lane] = m->addr[lane];
285 } 286 } 287 } 288 289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);	285 } 286 } 287 } 288 289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
290 w->outstanding_reqs_rd_gm++; 291 w->rd_gm_reqs_in_pipe--;	290 w->outstandingReqsRdGm++; 291 w->rdGmReqsInPipe--;
292 break; 293 294 case Brig::BRIG_SEGMENT_GROUP: 295 m->s_type = SEG_SHARED; 296 m->pipeId = LDSMEM_PIPE; 297 m->latency.set(w->computeUnit->shader->ticks(24)); 298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);	292 break; 293 294 case Brig::BRIG_SEGMENT_GROUP: 295 m->s_type = SEG_SHARED; 296 m->pipeId = LDSMEM_PIPE; 297 m->latency.set(w->computeUnit->shader->ticks(24)); 298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
299 w->outstanding_reqs_rd_lm++; 300 w->rd_lm_reqs_in_pipe--;	299 w->outstandingReqsRdLm++; 300 w->rdLmReqsInPipe--;
301 break; 302 303 case Brig::BRIG_SEGMENT_READONLY: 304 m->s_type = SEG_READONLY; 305 m->pipeId = GLBMEM_PIPE; 306 m->latency.set(w->computeUnit->shader->ticks(1)); 307 308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 309 if (mask[lane]) { 310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); 311 m->addr[lane] += w->roBase; 312 } 313 } 314 315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);	301 break; 302 303 case Brig::BRIG_SEGMENT_READONLY: 304 m->s_type = SEG_READONLY; 305 m->pipeId = GLBMEM_PIPE; 306 m->latency.set(w->computeUnit->shader->ticks(1)); 307 308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 309 if (mask[lane]) { 310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); 311 m->addr[lane] += w->roBase; 312 } 313 } 314 315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
316 w->outstanding_reqs_rd_gm++; 317 w->rd_gm_reqs_in_pipe--;	316 w->outstandingReqsRdGm++; 317 w->rdGmReqsInPipe--;
318 break; 319 320 case Brig::BRIG_SEGMENT_PRIVATE: 321 m->s_type = SEG_PRIVATE; 322 m->pipeId = GLBMEM_PIPE; 323 m->latency.set(w->computeUnit->shader->ticks(1)); 324 { 325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 326 if (mask[lane]) { 327 assert(m->addr[lane] < w->privSizePerItem); 328 329 m->addr[lane] = m->addr[lane] + 330 lane * sizeof(MemCType) + w->privBase; 331 } 332 } 333 } 334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);	318 break; 319 320 case Brig::BRIG_SEGMENT_PRIVATE: 321 m->s_type = SEG_PRIVATE; 322 m->pipeId = GLBMEM_PIPE; 323 m->latency.set(w->computeUnit->shader->ticks(1)); 324 { 325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 326 if (mask[lane]) { 327 assert(m->addr[lane] < w->privSizePerItem); 328 329 m->addr[lane] = m->addr[lane] + 330 lane * sizeof(MemCType) + w->privBase; 331 } 332 } 333 } 334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
335 w->outstanding_reqs_rd_gm++; 336 w->rd_gm_reqs_in_pipe--;	335 w->outstandingReqsRdGm++; 336 w->rdGmReqsInPipe--;
337 break; 338 339 default: 340 fatal("Load to unsupported segment %d %llxe\n", this->segment, 341 m->addr[0]); 342 } 343	337 break; 338 339 default: 340 fatal("Load to unsupported segment %d %llxe\n", this->segment, 341 m->addr[0]); 342 } 343
344 w->outstanding_reqs++; 345 w->mem_reqs_in_pipe--;	344 w->outstandingReqs++; 345 w->memReqsInPipe--;
346 } 347 348 template<typename OperationType, typename SrcDataType, 349 typename AddrRegOperandType> 350 void 351 StInst<OperationType, SrcDataType, 352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 353 { 354 Wavefront w = gpuDynInst->wavefront(); 355* 356 typedef typename OperationType::CType CType; 357	346 } 347 348 template<typename OperationType, typename SrcDataType, 349 typename AddrRegOperandType> 350 void 351 StInst<OperationType, SrcDataType, 352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst) 353 { 354 Wavefront w = gpuDynInst->wavefront(); 355* 356 typedef typename OperationType::CType CType; 357
358 const VectorMask &mask = w->get_pred();	358 const VectorMask &mask = w->getPred();
359 360 // arg references are handled uniquely for now (no Memory Request 361 // is used), so special-case them up front. Someday we should 362 // make this more realistic, at which we should get rid of this 363 // block and fold this case into the switch below. 364 if (this->segment == Brig::BRIG_SEGMENT_ARG) { 365 uint64_t address = this->addr.calcUniform(); 366 --- 47 unchanged lines hidden (view full) --- 414 415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 416 417 m->scope = getGenericMemoryScope(this->memoryScope); 418 419 m->simdId = w->simdId; 420 m->wfSlotId = w->wfSlotId; 421 m->wfDynId = w->wfDynId;	359 360 // arg references are handled uniquely for now (no Memory Request 361 // is used), so special-case them up front. Someday we should 362 // make this more realistic, at which we should get rid of this 363 // block and fold this case into the switch below. 364 if (this->segment == Brig::BRIG_SEGMENT_ARG) { 365 uint64_t address = this->addr.calcUniform(); 366 --- 47 unchanged lines hidden (view full) --- 414 415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder); 416 417 m->scope = getGenericMemoryScope(this->memoryScope); 418 419 m->simdId = w->simdId; 420 m->wfSlotId = w->wfSlotId; 421 m->wfDynId = w->wfDynId;
422 m->kern_id = w->kern_id;	422 m->kern_id = w->kernId;
423 m->cu_id = w->computeUnit->cu_id; 424 m->latency.init(&w->computeUnit->shader->tick_cnt); 425 426 switch (this->segment) { 427 case Brig::BRIG_SEGMENT_GLOBAL: 428 m->s_type = SEG_GLOBAL; 429 m->pipeId = GLBMEM_PIPE; 430 m->latency.set(w->computeUnit->shader->ticks(1)); --- 12 unchanged lines hidden (view full) --- 443 this); 444 445 m->addr[lane] = privAddr; 446 } 447 } 448 } 449 450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);	423 m->cu_id = w->computeUnit->cu_id; 424 m->latency.init(&w->computeUnit->shader->tick_cnt); 425 426 switch (this->segment) { 427 case Brig::BRIG_SEGMENT_GLOBAL: 428 m->s_type = SEG_GLOBAL; 429 m->pipeId = GLBMEM_PIPE; 430 m->latency.set(w->computeUnit->shader->ticks(1)); --- 12 unchanged lines hidden (view full) --- 443 this); 444 445 m->addr[lane] = privAddr; 446 } 447 } 448 } 449 450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
451 w->outstanding_reqs_wr_gm++; 452 w->wr_gm_reqs_in_pipe--;	451 w->outstandingReqsWrGm++; 452 w->wrGmReqsInPipe--;
453 break; 454 455 case Brig::BRIG_SEGMENT_SPILL: 456 assert(num_src_operands == 1); 457 m->s_type = SEG_SPILL; 458 m->pipeId = GLBMEM_PIPE; 459 m->latency.set(w->computeUnit->shader->ticks(1)); 460 { 461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 462 if (mask[lane]) { 463 assert(m->addr[lane] < w->spillSizePerItem); 464 465 m->addr[lane] = m->addr[lane] * w->spillWidth + 466 lane * sizeof(CType) + w->spillBase; 467 } 468 } 469 } 470 471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);	453 break; 454 455 case Brig::BRIG_SEGMENT_SPILL: 456 assert(num_src_operands == 1); 457 m->s_type = SEG_SPILL; 458 m->pipeId = GLBMEM_PIPE; 459 m->latency.set(w->computeUnit->shader->ticks(1)); 460 { 461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 462 if (mask[lane]) { 463 assert(m->addr[lane] < w->spillSizePerItem); 464 465 m->addr[lane] = m->addr[lane] * w->spillWidth + 466 lane * sizeof(CType) + w->spillBase; 467 } 468 } 469 } 470 471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
472 w->outstanding_reqs_wr_gm++; 473 w->wr_gm_reqs_in_pipe--;	472 w->outstandingReqsWrGm++; 473 w->wrGmReqsInPipe--;
474 break; 475 476 case Brig::BRIG_SEGMENT_GROUP: 477 m->s_type = SEG_SHARED; 478 m->pipeId = LDSMEM_PIPE; 479 m->latency.set(w->computeUnit->shader->ticks(24)); 480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);	474 break; 475 476 case Brig::BRIG_SEGMENT_GROUP: 477 m->s_type = SEG_SHARED; 478 m->pipeId = LDSMEM_PIPE; 479 m->latency.set(w->computeUnit->shader->ticks(24)); 480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
481 w->outstanding_reqs_wr_lm++; 482 w->wr_lm_reqs_in_pipe--;	481 w->outstandingReqsWrLm++; 482 w->wrLmReqsInPipe--;
483 break; 484 485 case Brig::BRIG_SEGMENT_PRIVATE: 486 m->s_type = SEG_PRIVATE; 487 m->pipeId = GLBMEM_PIPE; 488 m->latency.set(w->computeUnit->shader->ticks(1)); 489 { 490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 491 if (mask[lane]) { 492 assert(m->addr[lane] < w->privSizePerItem); 493 m->addr[lane] = m->addr[lane] + lane * 494 sizeof(CType)+w->privBase; 495 } 496 } 497 } 498 499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);	483 break; 484 485 case Brig::BRIG_SEGMENT_PRIVATE: 486 m->s_type = SEG_PRIVATE; 487 m->pipeId = GLBMEM_PIPE; 488 m->latency.set(w->computeUnit->shader->ticks(1)); 489 { 490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 491 if (mask[lane]) { 492 assert(m->addr[lane] < w->privSizePerItem); 493 m->addr[lane] = m->addr[lane] + lane * 494 sizeof(CType)+w->privBase; 495 } 496 } 497 } 498 499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
500 w->outstanding_reqs_wr_gm++; 501 w->wr_gm_reqs_in_pipe--;	500 w->outstandingReqsWrGm++; 501 w->wrGmReqsInPipe--;
502 break; 503 504 default: 505 fatal("Store to unsupported segment %d\n", this->segment); 506 } 507	502 break; 503 504 default: 505 fatal("Store to unsupported segment %d\n", this->segment); 506 } 507
508 w->outstanding_reqs++; 509 w->mem_reqs_in_pipe--;	508 w->outstandingReqs++; 509 w->memReqsInPipe--;
510 } 511 512 template<typename OperationType, typename SrcDataType, 513 typename AddrRegOperandType> 514 void 515 StInst<OperationType, SrcDataType, 516 AddrRegOperandType>::generateDisassembly() 517 { --- 73 unchanged lines hidden (view full) --- 591 592 if (HasDst) { 593 m->dst_reg = this->dest.regIndex(); 594 } 595 596 m->simdId = w->simdId; 597 m->wfSlotId = w->wfSlotId; 598 m->wfDynId = w->wfDynId;	510 } 511 512 template<typename OperationType, typename SrcDataType, 513 typename AddrRegOperandType> 514 void 515 StInst<OperationType, SrcDataType, 516 AddrRegOperandType>::generateDisassembly() 517 { --- 73 unchanged lines hidden (view full) --- 591 592 if (HasDst) { 593 m->dst_reg = this->dest.regIndex(); 594 } 595 596 m->simdId = w->simdId; 597 m->wfSlotId = w->wfSlotId; 598 m->wfDynId = w->wfDynId;
599 m->kern_id = w->kern_id;	599 m->kern_id = w->kernId;
600 m->cu_id = w->computeUnit->cu_id; 601 m->latency.init(&w->computeUnit->shader->tick_cnt); 602 603 switch (this->segment) { 604 case Brig::BRIG_SEGMENT_GLOBAL: 605 m->s_type = SEG_GLOBAL; 606 m->latency.set(w->computeUnit->shader->ticks(64)); 607 m->pipeId = GLBMEM_PIPE; 608 609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);	600 m->cu_id = w->computeUnit->cu_id; 601 m->latency.init(&w->computeUnit->shader->tick_cnt); 602 603 switch (this->segment) { 604 case Brig::BRIG_SEGMENT_GLOBAL: 605 m->s_type = SEG_GLOBAL; 606 m->latency.set(w->computeUnit->shader->ticks(64)); 607 m->pipeId = GLBMEM_PIPE; 608 609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
610 w->outstanding_reqs_wr_gm++; 611 w->wr_gm_reqs_in_pipe--; 612 w->outstanding_reqs_rd_gm++; 613 w->rd_gm_reqs_in_pipe--;	610 w->outstandingReqsWrGm++; 611 w->wrGmReqsInPipe--; 612 w->outstandingReqsRdGm++; 613 w->rdGmReqsInPipe--;
614 break; 615 616 case Brig::BRIG_SEGMENT_GROUP: 617 m->s_type = SEG_SHARED; 618 m->pipeId = LDSMEM_PIPE; 619 m->latency.set(w->computeUnit->shader->ticks(24)); 620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);	614 break; 615 616 case Brig::BRIG_SEGMENT_GROUP: 617 m->s_type = SEG_SHARED; 618 m->pipeId = LDSMEM_PIPE; 619 m->latency.set(w->computeUnit->shader->ticks(24)); 620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
621 w->outstanding_reqs_wr_lm++; 622 w->wr_lm_reqs_in_pipe--; 623 w->outstanding_reqs_rd_lm++; 624 w->rd_lm_reqs_in_pipe--;	621 w->outstandingReqsWrLm++; 622 w->wrLmReqsInPipe--; 623 w->outstandingReqsRdLm++; 624 w->rdLmReqsInPipe--;
625 break; 626 627 default: 628 fatal("Atomic op to unsupported segment %d\n", 629 this->segment); 630 } 631	625 break; 626 627 default: 628 fatal("Atomic op to unsupported segment %d\n", 629 this->segment); 630 } 631
632 w->outstanding_reqs++; 633 w->mem_reqs_in_pipe--;	632 w->outstandingReqs++; 633 w->memReqsInPipe--;
634 } 635 636 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); 637 638 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 639 bool HasDst> 640 void 641 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, --- 23 unchanged lines hidden ---	634 } 635 636 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); 637 638 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands, 639 bool HasDst> 640 void 641 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands, --- 23 unchanged lines hidden ---

1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 45 unchanged lines hidden (view full) ---

54
55 template<typename DestDataType, typename AddrRegOperandType>
56 void
57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58 {
59 Wavefront *w = gpuDynInst->wavefront();
60
61 typedef typename DestDataType::CType CType M5_VAR_USED;

62 const VectorMask &mask = w->get_pred();

62 const VectorMask &mask = w->getPred();

63 std::vector<Addr> addr_vec;
64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
65 this->addr.calcVector(w, addr_vec);
66
67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
68 if (mask[lane]) {
69 this->dest.set(w, lane, addr_vec[lane]);
70 }

--- 83 unchanged lines hidden (view full) ---

154 typename AddrRegOperandType>
155 void
156 LdInst<MemDataType, DestDataType,
157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
158 {
159 Wavefront *w = gpuDynInst->wavefront();
160
161 typedef typename MemDataType::CType MemCType;

162 const VectorMask &mask = w->get_pred();

162 const VectorMask &mask = w->getPred();

163
164 // Kernarg references are handled uniquely for now (no Memory Request
165 // is used), so special-case them up front. Someday we should
166 // make this more realistic, at which we should get rid of this
167 // block and fold this case into the switch below.
168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
169 MemCType val;
170

--- 54 unchanged lines hidden (view full) ---

225 for (int i = 0; i < num_dest_operands; ++i) {
226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
227 }
228 }
229
230 m->simdId = w->simdId;
231 m->wfSlotId = w->wfSlotId;
232 m->wfDynId = w->wfDynId;

233 m->kern_id = w->kern_id;

233 m->kern_id = w->kernId;

234 m->cu_id = w->computeUnit->cu_id;
235 m->latency.init(&w->computeUnit->shader->tick_cnt);
236
237 switch (this->segment) {
238 case Brig::BRIG_SEGMENT_GLOBAL:
239 m->s_type = SEG_GLOBAL;
240 m->pipeId = GLBMEM_PIPE;
241 m->latency.set(w->computeUnit->shader->ticks(1));

--- 14 unchanged lines hidden (view full) ---

256 this);
257
258 m->addr[lane] = privAddr;
259 }
260 }
261 }
262
263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

264 w->outstanding_reqs_rd_gm++;
265 w->rd_gm_reqs_in_pipe--;

264 w->outstandingReqsRdGm++;
265 w->rdGmReqsInPipe--;

266 break;
267
268 case Brig::BRIG_SEGMENT_SPILL:
269 assert(num_dest_operands == 1);
270 m->s_type = SEG_SPILL;
271 m->pipeId = GLBMEM_PIPE;
272 m->latency.set(w->computeUnit->shader->ticks(1));
273 {
274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
275 // note: this calculation will NOT WORK if the compiler
276 // ever generates loads/stores to the same address with
277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
278 if (mask[lane]) {
279 assert(m->addr[lane] < w->spillSizePerItem);
280
281 m->addr[lane] = m->addr[lane] * w->spillWidth +
282 lane * sizeof(MemCType) + w->spillBase;
283

284 w->last_addr[lane] = m->addr[lane];

284 w->lastAddr[lane] = m->addr[lane];

285 }
286 }
287 }
288
289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

290 w->outstanding_reqs_rd_gm++;
291 w->rd_gm_reqs_in_pipe--;

290 w->outstandingReqsRdGm++;
291 w->rdGmReqsInPipe--;

292 break;
293
294 case Brig::BRIG_SEGMENT_GROUP:
295 m->s_type = SEG_SHARED;
296 m->pipeId = LDSMEM_PIPE;
297 m->latency.set(w->computeUnit->shader->ticks(24));
298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);

299 w->outstanding_reqs_rd_lm++;
300 w->rd_lm_reqs_in_pipe--;

299 w->outstandingReqsRdLm++;
300 w->rdLmReqsInPipe--;

301 break;
302
303 case Brig::BRIG_SEGMENT_READONLY:
304 m->s_type = SEG_READONLY;
305 m->pipeId = GLBMEM_PIPE;
306 m->latency.set(w->computeUnit->shader->ticks(1));
307
308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
309 if (mask[lane]) {
310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
311 m->addr[lane] += w->roBase;
312 }
313 }
314
315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

316 w->outstanding_reqs_rd_gm++;
317 w->rd_gm_reqs_in_pipe--;

316 w->outstandingReqsRdGm++;
317 w->rdGmReqsInPipe--;

318 break;
319
320 case Brig::BRIG_SEGMENT_PRIVATE:
321 m->s_type = SEG_PRIVATE;
322 m->pipeId = GLBMEM_PIPE;
323 m->latency.set(w->computeUnit->shader->ticks(1));
324 {
325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
326 if (mask[lane]) {
327 assert(m->addr[lane] < w->privSizePerItem);
328
329 m->addr[lane] = m->addr[lane] +
330 lane * sizeof(MemCType) + w->privBase;
331 }
332 }
333 }
334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

335 w->outstanding_reqs_rd_gm++;
336 w->rd_gm_reqs_in_pipe--;

335 w->outstandingReqsRdGm++;
336 w->rdGmReqsInPipe--;

337 break;
338
339 default:
340 fatal("Load to unsupported segment %d %llxe\n", this->segment,
341 m->addr[0]);
342 }
343

344 w->outstanding_reqs++;
345 w->mem_reqs_in_pipe--;

344 w->outstandingReqs++;
345 w->memReqsInPipe--;

346 }
347
348 template<typename OperationType, typename SrcDataType,
349 typename AddrRegOperandType>
350 void
351 StInst<OperationType, SrcDataType,
352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
353 {
354 Wavefront *w = gpuDynInst->wavefront();
355
356 typedef typename OperationType::CType CType;
357

358 const VectorMask &mask = w->get_pred();

358 const VectorMask &mask = w->getPred();

359
360 // arg references are handled uniquely for now (no Memory Request
361 // is used), so special-case them up front. Someday we should
362 // make this more realistic, at which we should get rid of this
363 // block and fold this case into the switch below.
364 if (this->segment == Brig::BRIG_SEGMENT_ARG) {
365 uint64_t address = this->addr.calcUniform();
366

--- 47 unchanged lines hidden (view full) ---

414
415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
416
417 m->scope = getGenericMemoryScope(this->memoryScope);
418
419 m->simdId = w->simdId;
420 m->wfSlotId = w->wfSlotId;
421 m->wfDynId = w->wfDynId;

422 m->kern_id = w->kern_id;

422 m->kern_id = w->kernId;

423 m->cu_id = w->computeUnit->cu_id;
424 m->latency.init(&w->computeUnit->shader->tick_cnt);
425
426 switch (this->segment) {
427 case Brig::BRIG_SEGMENT_GLOBAL:
428 m->s_type = SEG_GLOBAL;
429 m->pipeId = GLBMEM_PIPE;
430 m->latency.set(w->computeUnit->shader->ticks(1));

--- 12 unchanged lines hidden (view full) ---

443 this);
444
445 m->addr[lane] = privAddr;
446 }
447 }
448 }
449
450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

451 w->outstanding_reqs_wr_gm++;
452 w->wr_gm_reqs_in_pipe--;

451 w->outstandingReqsWrGm++;
452 w->wrGmReqsInPipe--;

453 break;
454
455 case Brig::BRIG_SEGMENT_SPILL:
456 assert(num_src_operands == 1);
457 m->s_type = SEG_SPILL;
458 m->pipeId = GLBMEM_PIPE;
459 m->latency.set(w->computeUnit->shader->ticks(1));
460 {
461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462 if (mask[lane]) {
463 assert(m->addr[lane] < w->spillSizePerItem);
464
465 m->addr[lane] = m->addr[lane] * w->spillWidth +
466 lane * sizeof(CType) + w->spillBase;
467 }
468 }
469 }
470
471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

472 w->outstanding_reqs_wr_gm++;
473 w->wr_gm_reqs_in_pipe--;

472 w->outstandingReqsWrGm++;
473 w->wrGmReqsInPipe--;

474 break;
475
476 case Brig::BRIG_SEGMENT_GROUP:
477 m->s_type = SEG_SHARED;
478 m->pipeId = LDSMEM_PIPE;
479 m->latency.set(w->computeUnit->shader->ticks(24));
480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);

481 w->outstanding_reqs_wr_lm++;
482 w->wr_lm_reqs_in_pipe--;

481 w->outstandingReqsWrLm++;
482 w->wrLmReqsInPipe--;

483 break;
484
485 case Brig::BRIG_SEGMENT_PRIVATE:
486 m->s_type = SEG_PRIVATE;
487 m->pipeId = GLBMEM_PIPE;
488 m->latency.set(w->computeUnit->shader->ticks(1));
489 {
490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
491 if (mask[lane]) {
492 assert(m->addr[lane] < w->privSizePerItem);
493 m->addr[lane] = m->addr[lane] + lane *
494 sizeof(CType)+w->privBase;
495 }
496 }
497 }
498
499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

500 w->outstanding_reqs_wr_gm++;
501 w->wr_gm_reqs_in_pipe--;

500 w->outstandingReqsWrGm++;
501 w->wrGmReqsInPipe--;

502 break;
503
504 default:
505 fatal("Store to unsupported segment %d\n", this->segment);
506 }
507

508 w->outstanding_reqs++;
509 w->mem_reqs_in_pipe--;

508 w->outstandingReqs++;
509 w->memReqsInPipe--;

510 }
511
512 template<typename OperationType, typename SrcDataType,
513 typename AddrRegOperandType>
514 void
515 StInst<OperationType, SrcDataType,
516 AddrRegOperandType>::generateDisassembly()
517 {

--- 73 unchanged lines hidden (view full) ---

591
592 if (HasDst) {
593 m->dst_reg = this->dest.regIndex();
594 }
595
596 m->simdId = w->simdId;
597 m->wfSlotId = w->wfSlotId;
598 m->wfDynId = w->wfDynId;

599 m->kern_id = w->kern_id;

599 m->kern_id = w->kernId;

600 m->cu_id = w->computeUnit->cu_id;
601 m->latency.init(&w->computeUnit->shader->tick_cnt);
602
603 switch (this->segment) {
604 case Brig::BRIG_SEGMENT_GLOBAL:
605 m->s_type = SEG_GLOBAL;
606 m->latency.set(w->computeUnit->shader->ticks(64));
607 m->pipeId = GLBMEM_PIPE;
608
609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);

610 w->outstanding_reqs_wr_gm++;
611 w->wr_gm_reqs_in_pipe--;
612 w->outstanding_reqs_rd_gm++;
613 w->rd_gm_reqs_in_pipe--;

610 w->outstandingReqsWrGm++;
611 w->wrGmReqsInPipe--;
612 w->outstandingReqsRdGm++;
613 w->rdGmReqsInPipe--;

614 break;
615
616 case Brig::BRIG_SEGMENT_GROUP:
617 m->s_type = SEG_SHARED;
618 m->pipeId = LDSMEM_PIPE;
619 m->latency.set(w->computeUnit->shader->ticks(24));
620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);

621 w->outstanding_reqs_wr_lm++;
622 w->wr_lm_reqs_in_pipe--;
623 w->outstanding_reqs_rd_lm++;
624 w->rd_lm_reqs_in_pipe--;

621 w->outstandingReqsWrLm++;
622 w->wrLmReqsInPipe--;
623 w->outstandingReqsRdLm++;
624 w->rdLmReqsInPipe--;

625 break;
626
627 default:
628 fatal("Atomic op to unsupported segment %d\n",
629 this->segment);
630 }
631

632 w->outstanding_reqs++;
633 w->mem_reqs_in_pipe--;

632 w->outstandingReqs++;
633 w->memReqsInPipe--;