compute_unit.cc (11692:e772fdcd3809) compute_unit.cc (11695:0a65922d564d)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 1394 unchanged lines hidden (view full) ---

1403 }
1404}
1405
1406void
1407ComputeUnit::regStats()
1408{
1409 MemObject::regStats();
1410
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:

--- 1394 unchanged lines hidden (view full) ---

1403 }
1404}
1405
1406void
1407ComputeUnit::regStats()
1408{
1409 MemObject::regStats();
1410
1411 vALUInsts
1412 .name(name() + ".valu_insts")
1413 .desc("Number of vector ALU insts issued.")
1414 ;
1415 vALUInstsPerWF
1416 .name(name() + ".valu_insts_per_wf")
1417 .desc("The avg. number of vector ALU insts issued per-wavefront.")
1418 ;
1419 sALUInsts
1420 .name(name() + ".salu_insts")
1421 .desc("Number of scalar ALU insts issued.")
1422 ;
1423 sALUInstsPerWF
1424 .name(name() + ".salu_insts_per_wf")
1425 .desc("The avg. number of scalar ALU insts issued per-wavefront.")
1426 ;
1427 instCyclesVALU
1428 .name(name() + ".inst_cycles_valu")
1429 .desc("Number of cycles needed to execute VALU insts.")
1430 ;
1431 instCyclesSALU
1432 .name(name() + ".inst_cycles_salu")
1433 .desc("Number of cycles needed to execute SALU insts.")
1434 ;
1435 threadCyclesVALU
1436 .name(name() + ".thread_cycles_valu")
1437 .desc("Number of thread cycles used to execute vector ALU ops. "
1438 "Similar to instCyclesVALU but multiplied by the number of "
1439 "active threads.")
1440 ;
1441 vALUUtilization
1442 .name(name() + ".valu_utilization")
1443 .desc("Percentage of active vector ALU threads in a wave.")
1444 ;
1445 ldsNoFlatInsts
1446 .name(name() + ".lds_no_flat_insts")
1447 .desc("Number of LDS insts issued, not including FLAT "
1448 "accesses that resolve to LDS.")
1449 ;
1450 ldsNoFlatInstsPerWF
1451 .name(name() + ".lds_no_flat_insts_per_wf")
1452 .desc("The avg. number of LDS insts (not including FLAT "
1453 "accesses that resolve to LDS) per-wavefront.")
1454 ;
1455 flatVMemInsts
1456 .name(name() + ".flat_vmem_insts")
1457 .desc("The number of FLAT insts that resolve to vmem issued.")
1458 ;
1459 flatVMemInstsPerWF
1460 .name(name() + ".flat_vmem_insts_per_wf")
1461 .desc("The average number of FLAT insts that resolve to vmem "
1462 "issued per-wavefront.")
1463 ;
1464 flatLDSInsts
1465 .name(name() + ".flat_lds_insts")
1466 .desc("The number of FLAT insts that resolve to LDS issued.")
1467 ;
1468 flatLDSInstsPerWF
1469 .name(name() + ".flat_lds_insts_per_wf")
1470 .desc("The average number of FLAT insts that resolve to LDS "
1471 "issued per-wavefront.")
1472 ;
1473 vectorMemWrites
1474 .name(name() + ".vector_mem_writes")
1475 .desc("Number of vector mem write insts (excluding FLAT insts).")
1476 ;
1477 vectorMemWritesPerWF
1478 .name(name() + ".vector_mem_writes_per_wf")
1479 .desc("The average number of vector mem write insts "
1480 "(excluding FLAT insts) per-wavefront.")
1481 ;
1482 vectorMemReads
1483 .name(name() + ".vector_mem_reads")
1484 .desc("Number of vector mem read insts (excluding FLAT insts).")
1485 ;
1486 vectorMemReadsPerWF
1487 .name(name() + ".vector_mem_reads_per_wf")
1488 .desc("The avg. number of vector mem read insts (excluding "
1489 "FLAT insts) per-wavefront.")
1490 ;
1491 scalarMemWrites
1492 .name(name() + ".scalar_mem_writes")
1493 .desc("Number of scalar mem write insts.")
1494 ;
1495 scalarMemWritesPerWF
1496 .name(name() + ".scalar_mem_writes_per_wf")
1497 .desc("The average number of scalar mem write insts per-wavefront.")
1498 ;
1499 scalarMemReads
1500 .name(name() + ".scalar_mem_reads")
1501 .desc("Number of scalar mem read insts.")
1502 ;
1503 scalarMemReadsPerWF
1504 .name(name() + ".scalar_mem_reads_per_wf")
1505 .desc("The average number of scalar mem read insts per-wavefront.")
1506 ;
1507
1508 vALUInstsPerWF = vALUInsts / completedWfs;
1509 sALUInstsPerWF = sALUInsts / completedWfs;
1510 vALUUtilization = (threadCyclesVALU / (64 * instCyclesVALU)) * 100;
1511 ldsNoFlatInstsPerWF = ldsNoFlatInsts / completedWfs;
1512 flatVMemInstsPerWF = flatVMemInsts / completedWfs;
1513 flatLDSInstsPerWF = flatLDSInsts / completedWfs;
1514 vectorMemWritesPerWF = vectorMemWrites / completedWfs;
1515 vectorMemReadsPerWF = vectorMemReads / completedWfs;
1516 scalarMemWritesPerWF = scalarMemWrites / completedWfs;
1517 scalarMemReadsPerWF = scalarMemReads / completedWfs;
1518
1411 tlbCycles
1412 .name(name() + ".tlb_cycles")
1413 .desc("total number of cycles for all uncoalesced requests")
1414 ;
1415
1416 tlbRequests
1417 .name(name() + ".tlb_requests")
1418 .desc("number of uncoalesced requests")

--- 143 unchanged lines hidden (view full) ---

1562 execStage.regStats();
1563
1564 // register stats of memory pipeline
1565 globalMemoryPipe.regStats();
1566 localMemoryPipe.regStats();
1567}
1568
1569void
1519 tlbCycles
1520 .name(name() + ".tlb_cycles")
1521 .desc("total number of cycles for all uncoalesced requests")
1522 ;
1523
1524 tlbRequests
1525 .name(name() + ".tlb_requests")
1526 .desc("number of uncoalesced requests")

--- 143 unchanged lines hidden (view full) ---

1670 execStage.regStats();
1671
1672 // register stats of memory pipeline
1673 globalMemoryPipe.regStats();
1674 localMemoryPipe.regStats();
1675}
1676
1677void
1678ComputeUnit::updateInstStats(GPUDynInstPtr gpuDynInst)
1679{
1680 if (gpuDynInst->isScalar()) {
1681 if (gpuDynInst->isALU() && !gpuDynInst->isWaitcnt()) {
1682 sALUInsts++;
1683 instCyclesSALU++;
1684 } else if (gpuDynInst->isLoad()) {
1685 scalarMemReads++;
1686 } else if (gpuDynInst->isStore()) {
1687 scalarMemWrites++;
1688 }
1689 } else {
1690 if (gpuDynInst->isALU()) {
1691 vALUInsts++;
1692 instCyclesVALU++;
1693 threadCyclesVALU += gpuDynInst->wavefront()->execMask().count();
1694 } else if (gpuDynInst->isFlat()) {
1695 if (gpuDynInst->isLocalMem()) {
1696 flatLDSInsts++;
1697 } else {
1698 flatVMemInsts++;
1699 }
1700 } else if (gpuDynInst->isLocalMem()) {
1701 ldsNoFlatInsts++;
1702 } else if (gpuDynInst->isLoad()) {
1703 vectorMemReads++;
1704 } else if (gpuDynInst->isStore()) {
1705 vectorMemWrites++;
1706 }
1707 }
1708}
1709
1710void
1570ComputeUnit::updatePageDivergenceDist(Addr addr)
1571{
1572 Addr virt_page_addr = roundDown(addr, TheISA::PageBytes);
1573
1574 if (!pagesTouched.count(virt_page_addr))
1575 pagesTouched[virt_page_addr] = 1;
1576 else
1577 pagesTouched[virt_page_addr]++;

--- 202 unchanged lines hidden ---
1711ComputeUnit::updatePageDivergenceDist(Addr addr)
1712{
1713 Addr virt_page_addr = roundDown(addr, TheISA::PageBytes);
1714
1715 if (!pagesTouched.count(virt_page_addr))
1716 pagesTouched[virt_page_addr] = 1;
1717 else
1718 pagesTouched[virt_page_addr]++;

--- 202 unchanged lines hidden ---