compute_unit.cc (11692:e772fdcd3809) | compute_unit.cc (11695:0a65922d564d) |
---|---|
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 1394 unchanged lines hidden (view full) --- 1403 } 1404} 1405 1406void 1407ComputeUnit::regStats() 1408{ 1409 MemObject::regStats(); 1410 | 1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 1394 unchanged lines hidden (view full) --- 1403 } 1404} 1405 1406void 1407ComputeUnit::regStats() 1408{ 1409 MemObject::regStats(); 1410 |
1411 vALUInsts 1412 .name(name() + ".valu_insts") 1413 .desc("Number of vector ALU insts issued.") 1414 ; 1415 vALUInstsPerWF 1416 .name(name() + ".valu_insts_per_wf") 1417 .desc("The avg. number of vector ALU insts issued per-wavefront.") 1418 ; 1419 sALUInsts 1420 .name(name() + ".salu_insts") 1421 .desc("Number of scalar ALU insts issued.") 1422 ; 1423 sALUInstsPerWF 1424 .name(name() + ".salu_insts_per_wf") 1425 .desc("The avg. number of scalar ALU insts issued per-wavefront.") 1426 ; 1427 instCyclesVALU 1428 .name(name() + ".inst_cycles_valu") 1429 .desc("Number of cycles needed to execute VALU insts.") 1430 ; 1431 instCyclesSALU 1432 .name(name() + ".inst_cycles_salu") 1433 .desc("Number of cycles needed to execute SALU insts.") 1434 ; 1435 threadCyclesVALU 1436 .name(name() + ".thread_cycles_valu") 1437 .desc("Number of thread cycles used to execute vector ALU ops. " 1438 "Similar to instCyclesVALU but multiplied by the number of " 1439 "active threads.") 1440 ; 1441 vALUUtilization 1442 .name(name() + ".valu_utilization") 1443 .desc("Percentage of active vector ALU threads in a wave.") 1444 ; 1445 ldsNoFlatInsts 1446 .name(name() + ".lds_no_flat_insts") 1447 .desc("Number of LDS insts issued, not including FLAT " 1448 "accesses that resolve to LDS.") 1449 ; 1450 ldsNoFlatInstsPerWF 1451 .name(name() + ".lds_no_flat_insts_per_wf") 1452 .desc("The avg. number of LDS insts (not including FLAT " 1453 "accesses that resolve to LDS) per-wavefront.") 1454 ; 1455 flatVMemInsts 1456 .name(name() + ".flat_vmem_insts") 1457 .desc("The number of FLAT insts that resolve to vmem issued.") 1458 ; 1459 flatVMemInstsPerWF 1460 .name(name() + ".flat_vmem_insts_per_wf") 1461 .desc("The average number of FLAT insts that resolve to vmem " 1462 "issued per-wavefront.") 1463 ; 1464 flatLDSInsts 1465 .name(name() + ".flat_lds_insts") 1466 .desc("The number of FLAT insts that resolve to LDS issued.") 1467 ; 1468 flatLDSInstsPerWF 1469 .name(name() + ".flat_lds_insts_per_wf") 1470 .desc("The average number of FLAT insts that resolve to LDS " 1471 "issued per-wavefront.") 1472 ; 1473 vectorMemWrites 1474 .name(name() + ".vector_mem_writes") 1475 .desc("Number of vector mem write insts (excluding FLAT insts).") 1476 ; 1477 vectorMemWritesPerWF 1478 .name(name() + ".vector_mem_writes_per_wf") 1479 .desc("The average number of vector mem write insts " 1480 "(excluding FLAT insts) per-wavefront.") 1481 ; 1482 vectorMemReads 1483 .name(name() + ".vector_mem_reads") 1484 .desc("Number of vector mem read insts (excluding FLAT insts).") 1485 ; 1486 vectorMemReadsPerWF 1487 .name(name() + ".vector_mem_reads_per_wf") 1488 .desc("The avg. number of vector mem read insts (excluding " 1489 "FLAT insts) per-wavefront.") 1490 ; 1491 scalarMemWrites 1492 .name(name() + ".scalar_mem_writes") 1493 .desc("Number of scalar mem write insts.") 1494 ; 1495 scalarMemWritesPerWF 1496 .name(name() + ".scalar_mem_writes_per_wf") 1497 .desc("The average number of scalar mem write insts per-wavefront.") 1498 ; 1499 scalarMemReads 1500 .name(name() + ".scalar_mem_reads") 1501 .desc("Number of scalar mem read insts.") 1502 ; 1503 scalarMemReadsPerWF 1504 .name(name() + ".scalar_mem_reads_per_wf") 1505 .desc("The average number of scalar mem read insts per-wavefront.") 1506 ; 1507 1508 vALUInstsPerWF = vALUInsts / completedWfs; 1509 sALUInstsPerWF = sALUInsts / completedWfs; 1510 vALUUtilization = (threadCyclesVALU / (64 * instCyclesVALU)) * 100; 1511 ldsNoFlatInstsPerWF = ldsNoFlatInsts / completedWfs; 1512 flatVMemInstsPerWF = flatVMemInsts / completedWfs; 1513 flatLDSInstsPerWF = flatLDSInsts / completedWfs; 1514 vectorMemWritesPerWF = vectorMemWrites / completedWfs; 1515 vectorMemReadsPerWF = vectorMemReads / completedWfs; 1516 scalarMemWritesPerWF = scalarMemWrites / completedWfs; 1517 scalarMemReadsPerWF = scalarMemReads / completedWfs; 1518 |
|
1411 tlbCycles 1412 .name(name() + ".tlb_cycles") 1413 .desc("total number of cycles for all uncoalesced requests") 1414 ; 1415 1416 tlbRequests 1417 .name(name() + ".tlb_requests") 1418 .desc("number of uncoalesced requests") --- 143 unchanged lines hidden (view full) --- 1562 execStage.regStats(); 1563 1564 // register stats of memory pipeline 1565 globalMemoryPipe.regStats(); 1566 localMemoryPipe.regStats(); 1567} 1568 1569void | 1519 tlbCycles 1520 .name(name() + ".tlb_cycles") 1521 .desc("total number of cycles for all uncoalesced requests") 1522 ; 1523 1524 tlbRequests 1525 .name(name() + ".tlb_requests") 1526 .desc("number of uncoalesced requests") --- 143 unchanged lines hidden (view full) --- 1670 execStage.regStats(); 1671 1672 // register stats of memory pipeline 1673 globalMemoryPipe.regStats(); 1674 localMemoryPipe.regStats(); 1675} 1676 1677void |
1678ComputeUnit::updateInstStats(GPUDynInstPtr gpuDynInst) 1679{ 1680 if (gpuDynInst->isScalar()) { 1681 if (gpuDynInst->isALU() && !gpuDynInst->isWaitcnt()) { 1682 sALUInsts++; 1683 instCyclesSALU++; 1684 } else if (gpuDynInst->isLoad()) { 1685 scalarMemReads++; 1686 } else if (gpuDynInst->isStore()) { 1687 scalarMemWrites++; 1688 } 1689 } else { 1690 if (gpuDynInst->isALU()) { 1691 vALUInsts++; 1692 instCyclesVALU++; 1693 threadCyclesVALU += gpuDynInst->wavefront()->execMask().count(); 1694 } else if (gpuDynInst->isFlat()) { 1695 if (gpuDynInst->isLocalMem()) { 1696 flatLDSInsts++; 1697 } else { 1698 flatVMemInsts++; 1699 } 1700 } else if (gpuDynInst->isLocalMem()) { 1701 ldsNoFlatInsts++; 1702 } else if (gpuDynInst->isLoad()) { 1703 vectorMemReads++; 1704 } else if (gpuDynInst->isStore()) { 1705 vectorMemWrites++; 1706 } 1707 } 1708} 1709 1710void |
|
1570ComputeUnit::updatePageDivergenceDist(Addr addr) 1571{ 1572 Addr virt_page_addr = roundDown(addr, TheISA::PageBytes); 1573 1574 if (!pagesTouched.count(virt_page_addr)) 1575 pagesTouched[virt_page_addr] = 1; 1576 else 1577 pagesTouched[virt_page_addr]++; --- 202 unchanged lines hidden --- | 1711ComputeUnit::updatePageDivergenceDist(Addr addr) 1712{ 1713 Addr virt_page_addr = roundDown(addr, TheISA::PageBytes); 1714 1715 if (!pagesTouched.count(virt_page_addr)) 1716 pagesTouched[virt_page_addr] = 1; 1717 else 1718 pagesTouched[virt_page_addr]++; --- 202 unchanged lines hidden --- |