Profiler.hh revision 6145:15cca6ab723a
1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45/* 46 * Profiler.h 47 * 48 * Description: 49 * 50 * $Id$ 51 * 52 */ 53 54#ifndef PROFILER_H 55#define PROFILER_H 56 57#include "Global.hh" 58#include "GenericMachineType.hh" 59#include "RubyConfig.hh" 60#include "Histogram.hh" 61#include "Consumer.hh" 62#include "AccessModeType.hh" 63#include "AccessType.hh" 64#include "NodeID.hh" 65#include "MachineID.hh" 66#include "PrefetchBit.hh" 67#include "Address.hh" 68#include "Set.hh" 69#include "CacheRequestType.hh" 70#include "GenericRequestType.hh" 71//#include "XactProfiler.hh" //gem5:Arka for decomissioning og log_tm 72 73class CacheMsg; 74class CacheProfiler; 75class AddressProfiler; 76 77template <class KEY_TYPE, class VALUE_TYPE> class Map; 78 79class Profiler : public Consumer { 80public: 81 // Constructors 82 Profiler(); 83 84 // Destructor 85 ~Profiler(); 86 87 // Public Methods 88 void wakeup(); 89 90 void setPeriodicStatsFile(const string& filename); 91 void setPeriodicStatsInterval(integer_t period); 92 93 void setXactVisualizerFile(char* filename); 94 95 void printStats(ostream& out, bool short_stats=false); 96 void printShortStats(ostream& out) { printStats(out, true); } 97 void printTraceStats(ostream& out) const; 98 void clearStats(); 99 void printConfig(ostream& out) const; 100 void printResourceUsage(ostream& out) const; 101 102 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } 103 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } 104 //XactProfiler* getXactProfiler() { return m_xact_profiler_ptr;} //gem5:Arka for decomissioning og log_tm 105 106 void addPrimaryStatSample(const CacheMsg& msg, NodeID id); 107 void addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 108 void addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 109 void addAddressTraceSample(const CacheMsg& msg, NodeID id); 110 111 void profileRequest(const string& requestStr); 112 void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner); 113 114 void profileMulticastRetry(const Address& addr, int count); 115 116 void profileFilterAction(int action); 117 118 void profileConflictingRequests(const Address& addr); 119 void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); } 120 void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } 121 void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } 122 123 void countBAUnicast() { m_num_BA_unicasts++; } 124 void countBABroadcast() { m_num_BA_broadcasts++; } 125 126 void recordPrediction(bool wasGood, bool wasPredicted); 127 128 void startTransaction(int cpu); 129 void endTransaction(int cpu); 130 void profilePFWait(Time waitTime); 131 132 void controllerBusy(MachineID machID); 133 void bankBusy(); 134 void missLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); 135 void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); 136 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); } 137 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); } 138 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); } 139 void sequencerRequests(int num) { m_sequencer_requests.add(num); } 140 void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);} 141 142 void profileGetXMaskPrediction(const Set& pred_set); 143 void profileGetSMaskPrediction(const Set& pred_set); 144 void profileTrainingMask(const Set& pred_set); 145 void profileTransition(const string& component, NodeID id, NodeID version, Address addr, 146 const string& state, const string& event, 147 const string& next_state, const string& note); 148 void profileMsgDelay(int virtualNetwork, int delayCycles); 149 150 void print(ostream& out) const; 151 152 int64 getTotalInstructionsExecuted() const; 153 int64 getTotalTransactionsExecuted() const; 154 155 //---- begin Transactional Memory CODE 156 #if 0 //gem5:Arka for decomissioning og log_tm 157 void profileTransCycles(int proc, int cycles) { getXactProfiler()->profileTransCycles(proc, cycles);} 158 void profileNonTransCycles(int proc, int cycles) { getXactProfiler()->profileNonTransCycles(proc, cycles);} 159 void profileStallTransCycles(int proc, int cycles) { getXactProfiler()->profileStallTransCycles(proc, cycles); } 160 void profileStallNonTransCycles(int proc, int cycles) { getXactProfiler()->profileStallNonTransCycles(proc, cycles); } 161 void profileAbortingTransCycles(int proc, int cycles) { getXactProfiler()->profileAbortingTransCycles(proc, cycles); } 162 void profileCommitingTransCycles(int proc, int cycles) { getXactProfiler()->profileCommitingTransCycles(proc, cycles); } 163 void profileBarrierCycles(int proc, int cycles) { getXactProfiler()->profileBarrierCycles(proc, cycles);} 164 void profileBackoffTransCycles(int proc, int cycles) { getXactProfiler()->profileBackoffTransCycles(proc, cycles); } 165 void profileGoodTransCycles(int proc, int cycles) {getXactProfiler()->profileGoodTransCycles(proc, cycles); } 166 167 #endif //gem5:Arka TODO clean up the rest of this functions as well 168 void profileTransaction(int size, int logSize, int readS, int writeS, int overflow_readS, int overflow_writeS, int retries, int cycles, bool nacked, int loadMisses, int storeMisses, int instrCount, int xid); 169 void profileBeginTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen); 170 void profileCommitTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen); 171 void profileLoadTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc); 172 void profileLoad(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc); 173 void profileStoreTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc); 174 void profileStore(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc); 175 void profileLoadOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow); 176 void profileStoreOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow); 177 void profileNack(NodeID id, int tid, int xid, int thread, int nacking_thread, NodeID nackedBy, Address addr, Address logicalAddress, Address pc, uint64 seq_ts, uint64 nack_ts, bool possibleCycle); 178 void profileExposedConflict(NodeID id, int xid, int thread, Address addr, Address pc); 179 void profileTransWB(); 180 void profileExtraWB(); 181 void profileInferredAbort(); 182 void profileAbortTransaction(NodeID id, int tid, int xid, int thread, int delay, int abortingThread, int abortingProc, Address addr, Address pc); 183 void profileExceptionStart(bool xact, NodeID proc_no, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc); 184 void profileExceptionDone(bool xact, NodeID proc_no, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc, uinteger_t tpc, uinteger_t tnpc); 185 void profileTimerInterrupt(NodeID id, 186 uinteger_t tick, uinteger_t tick_cmpr, 187 uinteger_t stick, uinteger_t stick_cmpr, 188 int trap_level, 189 uinteger_t pc, uinteger_t npc, 190 uinteger_t pstate, int pil); 191 192 void profileAbortDelayConstants(int handlerStartupDelay, int handlerPerBlockDelay); 193 void profileXactChange(int procs, int cycles); 194 void profileReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread); 195 void profileWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread); 196 void profileRemoteReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread); 197 void profileRemoteWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread); 198 199 200 void profileReadFilterBitsSet(int xid, int bits, bool isCommit); 201 void profileWriteFilterBitsSet(int xid, int bits, bool isCommit); 202 203 void printTransactionState(bool can_skip); 204 205 void watchpointsFalsePositiveTrigger(); 206 void watchpointsTrueTrigger(); 207 208 void profileTransactionLogOverflow(NodeID id, Address addr, Address pc); 209 void profileTransactionCacheOverflow(NodeID id, Address addr, Address pc); 210 void profileGetCPS(NodeID id, uint32 cps, Address pc); 211 void profileTransactionTCC(NodeID id, Address pc); 212 void profileTransactionUnsupInst(NodeID id, Address pc); 213 void profileTransactionSaveInst(NodeID id, Address pc); 214 void profileTransactionRestoreInst(NodeID id, Address pc); 215 216 //---- end Transactional Memory CODE 217 218 void rubyWatch(int proc); 219 bool watchAddress(Address addr); 220 221 // return Ruby's start time 222 Time getRubyStartTime(){ 223 return m_ruby_start; 224 } 225 226 // added for MemoryControl: 227 void profileMemReq(int bank); 228 void profileMemBankBusy(); 229 void profileMemBusBusy(); 230 void profileMemTfawBusy(); 231 void profileMemReadWriteBusy(); 232 void profileMemDataBusBusy(); 233 void profileMemRefresh(); 234 void profileMemRead(); 235 void profileMemWrite(); 236 void profileMemWaitCycles(int cycles); 237 void profileMemInputQ(int cycles); 238 void profileMemBankQ(int cycles); 239 void profileMemArbWait(int cycles); 240 void profileMemRandBusy(); 241 void profileMemNotOld(); 242 243private: 244 // Private Methods 245 void addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 246 void addL1DStatSample(const CacheMsg& msg, NodeID id); 247 void addL1IStatSample(const CacheMsg& msg, NodeID id); 248 249 GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type); 250 251 // Private copy constructor and assignment operator 252 Profiler(const Profiler& obj); 253 Profiler& operator=(const Profiler& obj); 254 255 // Data Members (m_ prefix) 256 CacheProfiler* m_L1D_cache_profiler_ptr; 257 CacheProfiler* m_L1I_cache_profiler_ptr; 258 CacheProfiler* m_L2_cache_profiler_ptr; 259 AddressProfiler* m_address_profiler_ptr; 260 AddressProfiler* m_inst_profiler_ptr; 261 262// XactProfiler* m_xact_profiler_ptr; // gem5:Arka for decomissioning of log_tm 263 264 Vector<int64> m_instructions_executed_at_start; 265 Vector<int64> m_cycles_executed_at_start; 266 267 ostream* m_periodic_output_file_ptr; 268 integer_t m_stats_period; 269 std::fstream m_xact_visualizer; 270 std::ostream *m_xact_visualizer_ptr; 271 272 Time m_ruby_start; 273 time_t m_real_time_start_time; 274 275 int m_num_BA_unicasts; 276 int m_num_BA_broadcasts; 277 278 Vector<integer_t> m_perProcTotalMisses; 279 Vector<integer_t> m_perProcUserMisses; 280 Vector<integer_t> m_perProcSupervisorMisses; 281 Vector<integer_t> m_perProcStartTransaction; 282 Vector<integer_t> m_perProcEndTransaction; 283 Vector < Vector < integer_t > > m_busyControllerCount; 284 integer_t m_busyBankCount; 285 Histogram m_multicast_retry_histogram; 286 287 Histogram m_L1tbeProfile; 288 Histogram m_L2tbeProfile; 289 Histogram m_stopTableProfile; 290 291 Histogram m_filter_action_histogram; 292 Histogram m_tbeProfile; 293 294 Histogram m_sequencer_requests; 295 Histogram m_store_buffer_size; 296 Histogram m_store_buffer_blocks; 297 Histogram m_read_sharing_histogram; 298 Histogram m_write_sharing_histogram; 299 Histogram m_all_sharing_histogram; 300 int64 m_cache_to_cache; 301 int64 m_memory_to_cache; 302 303 Histogram m_prefetchWaitHistogram; 304 305 Vector<Histogram> m_missLatencyHistograms; 306 Vector<Histogram> m_machLatencyHistograms; 307 Histogram m_L2MissLatencyHistogram; 308 Histogram m_allMissLatencyHistogram; 309 310 Histogram m_allSWPrefetchLatencyHistogram; 311 Histogram m_SWPrefetchL2MissLatencyHistogram; 312 Vector<Histogram> m_SWPrefetchLatencyHistograms; 313 Vector<Histogram> m_SWPrefetchMachLatencyHistograms; 314 315 Histogram m_delayedCyclesHistogram; 316 Histogram m_delayedCyclesNonPFHistogram; 317 Vector<Histogram> m_delayedCyclesVCHistograms; 318 319 int m_predictions; 320 int m_predictionOpportunities; 321 int m_goodPredictions; 322 323 Histogram m_gets_mask_prediction; 324 Histogram m_getx_mask_prediction; 325 Histogram m_explicit_training_mask; 326 327 // For profiling possibly conflicting requests 328 Map<Address, Time>* m_conflicting_map_ptr; 329 Histogram m_conflicting_histogram; 330 331 Histogram m_outstanding_requests; 332 Histogram m_outstanding_persistent_requests; 333 334 Histogram m_average_latency_estimate; 335 336 //---- begin Transactional Memory CODE 337 Map <int, int>* m_procsInXactMap_ptr; 338 339 Histogram m_xactCycles; 340 Histogram m_xactLogs; 341 Histogram m_xactReads; 342 Histogram m_xactWrites; 343 Histogram m_xactOverflowReads; 344 Histogram m_xactOverflowWrites; 345 Histogram m_xactOverflowTotalReads; 346 Histogram m_xactOverflowTotalWrites; 347 Histogram m_xactSizes; 348 Histogram m_xactRetries; 349 Histogram m_abortDelays; 350 Histogram m_xactLoadMisses; 351 Histogram m_xactStoreMisses; 352 Histogram m_xactInstrCount; 353 int m_xactNacked; 354 int m_transactionAborts; 355 int m_transWBs; 356 int m_extraWBs; 357 int m_abortStarupDelay; 358 int m_abortPerBlockDelay; 359 int m_inferredAborts; 360 Map <int, int>* m_nackXIDMap_ptr; 361 // pairs of XIDs involved in NACKs 362 Map<int, Map<int, int> * > * m_nackXIDPairMap_ptr; 363 Map <Address, int>* m_nackPCMap_ptr; 364 Map <int, int>* m_xactExceptionMap_ptr; 365 Map <int, int>* m_abortIDMap_ptr; 366 Map <int, int>* m_commitIDMap_ptr; 367 Map <int, int>* m_xactRetryIDMap_ptr; 368 Map <int, int>* m_xactCyclesIDMap_ptr; 369 Map <int, int>* m_xactReadSetIDMap_ptr; 370 Map <int, int>* m_xactWriteSetIDMap_ptr; 371 Map <int, int>* m_xactLoadMissIDMap_ptr; 372 Map <int, int>* m_xactStoreMissIDMap_ptr; 373 Map <int, integer_t> *m_xactInstrCountIDMap_ptr; 374 Map <Address, int>* m_abortPCMap_ptr; 375 Map <Address, int>* m_abortAddressMap_ptr; 376 Map <Address, int>* m_readSetMatch_ptr; 377 Map <Address, int>* m_readSetNoMatch_ptr; 378 Map <Address, int>* m_writeSetMatch_ptr; 379 Map <Address, int>* m_writeSetNoMatch_ptr; 380 Map <Address, int>* m_remoteReadSetMatch_ptr; 381 Map <Address, int>* m_remoteReadSetNoMatch_ptr; 382 Map <Address, int>* m_remoteWriteSetMatch_ptr; 383 Map <Address, int>* m_remoteWriteSetNoMatch_ptr; 384 long long int m_readSetEmptyChecks; 385 long long int m_readSetMatch; 386 long long int m_readSetNoMatch; 387 long long int m_writeSetEmptyChecks; 388 long long int m_writeSetMatch; 389 long long int m_writeSetNoMatch; 390 Map<int, Histogram> * m_xactReadFilterBitsSetOnCommit; 391 Map<int, Histogram> * m_xactReadFilterBitsSetOnAbort; 392 Map<int, Histogram> * m_xactWriteFilterBitsSetOnCommit; 393 Map<int, Histogram> * m_xactWriteFilterBitsSetOnAbort; 394 395 unsigned int m_watchpointsFalsePositiveTrigger; 396 unsigned int m_watchpointsTrueTrigger; 397 398 int m_transactionUnsupInsts; 399 int m_transactionSaveRestAborts; 400 401 int m_transactionLogOverflows; 402 int m_transactionCacheOverflows; 403 404 //---- end Transactional Memory CODE 405 406 Map<Address, int>* m_watch_address_list_ptr; 407 // counts all initiated cache request including PUTs 408 int m_requests; 409 Map <string, int>* m_requestProfileMap_ptr; 410 411 Time m_xact_visualizer_last; 412 413 // added for MemoryControl: 414 long long int m_memReq; 415 long long int m_memBankBusy; 416 long long int m_memBusBusy; 417 long long int m_memTfawBusy; 418 long long int m_memReadWriteBusy; 419 long long int m_memDataBusBusy; 420 long long int m_memRefresh; 421 long long int m_memRead; 422 long long int m_memWrite; 423 long long int m_memWaitCycles; 424 long long int m_memInputQ; 425 long long int m_memBankQ; 426 long long int m_memArbWait; 427 long long int m_memRandBusy; 428 long long int m_memNotOld; 429 Vector<long long int> m_memBankCount; 430 431}; 432 433// Output operator declaration 434ostream& operator<<(ostream& out, const Profiler& obj); 435 436// ******************* Definitions ******************* 437 438// Output operator definition 439extern inline 440ostream& operator<<(ostream& out, const Profiler& obj) 441{ 442 obj.print(out); 443 out << flush; 444 return out; 445} 446 447#endif //PROFILER_H 448 449 450