Profiler.hh revision 6145:15cca6ab723a
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45/*
46 * Profiler.h
47 *
48 * Description:
49 *
50 * $Id$
51 *
52 */
53
54#ifndef PROFILER_H
55#define PROFILER_H
56
57#include "Global.hh"
58#include "GenericMachineType.hh"
59#include "RubyConfig.hh"
60#include "Histogram.hh"
61#include "Consumer.hh"
62#include "AccessModeType.hh"
63#include "AccessType.hh"
64#include "NodeID.hh"
65#include "MachineID.hh"
66#include "PrefetchBit.hh"
67#include "Address.hh"
68#include "Set.hh"
69#include "CacheRequestType.hh"
70#include "GenericRequestType.hh"
71//#include "XactProfiler.hh" //gem5:Arka for decomissioning og log_tm
72
73class CacheMsg;
74class CacheProfiler;
75class AddressProfiler;
76
77template <class KEY_TYPE, class VALUE_TYPE> class Map;
78
79class Profiler : public Consumer {
80public:
81  // Constructors
82  Profiler();
83
84  // Destructor
85  ~Profiler();
86
87  // Public Methods
88  void wakeup();
89
90  void setPeriodicStatsFile(const string& filename);
91  void setPeriodicStatsInterval(integer_t period);
92
93  void setXactVisualizerFile(char* filename);
94
95  void printStats(ostream& out, bool short_stats=false);
96  void printShortStats(ostream& out) { printStats(out, true); }
97  void printTraceStats(ostream& out) const;
98  void clearStats();
99  void printConfig(ostream& out) const;
100  void printResourceUsage(ostream& out) const;
101
102  AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
103  AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
104  //XactProfiler*    getXactProfiler() { return m_xact_profiler_ptr;} //gem5:Arka for decomissioning og log_tm
105
106  void addPrimaryStatSample(const CacheMsg& msg, NodeID id);
107  void addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
108  void addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
109  void addAddressTraceSample(const CacheMsg& msg, NodeID id);
110
111  void profileRequest(const string& requestStr);
112  void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
113
114  void profileMulticastRetry(const Address& addr, int count);
115
116  void profileFilterAction(int action);
117
118  void profileConflictingRequests(const Address& addr);
119  void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
120  void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
121  void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
122
123  void countBAUnicast() { m_num_BA_unicasts++; }
124  void countBABroadcast() { m_num_BA_broadcasts++; }
125
126  void recordPrediction(bool wasGood, bool wasPredicted);
127
128  void startTransaction(int cpu);
129  void endTransaction(int cpu);
130  void profilePFWait(Time waitTime);
131
132  void controllerBusy(MachineID machID);
133  void bankBusy();
134  void missLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
135  void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
136  void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
137  void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
138  void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
139  void sequencerRequests(int num) { m_sequencer_requests.add(num); }
140  void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);}
141
142  void profileGetXMaskPrediction(const Set& pred_set);
143  void profileGetSMaskPrediction(const Set& pred_set);
144  void profileTrainingMask(const Set& pred_set);
145  void profileTransition(const string& component, NodeID id, NodeID version, Address addr,
146                         const string& state, const string& event,
147                         const string& next_state, const string& note);
148  void profileMsgDelay(int virtualNetwork, int delayCycles);
149
150  void print(ostream& out) const;
151
152  int64 getTotalInstructionsExecuted() const;
153  int64 getTotalTransactionsExecuted() const;
154
155  //---- begin Transactional Memory CODE
156  #if 0  //gem5:Arka for decomissioning og log_tm
157  void profileTransCycles(int proc, int cycles) { getXactProfiler()->profileTransCycles(proc, cycles);}
158  void profileNonTransCycles(int proc, int cycles) { getXactProfiler()->profileNonTransCycles(proc, cycles);}
159  void profileStallTransCycles(int proc, int cycles) { getXactProfiler()->profileStallTransCycles(proc, cycles); }
160  void profileStallNonTransCycles(int proc, int cycles) { getXactProfiler()->profileStallNonTransCycles(proc, cycles); }
161  void profileAbortingTransCycles(int proc, int cycles) { getXactProfiler()->profileAbortingTransCycles(proc, cycles); }
162  void profileCommitingTransCycles(int proc, int cycles) { getXactProfiler()->profileCommitingTransCycles(proc, cycles); }
163  void profileBarrierCycles(int proc, int cycles) { getXactProfiler()->profileBarrierCycles(proc, cycles);}
164  void profileBackoffTransCycles(int proc, int cycles) { getXactProfiler()->profileBackoffTransCycles(proc, cycles); }
165  void profileGoodTransCycles(int proc, int cycles) {getXactProfiler()->profileGoodTransCycles(proc, cycles); }
166
167  #endif //gem5:Arka TODO clean up the rest of this functions as well
168  void profileTransaction(int size, int logSize, int readS, int writeS, int overflow_readS, int overflow_writeS, int retries, int cycles, bool nacked, int loadMisses, int storeMisses, int instrCount, int xid);
169  void profileBeginTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen);
170  void profileCommitTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen);
171  void profileLoadTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc);
172  void profileLoad(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc);
173  void profileStoreTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc);
174  void profileStore(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc);
175  void profileLoadOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow);
176  void profileStoreOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow);
177  void profileNack(NodeID id, int tid, int xid, int thread, int nacking_thread, NodeID nackedBy, Address addr, Address logicalAddress, Address pc, uint64 seq_ts, uint64 nack_ts, bool possibleCycle);
178  void profileExposedConflict(NodeID id, int xid, int thread, Address addr, Address pc);
179  void profileTransWB();
180  void profileExtraWB();
181  void profileInferredAbort();
182  void profileAbortTransaction(NodeID id, int tid, int xid, int thread, int delay, int abortingThread, int abortingProc, Address addr, Address pc);
183  void profileExceptionStart(bool xact, NodeID proc_no, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc);
184  void profileExceptionDone(bool xact, NodeID proc_no, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc, uinteger_t tpc, uinteger_t tnpc);
185  void profileTimerInterrupt(NodeID id,
186                             uinteger_t tick, uinteger_t tick_cmpr,
187                             uinteger_t stick, uinteger_t stick_cmpr,
188                             int trap_level,
189                             uinteger_t pc, uinteger_t npc,
190                             uinteger_t pstate, int pil);
191
192  void profileAbortDelayConstants(int handlerStartupDelay, int handlerPerBlockDelay);
193  void profileXactChange(int procs, int cycles);
194  void profileReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread);
195  void profileWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread);
196  void profileRemoteReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread);
197  void profileRemoteWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread);
198
199
200  void profileReadFilterBitsSet(int xid, int bits, bool isCommit);
201  void profileWriteFilterBitsSet(int xid, int bits, bool isCommit);
202
203  void printTransactionState(bool can_skip);
204
205  void watchpointsFalsePositiveTrigger();
206  void watchpointsTrueTrigger();
207
208  void profileTransactionLogOverflow(NodeID id, Address addr, Address pc);
209  void profileTransactionCacheOverflow(NodeID id, Address addr, Address pc);
210  void profileGetCPS(NodeID id, uint32 cps, Address pc);
211  void profileTransactionTCC(NodeID id, Address pc);
212  void profileTransactionUnsupInst(NodeID id, Address pc);
213  void profileTransactionSaveInst(NodeID id, Address pc);
214  void profileTransactionRestoreInst(NodeID id, Address pc);
215
216  //---- end Transactional Memory CODE
217
218  void rubyWatch(int proc);
219  bool watchAddress(Address addr);
220
221  // return Ruby's start time
222  Time getRubyStartTime(){
223    return m_ruby_start;
224  }
225
226  // added for MemoryControl:
227  void profileMemReq(int bank);
228  void profileMemBankBusy();
229  void profileMemBusBusy();
230  void profileMemTfawBusy();
231  void profileMemReadWriteBusy();
232  void profileMemDataBusBusy();
233  void profileMemRefresh();
234  void profileMemRead();
235  void profileMemWrite();
236  void profileMemWaitCycles(int cycles);
237  void profileMemInputQ(int cycles);
238  void profileMemBankQ(int cycles);
239  void profileMemArbWait(int cycles);
240  void profileMemRandBusy();
241  void profileMemNotOld();
242
243private:
244  // Private Methods
245  void addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
246  void addL1DStatSample(const CacheMsg& msg, NodeID id);
247  void addL1IStatSample(const CacheMsg& msg, NodeID id);
248
249  GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type);
250
251  // Private copy constructor and assignment operator
252  Profiler(const Profiler& obj);
253  Profiler& operator=(const Profiler& obj);
254
255  // Data Members (m_ prefix)
256  CacheProfiler* m_L1D_cache_profiler_ptr;
257  CacheProfiler* m_L1I_cache_profiler_ptr;
258  CacheProfiler* m_L2_cache_profiler_ptr;
259  AddressProfiler* m_address_profiler_ptr;
260  AddressProfiler* m_inst_profiler_ptr;
261
262//  XactProfiler*   m_xact_profiler_ptr; // gem5:Arka for decomissioning of log_tm
263
264  Vector<int64> m_instructions_executed_at_start;
265  Vector<int64> m_cycles_executed_at_start;
266
267  ostream* m_periodic_output_file_ptr;
268  integer_t m_stats_period;
269  std::fstream m_xact_visualizer;
270  std::ostream *m_xact_visualizer_ptr;
271
272  Time m_ruby_start;
273  time_t m_real_time_start_time;
274
275  int m_num_BA_unicasts;
276  int m_num_BA_broadcasts;
277
278  Vector<integer_t> m_perProcTotalMisses;
279  Vector<integer_t> m_perProcUserMisses;
280  Vector<integer_t> m_perProcSupervisorMisses;
281  Vector<integer_t> m_perProcStartTransaction;
282  Vector<integer_t> m_perProcEndTransaction;
283  Vector < Vector < integer_t > > m_busyControllerCount;
284  integer_t m_busyBankCount;
285  Histogram m_multicast_retry_histogram;
286
287  Histogram m_L1tbeProfile;
288  Histogram m_L2tbeProfile;
289  Histogram m_stopTableProfile;
290
291  Histogram m_filter_action_histogram;
292  Histogram m_tbeProfile;
293
294  Histogram m_sequencer_requests;
295  Histogram m_store_buffer_size;
296  Histogram m_store_buffer_blocks;
297  Histogram m_read_sharing_histogram;
298  Histogram m_write_sharing_histogram;
299  Histogram m_all_sharing_histogram;
300  int64 m_cache_to_cache;
301  int64 m_memory_to_cache;
302
303  Histogram m_prefetchWaitHistogram;
304
305  Vector<Histogram> m_missLatencyHistograms;
306  Vector<Histogram> m_machLatencyHistograms;
307  Histogram m_L2MissLatencyHistogram;
308  Histogram m_allMissLatencyHistogram;
309
310  Histogram  m_allSWPrefetchLatencyHistogram;
311  Histogram  m_SWPrefetchL2MissLatencyHistogram;
312  Vector<Histogram> m_SWPrefetchLatencyHistograms;
313  Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
314
315  Histogram m_delayedCyclesHistogram;
316  Histogram m_delayedCyclesNonPFHistogram;
317  Vector<Histogram> m_delayedCyclesVCHistograms;
318
319  int m_predictions;
320  int m_predictionOpportunities;
321  int m_goodPredictions;
322
323  Histogram m_gets_mask_prediction;
324  Histogram m_getx_mask_prediction;
325  Histogram m_explicit_training_mask;
326
327  // For profiling possibly conflicting requests
328  Map<Address, Time>* m_conflicting_map_ptr;
329  Histogram m_conflicting_histogram;
330
331  Histogram m_outstanding_requests;
332  Histogram m_outstanding_persistent_requests;
333
334  Histogram m_average_latency_estimate;
335
336  //---- begin Transactional Memory CODE
337  Map <int, int>* m_procsInXactMap_ptr;
338
339  Histogram m_xactCycles;
340  Histogram m_xactLogs;
341  Histogram m_xactReads;
342  Histogram m_xactWrites;
343  Histogram m_xactOverflowReads;
344  Histogram m_xactOverflowWrites;
345  Histogram m_xactOverflowTotalReads;
346  Histogram m_xactOverflowTotalWrites;
347  Histogram m_xactSizes;
348  Histogram m_xactRetries;
349  Histogram m_abortDelays;
350  Histogram m_xactLoadMisses;
351  Histogram m_xactStoreMisses;
352  Histogram m_xactInstrCount;
353  int m_xactNacked;
354  int m_transactionAborts;
355  int m_transWBs;
356  int m_extraWBs;
357  int m_abortStarupDelay;
358  int m_abortPerBlockDelay;
359  int m_inferredAborts;
360  Map <int, int>* m_nackXIDMap_ptr;
361  // pairs of XIDs involved in NACKs
362  Map<int, Map<int, int> * > * m_nackXIDPairMap_ptr;
363  Map <Address, int>* m_nackPCMap_ptr;
364  Map <int, int>* m_xactExceptionMap_ptr;
365  Map <int, int>* m_abortIDMap_ptr;
366  Map <int, int>* m_commitIDMap_ptr;
367  Map <int, int>* m_xactRetryIDMap_ptr;
368  Map <int, int>* m_xactCyclesIDMap_ptr;
369  Map <int, int>* m_xactReadSetIDMap_ptr;
370  Map <int, int>* m_xactWriteSetIDMap_ptr;
371  Map <int, int>* m_xactLoadMissIDMap_ptr;
372  Map <int, int>* m_xactStoreMissIDMap_ptr;
373  Map <int, integer_t> *m_xactInstrCountIDMap_ptr;
374  Map <Address, int>* m_abortPCMap_ptr;
375  Map <Address, int>* m_abortAddressMap_ptr;
376  Map <Address, int>* m_readSetMatch_ptr;
377  Map <Address, int>* m_readSetNoMatch_ptr;
378  Map <Address, int>* m_writeSetMatch_ptr;
379  Map <Address, int>* m_writeSetNoMatch_ptr;
380  Map <Address, int>* m_remoteReadSetMatch_ptr;
381  Map <Address, int>* m_remoteReadSetNoMatch_ptr;
382  Map <Address, int>* m_remoteWriteSetMatch_ptr;
383  Map <Address, int>* m_remoteWriteSetNoMatch_ptr;
384  long long int m_readSetEmptyChecks;
385  long long int m_readSetMatch;
386  long long int m_readSetNoMatch;
387  long long int m_writeSetEmptyChecks;
388  long long int m_writeSetMatch;
389  long long int m_writeSetNoMatch;
390  Map<int, Histogram> * m_xactReadFilterBitsSetOnCommit;
391  Map<int, Histogram> * m_xactReadFilterBitsSetOnAbort;
392  Map<int, Histogram> * m_xactWriteFilterBitsSetOnCommit;
393  Map<int, Histogram> * m_xactWriteFilterBitsSetOnAbort;
394
395  unsigned int m_watchpointsFalsePositiveTrigger;
396  unsigned int m_watchpointsTrueTrigger;
397
398  int m_transactionUnsupInsts;
399  int m_transactionSaveRestAborts;
400
401  int m_transactionLogOverflows;
402  int m_transactionCacheOverflows;
403
404  //---- end Transactional Memory CODE
405
406  Map<Address, int>* m_watch_address_list_ptr;
407  // counts all initiated cache request including PUTs
408  int m_requests;
409  Map <string, int>* m_requestProfileMap_ptr;
410
411  Time m_xact_visualizer_last;
412
413  // added for MemoryControl:
414  long long int m_memReq;
415  long long int m_memBankBusy;
416  long long int m_memBusBusy;
417  long long int m_memTfawBusy;
418  long long int m_memReadWriteBusy;
419  long long int m_memDataBusBusy;
420  long long int m_memRefresh;
421  long long int m_memRead;
422  long long int m_memWrite;
423  long long int m_memWaitCycles;
424  long long int m_memInputQ;
425  long long int m_memBankQ;
426  long long int m_memArbWait;
427  long long int m_memRandBusy;
428  long long int m_memNotOld;
429  Vector<long long int> m_memBankCount;
430
431};
432
433// Output operator declaration
434ostream& operator<<(ostream& out, const Profiler& obj);
435
436// ******************* Definitions *******************
437
438// Output operator definition
439extern inline
440ostream& operator<<(ostream& out, const Profiler& obj)
441{
442  obj.print(out);
443  out << flush;
444  return out;
445}
446
447#endif //PROFILER_H
448
449
450