Profiler.hh revision 6285:ce086eca1ede
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45/*
46 * Profiler.hh
47 *
48 * Description:
49 *
50 * $Id$
51 *
52 */
53
54#ifndef PROFILER_H
55#define PROFILER_H
56
57#include "mem/ruby/libruby.hh"
58
59#include "mem/ruby/common/Global.hh"
60#include "mem/protocol/GenericMachineType.hh"
61#include "mem/ruby/config/RubyConfig.hh"
62#include "mem/ruby/common/Histogram.hh"
63#include "mem/ruby/common/Consumer.hh"
64#include "mem/protocol/AccessModeType.hh"
65#include "mem/protocol/AccessType.hh"
66#include "mem/ruby/system/NodeID.hh"
67#include "mem/ruby/system/MachineID.hh"
68#include "mem/protocol/PrefetchBit.hh"
69#include "mem/ruby/common/Address.hh"
70#include "mem/ruby/common/Set.hh"
71#include "mem/protocol/CacheRequestType.hh"
72#include "mem/protocol/GenericRequestType.hh"
73#include "mem/ruby/system/MemoryControl.hh"
74
75class CacheMsg;
76class CacheProfiler;
77class AddressProfiler;
78
79template <class KEY_TYPE, class VALUE_TYPE> class Map;
80
81struct memory_control_profiler {
82  long long int m_memReq;
83  long long int m_memBankBusy;
84  long long int m_memBusBusy;
85  long long int m_memTfawBusy;
86  long long int m_memReadWriteBusy;
87  long long int m_memDataBusBusy;
88  long long int m_memRefresh;
89  long long int m_memRead;
90  long long int m_memWrite;
91  long long int m_memWaitCycles;
92  long long int m_memInputQ;
93  long long int m_memBankQ;
94  long long int m_memArbWait;
95  long long int m_memRandBusy;
96  long long int m_memNotOld;
97  Vector<long long int> m_memBankCount;
98  int m_banks_per_rank;
99  int m_ranks_per_dimm;
100  int m_dimms_per_channel;
101};
102
103
104class Profiler : public Consumer {
105public:
106  // Constructors
107  Profiler(const string & name);
108
109  void init(const vector<string> & argv, vector<string> memory_control_names);
110
111  // Destructor
112  ~Profiler();
113
114  // Public Methods
115  void wakeup();
116
117  void setPeriodicStatsFile(const string& filename);
118  void setPeriodicStatsInterval(integer_t period);
119
120  void printStats(ostream& out, bool short_stats=false);
121  void printShortStats(ostream& out) { printStats(out, true); }
122  void printTraceStats(ostream& out) const;
123  void clearStats();
124  void printConfig(ostream& out) const;
125  void printResourceUsage(ostream& out) const;
126
127  AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
128  AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
129
130  void addPrimaryStatSample(const CacheMsg& msg, NodeID id);
131  void addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
132  void addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
133  void addAddressTraceSample(const CacheMsg& msg, NodeID id);
134
135  void profileRequest(const string& requestStr);
136  void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
137
138  void profileMulticastRetry(const Address& addr, int count);
139
140  void profileFilterAction(int action);
141
142  void profileConflictingRequests(const Address& addr);
143  void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
144  void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
145  void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
146
147  void countBAUnicast() { m_num_BA_unicasts++; }
148  void countBABroadcast() { m_num_BA_broadcasts++; }
149
150  void recordPrediction(bool wasGood, bool wasPredicted);
151
152  void startTransaction(int cpu);
153  void endTransaction(int cpu);
154  void profilePFWait(Time waitTime);
155
156  void controllerBusy(MachineID machID);
157  void bankBusy();
158  void missLatency(Time t, RubyRequestType type);
159  void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
160  void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
161  void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
162  void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
163  void sequencerRequests(int num) { m_sequencer_requests.add(num); }
164  void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);}
165
166  void profileGetXMaskPrediction(const Set& pred_set);
167  void profileGetSMaskPrediction(const Set& pred_set);
168  void profileTrainingMask(const Set& pred_set);
169  void profileTransition(const string& component, NodeID version, Address addr,
170                         const string& state, const string& event,
171                         const string& next_state, const string& note);
172  void profileMsgDelay(int virtualNetwork, int delayCycles);
173
174  void print(ostream& out) const;
175
176  int64 getTotalInstructionsExecuted() const;
177  int64 getTotalTransactionsExecuted() const;
178
179  void rubyWatch(int proc);
180  bool watchAddress(Address addr);
181
182  // return Ruby's start time
183  Time getRubyStartTime(){
184    return m_ruby_start;
185  }
186
187  // added for MemoryControl:
188  void profileMemReq(string name, int bank);
189  void profileMemBankBusy(string name);
190  void profileMemBusBusy(string name);
191  void profileMemTfawBusy(string name);
192  void profileMemReadWriteBusy(string name);
193  void profileMemDataBusBusy(string name);
194  void profileMemRefresh(string name);
195  void profileMemRead(string name);
196  void profileMemWrite(string name);
197  void profileMemWaitCycles(string name, int cycles);
198  void profileMemInputQ(string name, int cycles);
199  void profileMemBankQ(string name, int cycles);
200  void profileMemArbWait(string name, int cycles);
201  void profileMemRandBusy(string name);
202  void profileMemNotOld(string name);
203  //added by SS
204  bool getHotLines() { return m_hot_lines; }
205  bool getAllInstructions() { return m_all_instructions; }
206
207private:
208  //added by SS
209  vector<string> m_memory_control_names;
210  // Private Methods
211  void addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
212  void addL1DStatSample(const CacheMsg& msg, NodeID id);
213  void addL1IStatSample(const CacheMsg& msg, NodeID id);
214
215  GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type);
216
217  // Private copy constructor and assignment operator
218  Profiler(const Profiler& obj);
219  Profiler& operator=(const Profiler& obj);
220
221  // Data Members (m_ prefix)
222  CacheProfiler* m_L1D_cache_profiler_ptr;
223  CacheProfiler* m_L1I_cache_profiler_ptr;
224  CacheProfiler* m_L2_cache_profiler_ptr;
225  AddressProfiler* m_address_profiler_ptr;
226  AddressProfiler* m_inst_profiler_ptr;
227
228  Vector<int64> m_instructions_executed_at_start;
229  Vector<int64> m_cycles_executed_at_start;
230
231  ostream* m_periodic_output_file_ptr;
232  integer_t m_stats_period;
233
234  Time m_ruby_start;
235  time_t m_real_time_start_time;
236
237  int m_num_BA_unicasts;
238  int m_num_BA_broadcasts;
239
240  Vector<integer_t> m_perProcTotalMisses;
241  Vector<integer_t> m_perProcUserMisses;
242  Vector<integer_t> m_perProcSupervisorMisses;
243  Vector<integer_t> m_perProcStartTransaction;
244  Vector<integer_t> m_perProcEndTransaction;
245  Vector < Vector < integer_t > > m_busyControllerCount;
246  integer_t m_busyBankCount;
247  Histogram m_multicast_retry_histogram;
248
249  Histogram m_L1tbeProfile;
250  Histogram m_L2tbeProfile;
251  Histogram m_stopTableProfile;
252
253  Histogram m_filter_action_histogram;
254  Histogram m_tbeProfile;
255
256  Histogram m_sequencer_requests;
257  Histogram m_store_buffer_size;
258  Histogram m_store_buffer_blocks;
259  Histogram m_read_sharing_histogram;
260  Histogram m_write_sharing_histogram;
261  Histogram m_all_sharing_histogram;
262  int64 m_cache_to_cache;
263  int64 m_memory_to_cache;
264
265  Histogram m_prefetchWaitHistogram;
266
267  Vector<Histogram> m_missLatencyHistograms;
268  Vector<Histogram> m_machLatencyHistograms;
269  Histogram m_L2MissLatencyHistogram;
270  Histogram m_allMissLatencyHistogram;
271
272  Histogram  m_allSWPrefetchLatencyHistogram;
273  Histogram  m_SWPrefetchL2MissLatencyHistogram;
274  Vector<Histogram> m_SWPrefetchLatencyHistograms;
275  Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
276
277  Histogram m_delayedCyclesHistogram;
278  Histogram m_delayedCyclesNonPFHistogram;
279  Vector<Histogram> m_delayedCyclesVCHistograms;
280
281  int m_predictions;
282  int m_predictionOpportunities;
283  int m_goodPredictions;
284
285  Histogram m_gets_mask_prediction;
286  Histogram m_getx_mask_prediction;
287  Histogram m_explicit_training_mask;
288
289  // For profiling possibly conflicting requests
290  Map<Address, Time>* m_conflicting_map_ptr;
291  Histogram m_conflicting_histogram;
292
293  Histogram m_outstanding_requests;
294  Histogram m_outstanding_persistent_requests;
295
296  Histogram m_average_latency_estimate;
297
298  Map<Address, int>* m_watch_address_list_ptr;
299  // counts all initiated cache request including PUTs
300  int m_requests;
301  Map <string, int>* m_requestProfileMap_ptr;
302
303  // added for MemoryControl:
304  //added by SS
305  map< string, memory_control_profiler* > m_memory_control_profilers;
306
307  //added by SS
308  bool m_hot_lines;
309  bool m_all_instructions;
310  string m_name;
311
312};
313
314// Output operator declaration
315ostream& operator<<(ostream& out, const Profiler& obj);
316
317// ******************* Definitions *******************
318
319// Output operator definition
320extern inline
321ostream& operator<<(ostream& out, const Profiler& obj)
322{
323  obj.print(out);
324  out << flush;
325  return out;
326}
327
328#endif //PROFILER_H
329
330
331