Profiler.hh (6284:a63d1dc4c820) Profiler.hh (6285:ce086eca1ede)
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;

--- 40 unchanged lines hidden (view full) ---

49 *
50 * $Id$
51 *
52 */
53
54#ifndef PROFILER_H
55#define PROFILER_H
56
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;

--- 40 unchanged lines hidden (view full) ---

49 *
50 * $Id$
51 *
52 */
53
54#ifndef PROFILER_H
55#define PROFILER_H
56
57#include "mem/ruby/libruby.hh"
58
57#include "mem/ruby/common/Global.hh"
58#include "mem/protocol/GenericMachineType.hh"
59#include "mem/ruby/config/RubyConfig.hh"
60#include "mem/ruby/common/Histogram.hh"
61#include "mem/ruby/common/Consumer.hh"
62#include "mem/protocol/AccessModeType.hh"
63#include "mem/protocol/AccessType.hh"
64#include "mem/ruby/system/NodeID.hh"
65#include "mem/ruby/system/MachineID.hh"
66#include "mem/protocol/PrefetchBit.hh"
67#include "mem/ruby/common/Address.hh"
68#include "mem/ruby/common/Set.hh"
69#include "mem/protocol/CacheRequestType.hh"
70#include "mem/protocol/GenericRequestType.hh"
59#include "mem/ruby/common/Global.hh"
60#include "mem/protocol/GenericMachineType.hh"
61#include "mem/ruby/config/RubyConfig.hh"
62#include "mem/ruby/common/Histogram.hh"
63#include "mem/ruby/common/Consumer.hh"
64#include "mem/protocol/AccessModeType.hh"
65#include "mem/protocol/AccessType.hh"
66#include "mem/ruby/system/NodeID.hh"
67#include "mem/ruby/system/MachineID.hh"
68#include "mem/protocol/PrefetchBit.hh"
69#include "mem/ruby/common/Address.hh"
70#include "mem/ruby/common/Set.hh"
71#include "mem/protocol/CacheRequestType.hh"
72#include "mem/protocol/GenericRequestType.hh"
73#include "mem/ruby/system/MemoryControl.hh"
71
72class CacheMsg;
73class CacheProfiler;
74class AddressProfiler;
75
76template <class KEY_TYPE, class VALUE_TYPE> class Map;
77
74
75class CacheMsg;
76class CacheProfiler;
77class AddressProfiler;
78
79template <class KEY_TYPE, class VALUE_TYPE> class Map;
80
81struct memory_control_profiler {
82 long long int m_memReq;
83 long long int m_memBankBusy;
84 long long int m_memBusBusy;
85 long long int m_memTfawBusy;
86 long long int m_memReadWriteBusy;
87 long long int m_memDataBusBusy;
88 long long int m_memRefresh;
89 long long int m_memRead;
90 long long int m_memWrite;
91 long long int m_memWaitCycles;
92 long long int m_memInputQ;
93 long long int m_memBankQ;
94 long long int m_memArbWait;
95 long long int m_memRandBusy;
96 long long int m_memNotOld;
97 Vector<long long int> m_memBankCount;
98 int m_banks_per_rank;
99 int m_ranks_per_dimm;
100 int m_dimms_per_channel;
101};
102
103
78class Profiler : public Consumer {
79public:
104class Profiler : public Consumer {
105public:
80 // Constructors
81 Profiler();
106 // Constructors
107 Profiler(const string & name);
82
108
83 // Destructor
84 ~Profiler();
109 void init(const vector<string> & argv, vector<string> memory_control_names);
85
110
86 // Public Methods
87 void wakeup();
111 // Destructor
112 ~Profiler();
88
113
89 void setPeriodicStatsFile(const string& filename);
90 void setPeriodicStatsInterval(integer_t period);
114 // Public Methods
115 void wakeup();
91
116
92 void printStats(ostream& out, bool short_stats=false);
93 void printShortStats(ostream& out) { printStats(out, true); }
94 void printTraceStats(ostream& out) const;
95 void clearStats();
96 void printConfig(ostream& out) const;
97 void printResourceUsage(ostream& out) const;
117 void setPeriodicStatsFile(const string& filename);
118 void setPeriodicStatsInterval(integer_t period);
98
119
99 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
100 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
120 void printStats(ostream& out, bool short_stats=false);
121 void printShortStats(ostream& out) { printStats(out, true); }
122 void printTraceStats(ostream& out) const;
123 void clearStats();
124 void printConfig(ostream& out) const;
125 void printResourceUsage(ostream& out) const;
101
126
102 void addPrimaryStatSample(const CacheMsg& msg, NodeID id);
103 void addSecondaryStatSample(GenericRequestType requestType,
104 AccessModeType type, int msgSize,
105 PrefetchBit pfBit, NodeID id);
106 void addSecondaryStatSample(CacheRequestType requestType,
107 AccessModeType type, int msgSize,
108 PrefetchBit pfBit, NodeID id);
109 void addAddressTraceSample(const CacheMsg& msg, NodeID id);
127 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
128 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
110
129
111 void profileRequest(const string& requestStr);
112 void profileSharing(const Address& addr, AccessType type,
113 NodeID requestor, const Set& sharers,
114 const Set& owner);
130 void addPrimaryStatSample(const CacheMsg& msg, NodeID id);
131 void addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
132 void addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
133 void addAddressTraceSample(const CacheMsg& msg, NodeID id);
115
134
116 void profileMulticastRetry(const Address& addr, int count);
135 void profileRequest(const string& requestStr);
136 void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
117
137
118 void profileFilterAction(int action);
138 void profileMulticastRetry(const Address& addr, int count);
119
139
120 void profileConflictingRequests(const Address& addr);
121 void profileOutstandingRequest(int outstanding) {
122 m_outstanding_requests.add(outstanding);
123 }
140 void profileFilterAction(int action);
124
141
125 void profileOutstandingPersistentRequest(int outstanding) {
126 m_outstanding_persistent_requests.add(outstanding);
127 }
128 void profileAverageLatencyEstimate(int latency) {
129 m_average_latency_estimate.add(latency);
130 }
142 void profileConflictingRequests(const Address& addr);
143 void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
144 void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
145 void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
131
146
132 void countBAUnicast() { m_num_BA_unicasts++; }
133 void countBABroadcast() { m_num_BA_broadcasts++; }
147 void countBAUnicast() { m_num_BA_unicasts++; }
148 void countBABroadcast() { m_num_BA_broadcasts++; }
134
149
135 void recordPrediction(bool wasGood, bool wasPredicted);
150 void recordPrediction(bool wasGood, bool wasPredicted);
136
151
137 void startTransaction(int cpu);
138 void endTransaction(int cpu);
139 void profilePFWait(Time waitTime);
152 void startTransaction(int cpu);
153 void endTransaction(int cpu);
154 void profilePFWait(Time waitTime);
140
155
141 void controllerBusy(MachineID machID);
142 void bankBusy();
143 void missLatency(Time t, CacheRequestType type,
144 GenericMachineType respondingMach);
145 void swPrefetchLatency(Time t, CacheRequestType type,
146 GenericMachineType respondingMach);
147 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
148 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
149 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
150 void sequencerRequests(int num) { m_sequencer_requests.add(num); }
151 void storeBuffer(int size, int blocks) {
152 m_store_buffer_size.add(size);
153 m_store_buffer_blocks.add(blocks);
154 }
156 void controllerBusy(MachineID machID);
157 void bankBusy();
158 void missLatency(Time t, RubyRequestType type);
159 void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
160 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
161 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
162 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
163 void sequencerRequests(int num) { m_sequencer_requests.add(num); }
164 void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);}
155
165
156 void profileGetXMaskPrediction(const Set& pred_set);
157 void profileGetSMaskPrediction(const Set& pred_set);
158 void profileTrainingMask(const Set& pred_set);
159 void profileTransition(const string& component, NodeID id, NodeID version,
160 Address addr, const string& state,
161 const string& event, const string& next_state,
162 const string& note);
163 void profileMsgDelay(int virtualNetwork, int delayCycles);
166 void profileGetXMaskPrediction(const Set& pred_set);
167 void profileGetSMaskPrediction(const Set& pred_set);
168 void profileTrainingMask(const Set& pred_set);
169 void profileTransition(const string& component, NodeID version, Address addr,
170 const string& state, const string& event,
171 const string& next_state, const string& note);
172 void profileMsgDelay(int virtualNetwork, int delayCycles);
164
173
165 void print(ostream& out) const;
174 void print(ostream& out) const;
166
175
167 int64 getTotalInstructionsExecuted() const;
168 int64 getTotalTransactionsExecuted() const;
176 int64 getTotalInstructionsExecuted() const;
177 int64 getTotalTransactionsExecuted() const;
169
178
170 Time getRubyStartTime(){
171 return m_ruby_start;
172 }
179 void rubyWatch(int proc);
180 bool watchAddress(Address addr);
173
181
174 // added for MemoryControl:
175 void profileMemReq(int bank);
176 void profileMemBankBusy();
177 void profileMemBusBusy();
178 void profileMemTfawBusy();
179 void profileMemReadWriteBusy();
180 void profileMemDataBusBusy();
181 void profileMemRefresh();
182 void profileMemRead();
183 void profileMemWrite();
184 void profileMemWaitCycles(int cycles);
185 void profileMemInputQ(int cycles);
186 void profileMemBankQ(int cycles);
187 void profileMemArbWait(int cycles);
188 void profileMemRandBusy();
189 void profileMemNotOld();
182 // return Ruby's start time
183 Time getRubyStartTime(){
184 return m_ruby_start;
185 }
190
186
187 // added for MemoryControl:
188 void profileMemReq(string name, int bank);
189 void profileMemBankBusy(string name);
190 void profileMemBusBusy(string name);
191 void profileMemTfawBusy(string name);
192 void profileMemReadWriteBusy(string name);
193 void profileMemDataBusBusy(string name);
194 void profileMemRefresh(string name);
195 void profileMemRead(string name);
196 void profileMemWrite(string name);
197 void profileMemWaitCycles(string name, int cycles);
198 void profileMemInputQ(string name, int cycles);
199 void profileMemBankQ(string name, int cycles);
200 void profileMemArbWait(string name, int cycles);
201 void profileMemRandBusy(string name);
202 void profileMemNotOld(string name);
203 //added by SS
204 bool getHotLines() { return m_hot_lines; }
205 bool getAllInstructions() { return m_all_instructions; }
206
191private:
207private:
192 // Private Methods
193 void addL2StatSample(GenericRequestType requestType, AccessModeType type,
194 int msgSize, PrefetchBit pfBit, NodeID id);
195 void addL1DStatSample(const CacheMsg& msg, NodeID id);
196 void addL1IStatSample(const CacheMsg& msg, NodeID id);
208 //added by SS
209 vector<string> m_memory_control_names;
210 // Private Methods
211 void addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
212 void addL1DStatSample(const CacheMsg& msg, NodeID id);
213 void addL1IStatSample(const CacheMsg& msg, NodeID id);
197
214
198 GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type);
215 GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type);
199
216
200 // Private copy constructor and assignment operator
201 Profiler(const Profiler& obj);
202 Profiler& operator=(const Profiler& obj);
217 // Private copy constructor and assignment operator
218 Profiler(const Profiler& obj);
219 Profiler& operator=(const Profiler& obj);
203
220
204 // Data Members (m_ prefix)
205 CacheProfiler* m_L1D_cache_profiler_ptr;
206 CacheProfiler* m_L1I_cache_profiler_ptr;
207 CacheProfiler* m_L2_cache_profiler_ptr;
208 AddressProfiler* m_address_profiler_ptr;
209 AddressProfiler* m_inst_profiler_ptr;
221 // Data Members (m_ prefix)
222 CacheProfiler* m_L1D_cache_profiler_ptr;
223 CacheProfiler* m_L1I_cache_profiler_ptr;
224 CacheProfiler* m_L2_cache_profiler_ptr;
225 AddressProfiler* m_address_profiler_ptr;
226 AddressProfiler* m_inst_profiler_ptr;
210
227
211 Vector<int64> m_instructions_executed_at_start;
212 Vector<int64> m_cycles_executed_at_start;
228 Vector m_instructions_executed_at_start;
229 Vector m_cycles_executed_at_start;
213
230
214 ostream* m_periodic_output_file_ptr;
215 integer_t m_stats_period;
231 ostream* m_periodic_output_file_ptr;
232 integer_t m_stats_period;
216
233
217 Time m_ruby_start;
218 time_t m_real_time_start_time;
234 Time m_ruby_start;
235 time_t m_real_time_start_time;
219
236
220 int m_num_BA_unicasts;
221 int m_num_BA_broadcasts;
237 int m_num_BA_unicasts;
238 int m_num_BA_broadcasts;
222
239
223 Vector<integer_t> m_perProcTotalMisses;
224 Vector<integer_t> m_perProcUserMisses;
225 Vector<integer_t> m_perProcSupervisorMisses;
226 Vector<integer_t> m_perProcStartTransaction;
227 Vector<integer_t> m_perProcEndTransaction;
228 Vector < Vector < integer_t > > m_busyControllerCount;
229 integer_t m_busyBankCount;
230 Histogram m_multicast_retry_histogram;
240 Vector m_perProcTotalMisses;
241 Vector m_perProcUserMisses;
242 Vector m_perProcSupervisorMisses;
243 Vector m_perProcStartTransaction;
244 Vector m_perProcEndTransaction;
245 Vector < Vector < integer_t > > m_busyControllerCount;
246 integer_t m_busyBankCount;
247 Histogram m_multicast_retry_histogram;
231
248
232 Histogram m_L1tbeProfile;
233 Histogram m_L2tbeProfile;
234 Histogram m_stopTableProfile;
249 Histogram m_L1tbeProfile;
250 Histogram m_L2tbeProfile;
251 Histogram m_stopTableProfile;
235
252
236 Histogram m_filter_action_histogram;
237 Histogram m_tbeProfile;
253 Histogram m_filter_action_histogram;
254 Histogram m_tbeProfile;
238
255
239 Histogram m_sequencer_requests;
240 Histogram m_store_buffer_size;
241 Histogram m_store_buffer_blocks;
242 Histogram m_read_sharing_histogram;
243 Histogram m_write_sharing_histogram;
244 Histogram m_all_sharing_histogram;
245 int64 m_cache_to_cache;
246 int64 m_memory_to_cache;
256 Histogram m_sequencer_requests;
257 Histogram m_store_buffer_size;
258 Histogram m_store_buffer_blocks;
259 Histogram m_read_sharing_histogram;
260 Histogram m_write_sharing_histogram;
261 Histogram m_all_sharing_histogram;
262 int64 m_cache_to_cache;
263 int64 m_memory_to_cache;
247
264
248 Histogram m_prefetchWaitHistogram;
265 Histogram m_prefetchWaitHistogram;
249
266
250 Vector<Histogram> m_missLatencyHistograms;
251 Vector<Histogram> m_machLatencyHistograms;
252 Histogram m_L2MissLatencyHistogram;
253 Histogram m_allMissLatencyHistogram;
267 Vector m_missLatencyHistograms;
268 Vector m_machLatencyHistograms;
269 Histogram m_L2MissLatencyHistogram;
270 Histogram m_allMissLatencyHistogram;
254
271
255 Histogram m_allSWPrefetchLatencyHistogram;
256 Histogram m_SWPrefetchL2MissLatencyHistogram;
257 Vector<Histogram> m_SWPrefetchLatencyHistograms;
258 Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
272 Histogram m_allSWPrefetchLatencyHistogram;
273 Histogram m_SWPrefetchL2MissLatencyHistogram;
274 Vector m_SWPrefetchLatencyHistograms;
275 Vector m_SWPrefetchMachLatencyHistograms;
259
276
260 Histogram m_delayedCyclesHistogram;
261 Histogram m_delayedCyclesNonPFHistogram;
262 Vector<Histogram> m_delayedCyclesVCHistograms;
277 Histogram m_delayedCyclesHistogram;
278 Histogram m_delayedCyclesNonPFHistogram;
279 Vector m_delayedCyclesVCHistograms;
263
280
264 int m_predictions;
265 int m_predictionOpportunities;
266 int m_goodPredictions;
281 int m_predictions;
282 int m_predictionOpportunities;
283 int m_goodPredictions;
267
284
268 Histogram m_gets_mask_prediction;
269 Histogram m_getx_mask_prediction;
270 Histogram m_explicit_training_mask;
285 Histogram m_gets_mask_prediction;
286 Histogram m_getx_mask_prediction;
287 Histogram m_explicit_training_mask;
271
288
272 // For profiling possibly conflicting requests
273 Map<Address, Time>* m_conflicting_map_ptr;
274 Histogram m_conflicting_histogram;
289 // For profiling possibly conflicting requests
290 Map* m_conflicting_map_ptr;
291 Histogram m_conflicting_histogram;
275
292
276 Histogram m_outstanding_requests;
277 Histogram m_outstanding_persistent_requests;
293 Histogram m_outstanding_requests;
294 Histogram m_outstanding_persistent_requests;
278
295
279 Histogram m_average_latency_estimate;
296 Histogram m_average_latency_estimate;
280
297
281 Map<Address, int>* m_watch_address_list_ptr;
282 // counts all initiated cache request including PUTs
283 int m_requests;
284 Map <string, int>* m_requestProfileMap_ptr;
298 Map* m_watch_address_list_ptr;
299 // counts all initiated cache request including PUTs
300 int m_requests;
301 Map * m_requestProfileMap_ptr;
285
302
286 // added for MemoryControl:
287 long long int m_memReq;
288 long long int m_memBankBusy;
289 long long int m_memBusBusy;
290 long long int m_memTfawBusy;
291 long long int m_memReadWriteBusy;
292 long long int m_memDataBusBusy;
293 long long int m_memRefresh;
294 long long int m_memRead;
295 long long int m_memWrite;
296 long long int m_memWaitCycles;
297 long long int m_memInputQ;
298 long long int m_memBankQ;
299 long long int m_memArbWait;
300 long long int m_memRandBusy;
301 long long int m_memNotOld;
302 Vector<long long int> m_memBankCount;
303 // added for MemoryControl:
304 //added by SS
305 map< string, memory_control_profiler* > m_memory_control_profilers;
303
306
307 //added by SS
308 bool m_hot_lines;
309 bool m_all_instructions;
310 string m_name;
311
304};
305
306// Output operator declaration
307ostream& operator<<(ostream& out, const Profiler& obj);
308
309// ******************* Definitions *******************
310
311// Output operator definition
312extern inline
313ostream& operator<<(ostream& out, const Profiler& obj)
314{
312};
313
314// Output operator declaration
315ostream& operator<<(ostream& out, const Profiler& obj);
316
317// ******************* Definitions *******************
318
319// Output operator definition
320extern inline
321ostream& operator<<(ostream& out, const Profiler& obj)
322{
315 obj.print(out);
316 out << flush;
317 return out;
323 obj.print(out);
324 out << flush;
325 return out;
318}
319
320#endif //PROFILER_H
321
322
326}
327
328#endif //PROFILER_H
329
330