Profiler.hh (6374:11423b4639c0) Profiler.hh (6433:0f0f0fbef977)
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
34
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
37
38 ----------------------------------------------------------------------
39
40 File modification date: 2008-02-23
41
42 ----------------------------------------------------------------------
43*/
44
45/*
46 * Profiler.hh
47 *
48 * Description:
49 *
50 * $Id$
51 *
52 */
53
54#ifndef PROFILER_H
55#define PROFILER_H
56
57#include "mem/ruby/libruby.hh"
58
59#include "mem/ruby/common/Global.hh"
60#include "mem/protocol/GenericMachineType.hh"
61#include "mem/ruby/common/Histogram.hh"
62#include "mem/ruby/common/Consumer.hh"
63#include "mem/protocol/AccessModeType.hh"
64#include "mem/protocol/AccessType.hh"
65#include "mem/ruby/system/NodeID.hh"
66#include "mem/ruby/system/MachineID.hh"
67#include "mem/protocol/PrefetchBit.hh"
68#include "mem/ruby/common/Address.hh"
69#include "mem/ruby/common/Set.hh"
70#include "mem/protocol/CacheRequestType.hh"
71#include "mem/protocol/GenericRequestType.hh"
72#include "mem/ruby/system/MemoryControl.hh"
73
74class CacheMsg;
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
34
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
37
38 ----------------------------------------------------------------------
39
40 File modification date: 2008-02-23
41
42 ----------------------------------------------------------------------
43*/
44
45/*
46 * Profiler.hh
47 *
48 * Description:
49 *
50 * $Id$
51 *
52 */
53
54#ifndef PROFILER_H
55#define PROFILER_H
56
57#include "mem/ruby/libruby.hh"
58
59#include "mem/ruby/common/Global.hh"
60#include "mem/protocol/GenericMachineType.hh"
61#include "mem/ruby/common/Histogram.hh"
62#include "mem/ruby/common/Consumer.hh"
63#include "mem/protocol/AccessModeType.hh"
64#include "mem/protocol/AccessType.hh"
65#include "mem/ruby/system/NodeID.hh"
66#include "mem/ruby/system/MachineID.hh"
67#include "mem/protocol/PrefetchBit.hh"
68#include "mem/ruby/common/Address.hh"
69#include "mem/ruby/common/Set.hh"
70#include "mem/protocol/CacheRequestType.hh"
71#include "mem/protocol/GenericRequestType.hh"
72#include "mem/ruby/system/MemoryControl.hh"
73
74class CacheMsg;
75class CacheProfiler;
76class AddressProfiler;
77
78template <class KEY_TYPE, class VALUE_TYPE> class Map;
79
80struct memory_control_profiler {
81 long long int m_memReq;
82 long long int m_memBankBusy;
83 long long int m_memBusBusy;
84 long long int m_memTfawBusy;
85 long long int m_memReadWriteBusy;
86 long long int m_memDataBusBusy;
87 long long int m_memRefresh;
88 long long int m_memRead;
89 long long int m_memWrite;
90 long long int m_memWaitCycles;
91 long long int m_memInputQ;
92 long long int m_memBankQ;
93 long long int m_memArbWait;
94 long long int m_memRandBusy;
95 long long int m_memNotOld;
96 Vector<long long int> m_memBankCount;
97 int m_banks_per_rank;
98 int m_ranks_per_dimm;
99 int m_dimms_per_channel;
100};
101
102
103class Profiler : public Consumer {
104public:
105 // Constructors
106 Profiler(const string & name);
107
108 void init(const vector<string> & argv, vector<string> memory_control_names);
109
110 // Destructor
111 ~Profiler();
112
113 // Public Methods
114 void wakeup();
115
116 void setPeriodicStatsFile(const string& filename);
117 void setPeriodicStatsInterval(integer_t period);
118
119 void printStats(ostream& out, bool short_stats=false);
120 void printShortStats(ostream& out) { printStats(out, true); }
121 void printTraceStats(ostream& out) const;
122 void clearStats();
123 void printConfig(ostream& out) const;
124 void printResourceUsage(ostream& out) const;
125
126 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
127 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
128
129 void addAddressTraceSample(const CacheMsg& msg, NodeID id);
130
131 void profileRequest(const string& requestStr);
132 void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
133
134 void profileMulticastRetry(const Address& addr, int count);
135
136 void profileFilterAction(int action);
137
138 void profileConflictingRequests(const Address& addr);
139 void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
140 void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
141 void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
142
75class AddressProfiler;
76
77template <class KEY_TYPE, class VALUE_TYPE> class Map;
78
79struct memory_control_profiler {
80 long long int m_memReq;
81 long long int m_memBankBusy;
82 long long int m_memBusBusy;
83 long long int m_memTfawBusy;
84 long long int m_memReadWriteBusy;
85 long long int m_memDataBusBusy;
86 long long int m_memRefresh;
87 long long int m_memRead;
88 long long int m_memWrite;
89 long long int m_memWaitCycles;
90 long long int m_memInputQ;
91 long long int m_memBankQ;
92 long long int m_memArbWait;
93 long long int m_memRandBusy;
94 long long int m_memNotOld;
95 Vector<long long int> m_memBankCount;
96 int m_banks_per_rank;
97 int m_ranks_per_dimm;
98 int m_dimms_per_channel;
99};
100
101
102class Profiler : public Consumer {
103public:
104 // Constructors
105 Profiler(const string & name);
106
107 void init(const vector<string> & argv, vector<string> memory_control_names);
108
109 // Destructor
110 ~Profiler();
111
112 // Public Methods
113 void wakeup();
114
115 void setPeriodicStatsFile(const string& filename);
116 void setPeriodicStatsInterval(integer_t period);
117
118 void printStats(ostream& out, bool short_stats=false);
119 void printShortStats(ostream& out) { printStats(out, true); }
120 void printTraceStats(ostream& out) const;
121 void clearStats();
122 void printConfig(ostream& out) const;
123 void printResourceUsage(ostream& out) const;
124
125 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
126 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
127
128 void addAddressTraceSample(const CacheMsg& msg, NodeID id);
129
130 void profileRequest(const string& requestStr);
131 void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
132
133 void profileMulticastRetry(const Address& addr, int count);
134
135 void profileFilterAction(int action);
136
137 void profileConflictingRequests(const Address& addr);
138 void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
139 void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
140 void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
141
143 void countBAUnicast() { m_num_BA_unicasts++; }
144 void countBABroadcast() { m_num_BA_broadcasts++; }
145
146 void recordPrediction(bool wasGood, bool wasPredicted);
147
148 void startTransaction(int cpu);
149 void endTransaction(int cpu);
150 void profilePFWait(Time waitTime);
151
152 void controllerBusy(MachineID machID);
153 void bankBusy();
154 void missLatency(Time t, RubyRequestType type);
155 void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
142 void recordPrediction(bool wasGood, bool wasPredicted);
143
144 void startTransaction(int cpu);
145 void endTransaction(int cpu);
146 void profilePFWait(Time waitTime);
147
148 void controllerBusy(MachineID machID);
149 void bankBusy();
150 void missLatency(Time t, RubyRequestType type);
151 void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
156 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
157 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
158 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
159 void sequencerRequests(int num) { m_sequencer_requests.add(num); }
152 void sequencerRequests(int num) { m_sequencer_requests.add(num); }
160 void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);}
161
153
162 void profileGetXMaskPrediction(const Set& pred_set);
163 void profileGetSMaskPrediction(const Set& pred_set);
164 void profileTrainingMask(const Set& pred_set);
165 void profileTransition(const string& component, NodeID version, Address addr,
166 const string& state, const string& event,
167 const string& next_state, const string& note);
168 void profileMsgDelay(int virtualNetwork, int delayCycles);
169
170 void print(ostream& out) const;
171
154 void profileTransition(const string& component, NodeID version, Address addr,
155 const string& state, const string& event,
156 const string& next_state, const string& note);
157 void profileMsgDelay(int virtualNetwork, int delayCycles);
158
159 void print(ostream& out) const;
160
172 int64 getTotalInstructionsExecuted() const;
173 int64 getTotalTransactionsExecuted() const;
174
175 void rubyWatch(int proc);
176 bool watchAddress(Address addr);
177
178 // return Ruby's start time
179 Time getRubyStartTime(){
180 return m_ruby_start;
181 }
182
183 // added for MemoryControl:
184 void profileMemReq(string name, int bank);
185 void profileMemBankBusy(string name);
186 void profileMemBusBusy(string name);
187 void profileMemTfawBusy(string name);
188 void profileMemReadWriteBusy(string name);
189 void profileMemDataBusBusy(string name);
190 void profileMemRefresh(string name);
191 void profileMemRead(string name);
192 void profileMemWrite(string name);
193 void profileMemWaitCycles(string name, int cycles);
194 void profileMemInputQ(string name, int cycles);
195 void profileMemBankQ(string name, int cycles);
196 void profileMemArbWait(string name, int cycles);
197 void profileMemRandBusy(string name);
198 void profileMemNotOld(string name);
199 //added by SS
200 bool getHotLines() { return m_hot_lines; }
201 bool getAllInstructions() { return m_all_instructions; }
202
203private:
204 //added by SS
205 vector<string> m_memory_control_names;
206
207 // Private copy constructor and assignment operator
208 Profiler(const Profiler& obj);
209 Profiler& operator=(const Profiler& obj);
210
211 // Data Members (m_ prefix)
161 int64 getTotalTransactionsExecuted() const;
162
163 void rubyWatch(int proc);
164 bool watchAddress(Address addr);
165
166 // return Ruby's start time
167 Time getRubyStartTime(){
168 return m_ruby_start;
169 }
170
171 // added for MemoryControl:
172 void profileMemReq(string name, int bank);
173 void profileMemBankBusy(string name);
174 void profileMemBusBusy(string name);
175 void profileMemTfawBusy(string name);
176 void profileMemReadWriteBusy(string name);
177 void profileMemDataBusBusy(string name);
178 void profileMemRefresh(string name);
179 void profileMemRead(string name);
180 void profileMemWrite(string name);
181 void profileMemWaitCycles(string name, int cycles);
182 void profileMemInputQ(string name, int cycles);
183 void profileMemBankQ(string name, int cycles);
184 void profileMemArbWait(string name, int cycles);
185 void profileMemRandBusy(string name);
186 void profileMemNotOld(string name);
187 //added by SS
188 bool getHotLines() { return m_hot_lines; }
189 bool getAllInstructions() { return m_all_instructions; }
190
191private:
192 //added by SS
193 vector<string> m_memory_control_names;
194
195 // Private copy constructor and assignment operator
196 Profiler(const Profiler& obj);
197 Profiler& operator=(const Profiler& obj);
198
199 // Data Members (m_ prefix)
212 CacheProfiler* m_L1D_cache_profiler_ptr;
213 CacheProfiler* m_L1I_cache_profiler_ptr;
214 CacheProfiler* m_L2_cache_profiler_ptr;
215 AddressProfiler* m_address_profiler_ptr;
216 AddressProfiler* m_inst_profiler_ptr;
217
218 Vector<int64> m_instructions_executed_at_start;
219 Vector<int64> m_cycles_executed_at_start;
220
221 ostream* m_periodic_output_file_ptr;
222 integer_t m_stats_period;
223
224 Time m_ruby_start;
225 time_t m_real_time_start_time;
226
200 AddressProfiler* m_address_profiler_ptr;
201 AddressProfiler* m_inst_profiler_ptr;
202
203 Vector<int64> m_instructions_executed_at_start;
204 Vector<int64> m_cycles_executed_at_start;
205
206 ostream* m_periodic_output_file_ptr;
207 integer_t m_stats_period;
208
209 Time m_ruby_start;
210 time_t m_real_time_start_time;
211
227 int m_num_BA_unicasts;
228 int m_num_BA_broadcasts;
229
230 Vector<integer_t> m_perProcTotalMisses;
231 Vector<integer_t> m_perProcUserMisses;
232 Vector<integer_t> m_perProcSupervisorMisses;
233 Vector<integer_t> m_perProcStartTransaction;
234 Vector<integer_t> m_perProcEndTransaction;
235 Vector < Vector < integer_t > > m_busyControllerCount;
236 integer_t m_busyBankCount;
237 Histogram m_multicast_retry_histogram;
238
212 Vector<integer_t> m_perProcTotalMisses;
213 Vector<integer_t> m_perProcUserMisses;
214 Vector<integer_t> m_perProcSupervisorMisses;
215 Vector<integer_t> m_perProcStartTransaction;
216 Vector<integer_t> m_perProcEndTransaction;
217 Vector < Vector < integer_t > > m_busyControllerCount;
218 integer_t m_busyBankCount;
219 Histogram m_multicast_retry_histogram;
220
239 Histogram m_L1tbeProfile;
240 Histogram m_L2tbeProfile;
241 Histogram m_stopTableProfile;
242
243 Histogram m_filter_action_histogram;
244 Histogram m_tbeProfile;
245
246 Histogram m_sequencer_requests;
221 Histogram m_filter_action_histogram;
222 Histogram m_tbeProfile;
223
224 Histogram m_sequencer_requests;
247 Histogram m_store_buffer_size;
248 Histogram m_store_buffer_blocks;
249 Histogram m_read_sharing_histogram;
250 Histogram m_write_sharing_histogram;
251 Histogram m_all_sharing_histogram;
252 int64 m_cache_to_cache;
253 int64 m_memory_to_cache;
254
255 Histogram m_prefetchWaitHistogram;
256
257 Vector<Histogram> m_missLatencyHistograms;
258 Vector<Histogram> m_machLatencyHistograms;
225 Histogram m_read_sharing_histogram;
226 Histogram m_write_sharing_histogram;
227 Histogram m_all_sharing_histogram;
228 int64 m_cache_to_cache;
229 int64 m_memory_to_cache;
230
231 Histogram m_prefetchWaitHistogram;
232
233 Vector<Histogram> m_missLatencyHistograms;
234 Vector<Histogram> m_machLatencyHistograms;
259 Histogram m_L2MissLatencyHistogram;
260 Histogram m_allMissLatencyHistogram;
261
262 Histogram m_allSWPrefetchLatencyHistogram;
263 Histogram m_SWPrefetchL2MissLatencyHistogram;
264 Vector<Histogram> m_SWPrefetchLatencyHistograms;
265 Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
266
267 Histogram m_delayedCyclesHistogram;
268 Histogram m_delayedCyclesNonPFHistogram;
269 Vector<Histogram> m_delayedCyclesVCHistograms;
270
235 Histogram m_allMissLatencyHistogram;
236
237 Histogram m_allSWPrefetchLatencyHistogram;
238 Histogram m_SWPrefetchL2MissLatencyHistogram;
239 Vector<Histogram> m_SWPrefetchLatencyHistograms;
240 Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
241
242 Histogram m_delayedCyclesHistogram;
243 Histogram m_delayedCyclesNonPFHistogram;
244 Vector<Histogram> m_delayedCyclesVCHistograms;
245
271 int m_predictions;
272 int m_predictionOpportunities;
273 int m_goodPredictions;
274
275 Histogram m_gets_mask_prediction;
276 Histogram m_getx_mask_prediction;
277 Histogram m_explicit_training_mask;
278
279 // For profiling possibly conflicting requests
280 Map<Address, Time>* m_conflicting_map_ptr;
281 Histogram m_conflicting_histogram;
282
283 Histogram m_outstanding_requests;
284 Histogram m_outstanding_persistent_requests;
285
286 Histogram m_average_latency_estimate;
287
288 Map<Address, int>* m_watch_address_list_ptr;
289 // counts all initiated cache request including PUTs
290 int m_requests;
291 Map <string, int>* m_requestProfileMap_ptr;
292
293 // added for MemoryControl:
294 //added by SS
295 map< string, memory_control_profiler* > m_memory_control_profilers;
296
297 //added by SS
298 bool m_hot_lines;
299 bool m_all_instructions;
300 string m_name;
301
302};
303
304// Output operator declaration
305ostream& operator<<(ostream& out, const Profiler& obj);
306
307// ******************* Definitions *******************
308
309// Output operator definition
310extern inline
311ostream& operator<<(ostream& out, const Profiler& obj)
312{
313 obj.print(out);
314 out << flush;
315 return out;
316}
317
318#endif //PROFILER_H
319
320
246 Histogram m_outstanding_requests;
247 Histogram m_outstanding_persistent_requests;
248
249 Histogram m_average_latency_estimate;
250
251 Map<Address, int>* m_watch_address_list_ptr;
252 // counts all initiated cache request including PUTs
253 int m_requests;
254 Map <string, int>* m_requestProfileMap_ptr;
255
256 // added for MemoryControl:
257 //added by SS
258 map< string, memory_control_profiler* > m_memory_control_profilers;
259
260 //added by SS
261 bool m_hot_lines;
262 bool m_all_instructions;
263 string m_name;
264
265};
266
267// Output operator declaration
268ostream& operator<<(ostream& out, const Profiler& obj);
269
270// ******************* Definitions *******************
271
272// Output operator definition
273extern inline
274ostream& operator<<(ostream& out, const Profiler& obj)
275{
276 obj.print(out);
277 out << flush;
278 return out;
279}
280
281#endif //PROFILER_H
282
283