1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45/* 46 * Profiler.hh 47 * 48 * Description: 49 * 50 * $Id$ 51 * 52 */ 53 54#ifndef PROFILER_H 55#define PROFILER_H 56
|
57#include "mem/ruby/libruby.hh" 58 |
59#include "mem/ruby/common/Global.hh" 60#include "mem/protocol/GenericMachineType.hh" 61#include "mem/ruby/config/RubyConfig.hh" 62#include "mem/ruby/common/Histogram.hh" 63#include "mem/ruby/common/Consumer.hh" 64#include "mem/protocol/AccessModeType.hh" 65#include "mem/protocol/AccessType.hh" 66#include "mem/ruby/system/NodeID.hh" 67#include "mem/ruby/system/MachineID.hh" 68#include "mem/protocol/PrefetchBit.hh" 69#include "mem/ruby/common/Address.hh" 70#include "mem/ruby/common/Set.hh" 71#include "mem/protocol/CacheRequestType.hh" 72#include "mem/protocol/GenericRequestType.hh"
|
73#include "mem/ruby/system/MemoryControl.hh" |
74 75class CacheMsg; 76class CacheProfiler; 77class AddressProfiler; 78 79template <class KEY_TYPE, class VALUE_TYPE> class Map; 80
|
81struct memory_control_profiler { 82 long long int m_memReq; 83 long long int m_memBankBusy; 84 long long int m_memBusBusy; 85 long long int m_memTfawBusy; 86 long long int m_memReadWriteBusy; 87 long long int m_memDataBusBusy; 88 long long int m_memRefresh; 89 long long int m_memRead; 90 long long int m_memWrite; 91 long long int m_memWaitCycles; 92 long long int m_memInputQ; 93 long long int m_memBankQ; 94 long long int m_memArbWait; 95 long long int m_memRandBusy; 96 long long int m_memNotOld; 97 Vector<long long int> m_memBankCount; 98 int m_banks_per_rank; 99 int m_ranks_per_dimm; 100 int m_dimms_per_channel; 101}; 102 103 |
104class Profiler : public Consumer { 105public:
|
80 // Constructors
81 Profiler();
|
106 // Constructors 107 Profiler(const string & name); |
108
|
83 // Destructor
84 ~Profiler();
|
109 void init(const vector<string> & argv, vector<string> memory_control_names); |
110
|
86 // Public Methods
87 void wakeup();
|
111 // Destructor 112 ~Profiler(); |
113
|
89 void setPeriodicStatsFile(const string& filename);
90 void setPeriodicStatsInterval(integer_t period);
|
114 // Public Methods 115 void wakeup(); |
116
|
92 void printStats(ostream& out, bool short_stats=false);
93 void printShortStats(ostream& out) { printStats(out, true); }
94 void printTraceStats(ostream& out) const;
95 void clearStats();
96 void printConfig(ostream& out) const;
97 void printResourceUsage(ostream& out) const;
|
117 void setPeriodicStatsFile(const string& filename); 118 void setPeriodicStatsInterval(integer_t period); |
119
|
99 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
100 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
|
120 void printStats(ostream& out, bool short_stats=false); 121 void printShortStats(ostream& out) { printStats(out, true); } 122 void printTraceStats(ostream& out) const; 123 void clearStats(); 124 void printConfig(ostream& out) const; 125 void printResourceUsage(ostream& out) const; |
126
|
102 void addPrimaryStatSample(const CacheMsg& msg, NodeID id);
103 void addSecondaryStatSample(GenericRequestType requestType,
104 AccessModeType type, int msgSize,
105 PrefetchBit pfBit, NodeID id);
106 void addSecondaryStatSample(CacheRequestType requestType,
107 AccessModeType type, int msgSize,
108 PrefetchBit pfBit, NodeID id);
109 void addAddressTraceSample(const CacheMsg& msg, NodeID id);
|
127 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } 128 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } |
129
|
111 void profileRequest(const string& requestStr);
112 void profileSharing(const Address& addr, AccessType type,
113 NodeID requestor, const Set& sharers,
114 const Set& owner);
|
130 void addPrimaryStatSample(const CacheMsg& msg, NodeID id); 131 void addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 132 void addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 133 void addAddressTraceSample(const CacheMsg& msg, NodeID id); |
134
|
116 void profileMulticastRetry(const Address& addr, int count);
|
135 void profileRequest(const string& requestStr); 136 void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner); |
137
|
118 void profileFilterAction(int action);
|
138 void profileMulticastRetry(const Address& addr, int count); |
139
|
120 void profileConflictingRequests(const Address& addr);
121 void profileOutstandingRequest(int outstanding) {
122 m_outstanding_requests.add(outstanding);
123 }
|
140 void profileFilterAction(int action); |
141
|
125 void profileOutstandingPersistentRequest(int outstanding) {
126 m_outstanding_persistent_requests.add(outstanding);
127 }
128 void profileAverageLatencyEstimate(int latency) {
129 m_average_latency_estimate.add(latency);
130 }
|
142 void profileConflictingRequests(const Address& addr); 143 void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); } 144 void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } 145 void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } |
146
|
132 void countBAUnicast() { m_num_BA_unicasts++; }
133 void countBABroadcast() { m_num_BA_broadcasts++; }
|
147 void countBAUnicast() { m_num_BA_unicasts++; } 148 void countBABroadcast() { m_num_BA_broadcasts++; } |
149
|
135 void recordPrediction(bool wasGood, bool wasPredicted);
|
150 void recordPrediction(bool wasGood, bool wasPredicted); |
151
|
137 void startTransaction(int cpu);
138 void endTransaction(int cpu);
139 void profilePFWait(Time waitTime);
|
152 void startTransaction(int cpu); 153 void endTransaction(int cpu); 154 void profilePFWait(Time waitTime); |
155
|
141 void controllerBusy(MachineID machID);
142 void bankBusy();
143 void missLatency(Time t, CacheRequestType type,
144 GenericMachineType respondingMach);
145 void swPrefetchLatency(Time t, CacheRequestType type,
146 GenericMachineType respondingMach);
147 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
148 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
149 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
150 void sequencerRequests(int num) { m_sequencer_requests.add(num); }
151 void storeBuffer(int size, int blocks) {
152 m_store_buffer_size.add(size);
153 m_store_buffer_blocks.add(blocks);
154 }
|
156 void controllerBusy(MachineID machID); 157 void bankBusy(); 158 void missLatency(Time t, RubyRequestType type); 159 void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); 160 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); } 161 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); } 162 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); } 163 void sequencerRequests(int num) { m_sequencer_requests.add(num); } 164 void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);} |
165
|
156 void profileGetXMaskPrediction(const Set& pred_set);
157 void profileGetSMaskPrediction(const Set& pred_set);
158 void profileTrainingMask(const Set& pred_set);
159 void profileTransition(const string& component, NodeID id, NodeID version,
160 Address addr, const string& state,
161 const string& event, const string& next_state,
162 const string& note);
163 void profileMsgDelay(int virtualNetwork, int delayCycles);
|
166 void profileGetXMaskPrediction(const Set& pred_set); 167 void profileGetSMaskPrediction(const Set& pred_set); 168 void profileTrainingMask(const Set& pred_set); 169 void profileTransition(const string& component, NodeID version, Address addr, 170 const string& state, const string& event, 171 const string& next_state, const string& note); 172 void profileMsgDelay(int virtualNetwork, int delayCycles); |
173
|
165 void print(ostream& out) const;
|
174 void print(ostream& out) const; |
175
|
167 int64 getTotalInstructionsExecuted() const;
168 int64 getTotalTransactionsExecuted() const;
|
176 int64 getTotalInstructionsExecuted() const; 177 int64 getTotalTransactionsExecuted() const; |
178
|
170 Time getRubyStartTime(){
171 return m_ruby_start;
172 }
|
179 void rubyWatch(int proc); 180 bool watchAddress(Address addr); |
181
|
174 // added for MemoryControl:
175 void profileMemReq(int bank);
176 void profileMemBankBusy();
177 void profileMemBusBusy();
178 void profileMemTfawBusy();
179 void profileMemReadWriteBusy();
180 void profileMemDataBusBusy();
181 void profileMemRefresh();
182 void profileMemRead();
183 void profileMemWrite();
184 void profileMemWaitCycles(int cycles);
185 void profileMemInputQ(int cycles);
186 void profileMemBankQ(int cycles);
187 void profileMemArbWait(int cycles);
188 void profileMemRandBusy();
189 void profileMemNotOld();
|
182 // return Ruby's start time 183 Time getRubyStartTime(){ 184 return m_ruby_start; 185 } |
186
|
187 // added for MemoryControl: 188 void profileMemReq(string name, int bank); 189 void profileMemBankBusy(string name); 190 void profileMemBusBusy(string name); 191 void profileMemTfawBusy(string name); 192 void profileMemReadWriteBusy(string name); 193 void profileMemDataBusBusy(string name); 194 void profileMemRefresh(string name); 195 void profileMemRead(string name); 196 void profileMemWrite(string name); 197 void profileMemWaitCycles(string name, int cycles); 198 void profileMemInputQ(string name, int cycles); 199 void profileMemBankQ(string name, int cycles); 200 void profileMemArbWait(string name, int cycles); 201 void profileMemRandBusy(string name); 202 void profileMemNotOld(string name); 203 //added by SS 204 bool getHotLines() { return m_hot_lines; } 205 bool getAllInstructions() { return m_all_instructions; } 206 |
207private:
|
192 // Private Methods
193 void addL2StatSample(GenericRequestType requestType, AccessModeType type,
194 int msgSize, PrefetchBit pfBit, NodeID id);
195 void addL1DStatSample(const CacheMsg& msg, NodeID id);
196 void addL1IStatSample(const CacheMsg& msg, NodeID id);
|
208 //added by SS 209 vector<string> m_memory_control_names; 210 // Private Methods 211 void addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 212 void addL1DStatSample(const CacheMsg& msg, NodeID id); 213 void addL1IStatSample(const CacheMsg& msg, NodeID id); |
214
|
198 GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type);
|
215 GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type); |
216
|
200 // Private copy constructor and assignment operator
201 Profiler(const Profiler& obj);
202 Profiler& operator=(const Profiler& obj);
|
217 // Private copy constructor and assignment operator 218 Profiler(const Profiler& obj); 219 Profiler& operator=(const Profiler& obj); |
220
|
204 // Data Members (m_ prefix)
205 CacheProfiler* m_L1D_cache_profiler_ptr;
206 CacheProfiler* m_L1I_cache_profiler_ptr;
207 CacheProfiler* m_L2_cache_profiler_ptr;
208 AddressProfiler* m_address_profiler_ptr;
209 AddressProfiler* m_inst_profiler_ptr;
|
221 // Data Members (m_ prefix) 222 CacheProfiler* m_L1D_cache_profiler_ptr; 223 CacheProfiler* m_L1I_cache_profiler_ptr; 224 CacheProfiler* m_L2_cache_profiler_ptr; 225 AddressProfiler* m_address_profiler_ptr; 226 AddressProfiler* m_inst_profiler_ptr; |
227
|
211 Vector<int64> m_instructions_executed_at_start;
212 Vector<int64> m_cycles_executed_at_start;
|
228 Vector m_instructions_executed_at_start; 229 Vector m_cycles_executed_at_start; |
230
|
214 ostream* m_periodic_output_file_ptr;
215 integer_t m_stats_period;
|
231 ostream* m_periodic_output_file_ptr; 232 integer_t m_stats_period; |
233
|
217 Time m_ruby_start;
218 time_t m_real_time_start_time;
|
234 Time m_ruby_start; 235 time_t m_real_time_start_time; |
236
|
220 int m_num_BA_unicasts;
221 int m_num_BA_broadcasts;
|
237 int m_num_BA_unicasts; 238 int m_num_BA_broadcasts; |
239
|
223 Vector<integer_t> m_perProcTotalMisses;
224 Vector<integer_t> m_perProcUserMisses;
225 Vector<integer_t> m_perProcSupervisorMisses;
226 Vector<integer_t> m_perProcStartTransaction;
227 Vector<integer_t> m_perProcEndTransaction;
228 Vector < Vector < integer_t > > m_busyControllerCount;
229 integer_t m_busyBankCount;
230 Histogram m_multicast_retry_histogram;
|
240 Vector m_perProcTotalMisses; 241 Vector m_perProcUserMisses; 242 Vector m_perProcSupervisorMisses; 243 Vector m_perProcStartTransaction; 244 Vector m_perProcEndTransaction; 245 Vector < Vector < integer_t > > m_busyControllerCount; 246 integer_t m_busyBankCount; 247 Histogram m_multicast_retry_histogram; |
248
|
232 Histogram m_L1tbeProfile;
233 Histogram m_L2tbeProfile;
234 Histogram m_stopTableProfile;
|
249 Histogram m_L1tbeProfile; 250 Histogram m_L2tbeProfile; 251 Histogram m_stopTableProfile; |
252
|
236 Histogram m_filter_action_histogram;
237 Histogram m_tbeProfile;
|
253 Histogram m_filter_action_histogram; 254 Histogram m_tbeProfile; |
255
|
239 Histogram m_sequencer_requests;
240 Histogram m_store_buffer_size;
241 Histogram m_store_buffer_blocks;
242 Histogram m_read_sharing_histogram;
243 Histogram m_write_sharing_histogram;
244 Histogram m_all_sharing_histogram;
245 int64 m_cache_to_cache;
246 int64 m_memory_to_cache;
|
256 Histogram m_sequencer_requests; 257 Histogram m_store_buffer_size; 258 Histogram m_store_buffer_blocks; 259 Histogram m_read_sharing_histogram; 260 Histogram m_write_sharing_histogram; 261 Histogram m_all_sharing_histogram; 262 int64 m_cache_to_cache; 263 int64 m_memory_to_cache; |
264
|
248 Histogram m_prefetchWaitHistogram;
|
265 Histogram m_prefetchWaitHistogram; |
266
|
250 Vector<Histogram> m_missLatencyHistograms;
251 Vector<Histogram> m_machLatencyHistograms;
252 Histogram m_L2MissLatencyHistogram;
253 Histogram m_allMissLatencyHistogram;
|
267 Vector m_missLatencyHistograms; 268 Vector m_machLatencyHistograms; 269 Histogram m_L2MissLatencyHistogram; 270 Histogram m_allMissLatencyHistogram; |
271
|
255 Histogram m_allSWPrefetchLatencyHistogram;
256 Histogram m_SWPrefetchL2MissLatencyHistogram;
257 Vector<Histogram> m_SWPrefetchLatencyHistograms;
258 Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
|
272 Histogram m_allSWPrefetchLatencyHistogram; 273 Histogram m_SWPrefetchL2MissLatencyHistogram; 274 Vector m_SWPrefetchLatencyHistograms; 275 Vector m_SWPrefetchMachLatencyHistograms; |
276
|
260 Histogram m_delayedCyclesHistogram;
261 Histogram m_delayedCyclesNonPFHistogram;
262 Vector<Histogram> m_delayedCyclesVCHistograms;
|
277 Histogram m_delayedCyclesHistogram; 278 Histogram m_delayedCyclesNonPFHistogram; 279 Vector m_delayedCyclesVCHistograms; |
280
|
264 int m_predictions;
265 int m_predictionOpportunities;
266 int m_goodPredictions;
|
281 int m_predictions; 282 int m_predictionOpportunities; 283 int m_goodPredictions; |
284
|
268 Histogram m_gets_mask_prediction;
269 Histogram m_getx_mask_prediction;
270 Histogram m_explicit_training_mask;
|
285 Histogram m_gets_mask_prediction; 286 Histogram m_getx_mask_prediction; 287 Histogram m_explicit_training_mask; |
288
|
272 // For profiling possibly conflicting requests
273 Map<Address, Time>* m_conflicting_map_ptr;
274 Histogram m_conflicting_histogram;
|
289 // For profiling possibly conflicting requests 290 Map* m_conflicting_map_ptr; 291 Histogram m_conflicting_histogram; |
292
|
276 Histogram m_outstanding_requests;
277 Histogram m_outstanding_persistent_requests;
|
293 Histogram m_outstanding_requests; 294 Histogram m_outstanding_persistent_requests; |
295
|
279 Histogram m_average_latency_estimate;
|
296 Histogram m_average_latency_estimate; |
297
|
281 Map<Address, int>* m_watch_address_list_ptr;
282 // counts all initiated cache request including PUTs
283 int m_requests;
284 Map <string, int>* m_requestProfileMap_ptr;
|
298 Map* m_watch_address_list_ptr; 299 // counts all initiated cache request including PUTs 300 int m_requests; 301 Map * m_requestProfileMap_ptr; |
302
|
286 // added for MemoryControl:
287 long long int m_memReq;
288 long long int m_memBankBusy;
289 long long int m_memBusBusy;
290 long long int m_memTfawBusy;
291 long long int m_memReadWriteBusy;
292 long long int m_memDataBusBusy;
293 long long int m_memRefresh;
294 long long int m_memRead;
295 long long int m_memWrite;
296 long long int m_memWaitCycles;
297 long long int m_memInputQ;
298 long long int m_memBankQ;
299 long long int m_memArbWait;
300 long long int m_memRandBusy;
301 long long int m_memNotOld;
302 Vector<long long int> m_memBankCount;
|
303 // added for MemoryControl: 304 //added by SS 305 map< string, memory_control_profiler* > m_memory_control_profilers; |
306
|
307 //added by SS 308 bool m_hot_lines; 309 bool m_all_instructions; 310 string m_name; 311 |
312}; 313 314// Output operator declaration 315ostream& operator<<(ostream& out, const Profiler& obj); 316 317// ******************* Definitions ******************* 318 319// Output operator definition 320extern inline 321ostream& operator<<(ostream& out, const Profiler& obj) 322{
|
315 obj.print(out);
316 out << flush;
317 return out;
|
323 obj.print(out); 324 out << flush; 325 return out; |
326} 327 328#endif //PROFILER_H 329 330
|