Profiler.hh revision 6284:a63d1dc4c820
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45/*
46 * Profiler.hh
47 *
48 * Description:
49 *
50 * $Id$
51 *
52 */
53
54#ifndef PROFILER_H
55#define PROFILER_H
56
57#include "mem/ruby/common/Global.hh"
58#include "mem/protocol/GenericMachineType.hh"
59#include "mem/ruby/config/RubyConfig.hh"
60#include "mem/ruby/common/Histogram.hh"
61#include "mem/ruby/common/Consumer.hh"
62#include "mem/protocol/AccessModeType.hh"
63#include "mem/protocol/AccessType.hh"
64#include "mem/ruby/system/NodeID.hh"
65#include "mem/ruby/system/MachineID.hh"
66#include "mem/protocol/PrefetchBit.hh"
67#include "mem/ruby/common/Address.hh"
68#include "mem/ruby/common/Set.hh"
69#include "mem/protocol/CacheRequestType.hh"
70#include "mem/protocol/GenericRequestType.hh"
71
72class CacheMsg;
73class CacheProfiler;
74class AddressProfiler;
75
76template <class KEY_TYPE, class VALUE_TYPE> class Map;
77
78class Profiler : public Consumer {
79public:
80    // Constructors
81    Profiler();
82
83    // Destructor
84    ~Profiler();
85
86    // Public Methods
87    void wakeup();
88
89    void setPeriodicStatsFile(const string& filename);
90    void setPeriodicStatsInterval(integer_t period);
91
92    void printStats(ostream& out, bool short_stats=false);
93    void printShortStats(ostream& out) { printStats(out, true); }
94    void printTraceStats(ostream& out) const;
95    void clearStats();
96    void printConfig(ostream& out) const;
97    void printResourceUsage(ostream& out) const;
98
99    AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
100    AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
101
102    void addPrimaryStatSample(const CacheMsg& msg, NodeID id);
103    void addSecondaryStatSample(GenericRequestType requestType,
104                                AccessModeType type, int msgSize,
105                                PrefetchBit pfBit, NodeID id);
106    void addSecondaryStatSample(CacheRequestType requestType,
107                                AccessModeType type, int msgSize,
108                                PrefetchBit pfBit, NodeID id);
109    void addAddressTraceSample(const CacheMsg& msg, NodeID id);
110
111    void profileRequest(const string& requestStr);
112    void profileSharing(const Address& addr, AccessType type,
113                        NodeID requestor, const Set& sharers,
114                        const Set& owner);
115
116    void profileMulticastRetry(const Address& addr, int count);
117
118    void profileFilterAction(int action);
119
120    void profileConflictingRequests(const Address& addr);
121    void profileOutstandingRequest(int outstanding) {
122        m_outstanding_requests.add(outstanding);
123    }
124
125    void profileOutstandingPersistentRequest(int outstanding) {
126        m_outstanding_persistent_requests.add(outstanding);
127    }
128    void profileAverageLatencyEstimate(int latency) {
129        m_average_latency_estimate.add(latency);
130    }
131
132    void countBAUnicast() { m_num_BA_unicasts++; }
133    void countBABroadcast() { m_num_BA_broadcasts++; }
134
135    void recordPrediction(bool wasGood, bool wasPredicted);
136
137    void startTransaction(int cpu);
138    void endTransaction(int cpu);
139    void profilePFWait(Time waitTime);
140
141    void controllerBusy(MachineID machID);
142    void bankBusy();
143    void missLatency(Time t, CacheRequestType type,
144                     GenericMachineType respondingMach);
145    void swPrefetchLatency(Time t, CacheRequestType type,
146                           GenericMachineType respondingMach);
147    void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
148    void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
149    void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
150    void sequencerRequests(int num) { m_sequencer_requests.add(num); }
151    void storeBuffer(int size, int blocks) {
152        m_store_buffer_size.add(size);
153        m_store_buffer_blocks.add(blocks);
154    }
155
156    void profileGetXMaskPrediction(const Set& pred_set);
157    void profileGetSMaskPrediction(const Set& pred_set);
158    void profileTrainingMask(const Set& pred_set);
159    void profileTransition(const string& component, NodeID id, NodeID version,
160                           Address addr, const string& state,
161                           const string& event, const string& next_state,
162                           const string& note);
163    void profileMsgDelay(int virtualNetwork, int delayCycles);
164
165    void print(ostream& out) const;
166
167    int64 getTotalInstructionsExecuted() const;
168    int64 getTotalTransactionsExecuted() const;
169
170    Time getRubyStartTime(){
171      return m_ruby_start;
172    }
173
174    // added for MemoryControl:
175    void profileMemReq(int bank);
176    void profileMemBankBusy();
177    void profileMemBusBusy();
178    void profileMemTfawBusy();
179    void profileMemReadWriteBusy();
180    void profileMemDataBusBusy();
181    void profileMemRefresh();
182    void profileMemRead();
183    void profileMemWrite();
184    void profileMemWaitCycles(int cycles);
185    void profileMemInputQ(int cycles);
186    void profileMemBankQ(int cycles);
187    void profileMemArbWait(int cycles);
188    void profileMemRandBusy();
189    void profileMemNotOld();
190
191private:
192    // Private Methods
193    void addL2StatSample(GenericRequestType requestType, AccessModeType type,
194                         int msgSize, PrefetchBit pfBit, NodeID id);
195    void addL1DStatSample(const CacheMsg& msg, NodeID id);
196    void addL1IStatSample(const CacheMsg& msg, NodeID id);
197
198    GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type);
199
200    // Private copy constructor and assignment operator
201    Profiler(const Profiler& obj);
202    Profiler& operator=(const Profiler& obj);
203
204    // Data Members (m_ prefix)
205    CacheProfiler* m_L1D_cache_profiler_ptr;
206    CacheProfiler* m_L1I_cache_profiler_ptr;
207    CacheProfiler* m_L2_cache_profiler_ptr;
208    AddressProfiler* m_address_profiler_ptr;
209    AddressProfiler* m_inst_profiler_ptr;
210
211    Vector<int64> m_instructions_executed_at_start;
212    Vector<int64> m_cycles_executed_at_start;
213
214    ostream* m_periodic_output_file_ptr;
215    integer_t m_stats_period;
216
217    Time m_ruby_start;
218    time_t m_real_time_start_time;
219
220    int m_num_BA_unicasts;
221    int m_num_BA_broadcasts;
222
223    Vector<integer_t> m_perProcTotalMisses;
224    Vector<integer_t> m_perProcUserMisses;
225    Vector<integer_t> m_perProcSupervisorMisses;
226    Vector<integer_t> m_perProcStartTransaction;
227    Vector<integer_t> m_perProcEndTransaction;
228    Vector < Vector < integer_t > > m_busyControllerCount;
229    integer_t m_busyBankCount;
230    Histogram m_multicast_retry_histogram;
231
232    Histogram m_L1tbeProfile;
233    Histogram m_L2tbeProfile;
234    Histogram m_stopTableProfile;
235
236    Histogram m_filter_action_histogram;
237    Histogram m_tbeProfile;
238
239    Histogram m_sequencer_requests;
240    Histogram m_store_buffer_size;
241    Histogram m_store_buffer_blocks;
242    Histogram m_read_sharing_histogram;
243    Histogram m_write_sharing_histogram;
244    Histogram m_all_sharing_histogram;
245    int64 m_cache_to_cache;
246    int64 m_memory_to_cache;
247
248    Histogram m_prefetchWaitHistogram;
249
250    Vector<Histogram> m_missLatencyHistograms;
251    Vector<Histogram> m_machLatencyHistograms;
252    Histogram m_L2MissLatencyHistogram;
253    Histogram m_allMissLatencyHistogram;
254
255    Histogram  m_allSWPrefetchLatencyHistogram;
256    Histogram  m_SWPrefetchL2MissLatencyHistogram;
257    Vector<Histogram> m_SWPrefetchLatencyHistograms;
258    Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
259
260    Histogram m_delayedCyclesHistogram;
261    Histogram m_delayedCyclesNonPFHistogram;
262    Vector<Histogram> m_delayedCyclesVCHistograms;
263
264    int m_predictions;
265    int m_predictionOpportunities;
266    int m_goodPredictions;
267
268    Histogram m_gets_mask_prediction;
269    Histogram m_getx_mask_prediction;
270    Histogram m_explicit_training_mask;
271
272    // For profiling possibly conflicting requests
273    Map<Address, Time>* m_conflicting_map_ptr;
274    Histogram m_conflicting_histogram;
275
276    Histogram m_outstanding_requests;
277    Histogram m_outstanding_persistent_requests;
278
279    Histogram m_average_latency_estimate;
280
281    Map<Address, int>* m_watch_address_list_ptr;
282    // counts all initiated cache request including PUTs
283    int m_requests;
284    Map <string, int>* m_requestProfileMap_ptr;
285
286    // added for MemoryControl:
287    long long int m_memReq;
288    long long int m_memBankBusy;
289    long long int m_memBusBusy;
290    long long int m_memTfawBusy;
291    long long int m_memReadWriteBusy;
292    long long int m_memDataBusBusy;
293    long long int m_memRefresh;
294    long long int m_memRead;
295    long long int m_memWrite;
296    long long int m_memWaitCycles;
297    long long int m_memInputQ;
298    long long int m_memBankQ;
299    long long int m_memArbWait;
300    long long int m_memRandBusy;
301    long long int m_memNotOld;
302    Vector<long long int> m_memBankCount;
303
304};
305
306// Output operator declaration
307ostream& operator<<(ostream& out, const Profiler& obj);
308
309// ******************* Definitions *******************
310
311// Output operator definition
312extern inline
313ostream& operator<<(ostream& out, const Profiler& obj)
314{
315    obj.print(out);
316    out << flush;
317    return out;
318}
319
320#endif //PROFILER_H
321
322
323