Profiler.hh revision 6154:6bb54dcb940e
1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45/* 46 * Profiler.h 47 * 48 * Description: 49 * 50 * $Id$ 51 * 52 */ 53 54#ifndef PROFILER_H 55#define PROFILER_H 56 57#include "mem/ruby/common/Global.hh" 58#include "mem/protocol/GenericMachineType.hh" 59#include "mem/ruby/config/RubyConfig.hh" 60#include "mem/ruby/common/Histogram.hh" 61#include "mem/ruby/common/Consumer.hh" 62#include "mem/protocol/AccessModeType.hh" 63#include "mem/protocol/AccessType.hh" 64#include "mem/ruby/system/NodeID.hh" 65#include "mem/ruby/system/MachineID.hh" 66#include "mem/protocol/PrefetchBit.hh" 67#include "mem/ruby/common/Address.hh" 68#include "mem/ruby/common/Set.hh" 69#include "mem/protocol/CacheRequestType.hh" 70#include "mem/protocol/GenericRequestType.hh" 71 72class CacheMsg; 73class CacheProfiler; 74class AddressProfiler; 75 76template <class KEY_TYPE, class VALUE_TYPE> class Map; 77 78class Profiler : public Consumer { 79public: 80 // Constructors 81 Profiler(); 82 83 // Destructor 84 ~Profiler(); 85 86 // Public Methods 87 void wakeup(); 88 89 void setPeriodicStatsFile(const string& filename); 90 void setPeriodicStatsInterval(integer_t period); 91 92 void printStats(ostream& out, bool short_stats=false); 93 void printShortStats(ostream& out) { printStats(out, true); } 94 void printTraceStats(ostream& out) const; 95 void clearStats(); 96 void printConfig(ostream& out) const; 97 void printResourceUsage(ostream& out) const; 98 99 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } 100 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } 101 102 void addPrimaryStatSample(const CacheMsg& msg, NodeID id); 103 void addSecondaryStatSample(GenericRequestType requestType, 104 AccessModeType type, int msgSize, 105 PrefetchBit pfBit, NodeID id); 106 void addSecondaryStatSample(CacheRequestType requestType, 107 AccessModeType type, int msgSize, 108 PrefetchBit pfBit, NodeID id); 109 void addAddressTraceSample(const CacheMsg& msg, NodeID id); 110 111 void profileRequest(const string& requestStr); 112 void profileSharing(const Address& addr, AccessType type, 113 NodeID requestor, const Set& sharers, 114 const Set& owner); 115 116 void profileMulticastRetry(const Address& addr, int count); 117 118 void profileFilterAction(int action); 119 120 void profileConflictingRequests(const Address& addr); 121 void profileOutstandingRequest(int outstanding) { 122 m_outstanding_requests.add(outstanding); 123 } 124 125 void profileOutstandingPersistentRequest(int outstanding) { 126 m_outstanding_persistent_requests.add(outstanding); 127 } 128 void profileAverageLatencyEstimate(int latency) { 129 m_average_latency_estimate.add(latency); 130 } 131 132 void countBAUnicast() { m_num_BA_unicasts++; } 133 void countBABroadcast() { m_num_BA_broadcasts++; } 134 135 void recordPrediction(bool wasGood, bool wasPredicted); 136 137 void startTransaction(int cpu); 138 void endTransaction(int cpu); 139 void profilePFWait(Time waitTime); 140 141 void controllerBusy(MachineID machID); 142 void bankBusy(); 143 void missLatency(Time t, CacheRequestType type, 144 GenericMachineType respondingMach); 145 void swPrefetchLatency(Time t, CacheRequestType type, 146 GenericMachineType respondingMach); 147 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); } 148 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); } 149 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); } 150 void sequencerRequests(int num) { m_sequencer_requests.add(num); } 151 void storeBuffer(int size, int blocks) { 152 m_store_buffer_size.add(size); 153 m_store_buffer_blocks.add(blocks); 154 } 155 156 void profileGetXMaskPrediction(const Set& pred_set); 157 void profileGetSMaskPrediction(const Set& pred_set); 158 void profileTrainingMask(const Set& pred_set); 159 void profileTransition(const string& component, NodeID id, NodeID version, 160 Address addr, const string& state, 161 const string& event, const string& next_state, 162 const string& note); 163 void profileMsgDelay(int virtualNetwork, int delayCycles); 164 165 void print(ostream& out) const; 166 167 int64 getTotalInstructionsExecuted() const; 168 int64 getTotalTransactionsExecuted() const; 169 170 Time getRubyStartTime(){ 171 return m_ruby_start; 172 } 173 174 // added for MemoryControl: 175 void profileMemReq(int bank); 176 void profileMemBankBusy(); 177 void profileMemBusBusy(); 178 void profileMemTfawBusy(); 179 void profileMemReadWriteBusy(); 180 void profileMemDataBusBusy(); 181 void profileMemRefresh(); 182 void profileMemRead(); 183 void profileMemWrite(); 184 void profileMemWaitCycles(int cycles); 185 void profileMemInputQ(int cycles); 186 void profileMemBankQ(int cycles); 187 void profileMemArbWait(int cycles); 188 void profileMemRandBusy(); 189 void profileMemNotOld(); 190 191private: 192 // Private Methods 193 void addL2StatSample(GenericRequestType requestType, AccessModeType type, 194 int msgSize, PrefetchBit pfBit, NodeID id); 195 void addL1DStatSample(const CacheMsg& msg, NodeID id); 196 void addL1IStatSample(const CacheMsg& msg, NodeID id); 197 198 GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type); 199 200 // Private copy constructor and assignment operator 201 Profiler(const Profiler& obj); 202 Profiler& operator=(const Profiler& obj); 203 204 // Data Members (m_ prefix) 205 CacheProfiler* m_L1D_cache_profiler_ptr; 206 CacheProfiler* m_L1I_cache_profiler_ptr; 207 CacheProfiler* m_L2_cache_profiler_ptr; 208 AddressProfiler* m_address_profiler_ptr; 209 AddressProfiler* m_inst_profiler_ptr; 210 211 Vector<int64> m_instructions_executed_at_start; 212 Vector<int64> m_cycles_executed_at_start; 213 214 ostream* m_periodic_output_file_ptr; 215 integer_t m_stats_period; 216 217 Time m_ruby_start; 218 time_t m_real_time_start_time; 219 220 int m_num_BA_unicasts; 221 int m_num_BA_broadcasts; 222 223 Vector<integer_t> m_perProcTotalMisses; 224 Vector<integer_t> m_perProcUserMisses; 225 Vector<integer_t> m_perProcSupervisorMisses; 226 Vector<integer_t> m_perProcStartTransaction; 227 Vector<integer_t> m_perProcEndTransaction; 228 Vector < Vector < integer_t > > m_busyControllerCount; 229 integer_t m_busyBankCount; 230 Histogram m_multicast_retry_histogram; 231 232 Histogram m_L1tbeProfile; 233 Histogram m_L2tbeProfile; 234 Histogram m_stopTableProfile; 235 236 Histogram m_filter_action_histogram; 237 Histogram m_tbeProfile; 238 239 Histogram m_sequencer_requests; 240 Histogram m_store_buffer_size; 241 Histogram m_store_buffer_blocks; 242 Histogram m_read_sharing_histogram; 243 Histogram m_write_sharing_histogram; 244 Histogram m_all_sharing_histogram; 245 int64 m_cache_to_cache; 246 int64 m_memory_to_cache; 247 248 Histogram m_prefetchWaitHistogram; 249 250 Vector<Histogram> m_missLatencyHistograms; 251 Vector<Histogram> m_machLatencyHistograms; 252 Histogram m_L2MissLatencyHistogram; 253 Histogram m_allMissLatencyHistogram; 254 255 Histogram m_allSWPrefetchLatencyHistogram; 256 Histogram m_SWPrefetchL2MissLatencyHistogram; 257 Vector<Histogram> m_SWPrefetchLatencyHistograms; 258 Vector<Histogram> m_SWPrefetchMachLatencyHistograms; 259 260 Histogram m_delayedCyclesHistogram; 261 Histogram m_delayedCyclesNonPFHistogram; 262 Vector<Histogram> m_delayedCyclesVCHistograms; 263 264 int m_predictions; 265 int m_predictionOpportunities; 266 int m_goodPredictions; 267 268 Histogram m_gets_mask_prediction; 269 Histogram m_getx_mask_prediction; 270 Histogram m_explicit_training_mask; 271 272 // For profiling possibly conflicting requests 273 Map<Address, Time>* m_conflicting_map_ptr; 274 Histogram m_conflicting_histogram; 275 276 Histogram m_outstanding_requests; 277 Histogram m_outstanding_persistent_requests; 278 279 Histogram m_average_latency_estimate; 280 281 Map<Address, int>* m_watch_address_list_ptr; 282 // counts all initiated cache request including PUTs 283 int m_requests; 284 Map <string, int>* m_requestProfileMap_ptr; 285 286 // added for MemoryControl: 287 long long int m_memReq; 288 long long int m_memBankBusy; 289 long long int m_memBusBusy; 290 long long int m_memTfawBusy; 291 long long int m_memReadWriteBusy; 292 long long int m_memDataBusBusy; 293 long long int m_memRefresh; 294 long long int m_memRead; 295 long long int m_memWrite; 296 long long int m_memWaitCycles; 297 long long int m_memInputQ; 298 long long int m_memBankQ; 299 long long int m_memArbWait; 300 long long int m_memRandBusy; 301 long long int m_memNotOld; 302 Vector<long long int> m_memBankCount; 303 304}; 305 306// Output operator declaration 307ostream& operator<<(ostream& out, const Profiler& obj); 308 309// ******************* Definitions ******************* 310 311// Output operator definition 312extern inline 313ostream& operator<<(ostream& out, const Profiler& obj) 314{ 315 obj.print(out); 316 out << flush; 317 return out; 318} 319 320#endif //PROFILER_H 321 322 323