Profiler.hh revision 6372:f1a41ea3bbab
1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45/* 46 * Profiler.hh 47 * 48 * Description: 49 * 50 * $Id$ 51 * 52 */ 53 54#ifndef PROFILER_H 55#define PROFILER_H 56 57#include "mem/ruby/libruby.hh" 58 59#include "mem/ruby/common/Global.hh" 60#include "mem/protocol/GenericMachineType.hh" 61#include "mem/ruby/common/Histogram.hh" 62#include "mem/ruby/common/Consumer.hh" 63#include "mem/protocol/AccessModeType.hh" 64#include "mem/protocol/AccessType.hh" 65#include "mem/ruby/system/NodeID.hh" 66#include "mem/ruby/system/MachineID.hh" 67#include "mem/protocol/PrefetchBit.hh" 68#include "mem/ruby/common/Address.hh" 69#include "mem/ruby/common/Set.hh" 70#include "mem/protocol/CacheRequestType.hh" 71#include "mem/protocol/GenericRequestType.hh" 72#include "mem/ruby/system/MemoryControl.hh" 73 74class CacheMsg; 75class CacheProfiler; 76class AddressProfiler; 77 78template <class KEY_TYPE, class VALUE_TYPE> class Map; 79 80struct memory_control_profiler { 81 long long int m_memReq; 82 long long int m_memBankBusy; 83 long long int m_memBusBusy; 84 long long int m_memTfawBusy; 85 long long int m_memReadWriteBusy; 86 long long int m_memDataBusBusy; 87 long long int m_memRefresh; 88 long long int m_memRead; 89 long long int m_memWrite; 90 long long int m_memWaitCycles; 91 long long int m_memInputQ; 92 long long int m_memBankQ; 93 long long int m_memArbWait; 94 long long int m_memRandBusy; 95 long long int m_memNotOld; 96 Vector<long long int> m_memBankCount; 97 int m_banks_per_rank; 98 int m_ranks_per_dimm; 99 int m_dimms_per_channel; 100}; 101 102 103class Profiler : public Consumer { 104public: 105 // Constructors 106 Profiler(const string & name); 107 108 void init(const vector<string> & argv, vector<string> memory_control_names); 109 110 // Destructor 111 ~Profiler(); 112 113 // Public Methods 114 void wakeup(); 115 116 void setPeriodicStatsFile(const string& filename); 117 void setPeriodicStatsInterval(integer_t period); 118 119 void printStats(ostream& out, bool short_stats=false); 120 void printShortStats(ostream& out) { printStats(out, true); } 121 void printTraceStats(ostream& out) const; 122 void clearStats(); 123 void printConfig(ostream& out) const; 124 void printResourceUsage(ostream& out) const; 125 126 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; } 127 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; } 128 129 void addPrimaryStatSample(const CacheMsg& msg, NodeID id); 130 void addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 131 void addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 132 void addAddressTraceSample(const CacheMsg& msg, NodeID id); 133 134 void profileRequest(const string& requestStr); 135 void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner); 136 137 void profileMulticastRetry(const Address& addr, int count); 138 139 void profileFilterAction(int action); 140 141 void profileConflictingRequests(const Address& addr); 142 void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); } 143 void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); } 144 void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); } 145 146 void countBAUnicast() { m_num_BA_unicasts++; } 147 void countBABroadcast() { m_num_BA_broadcasts++; } 148 149 void recordPrediction(bool wasGood, bool wasPredicted); 150 151 void startTransaction(int cpu); 152 void endTransaction(int cpu); 153 void profilePFWait(Time waitTime); 154 155 void controllerBusy(MachineID machID); 156 void bankBusy(); 157 void missLatency(Time t, RubyRequestType type); 158 void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach); 159 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); } 160 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); } 161 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); } 162 void sequencerRequests(int num) { m_sequencer_requests.add(num); } 163 void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);} 164 165 void profileGetXMaskPrediction(const Set& pred_set); 166 void profileGetSMaskPrediction(const Set& pred_set); 167 void profileTrainingMask(const Set& pred_set); 168 void profileTransition(const string& component, NodeID version, Address addr, 169 const string& state, const string& event, 170 const string& next_state, const string& note); 171 void profileMsgDelay(int virtualNetwork, int delayCycles); 172 173 void print(ostream& out) const; 174 175 int64 getTotalInstructionsExecuted() const; 176 int64 getTotalTransactionsExecuted() const; 177 178 void rubyWatch(int proc); 179 bool watchAddress(Address addr); 180 181 // return Ruby's start time 182 Time getRubyStartTime(){ 183 return m_ruby_start; 184 } 185 186 // added for MemoryControl: 187 void profileMemReq(string name, int bank); 188 void profileMemBankBusy(string name); 189 void profileMemBusBusy(string name); 190 void profileMemTfawBusy(string name); 191 void profileMemReadWriteBusy(string name); 192 void profileMemDataBusBusy(string name); 193 void profileMemRefresh(string name); 194 void profileMemRead(string name); 195 void profileMemWrite(string name); 196 void profileMemWaitCycles(string name, int cycles); 197 void profileMemInputQ(string name, int cycles); 198 void profileMemBankQ(string name, int cycles); 199 void profileMemArbWait(string name, int cycles); 200 void profileMemRandBusy(string name); 201 void profileMemNotOld(string name); 202 //added by SS 203 bool getHotLines() { return m_hot_lines; } 204 bool getAllInstructions() { return m_all_instructions; } 205 206private: 207 //added by SS 208 vector<string> m_memory_control_names; 209 // Private Methods 210 void addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id); 211 void addL1DStatSample(const CacheMsg& msg, NodeID id); 212 void addL1IStatSample(const CacheMsg& msg, NodeID id); 213 214 GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type); 215 216 // Private copy constructor and assignment operator 217 Profiler(const Profiler& obj); 218 Profiler& operator=(const Profiler& obj); 219 220 // Data Members (m_ prefix) 221 CacheProfiler* m_L1D_cache_profiler_ptr; 222 CacheProfiler* m_L1I_cache_profiler_ptr; 223 CacheProfiler* m_L2_cache_profiler_ptr; 224 AddressProfiler* m_address_profiler_ptr; 225 AddressProfiler* m_inst_profiler_ptr; 226 227 Vector<int64> m_instructions_executed_at_start; 228 Vector<int64> m_cycles_executed_at_start; 229 230 ostream* m_periodic_output_file_ptr; 231 integer_t m_stats_period; 232 233 Time m_ruby_start; 234 time_t m_real_time_start_time; 235 236 int m_num_BA_unicasts; 237 int m_num_BA_broadcasts; 238 239 Vector<integer_t> m_perProcTotalMisses; 240 Vector<integer_t> m_perProcUserMisses; 241 Vector<integer_t> m_perProcSupervisorMisses; 242 Vector<integer_t> m_perProcStartTransaction; 243 Vector<integer_t> m_perProcEndTransaction; 244 Vector < Vector < integer_t > > m_busyControllerCount; 245 integer_t m_busyBankCount; 246 Histogram m_multicast_retry_histogram; 247 248 Histogram m_L1tbeProfile; 249 Histogram m_L2tbeProfile; 250 Histogram m_stopTableProfile; 251 252 Histogram m_filter_action_histogram; 253 Histogram m_tbeProfile; 254 255 Histogram m_sequencer_requests; 256 Histogram m_store_buffer_size; 257 Histogram m_store_buffer_blocks; 258 Histogram m_read_sharing_histogram; 259 Histogram m_write_sharing_histogram; 260 Histogram m_all_sharing_histogram; 261 int64 m_cache_to_cache; 262 int64 m_memory_to_cache; 263 264 Histogram m_prefetchWaitHistogram; 265 266 Vector<Histogram> m_missLatencyHistograms; 267 Vector<Histogram> m_machLatencyHistograms; 268 Histogram m_L2MissLatencyHistogram; 269 Histogram m_allMissLatencyHistogram; 270 271 Histogram m_allSWPrefetchLatencyHistogram; 272 Histogram m_SWPrefetchL2MissLatencyHistogram; 273 Vector<Histogram> m_SWPrefetchLatencyHistograms; 274 Vector<Histogram> m_SWPrefetchMachLatencyHistograms; 275 276 Histogram m_delayedCyclesHistogram; 277 Histogram m_delayedCyclesNonPFHistogram; 278 Vector<Histogram> m_delayedCyclesVCHistograms; 279 280 int m_predictions; 281 int m_predictionOpportunities; 282 int m_goodPredictions; 283 284 Histogram m_gets_mask_prediction; 285 Histogram m_getx_mask_prediction; 286 Histogram m_explicit_training_mask; 287 288 // For profiling possibly conflicting requests 289 Map<Address, Time>* m_conflicting_map_ptr; 290 Histogram m_conflicting_histogram; 291 292 Histogram m_outstanding_requests; 293 Histogram m_outstanding_persistent_requests; 294 295 Histogram m_average_latency_estimate; 296 297 Map<Address, int>* m_watch_address_list_ptr; 298 // counts all initiated cache request including PUTs 299 int m_requests; 300 Map <string, int>* m_requestProfileMap_ptr; 301 302 // added for MemoryControl: 303 //added by SS 304 map< string, memory_control_profiler* > m_memory_control_profilers; 305 306 //added by SS 307 bool m_hot_lines; 308 bool m_all_instructions; 309 string m_name; 310 311}; 312 313// Output operator declaration 314ostream& operator<<(ostream& out, const Profiler& obj); 315 316// ******************* Definitions ******************* 317 318// Output operator definition 319extern inline 320ostream& operator<<(ostream& out, const Profiler& obj) 321{ 322 obj.print(out); 323 out << flush; 324 return out; 325} 326 327#endif //PROFILER_H 328 329 330