Profiler.cc (9923:cdd51a15e9be) | Profiler.cc (10012:ec5a5bfb941d) |
---|---|
1/* | 1/* |
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood | 2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood |
3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the --- 45 unchanged lines hidden (view full) --- 56#include "mem/ruby/profiler/AddressProfiler.hh" 57#include "mem/ruby/profiler/Profiler.hh" 58#include "mem/ruby/system/Sequencer.hh" 59#include "mem/ruby/system/System.hh" 60 61using namespace std; 62using m5::stl_helpers::operator<<; 63 | 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the --- 45 unchanged lines hidden (view full) --- 56#include "mem/ruby/profiler/AddressProfiler.hh" 57#include "mem/ruby/profiler/Profiler.hh" 58#include "mem/ruby/system/Sequencer.hh" 59#include "mem/ruby/system/System.hh" 60 61using namespace std; 62using m5::stl_helpers::operator<<; 63 |
64Profiler::Profiler(const Params *p) 65 : SimObject(p) | 64Profiler::Profiler(const RubySystemParams *p) 65 : m_IncompleteTimes(MachineType_NUM) |
66{ | 66{ |
67 m_inst_profiler_ptr = NULL; 68 m_address_profiler_ptr = NULL; 69 m_real_time_start_time = time(NULL); // Not reset in clearStats() 70 | |
71 m_hot_lines = p->hot_lines; 72 m_all_instructions = p->all_instructions; 73 | 67 m_hot_lines = p->hot_lines; 68 m_all_instructions = p->all_instructions; 69 |
74 m_num_of_sequencers = p->num_of_sequencers; 75 76 m_hot_lines = false; 77 m_all_instructions = false; 78 79 m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); | 70 m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers); |
80 m_address_profiler_ptr->setHotLines(m_hot_lines); 81 m_address_profiler_ptr->setAllInstructions(m_all_instructions); 82 83 if (m_all_instructions) { | 71 m_address_profiler_ptr->setHotLines(m_hot_lines); 72 m_address_profiler_ptr->setAllInstructions(m_all_instructions); 73 74 if (m_all_instructions) { |
84 m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); | 75 m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers); |
85 m_inst_profiler_ptr->setHotLines(m_hot_lines); 86 m_inst_profiler_ptr->setAllInstructions(m_all_instructions); 87 } | 76 m_inst_profiler_ptr->setHotLines(m_hot_lines); 77 m_inst_profiler_ptr->setAllInstructions(m_all_instructions); 78 } |
88 89 p->ruby_system->registerProfiler(this); | |
90} 91 92Profiler::~Profiler() 93{ 94} 95 96void | 79} 80 81Profiler::~Profiler() 82{ 83} 84 85void |
97Profiler::print(ostream& out) const | 86Profiler::regStats(const std::string &pName) |
98{ | 87{ |
99 out << "[Profiler]"; 100} | 88 if (!m_all_instructions) { 89 m_address_profiler_ptr->regStats(pName); 90 } |
101 | 91 |
102void 103Profiler::printRequestProfile(ostream &out) const 104{ 105 out << "Request vs. RubySystem State Profile" << endl; 106 out << "--------------------------------" << endl; 107 out << endl; | 92 if (m_all_instructions) { 93 m_inst_profiler_ptr->regStats(pName); 94 } |
108 | 95 |
109 map<string, uint64_t> m_requestProfileMap; 110 uint64_t m_requests = 0; | 96 delayHistogram 97 .init(10) 98 .name(pName + ".delayHist") 99 .desc("delay histogram for all message") 100 .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
111 | 101 |
112 for (uint32_t i = 0; i < MachineType_NUM; i++) { 113 for (map<uint32_t, AbstractController*>::iterator it = 114 g_abs_controls[i].begin(); 115 it != g_abs_controls[i].end(); ++it) { | 102 uint32_t numVNets = Network::getNumberOfVirtualNetworks(); 103 for (int i = 0; i < numVNets; i++) { 104 delayVCHistogram.push_back(new Stats::Histogram()); 105 delayVCHistogram[i] 106 ->init(10) 107 .name(pName + csprintf(".delayVCHist.vnet_%i", i)) 108 .desc(csprintf("delay histogram for vnet_%i", i)) 109 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 110 } |
116 | 111 |
117 AbstractController *ctr = (*it).second; 118 map<string, uint64_t> mp = ctr->getRequestProfileMap(); | 112 m_outstandReqHist 113 .init(10) 114 .name(pName + ".outstanding_req_hist") 115 .desc("") 116 .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
119 | 117 |
120 for (map<string, uint64_t>::iterator jt = mp.begin(); 121 jt != mp.end(); ++jt) { | 118 m_latencyHist 119 .init(10) 120 .name(pName + ".latency_hist") 121 .desc("") 122 .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
122 | 123 |
123 map<string, uint64_t>::iterator kt = 124 m_requestProfileMap.find((*jt).first); 125 if (kt != m_requestProfileMap.end()) { 126 (*kt).second += (*jt).second; 127 } else { 128 m_requestProfileMap[(*jt).first] = (*jt).second; 129 } 130 } | 124 m_hitLatencyHist 125 .init(10) 126 .name(pName + ".hit_latency_hist") 127 .desc("") 128 .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
131 | 129 |
132 m_requests += ctr->getRequestCount(); 133 } | 130 m_missLatencyHist 131 .init(10) 132 .name(pName + ".miss_latency_hist") 133 .desc("") 134 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 135 136 for (int i = 0; i < RubyRequestType_NUM; i++) { 137 m_typeLatencyHist.push_back(new Stats::Histogram()); 138 m_typeLatencyHist[i] 139 ->init(10) 140 .name(pName + csprintf(".%s.latency_hist", 141 RubyRequestType(i))) 142 .desc("") 143 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 144 145 m_hitTypeLatencyHist.push_back(new Stats::Histogram()); 146 m_hitTypeLatencyHist[i] 147 ->init(10) 148 .name(pName + csprintf(".%s.hit_latency_hist", 149 RubyRequestType(i))) 150 .desc("") 151 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 152 153 m_missTypeLatencyHist.push_back(new Stats::Histogram()); 154 m_missTypeLatencyHist[i] 155 ->init(10) 156 .name(pName + csprintf(".%s.miss_latency_hist", 157 RubyRequestType(i))) 158 .desc("") 159 .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
134 } 135 | 160 } 161 |
136 map<string, uint64_t>::const_iterator i = m_requestProfileMap.begin(); 137 map<string, uint64_t>::const_iterator end = m_requestProfileMap.end(); 138 for (; i != end; ++i) { 139 const string &key = i->first; 140 uint64_t count = i->second; | 162 for (int i = 0; i < MachineType_NUM; i++) { 163 m_hitMachLatencyHist.push_back(new Stats::Histogram()); 164 m_hitMachLatencyHist[i] 165 ->init(10) 166 .name(pName + csprintf(".%s.hit_mach_latency_hist", 167 MachineType(i))) 168 .desc("") 169 .flags(Stats::nozero | Stats::pdf | Stats::oneline); |
141 | 170 |
142 double percent = (100.0 * double(count)) / double(m_requests); 143 vector<string> items; 144 tokenize(items, key, ':'); 145 vector<string>::iterator j = items.begin(); 146 vector<string>::iterator end = items.end(); 147 for (; j != end; ++i) 148 out << setw(10) << *j; 149 out << setw(11) << count; 150 out << setw(14) << percent << endl; | 171 m_missMachLatencyHist.push_back(new Stats::Histogram()); 172 m_missMachLatencyHist[i] 173 ->init(10) 174 .name(pName + csprintf(".%s.miss_mach_latency_hist", 175 MachineType(i))) 176 .desc("") 177 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 178 179 m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); 180 m_IssueToInitialDelayHist[i] 181 ->init(10) 182 .name(pName + csprintf( 183 ".%s.miss_latency_hist.issue_to_initial_request", 184 MachineType(i))) 185 .desc("") 186 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 187 188 m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); 189 m_InitialToForwardDelayHist[i] 190 ->init(10) 191 .name(pName + csprintf(".%s.miss_latency_hist.initial_to_forward", 192 MachineType(i))) 193 .desc("") 194 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 195 196 m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); 197 m_ForwardToFirstResponseDelayHist[i] 198 ->init(10) 199 .name(pName + csprintf( 200 ".%s.miss_latency_hist.forward_to_first_response", 201 MachineType(i))) 202 .desc("") 203 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 204 205 m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); 206 m_FirstResponseToCompletionDelayHist[i] 207 ->init(10) 208 .name(pName + csprintf( 209 ".%s.miss_latency_hist.first_response_to_completion", 210 MachineType(i))) 211 .desc("") 212 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 213 214 m_IncompleteTimes[i] 215 .name(pName + csprintf(".%s.incomplete_times", MachineType(i))) 216 .desc("") 217 .flags(Stats::nozero); |
151 } | 218 } |
152 out << endl; | 219 220 for (int i = 0; i < RubyRequestType_NUM; i++) { 221 m_hitTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>()); 222 m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>()); 223 224 for (int j = 0; j < MachineType_NUM; j++) { 225 m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); 226 m_hitTypeMachLatencyHist[i][j] 227 ->init(10) 228 .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist", 229 RubyRequestType(i), MachineType(j))) 230 .desc("") 231 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 232 233 m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); 234 m_missTypeMachLatencyHist[i][j] 235 ->init(10) 236 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist", 237 RubyRequestType(i), MachineType(j))) 238 .desc("") 239 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 240 } 241 } |
153} 154 155void | 242} 243 244void |
156Profiler::printDelayProfile(ostream &out) const | 245Profiler::collateStats() |
157{ | 246{ |
158 out << "Message Delayed Cycles" << endl; 159 out << "----------------------" << endl; | 247 if (!m_all_instructions) { 248 m_address_profiler_ptr->collateStats(); 249 } |
160 | 250 |
161 uint32_t numVNets = Network::getNumberOfVirtualNetworks(); 162 Histogram delayHistogram; 163 std::vector<Histogram> delayVCHistogram(numVNets); | 251 if (m_all_instructions) { 252 m_inst_profiler_ptr->collateStats(); 253 } |
164 | 254 |
255 uint32_t numVNets = Network::getNumberOfVirtualNetworks(); |
|
165 for (uint32_t i = 0; i < MachineType_NUM; i++) { 166 for (map<uint32_t, AbstractController*>::iterator it = 167 g_abs_controls[i].begin(); 168 it != g_abs_controls[i].end(); ++it) { 169 170 AbstractController *ctr = (*it).second; 171 delayHistogram.add(ctr->getDelayHist()); 172 173 for (uint32_t i = 0; i < numVNets; i++) { | 256 for (uint32_t i = 0; i < MachineType_NUM; i++) { 257 for (map<uint32_t, AbstractController*>::iterator it = 258 g_abs_controls[i].begin(); 259 it != g_abs_controls[i].end(); ++it) { 260 261 AbstractController *ctr = (*it).second; 262 delayHistogram.add(ctr->getDelayHist()); 263 264 for (uint32_t i = 0; i < numVNets; i++) { |
174 delayVCHistogram[i].add(ctr->getDelayVCHist(i)); | 265 delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); |
175 } 176 } 177 } 178 | 266 } 267 } 268 } 269 |
179 out << "Total_delay_cycles: " << delayHistogram << endl; 180 181 for (int i = 0; i < numVNets; i++) { 182 out << " virtual_network_" << i << "_delay_cycles: " 183 << delayVCHistogram[i] << endl; 184 } 185} 186 187void 188Profiler::printOutstandingReqProfile(ostream &out) const 189{ 190 Histogram sequencerRequests; 191 | |
192 for (uint32_t i = 0; i < MachineType_NUM; i++) { 193 for (map<uint32_t, AbstractController*>::iterator it = | 270 for (uint32_t i = 0; i < MachineType_NUM; i++) { 271 for (map<uint32_t, AbstractController*>::iterator it = |
194 g_abs_controls[i].begin(); 195 it != g_abs_controls[i].end(); ++it) { | 272 g_abs_controls[i].begin(); 273 it != g_abs_controls[i].end(); ++it) { |
196 197 AbstractController *ctr = (*it).second; 198 Sequencer *seq = ctr->getSequencer(); 199 if (seq != NULL) { | 274 275 AbstractController *ctr = (*it).second; 276 Sequencer *seq = ctr->getSequencer(); 277 if (seq != NULL) { |
200 sequencerRequests.add(seq->getOutstandReqHist()); | 278 m_outstandReqHist.add(seq->getOutstandReqHist()); |
201 } 202 } 203 } 204 | 279 } 280 } 281 } 282 |
205 out << "sequencer_requests_outstanding: " 206 << sequencerRequests << endl; 207} 208 209void 210Profiler::printMissLatencyProfile(ostream &out) const 211{ 212 // Collate the miss latencies histograms from all the sequencers 213 Histogram latency_hist; 214 std::vector<Histogram> type_latency_hist(RubyRequestType_NUM); 215 216 Histogram hit_latency_hist; 217 std::vector<Histogram> hit_type_latency_hist(RubyRequestType_NUM); 218 219 std::vector<Histogram> hit_mach_latency_hist(MachineType_NUM); 220 std::vector<std::vector<Histogram> > 221 hit_type_mach_latency_hist(RubyRequestType_NUM, 222 std::vector<Histogram>(MachineType_NUM)); 223 224 Histogram miss_latency_hist; 225 std::vector<Histogram> miss_type_latency_hist(RubyRequestType_NUM); 226 227 std::vector<Histogram> miss_mach_latency_hist(MachineType_NUM); 228 std::vector<std::vector<Histogram> > 229 miss_type_mach_latency_hist(RubyRequestType_NUM, 230 std::vector<Histogram>(MachineType_NUM)); 231 232 std::vector<Histogram> issue_to_initial_delay_hist(MachineType_NUM); 233 std::vector<Histogram> initial_to_forward_delay_hist(MachineType_NUM); 234 std::vector<Histogram> 235 forward_to_first_response_delay_hist(MachineType_NUM); 236 std::vector<Histogram> 237 first_response_to_completion_delay_hist(MachineType_NUM); 238 std::vector<uint64_t> incomplete_times(MachineType_NUM); 239 | |
240 for (uint32_t i = 0; i < MachineType_NUM; i++) { 241 for (map<uint32_t, AbstractController*>::iterator it = | 283 for (uint32_t i = 0; i < MachineType_NUM; i++) { 284 for (map<uint32_t, AbstractController*>::iterator it = |
242 g_abs_controls[i].begin(); 243 it != g_abs_controls[i].end(); ++it) { | 285 g_abs_controls[i].begin(); 286 it != g_abs_controls[i].end(); ++it) { |
244 245 AbstractController *ctr = (*it).second; 246 Sequencer *seq = ctr->getSequencer(); 247 if (seq != NULL) { 248 // add all the latencies | 287 288 AbstractController *ctr = (*it).second; 289 Sequencer *seq = ctr->getSequencer(); 290 if (seq != NULL) { 291 // add all the latencies |
249 latency_hist.add(seq->getLatencyHist()); 250 hit_latency_hist.add(seq->getHitLatencyHist()); 251 miss_latency_hist.add(seq->getMissLatencyHist()); | 292 m_latencyHist.add(seq->getLatencyHist()); 293 m_hitLatencyHist.add(seq->getHitLatencyHist()); 294 m_missLatencyHist.add(seq->getMissLatencyHist()); |
252 253 // add the per request type latencies 254 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { | 295 296 // add the per request type latencies 297 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { |
255 type_latency_hist[j] 256 .add(seq->getTypeLatencyHist(j)); 257 hit_type_latency_hist[j] 258 .add(seq->getHitTypeLatencyHist(j)); 259 miss_type_latency_hist[j] 260 .add(seq->getMissTypeLatencyHist(j)); | 298 m_typeLatencyHist[j] 299 ->add(seq->getTypeLatencyHist(j)); 300 m_hitTypeLatencyHist[j] 301 ->add(seq->getHitTypeLatencyHist(j)); 302 m_missTypeLatencyHist[j] 303 ->add(seq->getMissTypeLatencyHist(j)); |
261 } 262 263 // add the per machine type miss latencies 264 for (uint32_t j = 0; j < MachineType_NUM; ++j) { | 304 } 305 306 // add the per machine type miss latencies 307 for (uint32_t j = 0; j < MachineType_NUM; ++j) { |
265 hit_mach_latency_hist[j] 266 .add(seq->getHitMachLatencyHist(j)); 267 miss_mach_latency_hist[j] 268 .add(seq->getMissMachLatencyHist(j)); | 308 m_hitMachLatencyHist[j] 309 ->add(seq->getHitMachLatencyHist(j)); 310 m_missMachLatencyHist[j] 311 ->add(seq->getMissMachLatencyHist(j)); |
269 | 312 |
270 issue_to_initial_delay_hist[j].add( | 313 m_IssueToInitialDelayHist[j]->add( |
271 seq->getIssueToInitialDelayHist(MachineType(j))); 272 | 314 seq->getIssueToInitialDelayHist(MachineType(j))); 315 |
273 initial_to_forward_delay_hist[j].add( | 316 m_InitialToForwardDelayHist[j]->add( |
274 seq->getInitialToForwardDelayHist(MachineType(j))); | 317 seq->getInitialToForwardDelayHist(MachineType(j))); |
275 forward_to_first_response_delay_hist[j].add(seq-> | 318 m_ForwardToFirstResponseDelayHist[j]->add(seq-> |
276 getForwardRequestToFirstResponseHist(MachineType(j))); 277 | 319 getForwardRequestToFirstResponseHist(MachineType(j))); 320 |
278 first_response_to_completion_delay_hist[j].add(seq-> 279 getFirstResponseToCompletionDelayHist(MachineType(j))); 280 incomplete_times[j] += | 321 m_FirstResponseToCompletionDelayHist[j]->add(seq-> 322 getFirstResponseToCompletionDelayHist( 323 MachineType(j))); 324 m_IncompleteTimes[j] += |
281 seq->getIncompleteTimes(MachineType(j)); 282 } 283 284 // add the per (request, machine) type miss latencies 285 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 286 for (uint32_t k = 0; k < MachineType_NUM; k++) { | 325 seq->getIncompleteTimes(MachineType(j)); 326 } 327 328 // add the per (request, machine) type miss latencies 329 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 330 for (uint32_t k = 0; k < MachineType_NUM; k++) { |
287 hit_type_mach_latency_hist[j][k].add( 288 seq->getHitTypeMachLatencyHist(j,k)); 289 miss_type_mach_latency_hist[j][k].add( 290 seq->getMissTypeMachLatencyHist(j,k)); | 331 m_hitTypeMachLatencyHist[j][k]->add( 332 seq->getHitTypeMachLatencyHist(j,k)); 333 m_missTypeMachLatencyHist[j][k]->add( 334 seq->getMissTypeMachLatencyHist(j,k)); |
291 } 292 } 293 } 294 } 295 } | 335 } 336 } 337 } 338 } 339 } |
296 297 out << "latency: " << latency_hist << endl; 298 for (int i = 0; i < RubyRequestType_NUM; i++) { 299 if (type_latency_hist[i].size() > 0) { 300 out << "latency: " << RubyRequestType(i) << ": " 301 << type_latency_hist[i] << endl; 302 } 303 } 304 305 out << "hit latency: " << hit_latency_hist << endl; 306 for (int i = 0; i < RubyRequestType_NUM; i++) { 307 if (hit_type_latency_hist[i].size() > 0) { 308 out << "hit latency: " << RubyRequestType(i) << ": " 309 << hit_type_latency_hist[i] << endl; 310 } 311 } 312 313 for (int i = 0; i < MachineType_NUM; i++) { 314 if (hit_mach_latency_hist[i].size() > 0) { 315 out << "hit latency: " << MachineType(i) << ": " 316 << hit_mach_latency_hist[i] << endl; 317 } 318 } 319 320 for (int i = 0; i < RubyRequestType_NUM; i++) { 321 for (int j = 0; j < MachineType_NUM; j++) { 322 if (hit_type_mach_latency_hist[i][j].size() > 0) { 323 out << "hit latency: " << RubyRequestType(i) 324 << ": " << MachineType(j) << ": " 325 << hit_type_mach_latency_hist[i][j] << endl; 326 } 327 } 328 } 329 330 out << "miss latency: " << miss_latency_hist << endl; 331 for (int i = 0; i < RubyRequestType_NUM; i++) { 332 if (miss_type_latency_hist[i].size() > 0) { 333 out << "miss latency: " << RubyRequestType(i) << ": " 334 << miss_type_latency_hist[i] << endl; 335 } 336 } 337 338 for (int i = 0; i < MachineType_NUM; i++) { 339 if (miss_mach_latency_hist[i].size() > 0) { 340 out << "miss latency: " << MachineType(i) << ": " 341 << miss_mach_latency_hist[i] << endl; 342 343 out << "miss latency: " << MachineType(i) 344 << "::issue_to_initial_request: " 345 << issue_to_initial_delay_hist[i] << endl; 346 out << "miss latency: " << MachineType(i) 347 << "::initial_to_forward_request: " 348 << initial_to_forward_delay_hist[i] << endl; 349 out << "miss latency: " << MachineType(i) 350 << "::forward_to_first_response: " 351 << forward_to_first_response_delay_hist[i] << endl; 352 out << "miss latency: " << MachineType(i) 353 << "::first_response_to_completion: " 354 << first_response_to_completion_delay_hist[i] << endl; 355 out << "incomplete times: " << incomplete_times[i] << endl; 356 } 357 } 358 359 for (int i = 0; i < RubyRequestType_NUM; i++) { 360 for (int j = 0; j < MachineType_NUM; j++) { 361 if (miss_type_mach_latency_hist[i][j].size() > 0) { 362 out << "miss latency: " << RubyRequestType(i) 363 << ": " << MachineType(j) << ": " 364 << miss_type_mach_latency_hist[i][j] << endl; 365 } 366 } 367 } 368 369 out << endl; | |
370} 371 372void | 340} 341 342void |
373Profiler::printStats(ostream& out, bool short_stats) 374{ 375 out << endl; 376 if (short_stats) { 377 out << "SHORT "; 378 } 379 out << "Profiler Stats" << endl; 380 out << "--------------" << endl; 381 382 Cycles ruby_cycles = g_system_ptr->curCycle()-m_ruby_start; 383 384 out << "Ruby_current_time: " << g_system_ptr->curCycle() << endl; 385 out << "Ruby_start_time: " << m_ruby_start << endl; 386 out << "Ruby_cycles: " << ruby_cycles << endl; 387 out << endl; 388 389 if (!short_stats) { 390 out << "Busy Controller Counts:" << endl; 391 for (uint32_t i = 0; i < MachineType_NUM; i++) { 392 uint32_t size = MachineType_base_count((MachineType)i); 393 394 for (uint32_t j = 0; j < size; j++) { 395 MachineID machID; 396 machID.type = (MachineType)i; 397 machID.num = j; 398 399 AbstractController *ctr = 400 (*(g_abs_controls[i].find(j))).second; 401 out << machID << ":" << ctr->getFullyBusyCycles() << " "; 402 if ((j + 1) % 8 == 0) { 403 out << endl; 404 } 405 } 406 out << endl; 407 } 408 out << endl; 409 410 out << "Busy Bank Count:" << m_busyBankCount << endl; 411 out << endl; 412 413 printOutstandingReqProfile(out); 414 out << endl; 415 } 416 417 if (!short_stats) { 418 out << "All Non-Zero Cycle Demand Cache Accesses" << endl; 419 out << "----------------------------------------" << endl; 420 printMissLatencyProfile(out); 421 422 if (m_all_sharing_histogram.size() > 0) { 423 out << "all_sharing: " << m_all_sharing_histogram << endl; 424 out << "read_sharing: " << m_read_sharing_histogram << endl; 425 out << "write_sharing: " << m_write_sharing_histogram << endl; 426 427 out << "all_sharing_percent: "; 428 m_all_sharing_histogram.printPercent(out); 429 out << endl; 430 431 out << "read_sharing_percent: "; 432 m_read_sharing_histogram.printPercent(out); 433 out << endl; 434 435 out << "write_sharing_percent: "; 436 m_write_sharing_histogram.printPercent(out); 437 out << endl; 438 439 int64 total_miss = m_cache_to_cache + m_memory_to_cache; 440 out << "all_misses: " << total_miss << endl; 441 out << "cache_to_cache_misses: " << m_cache_to_cache << endl; 442 out << "memory_to_cache_misses: " << m_memory_to_cache << endl; 443 out << "cache_to_cache_percent: " 444 << 100.0 * (double(m_cache_to_cache) / double(total_miss)) 445 << endl; 446 out << "memory_to_cache_percent: " 447 << 100.0 * (double(m_memory_to_cache) / double(total_miss)) 448 << endl; 449 out << endl; 450 } 451 452 printRequestProfile(out); 453 454 if (!m_all_instructions) { 455 m_address_profiler_ptr->printStats(out); 456 } 457 458 if (m_all_instructions) { 459 m_inst_profiler_ptr->printStats(out); 460 } 461 462 out << endl; 463 printDelayProfile(out); 464 } 465} 466 467void 468Profiler::clearStats() 469{ 470 m_ruby_start = g_system_ptr->curCycle(); 471 m_real_time_start_time = time(NULL); 472 473 m_busyBankCount = 0; 474 m_read_sharing_histogram.clear(); 475 m_write_sharing_histogram.clear(); 476 m_all_sharing_histogram.clear(); 477 m_cache_to_cache = 0; 478 m_memory_to_cache = 0; 479 480 // update the start time 481 m_ruby_start = g_system_ptr->curCycle(); 482} 483 484void | |
485Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) 486{ 487 if (msg.getType() != RubyRequestType_IFETCH) { 488 // Note: The following line should be commented out if you 489 // want to use the special profiling that is part of the GS320 490 // protocol 491 492 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be 493 // profiled by the AddressProfiler 494 m_address_profiler_ptr-> 495 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), 496 msg.getType(), msg.getAccessMode(), id, false); 497 } 498} | 343Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) 344{ 345 if (msg.getType() != RubyRequestType_IFETCH) { 346 // Note: The following line should be commented out if you 347 // want to use the special profiling that is part of the GS320 348 // protocol 349 350 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be 351 // profiled by the AddressProfiler 352 m_address_profiler_ptr-> 353 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), 354 msg.getType(), msg.getAccessMode(), id, false); 355 } 356} |
499 500void 501Profiler::profileSharing(const Address& addr, AccessType type, 502 NodeID requestor, const Set& sharers, 503 const Set& owner) 504{ 505 Set set_contacted(owner); 506 if (type == AccessType_Write) { 507 set_contacted.addSet(sharers); 508 } 509 set_contacted.remove(requestor); 510 int number_contacted = set_contacted.count(); 511 512 if (type == AccessType_Write) { 513 m_write_sharing_histogram.add(number_contacted); 514 } else { 515 m_read_sharing_histogram.add(number_contacted); 516 } 517 m_all_sharing_histogram.add(number_contacted); 518 519 if (number_contacted == 0) { 520 m_memory_to_cache++; 521 } else { 522 m_cache_to_cache++; 523 } 524} 525 526void 527Profiler::bankBusy() 528{ 529 m_busyBankCount++; 530} 531 532void 533Profiler::rubyWatch(int id) 534{ 535 uint64 tr = 0; 536 Address watch_address = Address(tr); 537 538 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->curCycle(), id, 539 watch_address); 540 541 // don't care about success or failure 542 m_watch_address_set.insert(watch_address); 543} 544 545bool 546Profiler::watchAddress(Address addr) 547{ 548 return m_watch_address_set.count(addr) > 0; 549} 550 551Profiler * 552RubyProfilerParams::create() 553{ 554 return new Profiler(this); 555} | |