Profiler.cc revision 9923:cdd51a15e9be
1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45#include <sys/types.h> 46#include <unistd.h> 47 48#include <algorithm> 49#include <fstream> 50 51#include "base/stl_helpers.hh" 52#include "base/str.hh" 53#include "mem/protocol/MachineType.hh" 54#include "mem/protocol/RubyRequest.hh" 55#include "mem/ruby/network/Network.hh" 56#include "mem/ruby/profiler/AddressProfiler.hh" 57#include "mem/ruby/profiler/Profiler.hh" 58#include "mem/ruby/system/Sequencer.hh" 59#include "mem/ruby/system/System.hh" 60 61using namespace std; 62using m5::stl_helpers::operator<<; 63 64Profiler::Profiler(const Params *p) 65 : SimObject(p) 66{ 67 m_inst_profiler_ptr = NULL; 68 m_address_profiler_ptr = NULL; 69 m_real_time_start_time = time(NULL); // Not reset in clearStats() 70 71 m_hot_lines = p->hot_lines; 72 m_all_instructions = p->all_instructions; 73 74 m_num_of_sequencers = p->num_of_sequencers; 75 76 m_hot_lines = false; 77 m_all_instructions = false; 78 79 m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); 80 m_address_profiler_ptr->setHotLines(m_hot_lines); 81 m_address_profiler_ptr->setAllInstructions(m_all_instructions); 82 83 if (m_all_instructions) { 84 m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); 85 m_inst_profiler_ptr->setHotLines(m_hot_lines); 86 m_inst_profiler_ptr->setAllInstructions(m_all_instructions); 87 } 88 89 p->ruby_system->registerProfiler(this); 90} 91 92Profiler::~Profiler() 93{ 94} 95 96void 97Profiler::print(ostream& out) const 98{ 99 out << "[Profiler]"; 100} 101 102void 103Profiler::printRequestProfile(ostream &out) const 104{ 105 out << "Request vs. RubySystem State Profile" << endl; 106 out << "--------------------------------" << endl; 107 out << endl; 108 109 map<string, uint64_t> m_requestProfileMap; 110 uint64_t m_requests = 0; 111 112 for (uint32_t i = 0; i < MachineType_NUM; i++) { 113 for (map<uint32_t, AbstractController*>::iterator it = 114 g_abs_controls[i].begin(); 115 it != g_abs_controls[i].end(); ++it) { 116 117 AbstractController *ctr = (*it).second; 118 map<string, uint64_t> mp = ctr->getRequestProfileMap(); 119 120 for (map<string, uint64_t>::iterator jt = mp.begin(); 121 jt != mp.end(); ++jt) { 122 123 map<string, uint64_t>::iterator kt = 124 m_requestProfileMap.find((*jt).first); 125 if (kt != m_requestProfileMap.end()) { 126 (*kt).second += (*jt).second; 127 } else { 128 m_requestProfileMap[(*jt).first] = (*jt).second; 129 } 130 } 131 132 m_requests += ctr->getRequestCount(); 133 } 134 } 135 136 map<string, uint64_t>::const_iterator i = m_requestProfileMap.begin(); 137 map<string, uint64_t>::const_iterator end = m_requestProfileMap.end(); 138 for (; i != end; ++i) { 139 const string &key = i->first; 140 uint64_t count = i->second; 141 142 double percent = (100.0 * double(count)) / double(m_requests); 143 vector<string> items; 144 tokenize(items, key, ':'); 145 vector<string>::iterator j = items.begin(); 146 vector<string>::iterator end = items.end(); 147 for (; j != end; ++i) 148 out << setw(10) << *j; 149 out << setw(11) << count; 150 out << setw(14) << percent << endl; 151 } 152 out << endl; 153} 154 155void 156Profiler::printDelayProfile(ostream &out) const 157{ 158 out << "Message Delayed Cycles" << endl; 159 out << "----------------------" << endl; 160 161 uint32_t numVNets = Network::getNumberOfVirtualNetworks(); 162 Histogram delayHistogram; 163 std::vector<Histogram> delayVCHistogram(numVNets); 164 165 for (uint32_t i = 0; i < MachineType_NUM; i++) { 166 for (map<uint32_t, AbstractController*>::iterator it = 167 g_abs_controls[i].begin(); 168 it != g_abs_controls[i].end(); ++it) { 169 170 AbstractController *ctr = (*it).second; 171 delayHistogram.add(ctr->getDelayHist()); 172 173 for (uint32_t i = 0; i < numVNets; i++) { 174 delayVCHistogram[i].add(ctr->getDelayVCHist(i)); 175 } 176 } 177 } 178 179 out << "Total_delay_cycles: " << delayHistogram << endl; 180 181 for (int i = 0; i < numVNets; i++) { 182 out << " virtual_network_" << i << "_delay_cycles: " 183 << delayVCHistogram[i] << endl; 184 } 185} 186 187void 188Profiler::printOutstandingReqProfile(ostream &out) const 189{ 190 Histogram sequencerRequests; 191 192 for (uint32_t i = 0; i < MachineType_NUM; i++) { 193 for (map<uint32_t, AbstractController*>::iterator it = 194 g_abs_controls[i].begin(); 195 it != g_abs_controls[i].end(); ++it) { 196 197 AbstractController *ctr = (*it).second; 198 Sequencer *seq = ctr->getSequencer(); 199 if (seq != NULL) { 200 sequencerRequests.add(seq->getOutstandReqHist()); 201 } 202 } 203 } 204 205 out << "sequencer_requests_outstanding: " 206 << sequencerRequests << endl; 207} 208 209void 210Profiler::printMissLatencyProfile(ostream &out) const 211{ 212 // Collate the miss latencies histograms from all the sequencers 213 Histogram latency_hist; 214 std::vector<Histogram> type_latency_hist(RubyRequestType_NUM); 215 216 Histogram hit_latency_hist; 217 std::vector<Histogram> hit_type_latency_hist(RubyRequestType_NUM); 218 219 std::vector<Histogram> hit_mach_latency_hist(MachineType_NUM); 220 std::vector<std::vector<Histogram> > 221 hit_type_mach_latency_hist(RubyRequestType_NUM, 222 std::vector<Histogram>(MachineType_NUM)); 223 224 Histogram miss_latency_hist; 225 std::vector<Histogram> miss_type_latency_hist(RubyRequestType_NUM); 226 227 std::vector<Histogram> miss_mach_latency_hist(MachineType_NUM); 228 std::vector<std::vector<Histogram> > 229 miss_type_mach_latency_hist(RubyRequestType_NUM, 230 std::vector<Histogram>(MachineType_NUM)); 231 232 std::vector<Histogram> issue_to_initial_delay_hist(MachineType_NUM); 233 std::vector<Histogram> initial_to_forward_delay_hist(MachineType_NUM); 234 std::vector<Histogram> 235 forward_to_first_response_delay_hist(MachineType_NUM); 236 std::vector<Histogram> 237 first_response_to_completion_delay_hist(MachineType_NUM); 238 std::vector<uint64_t> incomplete_times(MachineType_NUM); 239 240 for (uint32_t i = 0; i < MachineType_NUM; i++) { 241 for (map<uint32_t, AbstractController*>::iterator it = 242 g_abs_controls[i].begin(); 243 it != g_abs_controls[i].end(); ++it) { 244 245 AbstractController *ctr = (*it).second; 246 Sequencer *seq = ctr->getSequencer(); 247 if (seq != NULL) { 248 // add all the latencies 249 latency_hist.add(seq->getLatencyHist()); 250 hit_latency_hist.add(seq->getHitLatencyHist()); 251 miss_latency_hist.add(seq->getMissLatencyHist()); 252 253 // add the per request type latencies 254 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { 255 type_latency_hist[j] 256 .add(seq->getTypeLatencyHist(j)); 257 hit_type_latency_hist[j] 258 .add(seq->getHitTypeLatencyHist(j)); 259 miss_type_latency_hist[j] 260 .add(seq->getMissTypeLatencyHist(j)); 261 } 262 263 // add the per machine type miss latencies 264 for (uint32_t j = 0; j < MachineType_NUM; ++j) { 265 hit_mach_latency_hist[j] 266 .add(seq->getHitMachLatencyHist(j)); 267 miss_mach_latency_hist[j] 268 .add(seq->getMissMachLatencyHist(j)); 269 270 issue_to_initial_delay_hist[j].add( 271 seq->getIssueToInitialDelayHist(MachineType(j))); 272 273 initial_to_forward_delay_hist[j].add( 274 seq->getInitialToForwardDelayHist(MachineType(j))); 275 forward_to_first_response_delay_hist[j].add(seq-> 276 getForwardRequestToFirstResponseHist(MachineType(j))); 277 278 first_response_to_completion_delay_hist[j].add(seq-> 279 getFirstResponseToCompletionDelayHist(MachineType(j))); 280 incomplete_times[j] += 281 seq->getIncompleteTimes(MachineType(j)); 282 } 283 284 // add the per (request, machine) type miss latencies 285 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 286 for (uint32_t k = 0; k < MachineType_NUM; k++) { 287 hit_type_mach_latency_hist[j][k].add( 288 seq->getHitTypeMachLatencyHist(j,k)); 289 miss_type_mach_latency_hist[j][k].add( 290 seq->getMissTypeMachLatencyHist(j,k)); 291 } 292 } 293 } 294 } 295 } 296 297 out << "latency: " << latency_hist << endl; 298 for (int i = 0; i < RubyRequestType_NUM; i++) { 299 if (type_latency_hist[i].size() > 0) { 300 out << "latency: " << RubyRequestType(i) << ": " 301 << type_latency_hist[i] << endl; 302 } 303 } 304 305 out << "hit latency: " << hit_latency_hist << endl; 306 for (int i = 0; i < RubyRequestType_NUM; i++) { 307 if (hit_type_latency_hist[i].size() > 0) { 308 out << "hit latency: " << RubyRequestType(i) << ": " 309 << hit_type_latency_hist[i] << endl; 310 } 311 } 312 313 for (int i = 0; i < MachineType_NUM; i++) { 314 if (hit_mach_latency_hist[i].size() > 0) { 315 out << "hit latency: " << MachineType(i) << ": " 316 << hit_mach_latency_hist[i] << endl; 317 } 318 } 319 320 for (int i = 0; i < RubyRequestType_NUM; i++) { 321 for (int j = 0; j < MachineType_NUM; j++) { 322 if (hit_type_mach_latency_hist[i][j].size() > 0) { 323 out << "hit latency: " << RubyRequestType(i) 324 << ": " << MachineType(j) << ": " 325 << hit_type_mach_latency_hist[i][j] << endl; 326 } 327 } 328 } 329 330 out << "miss latency: " << miss_latency_hist << endl; 331 for (int i = 0; i < RubyRequestType_NUM; i++) { 332 if (miss_type_latency_hist[i].size() > 0) { 333 out << "miss latency: " << RubyRequestType(i) << ": " 334 << miss_type_latency_hist[i] << endl; 335 } 336 } 337 338 for (int i = 0; i < MachineType_NUM; i++) { 339 if (miss_mach_latency_hist[i].size() > 0) { 340 out << "miss latency: " << MachineType(i) << ": " 341 << miss_mach_latency_hist[i] << endl; 342 343 out << "miss latency: " << MachineType(i) 344 << "::issue_to_initial_request: " 345 << issue_to_initial_delay_hist[i] << endl; 346 out << "miss latency: " << MachineType(i) 347 << "::initial_to_forward_request: " 348 << initial_to_forward_delay_hist[i] << endl; 349 out << "miss latency: " << MachineType(i) 350 << "::forward_to_first_response: " 351 << forward_to_first_response_delay_hist[i] << endl; 352 out << "miss latency: " << MachineType(i) 353 << "::first_response_to_completion: " 354 << first_response_to_completion_delay_hist[i] << endl; 355 out << "incomplete times: " << incomplete_times[i] << endl; 356 } 357 } 358 359 for (int i = 0; i < RubyRequestType_NUM; i++) { 360 for (int j = 0; j < MachineType_NUM; j++) { 361 if (miss_type_mach_latency_hist[i][j].size() > 0) { 362 out << "miss latency: " << RubyRequestType(i) 363 << ": " << MachineType(j) << ": " 364 << miss_type_mach_latency_hist[i][j] << endl; 365 } 366 } 367 } 368 369 out << endl; 370} 371 372void 373Profiler::printStats(ostream& out, bool short_stats) 374{ 375 out << endl; 376 if (short_stats) { 377 out << "SHORT "; 378 } 379 out << "Profiler Stats" << endl; 380 out << "--------------" << endl; 381 382 Cycles ruby_cycles = g_system_ptr->curCycle()-m_ruby_start; 383 384 out << "Ruby_current_time: " << g_system_ptr->curCycle() << endl; 385 out << "Ruby_start_time: " << m_ruby_start << endl; 386 out << "Ruby_cycles: " << ruby_cycles << endl; 387 out << endl; 388 389 if (!short_stats) { 390 out << "Busy Controller Counts:" << endl; 391 for (uint32_t i = 0; i < MachineType_NUM; i++) { 392 uint32_t size = MachineType_base_count((MachineType)i); 393 394 for (uint32_t j = 0; j < size; j++) { 395 MachineID machID; 396 machID.type = (MachineType)i; 397 machID.num = j; 398 399 AbstractController *ctr = 400 (*(g_abs_controls[i].find(j))).second; 401 out << machID << ":" << ctr->getFullyBusyCycles() << " "; 402 if ((j + 1) % 8 == 0) { 403 out << endl; 404 } 405 } 406 out << endl; 407 } 408 out << endl; 409 410 out << "Busy Bank Count:" << m_busyBankCount << endl; 411 out << endl; 412 413 printOutstandingReqProfile(out); 414 out << endl; 415 } 416 417 if (!short_stats) { 418 out << "All Non-Zero Cycle Demand Cache Accesses" << endl; 419 out << "----------------------------------------" << endl; 420 printMissLatencyProfile(out); 421 422 if (m_all_sharing_histogram.size() > 0) { 423 out << "all_sharing: " << m_all_sharing_histogram << endl; 424 out << "read_sharing: " << m_read_sharing_histogram << endl; 425 out << "write_sharing: " << m_write_sharing_histogram << endl; 426 427 out << "all_sharing_percent: "; 428 m_all_sharing_histogram.printPercent(out); 429 out << endl; 430 431 out << "read_sharing_percent: "; 432 m_read_sharing_histogram.printPercent(out); 433 out << endl; 434 435 out << "write_sharing_percent: "; 436 m_write_sharing_histogram.printPercent(out); 437 out << endl; 438 439 int64 total_miss = m_cache_to_cache + m_memory_to_cache; 440 out << "all_misses: " << total_miss << endl; 441 out << "cache_to_cache_misses: " << m_cache_to_cache << endl; 442 out << "memory_to_cache_misses: " << m_memory_to_cache << endl; 443 out << "cache_to_cache_percent: " 444 << 100.0 * (double(m_cache_to_cache) / double(total_miss)) 445 << endl; 446 out << "memory_to_cache_percent: " 447 << 100.0 * (double(m_memory_to_cache) / double(total_miss)) 448 << endl; 449 out << endl; 450 } 451 452 printRequestProfile(out); 453 454 if (!m_all_instructions) { 455 m_address_profiler_ptr->printStats(out); 456 } 457 458 if (m_all_instructions) { 459 m_inst_profiler_ptr->printStats(out); 460 } 461 462 out << endl; 463 printDelayProfile(out); 464 } 465} 466 467void 468Profiler::clearStats() 469{ 470 m_ruby_start = g_system_ptr->curCycle(); 471 m_real_time_start_time = time(NULL); 472 473 m_busyBankCount = 0; 474 m_read_sharing_histogram.clear(); 475 m_write_sharing_histogram.clear(); 476 m_all_sharing_histogram.clear(); 477 m_cache_to_cache = 0; 478 m_memory_to_cache = 0; 479 480 // update the start time 481 m_ruby_start = g_system_ptr->curCycle(); 482} 483 484void 485Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) 486{ 487 if (msg.getType() != RubyRequestType_IFETCH) { 488 // Note: The following line should be commented out if you 489 // want to use the special profiling that is part of the GS320 490 // protocol 491 492 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be 493 // profiled by the AddressProfiler 494 m_address_profiler_ptr-> 495 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), 496 msg.getType(), msg.getAccessMode(), id, false); 497 } 498} 499 500void 501Profiler::profileSharing(const Address& addr, AccessType type, 502 NodeID requestor, const Set& sharers, 503 const Set& owner) 504{ 505 Set set_contacted(owner); 506 if (type == AccessType_Write) { 507 set_contacted.addSet(sharers); 508 } 509 set_contacted.remove(requestor); 510 int number_contacted = set_contacted.count(); 511 512 if (type == AccessType_Write) { 513 m_write_sharing_histogram.add(number_contacted); 514 } else { 515 m_read_sharing_histogram.add(number_contacted); 516 } 517 m_all_sharing_histogram.add(number_contacted); 518 519 if (number_contacted == 0) { 520 m_memory_to_cache++; 521 } else { 522 m_cache_to_cache++; 523 } 524} 525 526void 527Profiler::bankBusy() 528{ 529 m_busyBankCount++; 530} 531 532void 533Profiler::rubyWatch(int id) 534{ 535 uint64 tr = 0; 536 Address watch_address = Address(tr); 537 538 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->curCycle(), id, 539 watch_address); 540 541 // don't care about success or failure 542 m_watch_address_set.insert(watch_address); 543} 544 545bool 546Profiler::watchAddress(Address addr) 547{ 548 return m_watch_address_set.count(addr) > 0; 549} 550 551Profiler * 552RubyProfilerParams::create() 553{ 554 return new Profiler(this); 555} 556