Profiler.cc revision 9773:915be89faf30
1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45// Allows use of times() library call, which determines virtual runtime 46#include <sys/resource.h> 47#include <sys/times.h> 48#include <sys/types.h> 49#include <unistd.h> 50 51#include <algorithm> 52#include <fstream> 53 54#include "base/stl_helpers.hh" 55#include "base/str.hh" 56#include "mem/protocol/MachineType.hh" 57#include "mem/protocol/RubyRequest.hh" 58#include "mem/ruby/network/Network.hh" 59#include "mem/ruby/profiler/AddressProfiler.hh" 60#include "mem/ruby/profiler/Profiler.hh" 61#include "mem/ruby/system/Sequencer.hh" 62#include "mem/ruby/system/System.hh" 63 64using namespace std; 65using m5::stl_helpers::operator<<; 66 67static double process_memory_total(); 68static double process_memory_resident(); 69 70Profiler::Profiler(const Params *p) 71 : SimObject(p) 72{ 73 m_inst_profiler_ptr = NULL; 74 m_address_profiler_ptr = NULL; 75 m_real_time_start_time = time(NULL); // Not reset in clearStats() 76 77 m_hot_lines = p->hot_lines; 78 m_all_instructions = p->all_instructions; 79 80 m_num_of_sequencers = p->num_of_sequencers; 81 82 m_hot_lines = false; 83 m_all_instructions = false; 84 85 m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers); 86 m_address_profiler_ptr->setHotLines(m_hot_lines); 87 m_address_profiler_ptr->setAllInstructions(m_all_instructions); 88 89 if (m_all_instructions) { 90 m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers); 91 m_inst_profiler_ptr->setHotLines(m_hot_lines); 92 m_inst_profiler_ptr->setAllInstructions(m_all_instructions); 93 } 94 95 p->ruby_system->registerProfiler(this); 96} 97 98Profiler::~Profiler() 99{ 100} 101 102void 103Profiler::print(ostream& out) const 104{ 105 out << "[Profiler]"; 106} 107 108void 109Profiler::printRequestProfile(ostream &out) const 110{ 111 out << "Request vs. RubySystem State Profile" << endl; 112 out << "--------------------------------" << endl; 113 out << endl; 114 115 map<string, uint64_t> m_requestProfileMap; 116 uint64_t m_requests = 0; 117 118 for (uint32_t i = 0; i < MachineType_NUM; i++) { 119 for (map<uint32_t, AbstractController*>::iterator it = 120 g_abs_controls[i].begin(); 121 it != g_abs_controls[i].end(); ++it) { 122 123 AbstractController *ctr = (*it).second; 124 map<string, uint64_t> mp = ctr->getRequestProfileMap(); 125 126 for (map<string, uint64_t>::iterator jt = mp.begin(); 127 jt != mp.end(); ++jt) { 128 129 map<string, uint64_t>::iterator kt = 130 m_requestProfileMap.find((*jt).first); 131 if (kt != m_requestProfileMap.end()) { 132 (*kt).second += (*jt).second; 133 } else { 134 m_requestProfileMap[(*jt).first] = (*jt).second; 135 } 136 } 137 138 m_requests += ctr->getRequestCount(); 139 } 140 } 141 142 map<string, uint64_t>::const_iterator i = m_requestProfileMap.begin(); 143 map<string, uint64_t>::const_iterator end = m_requestProfileMap.end(); 144 for (; i != end; ++i) { 145 const string &key = i->first; 146 uint64_t count = i->second; 147 148 double percent = (100.0 * double(count)) / double(m_requests); 149 vector<string> items; 150 tokenize(items, key, ':'); 151 vector<string>::iterator j = items.begin(); 152 vector<string>::iterator end = items.end(); 153 for (; j != end; ++i) 154 out << setw(10) << *j; 155 out << setw(11) << count; 156 out << setw(14) << percent << endl; 157 } 158 out << endl; 159} 160 161void 162Profiler::printDelayProfile(ostream &out) const 163{ 164 out << "Message Delayed Cycles" << endl; 165 out << "----------------------" << endl; 166 167 uint32_t numVNets = Network::getNumberOfVirtualNetworks(); 168 Histogram delayHistogram; 169 std::vector<Histogram> delayVCHistogram(numVNets); 170 171 for (uint32_t i = 0; i < MachineType_NUM; i++) { 172 for (map<uint32_t, AbstractController*>::iterator it = 173 g_abs_controls[i].begin(); 174 it != g_abs_controls[i].end(); ++it) { 175 176 AbstractController *ctr = (*it).second; 177 delayHistogram.add(ctr->getDelayHist()); 178 179 for (uint32_t i = 0; i < numVNets; i++) { 180 delayVCHistogram[i].add(ctr->getDelayVCHist(i)); 181 } 182 } 183 } 184 185 out << "Total_delay_cycles: " << delayHistogram << endl; 186 187 for (int i = 0; i < numVNets; i++) { 188 out << " virtual_network_" << i << "_delay_cycles: " 189 << delayVCHistogram[i] << endl; 190 } 191} 192 193void 194Profiler::printOutstandingReqProfile(ostream &out) const 195{ 196 Histogram sequencerRequests; 197 198 for (uint32_t i = 0; i < MachineType_NUM; i++) { 199 for (map<uint32_t, AbstractController*>::iterator it = 200 g_abs_controls[i].begin(); 201 it != g_abs_controls[i].end(); ++it) { 202 203 AbstractController *ctr = (*it).second; 204 Sequencer *seq = ctr->getSequencer(); 205 if (seq != NULL) { 206 sequencerRequests.add(seq->getOutstandReqHist()); 207 } 208 } 209 } 210 211 out << "sequencer_requests_outstanding: " 212 << sequencerRequests << endl; 213} 214 215void 216Profiler::printMissLatencyProfile(ostream &out) const 217{ 218 // Collate the miss latencies histograms from all the sequencers 219 Histogram latency_hist; 220 std::vector<Histogram> type_latency_hist(RubyRequestType_NUM); 221 222 Histogram hit_latency_hist; 223 std::vector<Histogram> hit_type_latency_hist(RubyRequestType_NUM); 224 225 std::vector<Histogram> hit_mach_latency_hist(MachineType_NUM); 226 std::vector<std::vector<Histogram> > 227 hit_type_mach_latency_hist(RubyRequestType_NUM, 228 std::vector<Histogram>(MachineType_NUM)); 229 230 Histogram miss_latency_hist; 231 std::vector<Histogram> miss_type_latency_hist(RubyRequestType_NUM); 232 233 std::vector<Histogram> miss_mach_latency_hist(MachineType_NUM); 234 std::vector<std::vector<Histogram> > 235 miss_type_mach_latency_hist(RubyRequestType_NUM, 236 std::vector<Histogram>(MachineType_NUM)); 237 238 std::vector<Histogram> issue_to_initial_delay_hist(MachineType_NUM); 239 std::vector<Histogram> initial_to_forward_delay_hist(MachineType_NUM); 240 std::vector<Histogram> 241 forward_to_first_response_delay_hist(MachineType_NUM); 242 std::vector<Histogram> 243 first_response_to_completion_delay_hist(MachineType_NUM); 244 std::vector<uint64_t> incomplete_times(MachineType_NUM); 245 246 for (uint32_t i = 0; i < MachineType_NUM; i++) { 247 for (map<uint32_t, AbstractController*>::iterator it = 248 g_abs_controls[i].begin(); 249 it != g_abs_controls[i].end(); ++it) { 250 251 AbstractController *ctr = (*it).second; 252 Sequencer *seq = ctr->getSequencer(); 253 if (seq != NULL) { 254 // add all the latencies 255 latency_hist.add(seq->getLatencyHist()); 256 hit_latency_hist.add(seq->getHitLatencyHist()); 257 miss_latency_hist.add(seq->getMissLatencyHist()); 258 259 // add the per request type latencies 260 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { 261 type_latency_hist[j] 262 .add(seq->getTypeLatencyHist(j)); 263 hit_type_latency_hist[j] 264 .add(seq->getHitTypeLatencyHist(j)); 265 miss_type_latency_hist[j] 266 .add(seq->getMissTypeLatencyHist(j)); 267 } 268 269 // add the per machine type miss latencies 270 for (uint32_t j = 0; j < MachineType_NUM; ++j) { 271 hit_mach_latency_hist[j] 272 .add(seq->getHitMachLatencyHist(j)); 273 miss_mach_latency_hist[j] 274 .add(seq->getMissMachLatencyHist(j)); 275 276 issue_to_initial_delay_hist[j].add( 277 seq->getIssueToInitialDelayHist(MachineType(j))); 278 279 initial_to_forward_delay_hist[j].add( 280 seq->getInitialToForwardDelayHist(MachineType(j))); 281 forward_to_first_response_delay_hist[j].add(seq-> 282 getForwardRequestToFirstResponseHist(MachineType(j))); 283 284 first_response_to_completion_delay_hist[j].add(seq-> 285 getFirstResponseToCompletionDelayHist(MachineType(j))); 286 incomplete_times[j] += 287 seq->getIncompleteTimes(MachineType(j)); 288 } 289 290 // add the per (request, machine) type miss latencies 291 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 292 for (uint32_t k = 0; k < MachineType_NUM; k++) { 293 hit_type_mach_latency_hist[j][k].add( 294 seq->getHitTypeMachLatencyHist(j,k)); 295 miss_type_mach_latency_hist[j][k].add( 296 seq->getMissTypeMachLatencyHist(j,k)); 297 } 298 } 299 } 300 } 301 } 302 303 out << "latency: " << latency_hist << endl; 304 for (int i = 0; i < RubyRequestType_NUM; i++) { 305 if (type_latency_hist[i].size() > 0) { 306 out << "latency: " << RubyRequestType(i) << ": " 307 << type_latency_hist[i] << endl; 308 } 309 } 310 311 out << "hit latency: " << hit_latency_hist << endl; 312 for (int i = 0; i < RubyRequestType_NUM; i++) { 313 if (hit_type_latency_hist[i].size() > 0) { 314 out << "hit latency: " << RubyRequestType(i) << ": " 315 << hit_type_latency_hist[i] << endl; 316 } 317 } 318 319 for (int i = 0; i < MachineType_NUM; i++) { 320 if (hit_mach_latency_hist[i].size() > 0) { 321 out << "hit latency: " << MachineType(i) << ": " 322 << hit_mach_latency_hist[i] << endl; 323 } 324 } 325 326 for (int i = 0; i < RubyRequestType_NUM; i++) { 327 for (int j = 0; j < MachineType_NUM; j++) { 328 if (hit_type_mach_latency_hist[i][j].size() > 0) { 329 out << "hit latency: " << RubyRequestType(i) 330 << ": " << MachineType(j) << ": " 331 << hit_type_mach_latency_hist[i][j] << endl; 332 } 333 } 334 } 335 336 out << "miss latency: " << miss_latency_hist << endl; 337 for (int i = 0; i < RubyRequestType_NUM; i++) { 338 if (miss_type_latency_hist[i].size() > 0) { 339 out << "miss latency: " << RubyRequestType(i) << ": " 340 << miss_type_latency_hist[i] << endl; 341 } 342 } 343 344 for (int i = 0; i < MachineType_NUM; i++) { 345 if (miss_mach_latency_hist[i].size() > 0) { 346 out << "miss latency: " << MachineType(i) << ": " 347 << miss_mach_latency_hist[i] << endl; 348 349 out << "miss latency: " << MachineType(i) 350 << "::issue_to_initial_request: " 351 << issue_to_initial_delay_hist[i] << endl; 352 out << "miss latency: " << MachineType(i) 353 << "::initial_to_forward_request: " 354 << initial_to_forward_delay_hist[i] << endl; 355 out << "miss latency: " << MachineType(i) 356 << "::forward_to_first_response: " 357 << forward_to_first_response_delay_hist[i] << endl; 358 out << "miss latency: " << MachineType(i) 359 << "::first_response_to_completion: " 360 << first_response_to_completion_delay_hist[i] << endl; 361 out << "incomplete times: " << incomplete_times[i] << endl; 362 } 363 } 364 365 for (int i = 0; i < RubyRequestType_NUM; i++) { 366 for (int j = 0; j < MachineType_NUM; j++) { 367 if (miss_type_mach_latency_hist[i][j].size() > 0) { 368 out << "miss latency: " << RubyRequestType(i) 369 << ": " << MachineType(j) << ": " 370 << miss_type_mach_latency_hist[i][j] << endl; 371 } 372 } 373 } 374 375 out << endl; 376} 377 378void 379Profiler::printStats(ostream& out, bool short_stats) 380{ 381 out << endl; 382 if (short_stats) { 383 out << "SHORT "; 384 } 385 out << "Profiler Stats" << endl; 386 out << "--------------" << endl; 387 388 time_t real_time_current = time(NULL); 389 double seconds = difftime(real_time_current, m_real_time_start_time); 390 double minutes = seconds / 60.0; 391 double hours = minutes / 60.0; 392 double days = hours / 24.0; 393 Cycles ruby_cycles = g_system_ptr->curCycle()-m_ruby_start; 394 395 if (!short_stats) { 396 out << "Elapsed_time_in_seconds: " << seconds << endl; 397 out << "Elapsed_time_in_minutes: " << minutes << endl; 398 out << "Elapsed_time_in_hours: " << hours << endl; 399 out << "Elapsed_time_in_days: " << days << endl; 400 out << endl; 401 } 402 403 // print the virtual runtimes as well 404 struct tms vtime; 405 times(&vtime); 406 seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; 407 minutes = seconds / 60.0; 408 hours = minutes / 60.0; 409 days = hours / 24.0; 410 out << "Virtual_time_in_seconds: " << seconds << endl; 411 out << "Virtual_time_in_minutes: " << minutes << endl; 412 out << "Virtual_time_in_hours: " << hours << endl; 413 out << "Virtual_time_in_days: " << days << endl; 414 out << endl; 415 416 out << "Ruby_current_time: " << g_system_ptr->curCycle() << endl; 417 out << "Ruby_start_time: " << m_ruby_start << endl; 418 out << "Ruby_cycles: " << ruby_cycles << endl; 419 out << endl; 420 421 if (!short_stats) { 422 out << "mbytes_resident: " << process_memory_resident() << endl; 423 out << "mbytes_total: " << process_memory_total() << endl; 424 if (process_memory_total() > 0) { 425 out << "resident_ratio: " 426 << process_memory_resident()/process_memory_total() << endl; 427 } 428 out << endl; 429 } 430 431 if (!short_stats) { 432 out << "Busy Controller Counts:" << endl; 433 for (uint32_t i = 0; i < MachineType_NUM; i++) { 434 uint32_t size = MachineType_base_count((MachineType)i); 435 436 for (uint32_t j = 0; j < size; j++) { 437 MachineID machID; 438 machID.type = (MachineType)i; 439 machID.num = j; 440 441 AbstractController *ctr = 442 (*(g_abs_controls[i].find(j))).second; 443 out << machID << ":" << ctr->getFullyBusyCycles() << " "; 444 if ((j + 1) % 8 == 0) { 445 out << endl; 446 } 447 } 448 out << endl; 449 } 450 out << endl; 451 452 out << "Busy Bank Count:" << m_busyBankCount << endl; 453 out << endl; 454 455 printOutstandingReqProfile(out); 456 out << endl; 457 } 458 459 if (!short_stats) { 460 out << "All Non-Zero Cycle Demand Cache Accesses" << endl; 461 out << "----------------------------------------" << endl; 462 printMissLatencyProfile(out); 463 464 if (m_all_sharing_histogram.size() > 0) { 465 out << "all_sharing: " << m_all_sharing_histogram << endl; 466 out << "read_sharing: " << m_read_sharing_histogram << endl; 467 out << "write_sharing: " << m_write_sharing_histogram << endl; 468 469 out << "all_sharing_percent: "; 470 m_all_sharing_histogram.printPercent(out); 471 out << endl; 472 473 out << "read_sharing_percent: "; 474 m_read_sharing_histogram.printPercent(out); 475 out << endl; 476 477 out << "write_sharing_percent: "; 478 m_write_sharing_histogram.printPercent(out); 479 out << endl; 480 481 int64 total_miss = m_cache_to_cache + m_memory_to_cache; 482 out << "all_misses: " << total_miss << endl; 483 out << "cache_to_cache_misses: " << m_cache_to_cache << endl; 484 out << "memory_to_cache_misses: " << m_memory_to_cache << endl; 485 out << "cache_to_cache_percent: " 486 << 100.0 * (double(m_cache_to_cache) / double(total_miss)) 487 << endl; 488 out << "memory_to_cache_percent: " 489 << 100.0 * (double(m_memory_to_cache) / double(total_miss)) 490 << endl; 491 out << endl; 492 } 493 494 printRequestProfile(out); 495 496 if (!m_all_instructions) { 497 m_address_profiler_ptr->printStats(out); 498 } 499 500 if (m_all_instructions) { 501 m_inst_profiler_ptr->printStats(out); 502 } 503 504 out << endl; 505 printDelayProfile(out); 506 printResourceUsage(out); 507 } 508} 509 510void 511Profiler::printResourceUsage(ostream& out) const 512{ 513 out << endl; 514 out << "Resource Usage" << endl; 515 out << "--------------" << endl; 516 517 int64_t pagesize = getpagesize(); // page size in bytes 518 out << "page_size: " << pagesize << endl; 519 520 rusage usage; 521 getrusage (RUSAGE_SELF, &usage); 522 523 out << "user_time: " << usage.ru_utime.tv_sec << endl; 524 out << "system_time: " << usage.ru_stime.tv_sec << endl; 525 out << "page_reclaims: " << usage.ru_minflt << endl; 526 out << "page_faults: " << usage.ru_majflt << endl; 527 out << "swaps: " << usage.ru_nswap << endl; 528 out << "block_inputs: " << usage.ru_inblock << endl; 529 out << "block_outputs: " << usage.ru_oublock << endl; 530} 531 532void 533Profiler::clearStats() 534{ 535 m_ruby_start = g_system_ptr->curCycle(); 536 m_real_time_start_time = time(NULL); 537 538 m_busyBankCount = 0; 539 m_read_sharing_histogram.clear(); 540 m_write_sharing_histogram.clear(); 541 m_all_sharing_histogram.clear(); 542 m_cache_to_cache = 0; 543 m_memory_to_cache = 0; 544 545 // update the start time 546 m_ruby_start = g_system_ptr->curCycle(); 547} 548 549void 550Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) 551{ 552 if (msg.getType() != RubyRequestType_IFETCH) { 553 // Note: The following line should be commented out if you 554 // want to use the special profiling that is part of the GS320 555 // protocol 556 557 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be 558 // profiled by the AddressProfiler 559 m_address_profiler_ptr-> 560 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), 561 msg.getType(), msg.getAccessMode(), id, false); 562 } 563} 564 565void 566Profiler::profileSharing(const Address& addr, AccessType type, 567 NodeID requestor, const Set& sharers, 568 const Set& owner) 569{ 570 Set set_contacted(owner); 571 if (type == AccessType_Write) { 572 set_contacted.addSet(sharers); 573 } 574 set_contacted.remove(requestor); 575 int number_contacted = set_contacted.count(); 576 577 if (type == AccessType_Write) { 578 m_write_sharing_histogram.add(number_contacted); 579 } else { 580 m_read_sharing_histogram.add(number_contacted); 581 } 582 m_all_sharing_histogram.add(number_contacted); 583 584 if (number_contacted == 0) { 585 m_memory_to_cache++; 586 } else { 587 m_cache_to_cache++; 588 } 589} 590 591void 592Profiler::bankBusy() 593{ 594 m_busyBankCount++; 595} 596 597// Helper function 598static double 599process_memory_total() 600{ 601 // 4kB page size, 1024*1024 bytes per MB, 602 const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); 603 ifstream proc_file; 604 proc_file.open("/proc/self/statm"); 605 int total_size_in_pages = 0; 606 int res_size_in_pages = 0; 607 proc_file >> total_size_in_pages; 608 proc_file >> res_size_in_pages; 609 return double(total_size_in_pages) * MULTIPLIER; // size in megabytes 610} 611 612static double 613process_memory_resident() 614{ 615 // 4kB page size, 1024*1024 bytes per MB, 616 const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0); 617 ifstream proc_file; 618 proc_file.open("/proc/self/statm"); 619 int total_size_in_pages = 0; 620 int res_size_in_pages = 0; 621 proc_file >> total_size_in_pages; 622 proc_file >> res_size_in_pages; 623 return double(res_size_in_pages) * MULTIPLIER; // size in megabytes 624} 625 626void 627Profiler::rubyWatch(int id) 628{ 629 uint64 tr = 0; 630 Address watch_address = Address(tr); 631 632 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->curCycle(), id, 633 watch_address); 634 635 // don't care about success or failure 636 m_watch_address_set.insert(watch_address); 637} 638 639bool 640Profiler::watchAddress(Address addr) 641{ 642 return m_watch_address_set.count(addr) > 0; 643} 644 645Profiler * 646RubyProfilerParams::create() 647{ 648 return new Profiler(this); 649} 650