Profiler.cc revision 6889:323cd43a3c46
1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45/* 46 * Profiler.cc 47 * 48 * Description: See Profiler.hh 49 * 50 * $Id$ 51 * 52 */ 53 54#include "mem/ruby/profiler/Profiler.hh" 55#include "mem/ruby/profiler/AddressProfiler.hh" 56#include "mem/ruby/system/System.hh" 57#include "mem/ruby/network/Network.hh" 58#include "mem/gems_common/PrioHeap.hh" 59#include "mem/protocol/CacheMsg.hh" 60#include "mem/protocol/Protocol.hh" 61#include "mem/gems_common/util.hh" 62#include "mem/gems_common/Map.hh" 63#include "mem/ruby/common/Debug.hh" 64#include "mem/protocol/MachineType.hh" 65 66#include "mem/ruby/system/System.hh" 67 68// Allows use of times() library call, which determines virtual runtime 69#include <sys/times.h> 70 71extern std::ostream * debug_cout_ptr; 72 73static double process_memory_total(); 74static double process_memory_resident(); 75 76Profiler::Profiler(const Params *p) 77 : SimObject(p) 78{ 79 m_requestProfileMap_ptr = new Map<string, int>; 80 81 m_inst_profiler_ptr = NULL; 82 m_address_profiler_ptr = NULL; 83 84 m_real_time_start_time = time(NULL); // Not reset in clearStats() 85 m_stats_period = 1000000; // Default 86 m_periodic_output_file_ptr = &cerr; 87 88 m_hot_lines = p->hot_lines; 89 m_all_instructions = p->all_instructions; 90 91 // 92 // Initialize the memory controller profiler structs 93 // 94 m_mc_profilers.setSize(p->mem_cntrl_count); 95 for (int mem_cntrl = 0; mem_cntrl < p->mem_cntrl_count; mem_cntrl++) { 96 m_mc_profilers[mem_cntrl] = new memory_control_profiler; 97 m_mc_profilers[mem_cntrl]->m_memReq = 0; 98 m_mc_profilers[mem_cntrl]->m_memBankBusy = 0; 99 m_mc_profilers[mem_cntrl]->m_memBusBusy = 0; 100 m_mc_profilers[mem_cntrl]->m_memReadWriteBusy = 0; 101 m_mc_profilers[mem_cntrl]->m_memDataBusBusy = 0; 102 m_mc_profilers[mem_cntrl]->m_memTfawBusy = 0; 103 m_mc_profilers[mem_cntrl]->m_memRefresh = 0; 104 m_mc_profilers[mem_cntrl]->m_memRead = 0; 105 m_mc_profilers[mem_cntrl]->m_memWrite = 0; 106 m_mc_profilers[mem_cntrl]->m_memWaitCycles = 0; 107 m_mc_profilers[mem_cntrl]->m_memInputQ = 0; 108 m_mc_profilers[mem_cntrl]->m_memBankQ = 0; 109 m_mc_profilers[mem_cntrl]->m_memArbWait = 0; 110 m_mc_profilers[mem_cntrl]->m_memRandBusy = 0; 111 m_mc_profilers[mem_cntrl]->m_memNotOld = 0; 112 113 m_mc_profilers[mem_cntrl]->m_banks_per_rank = p->banks_per_rank; 114 m_mc_profilers[mem_cntrl]->m_ranks_per_dimm = p->ranks_per_dimm; 115 m_mc_profilers[mem_cntrl]->m_dimms_per_channel = 116 p->dimms_per_channel; 117 118 int totalBanks = p->banks_per_rank * 119 p->ranks_per_dimm * 120 p->dimms_per_channel; 121 122 m_mc_profilers[mem_cntrl]->m_memBankCount.setSize(totalBanks); 123 } 124 125 m_hot_lines = false; 126 m_all_instructions = false; 127 128 m_address_profiler_ptr = new AddressProfiler; 129 m_address_profiler_ptr -> setHotLines(m_hot_lines); 130 m_address_profiler_ptr -> setAllInstructions(m_all_instructions); 131 132 if (m_all_instructions) { 133 m_inst_profiler_ptr = new AddressProfiler; 134 m_inst_profiler_ptr -> setHotLines(m_hot_lines); 135 m_inst_profiler_ptr -> setAllInstructions(m_all_instructions); 136 } 137} 138 139Profiler::~Profiler() 140{ 141 if (m_periodic_output_file_ptr != &cerr) { 142 delete m_periodic_output_file_ptr; 143 } 144 145 for (int mem_cntrl = 0; 146 mem_cntrl < m_mc_profilers.size(); 147 mem_cntrl++) { 148 delete m_mc_profilers[mem_cntrl]; 149 } 150 151 delete m_requestProfileMap_ptr; 152} 153 154void Profiler::wakeup() 155{ 156 // FIXME - avoid the repeated code 157 158 Vector<integer_t> perProcCycleCount; 159 perProcCycleCount.setSize(RubySystem::getNumberOfSequencers()); 160 161 for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { 162 perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; 163 // The +1 allows us to avoid division by zero 164 } 165 166 integer_t total_misses = m_perProcTotalMisses.sum(); 167 integer_t simics_cycles_executed = perProcCycleCount.sum(); 168 integer_t transactions_started = m_perProcStartTransaction.sum(); 169 integer_t transactions_ended = m_perProcEndTransaction.sum(); 170 171 (*m_periodic_output_file_ptr) << "ruby_cycles: " 172 << g_eventQueue_ptr->getTime()-m_ruby_start 173 << endl; 174 175 (*m_periodic_output_file_ptr) << "total_misses: " 176 << total_misses 177 << " " 178 << m_perProcTotalMisses 179 << endl; 180 181 (*m_periodic_output_file_ptr) << "simics_cycles_executed: " 182 << simics_cycles_executed 183 << " " 184 << perProcCycleCount 185 << endl; 186 187 (*m_periodic_output_file_ptr) << "transactions_started: " 188 << transactions_started 189 << " " 190 << m_perProcStartTransaction 191 << endl; 192 193 (*m_periodic_output_file_ptr) << "transactions_ended: " 194 << transactions_ended 195 << " " 196 << m_perProcEndTransaction 197 << endl; 198 199 (*m_periodic_output_file_ptr) << "mbytes_resident: " 200 << process_memory_resident() 201 << endl; 202 203 (*m_periodic_output_file_ptr) << "mbytes_total: " 204 << process_memory_total() 205 << endl; 206 207 if (process_memory_total() > 0) { 208 (*m_periodic_output_file_ptr) << "resident_ratio: " 209 << process_memory_resident()/process_memory_total() 210 << endl; 211 } 212 213 (*m_periodic_output_file_ptr) << "miss_latency: " 214 << m_allMissLatencyHistogram 215 << endl; 216 217 *m_periodic_output_file_ptr << endl; 218 219 if (m_all_instructions) { 220 m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr); 221 } 222 223 //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr); 224 g_eventQueue_ptr->scheduleEvent(this, m_stats_period); 225} 226 227void Profiler::setPeriodicStatsFile(const string& filename) 228{ 229 cout << "Recording periodic statistics to file '" << filename << "' every " 230 << m_stats_period << " Ruby cycles" << endl; 231 232 if (m_periodic_output_file_ptr != &cerr) { 233 delete m_periodic_output_file_ptr; 234 } 235 236 m_periodic_output_file_ptr = new ofstream(filename.c_str()); 237 g_eventQueue_ptr->scheduleEvent(this, 1); 238} 239 240void Profiler::setPeriodicStatsInterval(integer_t period) 241{ 242 cout << "Recording periodic statistics every " << m_stats_period 243 << " Ruby cycles" << endl; 244 245 m_stats_period = period; 246 g_eventQueue_ptr->scheduleEvent(this, 1); 247} 248 249void Profiler::printConfig(ostream& out) const 250{ 251 out << endl; 252 out << "Profiler Configuration" << endl; 253 out << "----------------------" << endl; 254 out << "periodic_stats_period: " << m_stats_period << endl; 255} 256 257void Profiler::print(ostream& out) const 258{ 259 out << "[Profiler]"; 260} 261 262void Profiler::printStats(ostream& out, bool short_stats) 263{ 264 out << endl; 265 if (short_stats) { 266 out << "SHORT "; 267 } 268 out << "Profiler Stats" << endl; 269 out << "--------------" << endl; 270 271 time_t real_time_current = time(NULL); 272 double seconds = difftime(real_time_current, m_real_time_start_time); 273 double minutes = seconds/60.0; 274 double hours = minutes/60.0; 275 double days = hours/24.0; 276 Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start; 277 278 if (!short_stats) { 279 out << "Elapsed_time_in_seconds: " << seconds << endl; 280 out << "Elapsed_time_in_minutes: " << minutes << endl; 281 out << "Elapsed_time_in_hours: " << hours << endl; 282 out << "Elapsed_time_in_days: " << days << endl; 283 out << endl; 284 } 285 286 // print the virtual runtimes as well 287 struct tms vtime; 288 times(&vtime); 289 seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; 290 minutes = seconds / 60.0; 291 hours = minutes / 60.0; 292 days = hours / 24.0; 293 out << "Virtual_time_in_seconds: " << seconds << endl; 294 out << "Virtual_time_in_minutes: " << minutes << endl; 295 out << "Virtual_time_in_hours: " << hours << endl; 296 out << "Virtual_time_in_days: " << days << endl; 297 out << endl; 298 299 out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; 300 out << "Ruby_start_time: " << m_ruby_start << endl; 301 out << "Ruby_cycles: " << ruby_cycles << endl; 302 out << endl; 303 304 if (!short_stats) { 305 out << "mbytes_resident: " << process_memory_resident() << endl; 306 out << "mbytes_total: " << process_memory_total() << endl; 307 if (process_memory_total() > 0) { 308 out << "resident_ratio: " 309 << process_memory_resident()/process_memory_total() << endl; 310 } 311 out << endl; 312 313 } 314 315 Vector<integer_t> perProcCycleCount; 316 Vector<double> perProcCyclesPerTrans; 317 Vector<double> perProcMissesPerTrans; 318 319 320 perProcCycleCount.setSize(RubySystem::getNumberOfSequencers()); 321 perProcCyclesPerTrans.setSize(RubySystem::getNumberOfSequencers()); 322 perProcMissesPerTrans.setSize(RubySystem::getNumberOfSequencers()); 323 324 for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { 325 perProcCycleCount[i] = g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; 326 // The +1 allows us to avoid division by zero 327 328 int trans = m_perProcEndTransaction[i]; 329 if (trans == 0) { 330 perProcCyclesPerTrans[i] = 0; 331 perProcMissesPerTrans[i] = 0; 332 } else { 333 perProcCyclesPerTrans[i] = ruby_cycles / double(trans); 334 perProcMissesPerTrans[i] = m_perProcTotalMisses[i] / double(trans); 335 } 336 } 337 338 integer_t total_misses = m_perProcTotalMisses.sum(); 339 integer_t user_misses = m_perProcUserMisses.sum(); 340 integer_t supervisor_misses = m_perProcSupervisorMisses.sum(); 341 integer_t simics_cycles_executed = perProcCycleCount.sum(); 342 integer_t transactions_started = m_perProcStartTransaction.sum(); 343 integer_t transactions_ended = m_perProcEndTransaction.sum(); 344 345 double cycles_per_transaction = (transactions_ended != 0) ? (RubySystem::getNumberOfSequencers() * double(ruby_cycles)) / double(transactions_ended) : 0; 346 double misses_per_transaction = (transactions_ended != 0) ? double(total_misses) / double(transactions_ended) : 0; 347 348 out << "Total_misses: " << total_misses << endl; 349 out << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl; 350 out << "user_misses: " << user_misses << " " << m_perProcUserMisses << endl; 351 out << "supervisor_misses: " << supervisor_misses << " " << m_perProcSupervisorMisses << endl; 352 out << endl; 353 out << "ruby_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; 354 out << endl; 355 out << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl; 356 out << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl; 357 out << "cycles_per_transaction: " << cycles_per_transaction << " " << perProcCyclesPerTrans << endl; 358 out << "misses_per_transaction: " << misses_per_transaction << " " << perProcMissesPerTrans << endl; 359 360 out << endl; 361 362 out << endl; 363 364 for (int mem_cntrl = 0; 365 mem_cntrl < m_mc_profilers.size(); 366 mem_cntrl++) { 367 uint64 m_memReq = m_mc_profilers[mem_cntrl]->m_memReq; 368 uint64 m_memRefresh = m_mc_profilers[mem_cntrl]->m_memRefresh; 369 uint64 m_memInputQ = m_mc_profilers[mem_cntrl]->m_memInputQ; 370 uint64 m_memBankQ = m_mc_profilers[mem_cntrl]->m_memBankQ; 371 uint64 m_memWaitCycles = m_mc_profilers[mem_cntrl]->m_memWaitCycles; 372 uint64 m_memRead = m_mc_profilers[mem_cntrl]->m_memRead; 373 uint64 m_memWrite = m_mc_profilers[mem_cntrl]->m_memWrite; 374 uint64 m_memBankBusy = m_mc_profilers[mem_cntrl]->m_memBankBusy; 375 uint64 m_memRandBusy = m_mc_profilers[mem_cntrl]->m_memRandBusy; 376 uint64 m_memNotOld = m_mc_profilers[mem_cntrl]->m_memNotOld; 377 uint64 m_memArbWait = m_mc_profilers[mem_cntrl]->m_memArbWait; 378 uint64 m_memBusBusy = m_mc_profilers[mem_cntrl]->m_memBusBusy; 379 uint64 m_memTfawBusy = m_mc_profilers[mem_cntrl]->m_memTfawBusy; 380 uint64 m_memReadWriteBusy = m_mc_profilers[mem_cntrl]->m_memReadWriteBusy; 381 uint64 m_memDataBusBusy = m_mc_profilers[mem_cntrl]->m_memDataBusBusy; 382 Vector<uint64> m_memBankCount = m_mc_profilers[mem_cntrl]->m_memBankCount; 383 384 if (m_memReq || m_memRefresh) { // if there's a memory controller at all 385 uint64 total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; 386 double stallsPerReq = total_stalls * 1.0 / m_memReq; 387 out << "Memory control " << mem_cntrl << ":" << endl; 388 out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes 389 out << " memory_reads: " << m_memRead << endl; 390 out << " memory_writes: " << m_memWrite << endl; 391 out << " memory_refreshes: " << m_memRefresh << endl; 392 out << " memory_total_request_delays: " << total_stalls << endl; 393 out << " memory_delays_per_request: " << stallsPerReq << endl; 394 out << " memory_delays_in_input_queue: " << m_memInputQ << endl; 395 out << " memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl; 396 out << " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl; 397 // Note: The following "memory stalls" entries are a breakdown of the 398 // cycles which already showed up in m_memWaitCycles. The order is 399 // significant; it is the priority of attributing the cycles. 400 // For example, bank_busy is before arbitration because if the bank was 401 // busy, we didn't even check arbitration. 402 // Note: "not old enough" means that since we grouped waiting heads-of-queues 403 // into batches to avoid starvation, a request in a newer batch 404 // didn't try to arbitrate yet because there are older requests waiting. 405 out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl; 406 out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl; 407 out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl; 408 out << " memory_stalls_for_arbitration: " << m_memArbWait << endl; 409 out << " memory_stalls_for_bus: " << m_memBusBusy << endl; 410 out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl; 411 out << " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl; 412 out << " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl; 413 out << " accesses_per_bank: "; 414 for (int bank=0; bank < m_memBankCount.size(); bank++) { 415 out << m_memBankCount[bank] << " "; 416 //if ((bank % 8) == 7) out << " " << endl; 417 } 418 out << endl; 419 out << endl; 420 } 421 } 422 if (!short_stats) { 423 out << "Busy Controller Counts:" << endl; 424 for(int i=0; i < MachineType_NUM; i++) { 425 for(int j=0; j < MachineType_base_count((MachineType)i); j++) { 426 MachineID machID; 427 machID.type = (MachineType)i; 428 machID.num = j; 429 out << machID << ":" << m_busyControllerCount[i][j] << " "; 430 if ((j+1)%8 == 0) { 431 out << endl; 432 } 433 } 434 out << endl; 435 } 436 out << endl; 437 438 out << "Busy Bank Count:" << m_busyBankCount << endl; 439 out << endl; 440 441 out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl; 442 out << endl; 443 } 444 445 if (!short_stats) { 446 out << "All Non-Zero Cycle Demand Cache Accesses" << endl; 447 out << "----------------------------------------" << endl; 448 out << "miss_latency: " << m_allMissLatencyHistogram << endl; 449 for(int i=0; i<m_missLatencyHistograms.size(); i++) { 450 if (m_missLatencyHistograms[i].size() > 0) { 451 out << "miss_latency_" << RubyRequestType(i) << ": " << m_missLatencyHistograms[i] << endl; 452 } 453 } 454 for(int i=0; i<m_machLatencyHistograms.size(); i++) { 455 if (m_machLatencyHistograms[i].size() > 0) { 456 out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl; 457 } 458 } 459 460 out << endl; 461 462 out << "All Non-Zero Cycle SW Prefetch Requests" << endl; 463 out << "------------------------------------" << endl; 464 out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl; 465 for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) { 466 if (m_SWPrefetchLatencyHistograms[i].size() > 0) { 467 out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl; 468 } 469 } 470 for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) { 471 if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) { 472 out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl; 473 } 474 } 475 out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl; 476 477 if (m_all_sharing_histogram.size() > 0) { 478 out << "all_sharing: " << m_all_sharing_histogram << endl; 479 out << "read_sharing: " << m_read_sharing_histogram << endl; 480 out << "write_sharing: " << m_write_sharing_histogram << endl; 481 482 out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl; 483 out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl; 484 out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl; 485 486 int64 total_miss = m_cache_to_cache + m_memory_to_cache; 487 out << "all_misses: " << total_miss << endl; 488 out << "cache_to_cache_misses: " << m_cache_to_cache << endl; 489 out << "memory_to_cache_misses: " << m_memory_to_cache << endl; 490 out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl; 491 out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl; 492 out << endl; 493 } 494 495 if (m_outstanding_requests.size() > 0) { 496 out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl; 497 out << endl; 498 } 499 } 500 501 if (!short_stats) { 502 out << "Request vs. RubySystem State Profile" << endl; 503 out << "--------------------------------" << endl; 504 out << endl; 505 506 Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys(); 507 requestProfileKeys.sortVector(); 508 509 for(int i=0; i<requestProfileKeys.size(); i++) { 510 int temp_int = m_requestProfileMap_ptr->lookup(requestProfileKeys[i]); 511 double percent = (100.0*double(temp_int))/double(m_requests); 512 while (requestProfileKeys[i] != "") { 513 out << setw(10) << string_split(requestProfileKeys[i], ':'); 514 } 515 out << setw(11) << temp_int; 516 out << setw(14) << percent << endl; 517 } 518 out << endl; 519 520 out << "filter_action: " << m_filter_action_histogram << endl; 521 522 if (!m_all_instructions) { 523 m_address_profiler_ptr->printStats(out); 524 } 525 526 if (m_all_instructions) { 527 m_inst_profiler_ptr->printStats(out); 528 } 529 530 out << endl; 531 out << "Message Delayed Cycles" << endl; 532 out << "----------------------" << endl; 533 out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; 534 out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl; 535 for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { 536 out << " virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl; 537 } 538 539 printResourceUsage(out); 540 } 541 542} 543 544void Profiler::printResourceUsage(ostream& out) const 545{ 546 out << endl; 547 out << "Resource Usage" << endl; 548 out << "--------------" << endl; 549 550 integer_t pagesize = getpagesize(); // page size in bytes 551 out << "page_size: " << pagesize << endl; 552 553 rusage usage; 554 getrusage (RUSAGE_SELF, &usage); 555 556 out << "user_time: " << usage.ru_utime.tv_sec << endl; 557 out << "system_time: " << usage.ru_stime.tv_sec << endl; 558 out << "page_reclaims: " << usage.ru_minflt << endl; 559 out << "page_faults: " << usage.ru_majflt << endl; 560 out << "swaps: " << usage.ru_nswap << endl; 561 out << "block_inputs: " << usage.ru_inblock << endl; 562 out << "block_outputs: " << usage.ru_oublock << endl; 563} 564 565void Profiler::clearStats() 566{ 567 m_ruby_start = g_eventQueue_ptr->getTime(); 568 569 m_cycles_executed_at_start.setSize(RubySystem::getNumberOfSequencers()); 570 for (int i=0; i < RubySystem::getNumberOfSequencers(); i++) { 571 if (g_system_ptr == NULL) { 572 m_cycles_executed_at_start[i] = 0; 573 } else { 574 m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i); 575 } 576 } 577 578 m_perProcTotalMisses.setSize(RubySystem::getNumberOfSequencers()); 579 m_perProcUserMisses.setSize(RubySystem::getNumberOfSequencers()); 580 m_perProcSupervisorMisses.setSize(RubySystem::getNumberOfSequencers()); 581 m_perProcStartTransaction.setSize(RubySystem::getNumberOfSequencers()); 582 m_perProcEndTransaction.setSize(RubySystem::getNumberOfSequencers()); 583 584 for(int i=0; i < RubySystem::getNumberOfSequencers(); i++) { 585 m_perProcTotalMisses[i] = 0; 586 m_perProcUserMisses[i] = 0; 587 m_perProcSupervisorMisses[i] = 0; 588 m_perProcStartTransaction[i] = 0; 589 m_perProcEndTransaction[i] = 0; 590 } 591 592 m_busyControllerCount.setSize(MachineType_NUM); // all machines 593 for(int i=0; i < MachineType_NUM; i++) { 594 m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i)); 595 for(int j=0; j < MachineType_base_count((MachineType)i); j++) { 596 m_busyControllerCount[i][j] = 0; 597 } 598 } 599 m_busyBankCount = 0; 600 601 m_delayedCyclesHistogram.clear(); 602 m_delayedCyclesNonPFHistogram.clear(); 603 m_delayedCyclesVCHistograms.setSize(RubySystem::getNetwork()->getNumberOfVirtualNetworks()); 604 for (int i = 0; i < RubySystem::getNetwork()->getNumberOfVirtualNetworks(); i++) { 605 m_delayedCyclesVCHistograms[i].clear(); 606 } 607 608 m_missLatencyHistograms.setSize(RubyRequestType_NUM); 609 for(int i=0; i<m_missLatencyHistograms.size(); i++) { 610 m_missLatencyHistograms[i].clear(200); 611 } 612 m_machLatencyHistograms.setSize(GenericMachineType_NUM+1); 613 for(int i=0; i<m_machLatencyHistograms.size(); i++) { 614 m_machLatencyHistograms[i].clear(200); 615 } 616 m_allMissLatencyHistogram.clear(200); 617 618 m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM); 619 for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) { 620 m_SWPrefetchLatencyHistograms[i].clear(200); 621 } 622 m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1); 623 for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) { 624 m_SWPrefetchMachLatencyHistograms[i].clear(200); 625 } 626 m_allSWPrefetchLatencyHistogram.clear(200); 627 628 m_sequencer_requests.clear(); 629 m_read_sharing_histogram.clear(); 630 m_write_sharing_histogram.clear(); 631 m_all_sharing_histogram.clear(); 632 m_cache_to_cache = 0; 633 m_memory_to_cache = 0; 634 635 // clear HashMaps 636 m_requestProfileMap_ptr->clear(); 637 638 // count requests profiled 639 m_requests = 0; 640 641 m_outstanding_requests.clear(); 642 m_outstanding_persistent_requests.clear(); 643 644//added by SS 645 vector<string>::iterator it; 646 647 for (int mem_cntrl = 0; 648 mem_cntrl < m_mc_profilers.size(); 649 mem_cntrl++) { 650 m_mc_profilers[mem_cntrl]->m_memReq = 0; 651 m_mc_profilers[mem_cntrl]->m_memBankBusy = 0; 652 m_mc_profilers[mem_cntrl]->m_memBusBusy = 0; 653 m_mc_profilers[mem_cntrl]->m_memTfawBusy = 0; 654 m_mc_profilers[mem_cntrl]->m_memReadWriteBusy = 0; 655 m_mc_profilers[mem_cntrl]->m_memDataBusBusy = 0; 656 m_mc_profilers[mem_cntrl]->m_memRefresh = 0; 657 m_mc_profilers[mem_cntrl]->m_memRead = 0; 658 m_mc_profilers[mem_cntrl]->m_memWrite = 0; 659 m_mc_profilers[mem_cntrl]->m_memWaitCycles = 0; 660 m_mc_profilers[mem_cntrl]->m_memInputQ = 0; 661 m_mc_profilers[mem_cntrl]->m_memBankQ = 0; 662 m_mc_profilers[mem_cntrl]->m_memArbWait = 0; 663 m_mc_profilers[mem_cntrl]->m_memRandBusy = 0; 664 m_mc_profilers[mem_cntrl]->m_memNotOld = 0; 665 666 for (int bank=0; 667 bank < m_mc_profilers[mem_cntrl]->m_memBankCount.size(); 668 bank++) { 669 m_mc_profilers[mem_cntrl]->m_memBankCount[bank] = 0; 670 } 671 } 672 // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared 673 //g_eventQueue_ptr->triggerAllEvents(); 674 675 // update the start time 676 m_ruby_start = g_eventQueue_ptr->getTime(); 677} 678 679void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) 680{ 681 if (msg.getType() != CacheRequestType_IFETCH) { 682 683 // Note: The following line should be commented out if you want to 684 // use the special profiling that is part of the GS320 protocol 685 686 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be profiled by the AddressProfiler 687 m_address_profiler_ptr->addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false); 688 } 689} 690 691void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner) 692{ 693 Set set_contacted(owner); 694 if (type == AccessType_Write) { 695 set_contacted.addSet(sharers); 696 } 697 set_contacted.remove(requestor); 698 int number_contacted = set_contacted.count(); 699 700 if (type == AccessType_Write) { 701 m_write_sharing_histogram.add(number_contacted); 702 } else { 703 m_read_sharing_histogram.add(number_contacted); 704 } 705 m_all_sharing_histogram.add(number_contacted); 706 707 if (number_contacted == 0) { 708 m_memory_to_cache++; 709 } else { 710 m_cache_to_cache++; 711 } 712 713} 714 715void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) { 716 assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); 717 m_delayedCyclesHistogram.add(delayCycles); 718 m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); 719 if (virtualNetwork != 0) { 720 m_delayedCyclesNonPFHistogram.add(delayCycles); 721 } 722} 723 724// profiles original cache requests including PUTs 725void Profiler::profileRequest(const string& requestStr) 726{ 727 m_requests++; 728 729 if (m_requestProfileMap_ptr->exist(requestStr)) { 730 (m_requestProfileMap_ptr->lookup(requestStr))++; 731 } else { 732 m_requestProfileMap_ptr->add(requestStr, 1); 733 } 734} 735 736void Profiler::startTransaction(int cpu) 737{ 738 m_perProcStartTransaction[cpu]++; 739} 740 741void Profiler::endTransaction(int cpu) 742{ 743 m_perProcEndTransaction[cpu]++; 744} 745 746void Profiler::controllerBusy(MachineID machID) 747{ 748 m_busyControllerCount[(int)machID.type][(int)machID.num]++; 749} 750 751void Profiler::profilePFWait(Time waitTime) 752{ 753 m_prefetchWaitHistogram.add(waitTime); 754} 755 756void Profiler::bankBusy() 757{ 758 m_busyBankCount++; 759} 760 761// non-zero cycle demand request 762void Profiler::missLatency(Time t, RubyRequestType type) 763{ 764 m_allMissLatencyHistogram.add(t); 765 m_missLatencyHistograms[type].add(t); 766} 767 768// non-zero cycle prefetch request 769void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach) 770{ 771 m_allSWPrefetchLatencyHistogram.add(t); 772 m_SWPrefetchLatencyHistograms[type].add(t); 773 m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); 774 if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { 775 m_SWPrefetchL2MissLatencyHistogram.add(t); 776 } 777} 778 779void Profiler::profileTransition(const string& component, NodeID version, Address addr, 780 const string& state, const string& event, 781 const string& next_state, const string& note) 782{ 783 const int EVENT_SPACES = 20; 784 const int ID_SPACES = 3; 785 const int TIME_SPACES = 7; 786 const int COMP_SPACES = 10; 787 const int STATE_SPACES = 6; 788 789 if ((g_debug_ptr->getDebugTime() > 0) && 790 (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) { 791 (* debug_cout_ptr).flags(ios::right); 792 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 793 (* debug_cout_ptr) << setw(ID_SPACES) << version << " "; 794 (* debug_cout_ptr) << setw(COMP_SPACES) << component; 795 (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " "; 796 797 (* debug_cout_ptr).flags(ios::right); 798 (* debug_cout_ptr) << setw(STATE_SPACES) << state; 799 (* debug_cout_ptr) << ">"; 800 (* debug_cout_ptr).flags(ios::left); 801 (* debug_cout_ptr) << setw(STATE_SPACES) << next_state; 802 803 (* debug_cout_ptr) << " " << addr << " " << note; 804 805 (* debug_cout_ptr) << endl; 806 } 807} 808 809// Helper function 810static double process_memory_total() 811{ 812 const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, 813 ifstream proc_file; 814 proc_file.open("/proc/self/statm"); 815 int total_size_in_pages = 0; 816 int res_size_in_pages = 0; 817 proc_file >> total_size_in_pages; 818 proc_file >> res_size_in_pages; 819 return double(total_size_in_pages)*MULTIPLIER; // size in megabytes 820} 821 822static double process_memory_resident() 823{ 824 const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, 825 ifstream proc_file; 826 proc_file.open("/proc/self/statm"); 827 int total_size_in_pages = 0; 828 int res_size_in_pages = 0; 829 proc_file >> total_size_in_pages; 830 proc_file >> res_size_in_pages; 831 return double(res_size_in_pages)*MULTIPLIER; // size in megabytes 832} 833 834void Profiler::rubyWatch(int id){ 835 //int rn_g1 = 0;//SIMICS_get_register_number(id, "g1"); 836 uint64 tr = 0;//SIMICS_read_register(id, rn_g1); 837 Address watch_address = Address(tr); 838 const int ID_SPACES = 3; 839 const int TIME_SPACES = 7; 840 841 (* debug_cout_ptr).flags(ios::right); 842 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 843 (* debug_cout_ptr) << setw(ID_SPACES) << id << " " 844 << "RUBY WATCH " 845 << watch_address 846 << endl; 847 848 if(!m_watch_address_list_ptr->exist(watch_address)){ 849 m_watch_address_list_ptr->add(watch_address, 1); 850 } 851} 852 853bool Profiler::watchAddress(Address addr){ 854 if (m_watch_address_list_ptr->exist(addr)) 855 return true; 856 else 857 return false; 858} 859 860int64 Profiler::getTotalTransactionsExecuted() const { 861 return m_perProcEndTransaction.sum(); 862} 863 864// For MemoryControl: 865void Profiler::profileMemReq(int mem_cntrl, int bank) { 866 m_mc_profilers[mem_cntrl]->m_memReq++; 867 m_mc_profilers[mem_cntrl]->m_memBankCount[bank]++; 868} 869 870void Profiler::profileMemBankBusy(int mem_cntrl) { 871 m_mc_profilers[mem_cntrl]->m_memBankBusy++; 872} 873 874void Profiler::profileMemBusBusy(int mem_cntrl) { 875 m_mc_profilers[mem_cntrl]->m_memBusBusy++; 876} 877 878void Profiler::profileMemReadWriteBusy(int mem_cntrl) { 879 m_mc_profilers[mem_cntrl]->m_memReadWriteBusy++; 880} 881 882void Profiler::profileMemDataBusBusy(int mem_cntrl) { 883 m_mc_profilers[mem_cntrl]->m_memDataBusBusy++; 884} 885 886void Profiler::profileMemTfawBusy(int mem_cntrl) { 887 m_mc_profilers[mem_cntrl]->m_memTfawBusy++; 888} 889 890void Profiler::profileMemRefresh(int mem_cntrl) { 891 m_mc_profilers[mem_cntrl]->m_memRefresh++; 892} 893 894void Profiler::profileMemRead(int mem_cntrl) { 895 m_mc_profilers[mem_cntrl]->m_memRead++; 896} 897 898void Profiler::profileMemWrite(int mem_cntrl) { 899 m_mc_profilers[mem_cntrl]->m_memWrite++; 900} 901 902void Profiler::profileMemWaitCycles(int mem_cntrl, int cycles) { 903 m_mc_profilers[mem_cntrl]->m_memWaitCycles += cycles; 904} 905 906void Profiler::profileMemInputQ(int mem_cntrl, int cycles) { 907 m_mc_profilers[mem_cntrl]->m_memInputQ += cycles; 908} 909 910void Profiler::profileMemBankQ(int mem_cntrl, int cycles) { 911 m_mc_profilers[mem_cntrl]->m_memBankQ += cycles; 912} 913 914void Profiler::profileMemArbWait(int mem_cntrl, int cycles) { 915 m_mc_profilers[mem_cntrl]->m_memArbWait += cycles; 916} 917 918void Profiler::profileMemRandBusy(int mem_cntrl) { 919 m_mc_profilers[mem_cntrl]->m_memRandBusy++; 920} 921 922void Profiler::profileMemNotOld(int mem_cntrl) { 923 m_mc_profilers[mem_cntrl]->m_memNotOld++; 924} 925 926 927Profiler * 928RubyProfilerParams::create() 929{ 930 return new Profiler(this); 931} 932