Profiler.cc revision 6148:71a683318799
1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45/* 46 * Profiler.C 47 * 48 * Description: See Profiler.h 49 * 50 * $Id$ 51 * 52 */ 53 54#include "Profiler.hh" 55#include "CacheProfiler.hh" 56#include "AddressProfiler.hh" 57#include "System.hh" 58#include "Network.hh" 59#include "PrioHeap.hh" 60#include "CacheMsg.hh" 61#include "Driver.hh" 62#include "Protocol.hh" 63#include "util.hh" 64#include "Map.hh" 65#include "Debug.hh" 66#include "MachineType.hh" 67// #include "TransactionInterfaceManager.hh" 68#include "interface.hh" 69//#include "XactVisualizer.hh" //gem5:Arka for decomissioning log_tm 70//#include "XactProfiler.hh" //gem5:Arka for decomissioning log_tm 71 72// extern "C" { 73// #include "Rock.hh" 74// } 75 76// Allows use of times() library call, which determines virtual runtime 77#include <sys/times.h> 78 79extern std::ostream * debug_cout_ptr; 80extern std::ostream * xact_cout_ptr; 81 82static double process_memory_total(); 83static double process_memory_resident(); 84 85Profiler::Profiler() 86 : m_conflicting_histogram(-1) 87{ 88 m_requestProfileMap_ptr = new Map<string, int>; 89 m_L1D_cache_profiler_ptr = new CacheProfiler("L1D_cache"); 90 m_L1I_cache_profiler_ptr = new CacheProfiler("L1I_cache"); 91 92 m_L2_cache_profiler_ptr = new CacheProfiler("L2_cache"); 93 94 m_address_profiler_ptr = new AddressProfiler; 95 m_inst_profiler_ptr = NULL; 96 if (PROFILE_ALL_INSTRUCTIONS) { 97 m_inst_profiler_ptr = new AddressProfiler; 98 } 99 100 //m_xact_profiler_ptr = new XactProfiler; //gem5:Arka for decomissioning og log_tm 101 102 m_conflicting_map_ptr = new Map<Address, Time>; 103 104 m_real_time_start_time = time(NULL); // Not reset in clearStats() 105 m_stats_period = 1000000; // Default 106 m_periodic_output_file_ptr = &cerr; 107 m_xact_visualizer_ptr = &cout; 108 109 //---- begin XACT_MEM code 110 m_xactExceptionMap_ptr = new Map<int, int>; 111 m_procsInXactMap_ptr = new Map<int, int>; 112 m_abortIDMap_ptr = new Map<int, int>; 113 m_commitIDMap_ptr = new Map<int, int>; 114 m_xactRetryIDMap_ptr = new Map<int, int>; 115 m_xactCyclesIDMap_ptr = new Map<int, int>; 116 m_xactReadSetIDMap_ptr = new Map<int, int>; 117 m_xactWriteSetIDMap_ptr = new Map<int, int>; 118 m_xactLoadMissIDMap_ptr = new Map<int, int>; 119 m_xactStoreMissIDMap_ptr = new Map<int, int>; 120 m_xactInstrCountIDMap_ptr = new Map<int, integer_t>; 121 m_abortPCMap_ptr = new Map<Address, int>; 122 m_abortAddressMap_ptr = new Map<Address, int>; 123 m_nackXIDMap_ptr = new Map<int, int>; 124 m_nackXIDPairMap_ptr = new Map<int, Map<int, int> * >; 125 m_nackPCMap_ptr = new Map<Address, int>; 126 m_watch_address_list_ptr = new Map<Address, int>; 127 m_readSetMatch_ptr = new Map<Address, int>; 128 m_readSetNoMatch_ptr = new Map<Address, int>; 129 m_writeSetMatch_ptr = new Map<Address, int>; 130 m_writeSetNoMatch_ptr = new Map<Address, int>; 131 m_xactReadFilterBitsSetOnCommit = new Map<int, Histogram>; 132 m_xactReadFilterBitsSetOnAbort = new Map<int, Histogram>; 133 m_xactWriteFilterBitsSetOnCommit = new Map<int, Histogram>; 134 m_xactWriteFilterBitsSetOnAbort = new Map<int, Histogram>; 135 //---- end XACT_MEM code 136 137 // for MemoryControl: 138 m_memReq = 0; 139 m_memBankBusy = 0; 140 m_memBusBusy = 0; 141 m_memReadWriteBusy = 0; 142 m_memDataBusBusy = 0; 143 m_memTfawBusy = 0; 144 m_memRefresh = 0; 145 m_memRead = 0; 146 m_memWrite = 0; 147 m_memWaitCycles = 0; 148 m_memInputQ = 0; 149 m_memBankQ = 0; 150 m_memArbWait = 0; 151 m_memRandBusy = 0; 152 m_memNotOld = 0; 153 154 155 int totalBanks = RubyConfig::banksPerRank() 156 * RubyConfig::ranksPerDimm() 157 * RubyConfig::dimmsPerChannel(); 158 m_memBankCount.setSize(totalBanks); 159 160 clearStats(); 161} 162 163Profiler::~Profiler() 164{ 165 if (m_periodic_output_file_ptr != &cerr) { 166 delete m_periodic_output_file_ptr; 167 } 168 delete m_address_profiler_ptr; 169 delete m_L1D_cache_profiler_ptr; 170 delete m_L1I_cache_profiler_ptr; 171 delete m_L2_cache_profiler_ptr; 172 //delete m_xact_profiler_ptr; //gem5:Arka for decomissioning of log_tm 173 delete m_requestProfileMap_ptr; 174 delete m_conflicting_map_ptr; 175} 176 177void Profiler::wakeup() 178{ 179 // FIXME - avoid the repeated code 180 181 Vector<integer_t> perProcInstructionCount; 182 perProcInstructionCount.setSize(RubyConfig::numberOfProcessors()); 183 184 Vector<integer_t> perProcCycleCount; 185 perProcCycleCount.setSize(RubyConfig::numberOfProcessors()); 186 187 for(int i=0; i < RubyConfig::numberOfProcessors(); i++) { 188 perProcInstructionCount[i] = g_system_ptr->getDriver()->getInstructionCount(i) - m_instructions_executed_at_start[i] + 1; 189 perProcCycleCount[i] = g_system_ptr->getDriver()->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; 190 // The +1 allows us to avoid division by zero 191 } 192 193 integer_t total_misses = m_perProcTotalMisses.sum(); 194 integer_t instruction_executed = perProcInstructionCount.sum(); 195 integer_t simics_cycles_executed = perProcCycleCount.sum(); 196 integer_t transactions_started = m_perProcStartTransaction.sum(); 197 integer_t transactions_ended = m_perProcEndTransaction.sum(); 198 199 (*m_periodic_output_file_ptr) << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl; 200 (*m_periodic_output_file_ptr) << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl; 201 (*m_periodic_output_file_ptr) << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl; 202 (*m_periodic_output_file_ptr) << "simics_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; 203 (*m_periodic_output_file_ptr) << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl; 204 (*m_periodic_output_file_ptr) << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl; 205 (*m_periodic_output_file_ptr) << "L1TBE_usage: " << m_L1tbeProfile << endl; 206 (*m_periodic_output_file_ptr) << "L2TBE_usage: " << m_L2tbeProfile << endl; 207 (*m_periodic_output_file_ptr) << "mbytes_resident: " << process_memory_resident() << endl; 208 (*m_periodic_output_file_ptr) << "mbytes_total: " << process_memory_total() << endl; 209 if (process_memory_total() > 0) { 210 (*m_periodic_output_file_ptr) << "resident_ratio: " << process_memory_resident()/process_memory_total() << endl; 211 } 212 (*m_periodic_output_file_ptr) << "miss_latency: " << m_allMissLatencyHistogram << endl; 213 214 *m_periodic_output_file_ptr << endl; 215 216 if (PROFILE_ALL_INSTRUCTIONS) { 217 m_inst_profiler_ptr->printStats(*m_periodic_output_file_ptr); 218 } 219 220 //g_system_ptr->getNetwork()->printStats(*m_periodic_output_file_ptr); 221 g_eventQueue_ptr->scheduleEvent(this, m_stats_period); 222} 223 224void Profiler::setPeriodicStatsFile(const string& filename) 225{ 226 cout << "Recording periodic statistics to file '" << filename << "' every " 227 << m_stats_period << " Ruby cycles" << endl; 228 229 if (m_periodic_output_file_ptr != &cerr) { 230 delete m_periodic_output_file_ptr; 231 } 232 233 m_periodic_output_file_ptr = new ofstream(filename.c_str()); 234 g_eventQueue_ptr->scheduleEvent(this, 1); 235} 236 237void Profiler::setPeriodicStatsInterval(integer_t period) 238{ 239 cout << "Recording periodic statistics every " << m_stats_period << " Ruby cycles" << endl; 240 m_stats_period = period; 241 g_eventQueue_ptr->scheduleEvent(this, 1); 242} 243 244void Profiler::printConfig(ostream& out) const 245{ 246 out << endl; 247 out << "Profiler Configuration" << endl; 248 out << "----------------------" << endl; 249 out << "periodic_stats_period: " << m_stats_period << endl; 250} 251 252void Profiler::print(ostream& out) const 253{ 254 out << "[Profiler]"; 255} 256 257void Profiler::printStats(ostream& out, bool short_stats) 258{ 259 out << endl; 260 if (short_stats) { 261 out << "SHORT "; 262 } 263 out << "Profiler Stats" << endl; 264 out << "--------------" << endl; 265 266 time_t real_time_current = time(NULL); 267 double seconds = difftime(real_time_current, m_real_time_start_time); 268 double minutes = seconds/60.0; 269 double hours = minutes/60.0; 270 double days = hours/24.0; 271 Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start; 272 273 if (!short_stats) { 274 out << "Elapsed_time_in_seconds: " << seconds << endl; 275 out << "Elapsed_time_in_minutes: " << minutes << endl; 276 out << "Elapsed_time_in_hours: " << hours << endl; 277 out << "Elapsed_time_in_days: " << days << endl; 278 out << endl; 279 } 280 281 // print the virtual runtimes as well 282 struct tms vtime; 283 times(&vtime); 284 seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0; 285 minutes = seconds / 60.0; 286 hours = minutes / 60.0; 287 days = hours / 24.0; 288 out << "Virtual_time_in_seconds: " << seconds << endl; 289 out << "Virtual_time_in_minutes: " << minutes << endl; 290 out << "Virtual_time_in_hours: " << hours << endl; 291 out << "Virtual_time_in_days: " << hours << endl; 292 out << endl; 293 294 out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl; 295 out << "Ruby_start_time: " << m_ruby_start << endl; 296 out << "Ruby_cycles: " << ruby_cycles << endl; 297 out << endl; 298 299 if (!short_stats) { 300 out << "mbytes_resident: " << process_memory_resident() << endl; 301 out << "mbytes_total: " << process_memory_total() << endl; 302 if (process_memory_total() > 0) { 303 out << "resident_ratio: " << process_memory_resident()/process_memory_total() << endl; 304 } 305 out << endl; 306 307 if(m_num_BA_broadcasts + m_num_BA_unicasts != 0){ 308 out << endl; 309 out << "Broadcast_percent: " << (float)m_num_BA_broadcasts/(m_num_BA_broadcasts+m_num_BA_unicasts) << endl; 310 } 311 } 312 313 Vector<integer_t> perProcInstructionCount; 314 Vector<integer_t> perProcCycleCount; 315 Vector<double> perProcCPI; 316 Vector<double> perProcMissesPerInsn; 317 Vector<double> perProcInsnPerTrans; 318 Vector<double> perProcCyclesPerTrans; 319 Vector<double> perProcMissesPerTrans; 320 321 perProcInstructionCount.setSize(RubyConfig::numberOfProcessors()); 322 perProcCycleCount.setSize(RubyConfig::numberOfProcessors()); 323 perProcCPI.setSize(RubyConfig::numberOfProcessors()); 324 perProcMissesPerInsn.setSize(RubyConfig::numberOfProcessors()); 325 326 perProcInsnPerTrans.setSize(RubyConfig::numberOfProcessors()); 327 perProcCyclesPerTrans.setSize(RubyConfig::numberOfProcessors()); 328 perProcMissesPerTrans.setSize(RubyConfig::numberOfProcessors()); 329 330 for(int i=0; i < RubyConfig::numberOfProcessors(); i++) { 331 perProcInstructionCount[i] = g_system_ptr->getDriver()->getInstructionCount(i) - m_instructions_executed_at_start[i] + 1; 332 perProcCycleCount[i] = g_system_ptr->getDriver()->getCycleCount(i) - m_cycles_executed_at_start[i] + 1; 333 // The +1 allows us to avoid division by zero 334 perProcCPI[i] = double(ruby_cycles)/perProcInstructionCount[i]; 335 perProcMissesPerInsn[i] = 1000.0 * (double(m_perProcTotalMisses[i]) / double(perProcInstructionCount[i])); 336 337 int trans = m_perProcEndTransaction[i]; 338 if (trans == 0) { 339 perProcInsnPerTrans[i] = 0; 340 perProcCyclesPerTrans[i] = 0; 341 perProcMissesPerTrans[i] = 0; 342 } else { 343 perProcInsnPerTrans[i] = perProcInstructionCount[i] / double(trans); 344 perProcCyclesPerTrans[i] = ruby_cycles / double(trans); 345 perProcMissesPerTrans[i] = m_perProcTotalMisses[i] / double(trans); 346 } 347 } 348 349 integer_t total_misses = m_perProcTotalMisses.sum(); 350 integer_t user_misses = m_perProcUserMisses.sum(); 351 integer_t supervisor_misses = m_perProcSupervisorMisses.sum(); 352 integer_t instruction_executed = perProcInstructionCount.sum(); 353 integer_t simics_cycles_executed = perProcCycleCount.sum(); 354 integer_t transactions_started = m_perProcStartTransaction.sum(); 355 integer_t transactions_ended = m_perProcEndTransaction.sum(); 356 357 double instructions_per_transaction = (transactions_ended != 0) ? double(instruction_executed) / double(transactions_ended) : 0; 358 double cycles_per_transaction = (transactions_ended != 0) ? (RubyConfig::numberOfProcessors() * double(ruby_cycles)) / double(transactions_ended) : 0; 359 double misses_per_transaction = (transactions_ended != 0) ? double(total_misses) / double(transactions_ended) : 0; 360 361 out << "Total_misses: " << total_misses << endl; 362 out << "total_misses: " << total_misses << " " << m_perProcTotalMisses << endl; 363 out << "user_misses: " << user_misses << " " << m_perProcUserMisses << endl; 364 out << "supervisor_misses: " << supervisor_misses << " " << m_perProcSupervisorMisses << endl; 365 out << endl; 366 out << "instruction_executed: " << instruction_executed << " " << perProcInstructionCount << endl; 367 out << "simics_cycles_executed: " << simics_cycles_executed << " " << perProcCycleCount << endl; 368 out << "cycles_per_instruction: " << (RubyConfig::numberOfProcessors()*double(ruby_cycles))/double(instruction_executed) << " " << perProcCPI << endl; 369 out << "misses_per_thousand_instructions: " << 1000.0 * (double(total_misses) / double(instruction_executed)) << " " << perProcMissesPerInsn << endl; 370 out << endl; 371 out << "transactions_started: " << transactions_started << " " << m_perProcStartTransaction << endl; 372 out << "transactions_ended: " << transactions_ended << " " << m_perProcEndTransaction << endl; 373 out << "instructions_per_transaction: " << instructions_per_transaction << " " << perProcInsnPerTrans << endl; 374 out << "cycles_per_transaction: " << cycles_per_transaction << " " << perProcCyclesPerTrans << endl; 375 out << "misses_per_transaction: " << misses_per_transaction << " " << perProcMissesPerTrans << endl; 376 377 out << endl; 378 379 m_L1D_cache_profiler_ptr->printStats(out); 380 m_L1I_cache_profiler_ptr->printStats(out); 381 m_L2_cache_profiler_ptr->printStats(out); 382 383 out << endl; 384 385 if (m_memReq || m_memRefresh) { // if there's a memory controller at all 386 long long int total_stalls = m_memInputQ + m_memBankQ + m_memWaitCycles; 387 double stallsPerReq = total_stalls * 1.0 / m_memReq; 388 out << "Memory control:" << endl; 389 out << " memory_total_requests: " << m_memReq << endl; // does not include refreshes 390 out << " memory_reads: " << m_memRead << endl; 391 out << " memory_writes: " << m_memWrite << endl; 392 out << " memory_refreshes: " << m_memRefresh << endl; 393 out << " memory_total_request_delays: " << total_stalls << endl; 394 out << " memory_delays_per_request: " << stallsPerReq << endl; 395 out << " memory_delays_in_input_queue: " << m_memInputQ << endl; 396 out << " memory_delays_behind_head_of_bank_queue: " << m_memBankQ << endl; 397 out << " memory_delays_stalled_at_head_of_bank_queue: " << m_memWaitCycles << endl; 398 // Note: The following "memory stalls" entries are a breakdown of the 399 // cycles which already showed up in m_memWaitCycles. The order is 400 // significant; it is the priority of attributing the cycles. 401 // For example, bank_busy is before arbitration because if the bank was 402 // busy, we didn't even check arbitration. 403 // Note: "not old enough" means that since we grouped waiting heads-of-queues 404 // into batches to avoid starvation, a request in a newer batch 405 // didn't try to arbitrate yet because there are older requests waiting. 406 out << " memory_stalls_for_bank_busy: " << m_memBankBusy << endl; 407 out << " memory_stalls_for_random_busy: " << m_memRandBusy << endl; 408 out << " memory_stalls_for_anti_starvation: " << m_memNotOld << endl; 409 out << " memory_stalls_for_arbitration: " << m_memArbWait << endl; 410 out << " memory_stalls_for_bus: " << m_memBusBusy << endl; 411 out << " memory_stalls_for_tfaw: " << m_memTfawBusy << endl; 412 out << " memory_stalls_for_read_write_turnaround: " << m_memReadWriteBusy << endl; 413 out << " memory_stalls_for_read_read_turnaround: " << m_memDataBusBusy << endl; 414 out << " accesses_per_bank: "; 415 for (int bank=0; bank < m_memBankCount.size(); bank++) { 416 out << m_memBankCount[bank] << " "; 417 //if ((bank % 8) == 7) out << " " << endl; 418 } 419 out << endl; 420 out << endl; 421 } 422 423 if (!short_stats) { 424 out << "Busy Controller Counts:" << endl; 425 for(int i=0; i < MachineType_NUM; i++) { 426 for(int j=0; j < MachineType_base_count((MachineType)i); j++) { 427 MachineID machID; 428 machID.type = (MachineType)i; 429 machID.num = j; 430 out << machID << ":" << m_busyControllerCount[i][j] << " "; 431 if ((j+1)%8 == 0) { 432 out << endl; 433 } 434 } 435 out << endl; 436 } 437 out << endl; 438 439 out << "Busy Bank Count:" << m_busyBankCount << endl; 440 out << endl; 441 442 out << "L1TBE_usage: " << m_L1tbeProfile << endl; 443 out << "L2TBE_usage: " << m_L2tbeProfile << endl; 444 out << "StopTable_usage: " << m_stopTableProfile << endl; 445 out << "sequencer_requests_outstanding: " << m_sequencer_requests << endl; 446 out << "store_buffer_size: " << m_store_buffer_size << endl; 447 out << "unique_blocks_in_store_buffer: " << m_store_buffer_blocks << endl; 448 out << endl; 449 } 450 451 if (!short_stats) { 452 out << "All Non-Zero Cycle Demand Cache Accesses" << endl; 453 out << "----------------------------------------" << endl; 454 out << "miss_latency: " << m_allMissLatencyHistogram << endl; 455 for(int i=0; i<m_missLatencyHistograms.size(); i++) { 456 if (m_missLatencyHistograms[i].size() > 0) { 457 out << "miss_latency_" << CacheRequestType(i) << ": " << m_missLatencyHistograms[i] << endl; 458 } 459 } 460 for(int i=0; i<m_machLatencyHistograms.size(); i++) { 461 if (m_machLatencyHistograms[i].size() > 0) { 462 out << "miss_latency_" << GenericMachineType(i) << ": " << m_machLatencyHistograms[i] << endl; 463 } 464 } 465 out << "miss_latency_L2Miss: " << m_L2MissLatencyHistogram << endl; 466 467 out << endl; 468 469 out << "All Non-Zero Cycle SW Prefetch Requests" << endl; 470 out << "------------------------------------" << endl; 471 out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl; 472 for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) { 473 if (m_SWPrefetchLatencyHistograms[i].size() > 0) { 474 out << "prefetch_latency_" << CacheRequestType(i) << ": " << m_SWPrefetchLatencyHistograms[i] << endl; 475 } 476 } 477 for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) { 478 if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) { 479 out << "prefetch_latency_" << GenericMachineType(i) << ": " << m_SWPrefetchMachLatencyHistograms[i] << endl; 480 } 481 } 482 out << "prefetch_latency_L2Miss:" << m_SWPrefetchL2MissLatencyHistogram << endl; 483 484 out << "multicast_retries: " << m_multicast_retry_histogram << endl; 485 out << "gets_mask_prediction_count: " << m_gets_mask_prediction << endl; 486 out << "getx_mask_prediction_count: " << m_getx_mask_prediction << endl; 487 out << "explicit_training_mask: " << m_explicit_training_mask << endl; 488 out << endl; 489 490 if (m_all_sharing_histogram.size() > 0) { 491 out << "all_sharing: " << m_all_sharing_histogram << endl; 492 out << "read_sharing: " << m_read_sharing_histogram << endl; 493 out << "write_sharing: " << m_write_sharing_histogram << endl; 494 495 out << "all_sharing_percent: "; m_all_sharing_histogram.printPercent(out); out << endl; 496 out << "read_sharing_percent: "; m_read_sharing_histogram.printPercent(out); out << endl; 497 out << "write_sharing_percent: "; m_write_sharing_histogram.printPercent(out); out << endl; 498 499 int64 total_miss = m_cache_to_cache + m_memory_to_cache; 500 out << "all_misses: " << total_miss << endl; 501 out << "cache_to_cache_misses: " << m_cache_to_cache << endl; 502 out << "memory_to_cache_misses: " << m_memory_to_cache << endl; 503 out << "cache_to_cache_percent: " << 100.0 * (double(m_cache_to_cache) / double(total_miss)) << endl; 504 out << "memory_to_cache_percent: " << 100.0 * (double(m_memory_to_cache) / double(total_miss)) << endl; 505 out << endl; 506 } 507 508 if (m_conflicting_histogram.size() > 0) { 509 out << "conflicting_histogram: " << m_conflicting_histogram << endl; 510 out << "conflicting_histogram_percent: "; m_conflicting_histogram.printPercent(out); out << endl; 511 out << endl; 512 } 513 514 if (m_outstanding_requests.size() > 0) { 515 out << "outstanding_requests: "; m_outstanding_requests.printPercent(out); out << endl; 516 if (m_outstanding_persistent_requests.size() > 0) { 517 out << "outstanding_persistent_requests: "; m_outstanding_persistent_requests.printPercent(out); out << endl; 518 } 519 out << endl; 520 } 521 } 522 523 if (XACT_MEMORY){ 524 // Transactional Memory stats 525 out << "Transactional Memory Stats:" << endl; 526 out << "------- xact --------" << endl; 527 out << "xact_size_dist: " << m_xactSizes << endl; 528 out << "xact_instr_count: " << m_xactInstrCount << endl; 529 out << "xact_time_dist: " << m_xactCycles << endl; 530 out << "xact_log_size_dist: " << m_xactLogs << endl; 531 out << "xact_read_set_size_dist: " << m_xactReads << endl; 532 out << "xact_write_set_size_dist: " << m_xactWrites << endl; 533 out << "xact_overflow_read_lines_dist: " << m_xactOverflowReads << endl; 534 out << "xact_overflow_write_lines_dist: " << m_xactOverflowWrites << endl; 535 out << "xact_overflow_read_set_size_dist: " << m_xactOverflowTotalReads << endl; 536 out << "xact_overflow_write_set_size_dist: " << m_xactOverflowTotalWrites << endl; 537 out << "xact_miss_load_dist: " << m_xactLoadMisses << endl; 538 out << "xact_miss_store_dist: " << m_xactStoreMisses << endl; 539 out << "xact_nacked: " << m_xactNacked << endl; 540 out << "xact_retries: " << m_xactRetries << endl; 541 out << "xact_abort_delays: " << m_abortDelays << endl; 542 out << "xact_aborts: " << m_transactionAborts << endl; 543 if (ATMTP_ENABLED) { 544 out << "xact_log_overflows: " << m_transactionLogOverflows << endl; 545 out << "xact_cache_overflows: " << m_transactionCacheOverflows << endl; 546 out << "xact_unsup_inst_aborts: " << m_transactionUnsupInsts << endl; 547 out << "xact_save_rest_aborts: " << m_transactionSaveRestAborts << endl; 548 } 549 out << "xact_writebacks: " << m_transWBs << endl; 550 out << "xact_extra_wbs: " << m_extraWBs << endl; 551 out << "xact_handler_startup_delay: " << m_abortStarupDelay << endl; 552 out << "xact_handler_per_block_delay: " << m_abortPerBlockDelay << endl; 553 out << "xact_inferred_aborts: " << m_inferredAborts << endl; 554 //out << "xact_histogram: " << m_procsInXact << endl; 555 556 if (!short_stats) { 557 Vector<int> nackedXIDKeys = m_nackXIDMap_ptr->keys(); 558 nackedXIDKeys.sortVector(); 559 out << endl; 560 int total_nacks = 0; 561 out << "------- xact Nacks by XID --------" << endl; 562 for(int i=0; i<nackedXIDKeys.size(); i++) { 563 int key = nackedXIDKeys[i]; 564 int count = m_nackXIDMap_ptr->lookup(key); 565 total_nacks += count; 566 out << "xact " << key << " " 567 << setw(6) << dec << count 568 << endl; 569 } 570 out << "Total Nacks: " << total_nacks << endl; 571 out << "---------------" << endl; 572 out << endl; 573 574 // Print XID Nack Pairs 575 Vector<int> nackedXIDPairKeys = m_nackXIDPairMap_ptr->keys(); 576 nackedXIDPairKeys.sortVector(); 577 out << endl; 578 total_nacks = 0; 579 out << "------- xact Nacks by XID Pairs --------" << endl; 580 for(int i=0; i<nackedXIDPairKeys.size(); i++) { 581 int key = nackedXIDPairKeys[i]; 582 Map<int, int> * my_map = m_nackXIDPairMap_ptr->lookup(key); 583 Vector<int> my_keys = my_map->keys(); 584 my_keys.sortVector(); 585 for(int j=0; j<my_keys.size(); j++){ 586 int nid = my_keys[j]; 587 int count = my_map->lookup(nid); 588 total_nacks += count; 589 out << "xact " << key << " nacked by xact " << nid << " " 590 << setw(6) << dec << count 591 << endl; 592 } 593 } 594 out << "Total Nacks: " << total_nacks << endl; 595 out << "---------------" << endl; 596 out << endl; 597 598 599 Vector<Address> nackedPCKeys = m_nackPCMap_ptr->keys(); 600 nackedPCKeys.sortVector(); 601 out << endl; 602 out << "------- xact Nacks by PC --------" << endl; 603 for(int i=0; i<nackedPCKeys.size(); i++) { 604 Address key = nackedPCKeys[i]; 605 int count = m_nackPCMap_ptr->lookup(key); 606 out << "xact_Nack " << key << " " 607 << setw(4) << dec << count 608 << endl; 609 } 610 out << "---------------" << endl; 611 out << endl; 612 613 614 Vector<int> xactExceptionKeys = m_xactExceptionMap_ptr->keys(); 615 xactExceptionKeys.sortVector(); 616 out << "------- xact exceptions --------" << endl; 617 for(int i=0; i<xactExceptionKeys.size(); i++) { 618 int key = xactExceptionKeys[i]; 619 int count = m_xactExceptionMap_ptr->lookup(key); 620 out << "xact_exception(" 621 << hex << key << "):" 622 << setw(4) << dec << count 623 << endl; 624 } 625 out << endl; 626 out << "---------------" << endl; 627 out << endl; 628 629 Vector<int> abortIDKeys = m_abortIDMap_ptr->keys(); 630 abortIDKeys.sortVector(); 631 out << "------- xact abort by XID --------" << endl; 632 for(int i=0; i<abortIDKeys.size(); i++) { 633 int count = m_abortIDMap_ptr->lookup(abortIDKeys[i]); 634 out << "xact_aborts(" 635 << dec << abortIDKeys[i] << "):" 636 << setw(7) << count 637 << endl; 638 } 639 out << endl; 640 out << "---------------" << endl; 641 out << endl; 642 643 Vector<Address> abortedPCKeys = m_abortPCMap_ptr->keys(); 644 abortedPCKeys.sortVector(); 645 out << endl; 646 out << "------- xact Aborts by PC --------" << endl; 647 for(int i=0; i<abortedPCKeys.size(); i++) { 648 Address key = abortedPCKeys[i]; 649 int count = m_abortPCMap_ptr->lookup(key); 650 out << "xact_abort_pc " << key 651 << setw(4) << dec << count 652 << endl; 653 } 654 out << "---------------" << endl; 655 out << endl; 656 657 Vector<Address> abortedAddrKeys = m_abortAddressMap_ptr->keys(); 658 abortedAddrKeys.sortVector(); 659 out << endl; 660 out << "------- xact Aborts by Address --------" << endl; 661 for(int i=0; i<abortedAddrKeys.size(); i++) { 662 Address key = abortedAddrKeys[i]; 663 int count = m_abortAddressMap_ptr->lookup(key); 664 out << "xact_abort_address " << key 665 << setw(4) << dec << count 666 << endl; 667 } 668 out << "---------------" << endl; 669 out << endl; 670 } // !short_stats 671 672 Vector<int> commitIDKeys = m_commitIDMap_ptr->keys(); 673 commitIDKeys.sortVector(); 674 out << "------- xact Commit Stats by XID --------" << endl; 675 for(int i=0; i<commitIDKeys.size(); i++) { 676 int count = m_commitIDMap_ptr->lookup(commitIDKeys[i]); 677 double retry_count = (double)m_xactRetryIDMap_ptr->lookup(commitIDKeys[i]) / count; 678 double cycles_count = (double)m_xactCyclesIDMap_ptr->lookup(commitIDKeys[i]) / count; 679 double readset_count = (double)m_xactReadSetIDMap_ptr->lookup(commitIDKeys[i]) / count; 680 double writeset_count = (double)m_xactWriteSetIDMap_ptr->lookup(commitIDKeys[i]) / count; 681 double loadmiss_count = (double)m_xactLoadMissIDMap_ptr->lookup(commitIDKeys[i]) / count; 682 double storemiss_count = (double)m_xactStoreMissIDMap_ptr->lookup(commitIDKeys[i]) / count; 683 double instr_count = (double)m_xactInstrCountIDMap_ptr->lookup(commitIDKeys[i]) / count; 684 out << "xact_stats id: " 685 << dec << commitIDKeys[i] 686 << " count: " << setw(7) << count 687 << " Cycles: " << setw(7) << cycles_count 688 << " Instr: " << setw(7) << instr_count 689 << " ReadSet: " << setw(7) << readset_count 690 << " WriteSet: " << setw(7) << writeset_count 691 << " LoadMiss: " << setw(7) << loadmiss_count 692 << " StoreMiss: " << setw(7) << storemiss_count 693 << " Retry Count: " << setw(7) << retry_count 694 << endl; 695 } 696 out << endl; 697 out << "---------------" << endl; 698 out << endl; 699 700 if (!short_stats) { 701 Vector<int> procsInXactKeys = m_procsInXactMap_ptr->keys(); 702 procsInXactKeys.sortVector(); 703 out << "------- xact histogram --------" << endl; 704 for(int i=0; i<procsInXactKeys.size(); i++) { 705 int count = m_procsInXactMap_ptr->lookup(procsInXactKeys[i]); 706 int key = procsInXactKeys[i]; 707 out << "xact_histogram(" 708 << dec << key << "):" 709 << setw(8) << count 710 << endl; 711 } 712 out << endl; 713 out << "---------------" << endl; 714 out << endl; 715 716 // Read/Write set Bloom filter stats 717 //int false_reads = 0; 718 long long int false_reads = m_readSetNoMatch; 719 Vector<Address> fp_read_keys = m_readSetNoMatch_ptr->keys(); 720 out << "------- xact read set false positives -------" << endl; 721 for(int i=0; i < fp_read_keys.size(); ++i){ 722 int count = m_readSetNoMatch_ptr->lookup(fp_read_keys[i]); 723 //out << "read_false_positive( " << fp_read_keys[i] << " ): " 724 // << setw(8) << dec << count << endl; 725 false_reads += count; 726 } 727 out << "Total read set false positives : " << setw(8) << false_reads << endl; 728 out << "-----------------------" << endl; 729 out << endl; 730 731 //int matching_reads = 0; 732 long long int matching_reads = m_readSetMatch; 733 long long int empty_checks = m_readSetEmptyChecks; 734 Vector<Address> read_keys = m_readSetMatch_ptr->keys(); 735 out << "------- xact read set matches -------" << endl; 736 for(int i=0; i < read_keys.size(); ++i){ 737 int count = m_readSetMatch_ptr->lookup(read_keys[i]); 738 //out << "read_match( " << read_keys[i] << " ): " 739 // << setw(8) << dec << count << endl; 740 matching_reads += count; 741 } 742 out << "Total read set matches : " << setw(8) << matching_reads << endl; 743 out << "Total read set empty checks : " << setw(8) << empty_checks << endl; 744 double false_positive_pct = 0.0; 745 if((false_reads + matching_reads)> 0){ 746 false_positive_pct = (1.0*false_reads)/(false_reads+matching_reads)*100.0; 747 } 748 out << "Read set false positives rate : " << false_positive_pct << "%" << endl; 749 out << "-----------------------" << endl; 750 out << endl; 751 752 // for write set 753 //int false_writes = 0; 754 long long int false_writes = m_writeSetNoMatch; 755 Vector<Address> fp_write_keys = m_writeSetNoMatch_ptr->keys(); 756 out << "------- xact write set false positives -------" << endl; 757 for(int i=0; i < fp_write_keys.size(); ++i){ 758 int count = m_writeSetNoMatch_ptr->lookup(fp_write_keys[i]); 759 //out << "write_false_positive( " << fp_write_keys[i] << " ): " 760 // << setw(8) << dec << count << endl; 761 false_writes += count; 762 } 763 out << "Total write set false positives : " << setw(8) << false_writes << endl; 764 out << "-----------------------" << endl; 765 out << endl; 766 767 //int matching_writes = 0; 768 long long int matching_writes = m_writeSetMatch; 769 empty_checks = m_writeSetEmptyChecks; 770 Vector<Address> write_keys = m_writeSetMatch_ptr->keys(); 771 out << "------- xact write set matches -------" << endl; 772 for(int i=0; i < write_keys.size(); ++i){ 773 int count = m_writeSetMatch_ptr->lookup(write_keys[i]); 774 //out << "write_match( " << write_keys[i] << " ): " 775 // << setw(8) << dec << count << endl; 776 matching_writes += count; 777 } 778 out << "Total write set matches : " << setw(8) << matching_writes << endl; 779 out << "Total write set empty checks : " << setw(8) << empty_checks << endl; 780 false_positive_pct = 0.0; 781 if((matching_writes+false_writes) > 0){ 782 false_positive_pct = (1.0*false_writes)/(false_writes+matching_writes)*100.0; 783 } 784 out << "Write set false positives rate : " << false_positive_pct << "%" << endl; 785 out << "-----------------------" << endl; 786 out << endl; 787 788 out << "----- Xact Signature Stats ------" << endl; 789 Vector<int> xids = m_xactReadFilterBitsSetOnCommit->keys(); 790 for(int i=0; i < xids.size(); ++i){ 791 int xid = xids[i]; 792 out << "xid " << xid << " Read set bits set on commit: " << (m_xactReadFilterBitsSetOnCommit->lookup(xid)) << endl; 793 } 794 xids = m_xactWriteFilterBitsSetOnCommit->keys(); 795 for(int i=0; i < xids.size(); ++i){ 796 int xid = xids[i]; 797 out << "xid " << xid << " Write set bits set on commit: " << (m_xactWriteFilterBitsSetOnCommit->lookup(xid)) << endl; 798 } 799 xids = m_xactReadFilterBitsSetOnAbort->keys(); 800 for(int i=0; i < xids.size(); ++i){ 801 int xid = xids[i]; 802 out << "xid " << xid << " Read set bits set on abort: " << (m_xactReadFilterBitsSetOnAbort->lookup(xid)) << endl; 803 } 804 xids = m_xactWriteFilterBitsSetOnAbort->keys(); 805 for(int i=0; i < xids.size(); ++i){ 806 int xid = xids[i]; 807 out << "xid " << xid << " Write set bits set on abort: " << (m_xactWriteFilterBitsSetOnAbort->lookup(xid)) << endl; 808 } 809 out << endl; 810 811 cout << "------- WATCHPOINTS --------" << endl; 812 cout << "False Triggers : " << m_watchpointsFalsePositiveTrigger << endl; 813 cout << "True Triggers : " << m_watchpointsTrueTrigger << endl; 814 cout << "Total Triggers : " << m_watchpointsTrueTrigger + m_watchpointsFalsePositiveTrigger << endl; 815 cout << "---------------" << endl; 816 cout << endl; 817 } // !short_stats 818 //m_xact_profiler_ptr->printStats(out, short_stats); // gem5:Arka for decomissioning of log_tm 819 } // XACT_MEMORY 820 821 if (!short_stats) { 822 out << "Request vs. RubySystem State Profile" << endl; 823 out << "--------------------------------" << endl; 824 out << endl; 825 826 Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys(); 827 requestProfileKeys.sortVector(); 828 829 for(int i=0; i<requestProfileKeys.size(); i++) { 830 int temp_int = m_requestProfileMap_ptr->lookup(requestProfileKeys[i]); 831 double percent = (100.0*double(temp_int))/double(m_requests); 832 while (requestProfileKeys[i] != "") { 833 out << setw(10) << string_split(requestProfileKeys[i], ':'); 834 } 835 out << setw(11) << temp_int; 836 out << setw(14) << percent << endl; 837 } 838 out << endl; 839 840 out << "filter_action: " << m_filter_action_histogram << endl; 841 842 if (!PROFILE_ALL_INSTRUCTIONS) { 843 m_address_profiler_ptr->printStats(out); 844 } 845 846 if (PROFILE_ALL_INSTRUCTIONS) { 847 m_inst_profiler_ptr->printStats(out); 848 } 849 850 out << endl; 851 out << "Message Delayed Cycles" << endl; 852 out << "----------------------" << endl; 853 out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl; 854 out << "Total_nonPF_delay_cycles: " << m_delayedCyclesNonPFHistogram << endl; 855 for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) { 856 out << " virtual_network_" << i << "_delay_cycles: " << m_delayedCyclesVCHistograms[i] << endl; 857 } 858 859 printResourceUsage(out); 860 } 861 862} 863 864void Profiler::printResourceUsage(ostream& out) const 865{ 866 out << endl; 867 out << "Resource Usage" << endl; 868 out << "--------------" << endl; 869 870 integer_t pagesize = getpagesize(); // page size in bytes 871 out << "page_size: " << pagesize << endl; 872 873 rusage usage; 874 getrusage (RUSAGE_SELF, &usage); 875 876 out << "user_time: " << usage.ru_utime.tv_sec << endl; 877 out << "system_time: " << usage.ru_stime.tv_sec << endl; 878 out << "page_reclaims: " << usage.ru_minflt << endl; 879 out << "page_faults: " << usage.ru_majflt << endl; 880 out << "swaps: " << usage.ru_nswap << endl; 881 out << "block_inputs: " << usage.ru_inblock << endl; 882 out << "block_outputs: " << usage.ru_oublock << endl; 883} 884 885void Profiler::clearStats() 886{ 887 m_num_BA_unicasts = 0; 888 m_num_BA_broadcasts = 0; 889 890 m_ruby_start = g_eventQueue_ptr->getTime(); 891 892 m_instructions_executed_at_start.setSize(RubyConfig::numberOfProcessors()); 893 m_cycles_executed_at_start.setSize(RubyConfig::numberOfProcessors()); 894 for (int i=0; i < RubyConfig::numberOfProcessors(); i++) { 895 if (g_system_ptr == NULL) { 896 m_instructions_executed_at_start[i] = 0; 897 m_cycles_executed_at_start[i] = 0; 898 } else { 899 m_instructions_executed_at_start[i] = g_system_ptr->getDriver()->getInstructionCount(i); 900 m_cycles_executed_at_start[i] = g_system_ptr->getDriver()->getCycleCount(i); 901 } 902 } 903 904 m_perProcTotalMisses.setSize(RubyConfig::numberOfProcessors()); 905 m_perProcUserMisses.setSize(RubyConfig::numberOfProcessors()); 906 m_perProcSupervisorMisses.setSize(RubyConfig::numberOfProcessors()); 907 m_perProcStartTransaction.setSize(RubyConfig::numberOfProcessors()); 908 m_perProcEndTransaction.setSize(RubyConfig::numberOfProcessors()); 909 910 for(int i=0; i < RubyConfig::numberOfProcessors(); i++) { 911 m_perProcTotalMisses[i] = 0; 912 m_perProcUserMisses[i] = 0; 913 m_perProcSupervisorMisses[i] = 0; 914 m_perProcStartTransaction[i] = 0; 915 m_perProcEndTransaction[i] = 0; 916 } 917 918 m_busyControllerCount.setSize(MachineType_NUM); // all machines 919 for(int i=0; i < MachineType_NUM; i++) { 920 m_busyControllerCount[i].setSize(MachineType_base_count((MachineType)i)); 921 for(int j=0; j < MachineType_base_count((MachineType)i); j++) { 922 m_busyControllerCount[i][j] = 0; 923 } 924 } 925 m_busyBankCount = 0; 926 927 m_delayedCyclesHistogram.clear(); 928 m_delayedCyclesNonPFHistogram.clear(); 929 m_delayedCyclesVCHistograms.setSize(NUMBER_OF_VIRTUAL_NETWORKS); 930 for (int i = 0; i < NUMBER_OF_VIRTUAL_NETWORKS; i++) { 931 m_delayedCyclesVCHistograms[i].clear(); 932 } 933 934 m_gets_mask_prediction.clear(); 935 m_getx_mask_prediction.clear(); 936 m_explicit_training_mask.clear(); 937 938 m_missLatencyHistograms.setSize(CacheRequestType_NUM); 939 for(int i=0; i<m_missLatencyHistograms.size(); i++) { 940 m_missLatencyHistograms[i].clear(200); 941 } 942 m_machLatencyHistograms.setSize(GenericMachineType_NUM+1); 943 for(int i=0; i<m_machLatencyHistograms.size(); i++) { 944 m_machLatencyHistograms[i].clear(200); 945 } 946 m_allMissLatencyHistogram.clear(200); 947 m_L2MissLatencyHistogram.clear(200); 948 949 m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM); 950 for(int i=0; i<m_SWPrefetchLatencyHistograms.size(); i++) { 951 m_SWPrefetchLatencyHistograms[i].clear(200); 952 } 953 m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1); 954 for(int i=0; i<m_SWPrefetchMachLatencyHistograms.size(); i++) { 955 m_SWPrefetchMachLatencyHistograms[i].clear(200); 956 } 957 m_allSWPrefetchLatencyHistogram.clear(200); 958 m_SWPrefetchL2MissLatencyHistogram.clear(200); 959 960 m_multicast_retry_histogram.clear(); 961 962 m_L1tbeProfile.clear(); 963 m_L2tbeProfile.clear(); 964 m_stopTableProfile.clear(); 965 m_filter_action_histogram.clear(); 966 967 m_sequencer_requests.clear(); 968 m_store_buffer_size.clear(); 969 m_store_buffer_blocks.clear(); 970 m_read_sharing_histogram.clear(); 971 m_write_sharing_histogram.clear(); 972 m_all_sharing_histogram.clear(); 973 m_cache_to_cache = 0; 974 m_memory_to_cache = 0; 975 976 m_predictions = 0; 977 m_predictionOpportunities = 0; 978 m_goodPredictions = 0; 979 980 // clear HashMaps 981 m_requestProfileMap_ptr->clear(); 982 983 // count requests profiled 984 m_requests = 0; 985 986 // Conflicting requests 987 m_conflicting_map_ptr->clear(); 988 m_conflicting_histogram.clear(); 989 990 m_outstanding_requests.clear(); 991 m_outstanding_persistent_requests.clear(); 992 993 m_L1D_cache_profiler_ptr->clearStats(); 994 m_L1I_cache_profiler_ptr->clearStats(); 995 m_L2_cache_profiler_ptr->clearStats(); 996 //m_xact_profiler_ptr->clearStats(); //gem5:Arka for decomissiong of log_tm 997 998 //---- begin XACT_MEM code 999 ASSERT(m_xactExceptionMap_ptr != NULL); 1000 ASSERT(m_procsInXactMap_ptr != NULL); 1001 ASSERT(m_abortIDMap_ptr != NULL); 1002 ASSERT(m_abortPCMap_ptr != NULL); 1003 ASSERT( m_nackXIDMap_ptr != NULL); 1004 ASSERT(m_nackPCMap_ptr != NULL); 1005 1006 m_abortStarupDelay = -1; 1007 m_abortPerBlockDelay = -1; 1008 m_transWBs = 0; 1009 m_extraWBs = 0; 1010 m_transactionAborts = 0; 1011 m_transactionLogOverflows = 0; 1012 m_transactionCacheOverflows = 0; 1013 m_transactionUnsupInsts = 0; 1014 m_transactionSaveRestAborts = 0; 1015 m_inferredAborts = 0; 1016 m_xactNacked = 0; 1017 1018 m_xactLogs.clear(); 1019 m_xactCycles.clear(); 1020 m_xactReads.clear(); 1021 m_xactWrites.clear(); 1022 m_xactSizes.clear(); 1023 m_abortDelays.clear(); 1024 m_xactRetries.clear(); 1025 m_xactOverflowReads.clear(); 1026 m_xactOverflowWrites.clear(); 1027 m_xactLoadMisses.clear(); 1028 m_xactStoreMisses.clear(); 1029 m_xactOverflowTotalReads.clear(); 1030 m_xactOverflowTotalWrites.clear(); 1031 1032 m_xactExceptionMap_ptr->clear(); 1033 m_procsInXactMap_ptr->clear(); 1034 m_abortIDMap_ptr->clear(); 1035 m_commitIDMap_ptr->clear(); 1036 m_xactRetryIDMap_ptr->clear(); 1037 m_xactCyclesIDMap_ptr->clear(); 1038 m_xactReadSetIDMap_ptr->clear(); 1039 m_xactWriteSetIDMap_ptr->clear(); 1040 m_xactLoadMissIDMap_ptr->clear(); 1041 m_xactStoreMissIDMap_ptr->clear(); 1042 m_xactInstrCountIDMap_ptr->clear(); 1043 m_abortPCMap_ptr->clear(); 1044 m_abortAddressMap_ptr->clear(); 1045 m_nackXIDMap_ptr->clear(); 1046 m_nackXIDPairMap_ptr->clear(); 1047 m_nackPCMap_ptr->clear(); 1048 1049 m_xactReadFilterBitsSetOnCommit->clear(); 1050 m_xactReadFilterBitsSetOnAbort->clear(); 1051 m_xactWriteFilterBitsSetOnCommit->clear(); 1052 m_xactWriteFilterBitsSetOnAbort->clear(); 1053 1054 m_readSetEmptyChecks = 0; 1055 m_readSetMatch = 0; 1056 m_readSetNoMatch = 0; 1057 m_writeSetEmptyChecks = 0; 1058 m_writeSetMatch = 0; 1059 m_writeSetNoMatch = 0; 1060 1061 m_xact_visualizer_last = 0; 1062 m_watchpointsFalsePositiveTrigger = 0; 1063 m_watchpointsTrueTrigger = 0; 1064 //---- end XACT_MEM code 1065 1066 // for MemoryControl: 1067 m_memReq = 0; 1068 m_memBankBusy = 0; 1069 m_memBusBusy = 0; 1070 m_memTfawBusy = 0; 1071 m_memReadWriteBusy = 0; 1072 m_memDataBusBusy = 0; 1073 m_memRefresh = 0; 1074 m_memRead = 0; 1075 m_memWrite = 0; 1076 m_memWaitCycles = 0; 1077 m_memInputQ = 0; 1078 m_memBankQ = 0; 1079 m_memArbWait = 0; 1080 m_memRandBusy = 0; 1081 m_memNotOld = 0; 1082 1083 for (int bank=0; bank < m_memBankCount.size(); bank++) { 1084 m_memBankCount[bank] = 0; 1085 } 1086 1087 // Flush the prefetches through the system - used so that there are no outstanding requests after stats are cleared 1088 //g_eventQueue_ptr->triggerAllEvents(); 1089 1090 // update the start time 1091 m_ruby_start = g_eventQueue_ptr->getTime(); 1092} 1093 1094void Profiler::addPrimaryStatSample(const CacheMsg& msg, NodeID id) 1095{ 1096 if (Protocol::m_TwoLevelCache) { 1097 if (msg.getType() == CacheRequestType_IFETCH) { 1098 addL1IStatSample(msg, id); 1099 } else { 1100 addL1DStatSample(msg, id); 1101 } 1102 // profile the address after an L1 miss (outside of the processor for CMP) 1103 if (Protocol::m_CMP) { 1104 addAddressTraceSample(msg, id); 1105 } 1106 } else { 1107 addL2StatSample(CacheRequestType_to_GenericRequestType(msg.getType()), 1108 msg.getAccessMode(), msg.getSize(), msg.getPrefetch(), id); 1109 addAddressTraceSample(msg, id); 1110 } 1111} 1112 1113void Profiler::profileConflictingRequests(const Address& addr) 1114{ 1115 assert(addr == line_address(addr)); 1116 Time last_time = m_ruby_start; 1117 if (m_conflicting_map_ptr->exist(addr)) { 1118 Time last_time = m_conflicting_map_ptr->lookup(addr); 1119 } 1120 Time current_time = g_eventQueue_ptr->getTime(); 1121 assert (current_time - last_time > 0); 1122 m_conflicting_histogram.add(current_time - last_time); 1123 m_conflicting_map_ptr->add(addr, current_time); 1124} 1125 1126void Profiler::addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id) 1127{ 1128 addSecondaryStatSample(CacheRequestType_to_GenericRequestType(requestType), type, msgSize, pfBit, id); 1129} 1130 1131void Profiler::addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id) 1132{ 1133 addL2StatSample(requestType, type, msgSize, pfBit, id); 1134} 1135 1136void Profiler::addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id) 1137{ 1138 m_perProcTotalMisses[id]++; 1139 if (type == AccessModeType_SupervisorMode) { 1140 m_perProcSupervisorMisses[id]++; 1141 } else { 1142 m_perProcUserMisses[id]++; 1143 } 1144 m_L2_cache_profiler_ptr->addStatSample(requestType, type, msgSize, pfBit); 1145} 1146 1147void Profiler::addL1DStatSample(const CacheMsg& msg, NodeID id) 1148{ 1149 m_L1D_cache_profiler_ptr->addStatSample(CacheRequestType_to_GenericRequestType(msg.getType()), 1150 msg.getAccessMode(), msg.getSize(), msg.getPrefetch()); 1151} 1152 1153void Profiler::addL1IStatSample(const CacheMsg& msg, NodeID id) 1154{ 1155 m_L1I_cache_profiler_ptr->addStatSample(CacheRequestType_to_GenericRequestType(msg.getType()), 1156 msg.getAccessMode(), msg.getSize(), msg.getPrefetch()); 1157} 1158 1159void Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id) 1160{ 1161 if (msg.getType() != CacheRequestType_IFETCH) { 1162 1163 // Note: The following line should be commented out if you want to 1164 // use the special profiling that is part of the GS320 protocol 1165 1166 // NOTE: Unless PROFILE_HOT_LINES or PROFILE_ALL_INSTRUCTIONS are enabled, nothing will be profiled by the AddressProfiler 1167 m_address_profiler_ptr->addTraceSample(msg.getAddress(), msg.getProgramCounter(), msg.getType(), msg.getAccessMode(), id, false); 1168 } 1169} 1170 1171void Profiler::profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner) 1172{ 1173 Set set_contacted(owner); 1174 if (type == AccessType_Write) { 1175 set_contacted.addSet(sharers); 1176 } 1177 set_contacted.remove(requestor); 1178 int number_contacted = set_contacted.count(); 1179 1180 if (type == AccessType_Write) { 1181 m_write_sharing_histogram.add(number_contacted); 1182 } else { 1183 m_read_sharing_histogram.add(number_contacted); 1184 } 1185 m_all_sharing_histogram.add(number_contacted); 1186 1187 if (number_contacted == 0) { 1188 m_memory_to_cache++; 1189 } else { 1190 m_cache_to_cache++; 1191 } 1192 1193} 1194 1195void Profiler::profileMsgDelay(int virtualNetwork, int delayCycles) { 1196 assert(virtualNetwork < m_delayedCyclesVCHistograms.size()); 1197 m_delayedCyclesHistogram.add(delayCycles); 1198 m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles); 1199 if (virtualNetwork != 0) { 1200 m_delayedCyclesNonPFHistogram.add(delayCycles); 1201 } 1202} 1203 1204// profiles original cache requests including PUTs 1205void Profiler::profileRequest(const string& requestStr) 1206{ 1207 m_requests++; 1208 1209 if (m_requestProfileMap_ptr->exist(requestStr)) { 1210 (m_requestProfileMap_ptr->lookup(requestStr))++; 1211 } else { 1212 m_requestProfileMap_ptr->add(requestStr, 1); 1213 } 1214} 1215 1216void Profiler::recordPrediction(bool wasGood, bool wasPredicted) 1217{ 1218 m_predictionOpportunities++; 1219 if(wasPredicted){ 1220 m_predictions++; 1221 if(wasGood){ 1222 m_goodPredictions++; 1223 } 1224 } 1225} 1226 1227void Profiler::profileFilterAction(int action) 1228{ 1229 m_filter_action_histogram.add(action); 1230} 1231 1232void Profiler::profileMulticastRetry(const Address& addr, int count) 1233{ 1234 m_multicast_retry_histogram.add(count); 1235} 1236 1237void Profiler::startTransaction(int cpu) 1238{ 1239 m_perProcStartTransaction[cpu]++; 1240} 1241 1242void Profiler::endTransaction(int cpu) 1243{ 1244 m_perProcEndTransaction[cpu]++; 1245} 1246 1247void Profiler::controllerBusy(MachineID machID) 1248{ 1249 m_busyControllerCount[(int)machID.type][(int)machID.num]++; 1250} 1251 1252void Profiler::profilePFWait(Time waitTime) 1253{ 1254 m_prefetchWaitHistogram.add(waitTime); 1255} 1256 1257void Profiler::bankBusy() 1258{ 1259 m_busyBankCount++; 1260} 1261 1262// non-zero cycle demand request 1263void Profiler::missLatency(Time t, CacheRequestType type, GenericMachineType respondingMach) 1264{ 1265 m_allMissLatencyHistogram.add(t); 1266 m_missLatencyHistograms[type].add(t); 1267 m_machLatencyHistograms[respondingMach].add(t); 1268 if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { 1269 m_L2MissLatencyHistogram.add(t); 1270 } 1271} 1272 1273// non-zero cycle prefetch request 1274void Profiler::swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach) 1275{ 1276 m_allSWPrefetchLatencyHistogram.add(t); 1277 m_SWPrefetchLatencyHistograms[type].add(t); 1278 m_SWPrefetchMachLatencyHistograms[respondingMach].add(t); 1279 if(respondingMach == GenericMachineType_Directory || respondingMach == GenericMachineType_NUM) { 1280 m_SWPrefetchL2MissLatencyHistogram.add(t); 1281 } 1282} 1283 1284void Profiler::profileTransition(const string& component, NodeID id, NodeID version, Address addr, 1285 const string& state, const string& event, 1286 const string& next_state, const string& note) 1287{ 1288 const int EVENT_SPACES = 20; 1289 const int ID_SPACES = 3; 1290 const int TIME_SPACES = 7; 1291 const int COMP_SPACES = 10; 1292 const int STATE_SPACES = 6; 1293 1294 if ((g_debug_ptr->getDebugTime() > 0) && 1295 (g_eventQueue_ptr->getTime() >= g_debug_ptr->getDebugTime())) { 1296 (* debug_cout_ptr).flags(ios::right); 1297 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1298 (* debug_cout_ptr) << setw(ID_SPACES) << id << " "; 1299 (* debug_cout_ptr) << setw(ID_SPACES) << version << " "; 1300 (* debug_cout_ptr) << setw(COMP_SPACES) << component; 1301 (* debug_cout_ptr) << setw(EVENT_SPACES) << event << " "; 1302 for (int i=0; i < RubyConfig::numberOfProcessors(); i++) { 1303 1304 if (i == id) { 1305 (* debug_cout_ptr).flags(ios::right); 1306 (* debug_cout_ptr) << setw(STATE_SPACES) << state; 1307 (* debug_cout_ptr) << ">"; 1308 (* debug_cout_ptr).flags(ios::left); 1309 (* debug_cout_ptr) << setw(STATE_SPACES) << next_state; 1310 } else { 1311 // cout << setw(STATE_SPACES) << " " << " " << setw(STATE_SPACES) << " "; 1312 } 1313 } 1314 (* debug_cout_ptr) << " " << addr << " " << note; 1315 1316 (* debug_cout_ptr) << endl; 1317 } 1318} 1319 1320// Helper function 1321static double process_memory_total() 1322{ 1323 const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, 1324 ifstream proc_file; 1325 proc_file.open("/proc/self/statm"); 1326 int total_size_in_pages = 0; 1327 int res_size_in_pages = 0; 1328 proc_file >> total_size_in_pages; 1329 proc_file >> res_size_in_pages; 1330 return double(total_size_in_pages)*MULTIPLIER; // size in megabytes 1331} 1332 1333static double process_memory_resident() 1334{ 1335 const double MULTIPLIER = 4096.0/(1024.0*1024.0); // 4kB page size, 1024*1024 bytes per MB, 1336 ifstream proc_file; 1337 proc_file.open("/proc/self/statm"); 1338 int total_size_in_pages = 0; 1339 int res_size_in_pages = 0; 1340 proc_file >> total_size_in_pages; 1341 proc_file >> res_size_in_pages; 1342 return double(res_size_in_pages)*MULTIPLIER; // size in megabytes 1343} 1344 1345void Profiler::profileGetXMaskPrediction(const Set& pred_set) 1346{ 1347 m_getx_mask_prediction.add(pred_set.count()); 1348} 1349 1350void Profiler::profileGetSMaskPrediction(const Set& pred_set) 1351{ 1352 m_gets_mask_prediction.add(pred_set.count()); 1353} 1354 1355void Profiler::profileTrainingMask(const Set& pred_set) 1356{ 1357 m_explicit_training_mask.add(pred_set.count()); 1358} 1359 1360int64 Profiler::getTotalInstructionsExecuted() const 1361{ 1362 int64 sum = 1; // Starting at 1 allows us to avoid division by zero 1363 for(int i=0; i < RubyConfig::numberOfProcessors(); i++) { 1364 sum += (g_system_ptr->getDriver()->getInstructionCount(i) - m_instructions_executed_at_start[i]); 1365 } 1366 return sum; 1367} 1368 1369int64 Profiler::getTotalTransactionsExecuted() const 1370{ 1371 int64 sum = m_perProcEndTransaction.sum(); 1372 if (sum > 0) { 1373 return sum; 1374 } else { 1375 return 1; // Avoid division by zero errors 1376 } 1377} 1378 1379 1380// The following case statement converts CacheRequestTypes to GenericRequestTypes 1381// allowing all profiling to be done with a single enum type instead of slow strings 1382GenericRequestType Profiler::CacheRequestType_to_GenericRequestType(const CacheRequestType& type) { 1383 switch (type) { 1384 case CacheRequestType_LD: 1385 return GenericRequestType_LD; 1386 break; 1387 case CacheRequestType_ST: 1388 return GenericRequestType_ST; 1389 break; 1390 case CacheRequestType_ATOMIC: 1391 return GenericRequestType_ATOMIC; 1392 break; 1393 case CacheRequestType_IFETCH: 1394 return GenericRequestType_IFETCH; 1395 break; 1396 case CacheRequestType_LD_XACT: 1397 return GenericRequestType_LD_XACT; 1398 break; 1399 case CacheRequestType_LDX_XACT: 1400 return GenericRequestType_LDX_XACT; 1401 break; 1402 case CacheRequestType_ST_XACT: 1403 return GenericRequestType_ST_XACT; 1404 break; 1405 case CacheRequestType_NULL: 1406 return GenericRequestType_NULL; 1407 break; 1408 default: 1409 ERROR_MSG("Unexpected cache request type"); 1410 } 1411} 1412 1413//---- begin Transactional Memory CODE 1414void Profiler::profileTransaction(int size, int logSize, int readS, int writeS, int overflow_readS, int overflow_writeS, int retries, int useful_cycles, bool nacked, int loadMisses, int storeMisses, int instrCount, int xid){ 1415 m_xactLogs.add(logSize); 1416 m_xactSizes.add(size); 1417 m_xactReads.add(readS); 1418 m_xactWrites.add(writeS); 1419 m_xactRetries.add(retries); 1420 m_xactCycles.add(useful_cycles); 1421 m_xactLoadMisses.add(loadMisses); 1422 m_xactStoreMisses.add(storeMisses); 1423 m_xactInstrCount.add(instrCount); 1424 1425 // was this transaction nacked? 1426 if(nacked){ 1427 m_xactNacked++; 1428 } 1429 1430 // for overflowed transactions 1431 if(overflow_readS > 0 || overflow_writeS > 0){ 1432 m_xactOverflowReads.add(overflow_readS); 1433 m_xactOverflowWrites.add(overflow_writeS); 1434 m_xactOverflowTotalReads.add(readS); 1435 m_xactOverflowTotalWrites.add(writeS); 1436 } 1437 1438 // Record commits by xid 1439 if(!m_commitIDMap_ptr->exist(xid)){ 1440 m_commitIDMap_ptr->add(xid, 1); 1441 m_xactRetryIDMap_ptr->add(xid, retries); 1442 m_xactCyclesIDMap_ptr->add(xid, useful_cycles); 1443 m_xactReadSetIDMap_ptr->add(xid, readS); 1444 m_xactWriteSetIDMap_ptr->add(xid, writeS); 1445 m_xactLoadMissIDMap_ptr->add(xid, loadMisses); 1446 m_xactStoreMissIDMap_ptr->add(xid, storeMisses); 1447 m_xactInstrCountIDMap_ptr->add(xid, instrCount); 1448 } else { 1449 (m_commitIDMap_ptr->lookup(xid))++; 1450 (m_xactRetryIDMap_ptr->lookup(xid)) += retries; 1451 (m_xactCyclesIDMap_ptr->lookup(xid)) += useful_cycles; 1452 (m_xactReadSetIDMap_ptr->lookup(xid)) += readS; 1453 (m_xactWriteSetIDMap_ptr->lookup(xid)) += writeS; 1454 (m_xactLoadMissIDMap_ptr->lookup(xid)) += loadMisses; 1455 (m_xactStoreMissIDMap_ptr->lookup(xid)) += storeMisses; 1456 (m_xactInstrCountIDMap_ptr->lookup(xid)) += instrCount; 1457 } 1458} 1459 1460void Profiler::profileBeginTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen){ 1461 //- if(PROFILE_XACT){ 1462 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 2)){ 1463 const char* openStr = isOpen ? " OPEN" : " CLOSED"; 1464 const int ID_SPACES = 3; 1465 const int TIME_SPACES = 7; 1466 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1467 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1468 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1469 // The actual processor number 1470 int proc_no = id*RubyConfig::numberofSMTThreads() + thread; 1471 (* debug_cout_ptr).flags(ios::right); 1472 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1473 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1474 << " XACT BEGIN " << xid 1475 << " PC 0x" << hex << pc.getAddress() 1476 << dec 1477 << " *PC 0x" << hex << myInst << dec 1478 << " '" << myInstStr << "'" 1479 << openStr 1480 << endl; 1481 } 1482} 1483 1484void Profiler::profileCommitTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen){ 1485 //- if(PROFILE_XACT){ 1486 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 2)){ 1487 const char* openStr = isOpen ? " OPEN" : " CLOSED"; 1488 const int ID_SPACES = 3; 1489 const int TIME_SPACES = 7; 1490 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1491 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1492 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1493 // The actual processor number 1494 int proc_no = id*RubyConfig::numberofSMTThreads() + thread; 1495 (* debug_cout_ptr).flags(ios::right); 1496 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1497 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1498 << " XACT COMMIT " << xid 1499 << " PC 0x" << hex << pc.getAddress() 1500 << dec 1501 << " *PC 0x" << hex << myInst << dec 1502 << " '" << myInstStr << "'" 1503 << openStr 1504 << endl; 1505 } 1506 1507} 1508 1509// for profiling overflows 1510void Profiler::profileLoadOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow){ 1511 //- if(PROFILE_XACT){ 1512 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 1513 const int ID_SPACES = 3; 1514 const int TIME_SPACES = 7; 1515 string overflow_str = " XACT LOAD L1 OVERFLOW "; 1516 if(!l1_overflow){ 1517 overflow_str = " XACT LOAD L2 OVERFLOW "; 1518 } 1519 // The actual processor number 1520 int proc_no = id*RubyConfig::numberofSMTThreads() + thread; 1521 (* debug_cout_ptr).flags(ios::right); 1522 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1523 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1524 << overflow_str << xid 1525 << " ADDR " << addr 1526 << endl; 1527 } 1528} 1529 1530// for profiling overflows 1531void Profiler::profileStoreOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow){ 1532 //- if(PROFILE_XACT){ 1533 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 1534 const int ID_SPACES = 3; 1535 const int TIME_SPACES = 7; 1536 string overflow_str = " XACT STORE L1 OVERFLOW "; 1537 if(!l1_overflow){ 1538 overflow_str = " XACT STORE L2 OVERFLOW "; 1539 } 1540 // The actual processor number 1541 int proc_no = id*RubyConfig::numberofSMTThreads() + thread; 1542 (* debug_cout_ptr).flags(ios::right); 1543 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1544 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1545 << overflow_str << xid 1546 << " ADDR " << addr 1547 << endl; 1548 } 1549} 1550 1551void Profiler::profileLoadTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc){ 1552 //- if(PROFILE_XACT){ 1553 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 3)){ 1554 const int ID_SPACES = 3; 1555 const int TIME_SPACES = 7; 1556 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1557 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1558 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1559 // The actual processor number 1560 int proc_no = id*RubyConfig::numberofSMTThreads() + thread; 1561 (* debug_cout_ptr).flags(ios::right); 1562 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1563 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1564 << " XACT LOAD " << xid 1565 << " " << addr 1566 << " VA " << logicalAddress 1567 << " PC " << pc 1568 << " *PC 0x" << hex << myInst << dec 1569 << " '" << myInstStr << "'" 1570 //<< " VAL 0x" << hex << SIMICS_read_physical_memory(proc_no, SIMICS_translate_data_address(proc_no, logicalAddress), 4) << dec 1571 << " VAL 0x" << hex << g_system_ptr->getDriver()->readPhysicalMemory(proc_no, addr.getAddress(), 4) << dec 1572 << endl; 1573 } 1574} 1575 1576void Profiler::profileLoad(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc){ 1577 if(PROFILE_NONXACT){ 1578 const int ID_SPACES = 3; 1579 const int TIME_SPACES = 7; 1580 // The actual processor number 1581 int proc_no = id*RubyConfig::numberofSMTThreads() + thread; 1582 (* debug_cout_ptr).flags(ios::right); 1583 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1584 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1585 << " LOAD " << xid 1586 << " " << addr 1587 << " VA " << logicalAddress 1588 << " PC " << pc 1589 //<< " VAL 0x" << hex << SIMICS_read_physical_memory(proc_no, SIMICS_translate_data_address(proc_no, logicalAddress), 4) << dec 1590 << " VAL 0x" << hex << g_system_ptr->getDriver()->readPhysicalMemory(proc_no, addr.getAddress(), 4) << dec 1591 << endl; 1592 } 1593} 1594 1595void Profiler::profileStoreTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc){ 1596 //- if(PROFILE_XACT){ 1597 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 3)){ 1598 const int ID_SPACES = 3; 1599 const int TIME_SPACES = 7; 1600 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1601 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1602 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1603 // The actual processor number 1604 int proc_no = id*RubyConfig::numberofSMTThreads() + thread; 1605 (* debug_cout_ptr).flags(ios::right); 1606 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1607 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1608 << " XACT STORE " << xid 1609 << " " << addr 1610 << " VA " << logicalAddress 1611 << " PC " << pc 1612 << " *PC 0x" << hex << myInst << dec 1613 << " '" << myInstStr << "'" 1614 << endl; 1615 } 1616} 1617 1618void Profiler::profileStore(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc){ 1619 if(PROFILE_NONXACT){ 1620 const int ID_SPACES = 3; 1621 const int TIME_SPACES = 7; 1622 // The actual processor number 1623 int proc_no = id*RubyConfig::numberofSMTThreads() + thread; 1624 (* debug_cout_ptr).flags(ios::right); 1625 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1626 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1627 << " STORE " << xid 1628 << " " << addr 1629 << " VA " << logicalAddress 1630 << " PC " << pc 1631 << endl; 1632 } 1633} 1634 1635void Profiler::profileNack(NodeID id, int tid, int xid, int thread, int nacking_thread, NodeID nackedBy, Address addr, Address logicalAddress, Address pc, uint64 seq_ts, uint64 nack_ts, bool possibleCycle){ 1636 int nid = 0; // g_system_ptr->getChip(nackedBy/RubyConfig::numberOfProcsPerChip())->getTransactionInterfaceManager(nackedBy%RubyConfig::numberOfProcsPerChip())->getXID(nacking_thread); 1637 assert(0); 1638 //- if(PROFILE_XACT){ 1639 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 1640 const int ID_SPACES = 3; 1641 const int TIME_SPACES = 7; 1642 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1643 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1644 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1645 // The actual processor number 1646 int proc_no = id*g_NUM_SMT_THREADS + thread; 1647 int nack_proc_no = nackedBy*g_NUM_SMT_THREADS + nacking_thread; 1648 Address nack_pc = SIMICS_get_program_counter(nack_proc_no); 1649 (* debug_cout_ptr).flags(ios::right); 1650 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1651 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1652 << " XACT NACK " << xid 1653 << " by " << nack_proc_no 1654 << " [ " << nackedBy 1655 << ", " << nacking_thread 1656 << " ]" 1657 << " NID: " << nid 1658 << " " << addr 1659 << " VA " << logicalAddress 1660 << " PC " << pc 1661 << " *PC 0x" << hex << myInst << dec 1662 << " '" << myInstStr << "'" 1663 << " NackerPC " << nack_pc 1664 << " my_ts " << seq_ts 1665 << " nack_ts " << nack_ts 1666 << " possible_cycle " << possibleCycle 1667 << endl; 1668 } 1669 1670 // Record nacks by xid 1671 if(!m_nackXIDMap_ptr->exist(xid)){ 1672 m_nackXIDMap_ptr->add(xid, 1); 1673 } else { 1674 (m_nackXIDMap_ptr->lookup(xid))++; 1675 } 1676 1677 // Record nack ID pairs by xid 1678 if(!m_nackXIDPairMap_ptr->exist(xid)){ 1679 Map<int, int> * new_map = new Map<int, int>; 1680 new_map->add(nid, 1); 1681 m_nackXIDPairMap_ptr->add(xid, new_map); 1682 } 1683 else{ 1684 // retrieve existing map 1685 Map<int, int> * my_map = m_nackXIDPairMap_ptr->lookup(xid); 1686 if(!my_map->exist(nid)){ 1687 my_map->add(nid, 1); 1688 } 1689 else{ 1690 (my_map->lookup(nid))++; 1691 } 1692 } 1693 1694 // Record nacks by pc 1695 if(!m_nackPCMap_ptr->exist(pc)){ 1696 m_nackPCMap_ptr->add(pc, 1); 1697 } else { 1698 (m_nackPCMap_ptr->lookup(pc))++; 1699 } 1700} 1701 1702void Profiler::profileExposedConflict(NodeID id, int xid, int thread, Address addr, Address pc){ 1703 //if(PROFILE_XACT){ 1704 const int ID_SPACES = 3; 1705 const int TIME_SPACES = 7; 1706 // The actual processor number 1707 int proc_no = id*g_NUM_SMT_THREADS + thread; 1708 (* debug_cout_ptr).flags(ios::right); 1709 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1710 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " " 1711 << " EXPOSED ACTION CONFLICT " << xid 1712 << " ADDR " << addr 1713 << " PC " << pc 1714 << endl; 1715 //} 1716} 1717 1718void Profiler::profileInferredAbort(){ 1719 m_inferredAborts++; 1720} 1721 1722void Profiler::profileAbortDelayConstants(int startupDelay, int perBlock){ 1723 m_abortStarupDelay = startupDelay; 1724 m_abortPerBlockDelay = perBlock; 1725} 1726 1727void Profiler::profileAbortTransaction(NodeID id, int tid, int xid, int thread, int delay, int abortingThread, int abortingProc, Address addr, Address pc){ 1728 const int ID_SPACES = 3; 1729 const int TIME_SPACES = 7; 1730 int abortingXID = -1; 1731 // The actual processor number 1732 int proc_no = id*g_NUM_SMT_THREADS + thread; 1733 // we are passed in physical proc number. Compute logical abort proc_no 1734 int logical_abort_proc_no = abortingProc/g_NUM_SMT_THREADS; 1735 if(abortingProc >= 0){ 1736 AbstractChip * c = g_system_ptr->getChip(logical_abort_proc_no/RubyConfig::numberOfProcsPerChip()); 1737 abortingXID = 0; // c->getTransactionInterfaceManager(logical_abort_proc_no%RubyConfig::numberOfProcsPerChip())->getXID(abortingThread); 1738 assert(0); 1739 } 1740 //- if(PROFILE_XACT){ 1741 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 1742 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1743 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1744 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1745 (* debug_cout_ptr).flags(ios::right); 1746 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1747 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << "]" << " TID " << tid 1748 << " XACT ABORT " << xid 1749 << " caused by " << abortingProc 1750 << " [ " << logical_abort_proc_no 1751 << ", " << abortingThread 1752 << " ]" 1753 << " xid: " << abortingXID << " " 1754 << " address: " << addr 1755 << " delay: " << delay 1756 << " PC " << pc 1757 << " *PC 0x" << hex << myInst << dec 1758 << " '" << myInstStr << "'" 1759 << endl; 1760 } 1761 m_transactionAborts++; 1762 1763 // Record aborts by xid 1764 if(!m_abortIDMap_ptr->exist(xid)){ 1765 m_abortIDMap_ptr->add(xid, 1); 1766 } else { 1767 (m_abortIDMap_ptr->lookup(xid))++; 1768 } 1769 m_abortDelays.add(delay); 1770 1771 // Record aborts by pc 1772 if(!m_abortPCMap_ptr->exist(pc)){ 1773 m_abortPCMap_ptr->add(pc, 1); 1774 } else { 1775 (m_abortPCMap_ptr->lookup(pc))++; 1776 } 1777 1778 // Record aborts by address 1779 if(!m_abortAddressMap_ptr->exist(addr)){ 1780 m_abortAddressMap_ptr->add(addr, 1); 1781 } else { 1782 (m_abortAddressMap_ptr->lookup(addr))++; 1783 } 1784} 1785 1786void Profiler::profileTransWB(){ 1787 m_transWBs++; 1788} 1789 1790void Profiler::profileExtraWB(){ 1791 m_extraWBs++; 1792} 1793 1794void Profiler::profileXactChange(int procs, int cycles){ 1795 if(!m_procsInXactMap_ptr->exist(procs)){ 1796 m_procsInXactMap_ptr->add(procs, cycles); 1797 } else { 1798 (m_procsInXactMap_ptr->lookup(procs)) += cycles; 1799 } 1800} 1801 1802void Profiler::profileReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread){ 1803 // do NOT count instances when signature is empty! 1804 if(!bf_filter_result && !perfect_filter_result){ 1805 m_readSetEmptyChecks++; 1806 return; 1807 } 1808 1809 if(bf_filter_result != perfect_filter_result){ 1810 m_readSetNoMatch++; 1811 /* 1812 // we have a false positive 1813 if(!m_readSetNoMatch_ptr->exist(addr)){ 1814 m_readSetNoMatch_ptr->add(addr, 1); 1815 } 1816 else{ 1817 (m_readSetNoMatch_ptr->lookup(addr))++; 1818 } 1819 */ 1820 } 1821 else{ 1822 m_readSetMatch++; 1823 /* 1824 // Bloom filter agrees with perfect filter 1825 if(!m_readSetMatch_ptr->exist(addr)){ 1826 m_readSetMatch_ptr->add(addr, 1); 1827 } 1828 else{ 1829 (m_readSetMatch_ptr->lookup(addr))++; 1830 } 1831 */ 1832 } 1833} 1834 1835 1836void Profiler::profileRemoteReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread){ 1837 if(bf_filter_result != perfect_filter_result){ 1838 // we have a false positive 1839 if(!m_remoteReadSetNoMatch_ptr->exist(addr)){ 1840 m_remoteReadSetNoMatch_ptr->add(addr, 1); 1841 } 1842 else{ 1843 (m_remoteReadSetNoMatch_ptr->lookup(addr))++; 1844 } 1845 } 1846 else{ 1847 // Bloom filter agrees with perfect filter 1848 if(!m_remoteReadSetMatch_ptr->exist(addr)){ 1849 m_remoteReadSetMatch_ptr->add(addr, 1); 1850 } 1851 else{ 1852 (m_remoteReadSetMatch_ptr->lookup(addr))++; 1853 } 1854 } 1855} 1856 1857void Profiler::profileWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread){ 1858 // do NOT count instances when signature is empty! 1859 if(!bf_filter_result && !perfect_filter_result){ 1860 m_writeSetEmptyChecks++; 1861 return; 1862 } 1863 1864 if(bf_filter_result != perfect_filter_result){ 1865 m_writeSetNoMatch++; 1866 /* 1867 // we have a false positive 1868 if(!m_writeSetNoMatch_ptr->exist(addr)){ 1869 m_writeSetNoMatch_ptr->add(addr, 1); 1870 } 1871 else{ 1872 (m_writeSetNoMatch_ptr->lookup(addr))++; 1873 } 1874 */ 1875 } 1876 else{ 1877 m_writeSetMatch++; 1878 /* 1879 // Bloom filter agrees with perfect filter 1880 if(!m_writeSetMatch_ptr->exist(addr)){ 1881 m_writeSetMatch_ptr->add(addr, 1); 1882 } 1883 else{ 1884 (m_writeSetMatch_ptr->lookup(addr))++; 1885 } 1886 */ 1887 } 1888} 1889 1890 1891void Profiler::profileRemoteWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread){ 1892 if(bf_filter_result != perfect_filter_result){ 1893 // we have a false positive 1894 if(!m_remoteWriteSetNoMatch_ptr->exist(addr)){ 1895 m_remoteWriteSetNoMatch_ptr->add(addr, 1); 1896 } 1897 else{ 1898 (m_remoteWriteSetNoMatch_ptr->lookup(addr))++; 1899 } 1900 } 1901 else{ 1902 // Bloom filter agrees with perfect filter 1903 if(!m_remoteWriteSetMatch_ptr->exist(addr)){ 1904 m_remoteWriteSetMatch_ptr->add(addr, 1); 1905 } 1906 else{ 1907 (m_remoteWriteSetMatch_ptr->lookup(addr))++; 1908 } 1909 } 1910} 1911 1912void Profiler::profileTransactionLogOverflow(NodeID id, Address addr, Address pc){ 1913 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 1914 const int ID_SPACES = 3; 1915 const int TIME_SPACES = 7; 1916 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1917 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1918 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1919 (* debug_cout_ptr).flags(ios::right); 1920 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1921 (* debug_cout_ptr) << setw(ID_SPACES) << id << " " 1922 << " XACT LOG OVERFLOW" 1923 << " ADDR " << addr 1924 << " PC " << pc 1925 << " *PC 0x" << hex << myInst << dec 1926 << " '" << myInstStr << "'" 1927 << endl; 1928 1929 } 1930 m_transactionLogOverflows++; 1931} 1932 1933void Profiler::profileTransactionCacheOverflow(NodeID id, Address addr, Address pc){ 1934 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 1935 const int ID_SPACES = 3; 1936 const int TIME_SPACES = 7; 1937 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1938 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1939 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1940 (* debug_cout_ptr).flags(ios::right); 1941 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1942 (* debug_cout_ptr) << setw(ID_SPACES) << id << " " 1943 << " XACT CACHE OVERFLOW " 1944 << " ADDR " << addr 1945 << " PC " << pc 1946 << " *PC 0x" << hex << myInst << dec 1947 << " '" << myInstStr << "'" 1948 << endl; 1949 1950 } 1951 m_transactionCacheOverflows++; 1952} 1953 1954void Profiler::profileGetCPS(NodeID id, uint32 cps, Address pc){ 1955 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 1956 const int ID_SPACES = 3; 1957 const int TIME_SPACES = 7; 1958 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 1959 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 1960 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 1961 1962 (* debug_cout_ptr).flags(ios::right); 1963 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 1964 (* debug_cout_ptr) << setw(ID_SPACES) << id << " " 1965 << " XACT GET CPS" 1966 << " PC " << pc 1967 << " *PC 0x" << hex << myInst << dec 1968 << " '" << myInstStr << "'" 1969 << " CPS 0x" << hex << cps << dec 1970 << endl; 1971 } 1972} 1973//---- end Transactional Memory CODE 1974 1975 1976void Profiler::profileExceptionStart(bool xact, NodeID id, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc){ 1977 if(xact){ 1978 if(!m_xactExceptionMap_ptr->exist(val)){ 1979 m_xactExceptionMap_ptr->add(val, 1); 1980 } else { 1981 (m_xactExceptionMap_ptr->lookup(val))++; 1982 } 1983 } 1984 1985 if (!xact && !PROFILE_NONXACT) return; 1986 1987 if(PROFILE_EXCEPTIONS){ 1988 const int ID_SPACES = 3; 1989 const int TIME_SPACES = 7; 1990 // The actual processor number 1991 int proc_no = id*g_NUM_SMT_THREADS + thread; 1992 1993 // get the excepting instruction 1994 const char * instruction; 1995 physical_address_t addr = SIMICS_translate_address( proc_no, Address(pc)); 1996 if(val != 0x64 && addr != 0x0){ 1997 // ignore instruction TLB miss 1998 instruction = SIMICS_disassemble_physical( proc_no, addr ); 1999 } 2000 2001 (* debug_cout_ptr).flags(ios::right); 2002 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 2003 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << " ]" << " "; 2004 if (xact) 2005 (* debug_cout_ptr) << " XACT Exception("; 2006 else 2007 (* debug_cout_ptr) << " Exception("; 2008 2009 (* debug_cout_ptr) << hex << val << dec << ")_START--Trap Level " << trap_level 2010 << "--(PC=0x" << hex << pc << ", " << npc << ")" 2011 << dec; 2012 2013 if(val != 0x64 && addr != 0x0){ 2014 (* debug_cout_ptr) << " instruction = " << instruction; 2015 } 2016 else{ 2017 (* debug_cout_ptr) << " instruction = INSTRUCTION TLB MISS"; 2018 } 2019 (* debug_cout_ptr) << dec << endl; 2020 } 2021} 2022 2023void Profiler::profileExceptionDone(bool xact, NodeID id, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc, uinteger_t tpc, uinteger_t tnpc){ 2024 if (!xact && !PROFILE_NONXACT) return; 2025 2026 if (PROFILE_EXCEPTIONS){ 2027 const int ID_SPACES = 3; 2028 const int TIME_SPACES = 7; 2029 // The actual processor number 2030 int proc_no = id*g_NUM_SMT_THREADS + thread; 2031 2032 // get the excepting instruction 2033 const char * instruction; 2034 instruction = SIMICS_disassemble_physical( proc_no, SIMICS_translate_address( proc_no, Address(pc) ) ); 2035 2036 2037 (* debug_cout_ptr).flags(ios::right); 2038 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 2039 (* debug_cout_ptr) << setw(ID_SPACES) << proc_no << " [" << id << "," << thread << " ]" << " "; 2040 if (xact) 2041 (* debug_cout_ptr) << " XACT Exception("; 2042 else 2043 (* debug_cout_ptr) << " Exception("; 2044 2045 (* debug_cout_ptr) << hex << val << dec << ")_DONE--Trap Level " << trap_level 2046 << "--(PC=0x" << hex << pc << ", " << npc << dec << ")" 2047 << "--(TPC=0x" << hex << tpc << ", " << tnpc << dec << ")" 2048 << endl; 2049 } 2050} 2051 2052void Profiler::rubyWatch(int id){ 2053 int rn_g1 = SIMICS_get_register_number(id, "g1"); 2054 uint64 tr = SIMICS_read_register(id, rn_g1); 2055 Address watch_address = Address(tr); 2056 const int ID_SPACES = 3; 2057 const int TIME_SPACES = 7; 2058 2059 (* debug_cout_ptr).flags(ios::right); 2060 (* debug_cout_ptr) << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 2061 (* debug_cout_ptr) << setw(ID_SPACES) << id << " " 2062 << "RUBY WATCH " 2063 << watch_address 2064 << endl; 2065 2066 if(!m_watch_address_list_ptr->exist(watch_address)){ 2067 m_watch_address_list_ptr->add(watch_address, 1); 2068 } 2069} 2070 2071bool Profiler::watchAddress(Address addr){ 2072 if (m_watch_address_list_ptr->exist(addr)) 2073 return true; 2074 else 2075 return false; 2076} 2077 2078void Profiler::profileReadFilterBitsSet(int xid, int bits, bool isCommit) { 2079 if (isCommit) { 2080 if(!m_xactReadFilterBitsSetOnCommit->exist(xid)){ 2081 Histogram hist; 2082 hist.add(bits); 2083 m_xactReadFilterBitsSetOnCommit->add(xid, hist); 2084 } 2085 else{ 2086 (m_xactReadFilterBitsSetOnCommit->lookup(xid)).add(bits); 2087 } 2088 } else { 2089 if(!m_xactReadFilterBitsSetOnAbort->exist(xid)){ 2090 Histogram hist; 2091 hist.add(bits); 2092 m_xactReadFilterBitsSetOnAbort->add(xid, hist); 2093 } 2094 else{ 2095 (m_xactReadFilterBitsSetOnAbort->lookup(xid)).add(bits); 2096 } 2097 } 2098} 2099 2100void Profiler::profileWriteFilterBitsSet(int xid, int bits, bool isCommit) { 2101 if (isCommit) { 2102 if(!m_xactWriteFilterBitsSetOnCommit->exist(xid)){ 2103 Histogram hist; 2104 hist.add(bits); 2105 m_xactWriteFilterBitsSetOnCommit->add(xid, hist); 2106 } 2107 else{ 2108 (m_xactWriteFilterBitsSetOnCommit->lookup(xid)).add(bits); 2109 } 2110 } else { 2111 if(!m_xactWriteFilterBitsSetOnAbort->exist(xid)){ 2112 Histogram hist; 2113 hist.add(bits); 2114 m_xactWriteFilterBitsSetOnAbort->add(xid, hist); 2115 } 2116 else{ 2117 (m_xactWriteFilterBitsSetOnAbort->lookup(xid)).add(bits); 2118 } 2119 } 2120} 2121/* 2122 //gem5:Arka for decomissioning log_tm 2123 2124void Profiler::setXactVisualizerFile(char * filename){ 2125 if ( (filename == NULL) || 2126 (!strcmp(filename, "none")) ) { 2127 m_xact_visualizer_ptr = &cout; 2128 return; 2129 } 2130 2131 if (m_xact_visualizer.is_open() ) { 2132 m_xact_visualizer.close (); 2133 } 2134 m_xact_visualizer.open (filename, std::ios::out); 2135 if (! m_xact_visualizer.is_open() ) { 2136 cerr << "setXactVisualizer: can't open file " << filename << endl; 2137 } 2138 else { 2139 m_xact_visualizer_ptr = &m_xact_visualizer; 2140 } 2141 cout << "setXactVisualizer file " << filename << endl; 2142} 2143 2144void Profiler::printTransactionState(bool can_skip){ 2145 if (!XACT_VISUALIZER) return; 2146 int num_processors = RubyConfig::numberOfProcessors() * RubyConfig::numberofSMTThreads(); 2147 2148 if (!g_system_ptr->getXactVisualizer()->existXactActivity() && can_skip) 2149 return; 2150 2151 if (can_skip && ((g_eventQueue_ptr->getTime()/10000) <= m_xact_visualizer_last)) 2152 return; 2153 2154 Vector<char> xactStateVector = g_system_ptr->getXactVisualizer()->getTransactionStateVector(); 2155 for (int i = 0 ; i < num_processors; i++){ 2156 (* m_xact_visualizer_ptr) << xactStateVector[i] << " "; 2157 } 2158 (* m_xact_visualizer_ptr) << " " << g_eventQueue_ptr->getTime() << endl; 2159 m_xact_visualizer_last = g_eventQueue_ptr->getTime() / 10000; 2160} 2161*/ 2162void Profiler::watchpointsFalsePositiveTrigger() 2163{ 2164 m_watchpointsFalsePositiveTrigger++; 2165} 2166 2167void Profiler::watchpointsTrueTrigger() 2168{ 2169 m_watchpointsTrueTrigger++; 2170} 2171 2172// For MemoryControl: 2173void Profiler::profileMemReq(int bank) { 2174 m_memReq++; 2175 m_memBankCount[bank]++; 2176} 2177void Profiler::profileMemBankBusy() { m_memBankBusy++; } 2178void Profiler::profileMemBusBusy() { m_memBusBusy++; } 2179void Profiler::profileMemReadWriteBusy() { m_memReadWriteBusy++; } 2180void Profiler::profileMemDataBusBusy() { m_memDataBusBusy++; } 2181void Profiler::profileMemTfawBusy() { m_memTfawBusy++; } 2182void Profiler::profileMemRefresh() { m_memRefresh++; } 2183void Profiler::profileMemRead() { m_memRead++; } 2184void Profiler::profileMemWrite() { m_memWrite++; } 2185void Profiler::profileMemWaitCycles(int cycles) { m_memWaitCycles += cycles; } 2186void Profiler::profileMemInputQ(int cycles) { m_memInputQ += cycles; } 2187void Profiler::profileMemBankQ(int cycles) { m_memBankQ += cycles; } 2188void Profiler::profileMemArbWait(int cycles) { m_memArbWait += cycles; } 2189void Profiler::profileMemRandBusy() { m_memRandBusy++; } 2190void Profiler::profileMemNotOld() { m_memNotOld++; } 2191 2192 2193//----------- ATMTP -------------------// 2194 2195void Profiler::profileTransactionTCC(NodeID id, Address pc){ 2196 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 2197 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 2198 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 2199 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 2200 2201 const int ID_SPACES = 3; 2202 const int TIME_SPACES = 7; 2203 cout.flags(ios::right); 2204 cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 2205 cout << setw(ID_SPACES) << id << " " 2206 << " XACT Aborting! Executed TCC " 2207 << " PC: " << pc 2208 << " *PC: 0x" << hex << myInst << dec 2209 << " '" << myInstStr << "'" 2210 << endl; 2211 } 2212 m_transactionUnsupInsts++; 2213} 2214 2215void Profiler::profileTransactionUnsupInst(NodeID id, Address pc){ 2216 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 2217 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 2218 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 2219 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 2220 2221 const int ID_SPACES = 3; 2222 const int TIME_SPACES = 7; 2223 cout.flags(ios::right); 2224 cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 2225 cout << setw(ID_SPACES) << id << " " 2226 << " XACT Aborting! Executed Unsupported Instruction " 2227 << " PC: " << pc 2228 << " *PC: 0x" << hex << myInst << dec 2229 << " '" << myInstStr << "'" 2230 << endl; 2231 } 2232 m_transactionUnsupInsts++; 2233} 2234 2235void Profiler::profileTransactionSaveInst(NodeID id, Address pc){ 2236 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 2237 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 2238 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 2239 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 2240 2241 const int ID_SPACES = 3; 2242 const int TIME_SPACES = 7; 2243 cout.flags(ios::right); 2244 cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 2245 cout << setw(ID_SPACES) << id << " " 2246 << " XACT Aborting! Executed Save Instruction " 2247 << " PC: " << pc 2248 << " *PC: 0x" << hex << myInst << dec 2249 << " '" << myInstStr << "'" 2250 << endl; 2251 } 2252 m_transactionSaveRestAborts++; 2253} 2254 2255void Profiler::profileTransactionRestoreInst(NodeID id, Address pc){ 2256 if(PROFILE_XACT || (ATMTP_DEBUG_LEVEL >= 1)){ 2257 physical_address_t myPhysPC = SIMICS_translate_address(id, pc); 2258 integer_t myInst = SIMICS_read_physical_memory(id, myPhysPC, 4); 2259 const char *myInstStr = SIMICS_disassemble_physical(id, myPhysPC); 2260 2261 const int ID_SPACES = 3; 2262 const int TIME_SPACES = 7; 2263 cout.flags(ios::right); 2264 cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 2265 cout << setw(ID_SPACES) << id << " " 2266 << " XACT Aborting! Executed Restore Instruction " 2267 << " PC: " << pc 2268 << " *PC: 0x" << hex << myInst << dec 2269 << " '" << myInstStr << "'" 2270 << endl; 2271 } 2272 m_transactionSaveRestAborts++; 2273} 2274 2275void Profiler::profileTimerInterrupt(NodeID id, 2276 uinteger_t tick, uinteger_t tick_cmpr, 2277 uinteger_t stick, uinteger_t stick_cmpr, 2278 int trap_level, 2279 uinteger_t pc, uinteger_t npc, 2280 uinteger_t pstate, int pil){ 2281 if (PROFILE_EXCEPTIONS) { 2282 const int ID_SPACES = 3; 2283 const int TIME_SPACES = 7; 2284 cout.flags(ios::right); 2285 cout << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " "; 2286 cout << setw(ID_SPACES) << id << " "; 2287 cout << hex << "Timer--(Tick=0x" << tick << ", TckCmp=0x" << tick_cmpr 2288 << ", STick=0x" << stick << ", STickCmp=0x" << stick_cmpr 2289 << ")--(PC=" << pc << ", " << npc 2290 << dec << ")--(TL=" << trap_level << ", pil=" << pil 2291 << hex << ", pstate=0x" << pstate 2292 << dec << ")" << endl; 2293 } 2294} 2295