Profiler.cc revision 11798:e034a4566653
1/* 2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45#include "mem/ruby/profiler/Profiler.hh" 46 47#include <sys/types.h> 48#include <unistd.h> 49 50#include <algorithm> 51#include <fstream> 52 53#include "base/stl_helpers.hh" 54#include "base/str.hh" 55#include "mem/protocol/MachineType.hh" 56#include "mem/protocol/RubyRequest.hh" 57#include "mem/ruby/network/Network.hh" 58#include "mem/ruby/profiler/AddressProfiler.hh" 59 60/** 61 * the profiler uses GPUCoalescer code even 62 * though the GPUCoalescer is not built for 63 * all ISAs, which can lead to run/link time 64 * errors. here we guard the coalescer code 65 * with ifdefs as there is no easy way to 66 * refactor this code without removing 67 * GPUCoalescer stats from the profiler. 68 * 69 * eventually we should use probe points 70 * here, but until then these ifdefs will 71 * serve. 72 */ 73#ifdef BUILD_GPU 74#include "mem/ruby/system/GPUCoalescer.hh" 75#endif 76 77#include "mem/ruby/system/Sequencer.hh" 78 79using namespace std; 80using m5::stl_helpers::operator<<; 81 82Profiler::Profiler(const RubySystemParams *p, RubySystem *rs) 83 : m_ruby_system(rs), m_hot_lines(p->hot_lines), 84 m_all_instructions(p->all_instructions), 85 m_num_vnets(p->number_of_virtual_networks) 86{ 87 m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); 88 m_address_profiler_ptr->setHotLines(m_hot_lines); 89 m_address_profiler_ptr->setAllInstructions(m_all_instructions); 90 91 if (m_all_instructions) { 92 m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); 93 m_inst_profiler_ptr->setHotLines(m_hot_lines); 94 m_inst_profiler_ptr->setAllInstructions(m_all_instructions); 95 } 96} 97 98Profiler::~Profiler() 99{ 100} 101 102void 103Profiler::regStats(const std::string &pName) 104{ 105 if (!m_all_instructions) { 106 m_address_profiler_ptr->regStats(pName); 107 } 108 109 if (m_all_instructions) { 110 m_inst_profiler_ptr->regStats(pName); 111 } 112 113 delayHistogram 114 .init(10) 115 .name(pName + ".delayHist") 116 .desc("delay histogram for all message") 117 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 118 119 for (int i = 0; i < m_num_vnets; i++) { 120 delayVCHistogram.push_back(new Stats::Histogram()); 121 delayVCHistogram[i] 122 ->init(10) 123 .name(pName + csprintf(".delayVCHist.vnet_%i", i)) 124 .desc(csprintf("delay histogram for vnet_%i", i)) 125 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 126 } 127 128 m_outstandReqHistSeqr 129 .init(10) 130 .name(pName + ".outstanding_req_hist_seqr") 131 .desc("") 132 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 133 134 m_outstandReqHistCoalsr 135 .init(10) 136 .name(pName + ".outstanding_req_hist_coalsr") 137 .desc("") 138 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 139 140 m_latencyHistSeqr 141 .init(10) 142 .name(pName + ".latency_hist_seqr") 143 .desc("") 144 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 145 146 m_latencyHistCoalsr 147 .init(10) 148 .name(pName + ".latency_hist_coalsr") 149 .desc("") 150 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 151 152 m_hitLatencyHistSeqr 153 .init(10) 154 .name(pName + ".hit_latency_hist_seqr") 155 .desc("") 156 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 157 158 m_missLatencyHistSeqr 159 .init(10) 160 .name(pName + ".miss_latency_hist_seqr") 161 .desc("") 162 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 163 164 m_missLatencyHistCoalsr 165 .init(10) 166 .name(pName + ".miss_latency_hist_coalsr") 167 .desc("") 168 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 169 170 for (int i = 0; i < RubyRequestType_NUM; i++) { 171 m_typeLatencyHistSeqr.push_back(new Stats::Histogram()); 172 m_typeLatencyHistSeqr[i] 173 ->init(10) 174 .name(pName + csprintf(".%s.latency_hist_seqr", 175 RubyRequestType(i))) 176 .desc("") 177 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 178 179 m_typeLatencyHistCoalsr.push_back(new Stats::Histogram()); 180 m_typeLatencyHistCoalsr[i] 181 ->init(10) 182 .name(pName + csprintf(".%s.latency_hist_coalsr", 183 RubyRequestType(i))) 184 .desc("") 185 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 186 187 m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram()); 188 m_hitTypeLatencyHistSeqr[i] 189 ->init(10) 190 .name(pName + csprintf(".%s.hit_latency_hist_seqr", 191 RubyRequestType(i))) 192 .desc("") 193 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 194 195 m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram()); 196 m_missTypeLatencyHistSeqr[i] 197 ->init(10) 198 .name(pName + csprintf(".%s.miss_latency_hist_seqr", 199 RubyRequestType(i))) 200 .desc("") 201 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 202 203 m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram()); 204 m_missTypeLatencyHistCoalsr[i] 205 ->init(10) 206 .name(pName + csprintf(".%s.miss_latency_hist_coalsr", 207 RubyRequestType(i))) 208 .desc("") 209 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 210 } 211 212 for (int i = 0; i < MachineType_NUM; i++) { 213 m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram()); 214 m_hitMachLatencyHistSeqr[i] 215 ->init(10) 216 .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr", 217 MachineType(i))) 218 .desc("") 219 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 220 221 m_missMachLatencyHistSeqr.push_back(new Stats::Histogram()); 222 m_missMachLatencyHistSeqr[i] 223 ->init(10) 224 .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr", 225 MachineType(i))) 226 .desc("") 227 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 228 229 m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram()); 230 m_missMachLatencyHistCoalsr[i] 231 ->init(10) 232 .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr", 233 MachineType(i))) 234 .desc("") 235 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 236 237 m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram()); 238 m_IssueToInitialDelayHistSeqr[i] 239 ->init(10) 240 .name(pName + csprintf( 241 ".%s.miss_latency_hist_seqr.issue_to_initial_request", 242 MachineType(i))) 243 .desc("") 244 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 245 246 m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram()); 247 m_IssueToInitialDelayHistCoalsr[i] 248 ->init(10) 249 .name(pName + csprintf( 250 ".%s.miss_latency_hist_coalsr.issue_to_initial_request", 251 MachineType(i))) 252 .desc("") 253 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 254 255 m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram()); 256 m_InitialToForwardDelayHistSeqr[i] 257 ->init(10) 258 .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward", 259 MachineType(i))) 260 .desc("") 261 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 262 263 m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram()); 264 m_InitialToForwardDelayHistCoalsr[i] 265 ->init(10) 266 .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward", 267 MachineType(i))) 268 .desc("") 269 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 270 271 m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram()); 272 m_ForwardToFirstResponseDelayHistSeqr[i] 273 ->init(10) 274 .name(pName + csprintf( 275 ".%s.miss_latency_hist_seqr.forward_to_first_response", 276 MachineType(i))) 277 .desc("") 278 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 279 280 m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram()); 281 m_ForwardToFirstResponseDelayHistCoalsr[i] 282 ->init(10) 283 .name(pName + csprintf( 284 ".%s.miss_latency_hist_coalsr.forward_to_first_response", 285 MachineType(i))) 286 .desc("") 287 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 288 289 m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram()); 290 m_FirstResponseToCompletionDelayHistSeqr[i] 291 ->init(10) 292 .name(pName + csprintf( 293 ".%s.miss_latency_hist_seqr.first_response_to_completion", 294 MachineType(i))) 295 .desc("") 296 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 297 298 m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram()); 299 m_FirstResponseToCompletionDelayHistCoalsr[i] 300 ->init(10) 301 .name(pName + csprintf( 302 ".%s.miss_latency_hist_coalsr.first_response_to_completion", 303 MachineType(i))) 304 .desc("") 305 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 306 307 m_IncompleteTimesSeqr[i] 308 .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i))) 309 .desc("") 310 .flags(Stats::nozero); 311 } 312 313 for (int i = 0; i < RubyRequestType_NUM; i++) { 314 m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>()); 315 m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>()); 316 m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>()); 317 318 for (int j = 0; j < MachineType_NUM; j++) { 319 m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); 320 m_hitTypeMachLatencyHistSeqr[i][j] 321 ->init(10) 322 .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr", 323 RubyRequestType(i), MachineType(j))) 324 .desc("") 325 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 326 327 m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); 328 m_missTypeMachLatencyHistSeqr[i][j] 329 ->init(10) 330 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr", 331 RubyRequestType(i), MachineType(j))) 332 .desc("") 333 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 334 335 m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram()); 336 m_missTypeMachLatencyHistCoalsr[i][j] 337 ->init(10) 338 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr", 339 RubyRequestType(i), MachineType(j))) 340 .desc("") 341 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 342 } 343 } 344} 345 346void 347Profiler::collateStats() 348{ 349 if (!m_all_instructions) { 350 m_address_profiler_ptr->collateStats(); 351 } 352 353 if (m_all_instructions) { 354 m_inst_profiler_ptr->collateStats(); 355 } 356 357 for (uint32_t i = 0; i < MachineType_NUM; i++) { 358 for (map<uint32_t, AbstractController*>::iterator it = 359 m_ruby_system->m_abstract_controls[i].begin(); 360 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 361 362 AbstractController *ctr = (*it).second; 363 delayHistogram.add(ctr->getDelayHist()); 364 365 for (uint32_t i = 0; i < m_num_vnets; i++) { 366 delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); 367 } 368 } 369 } 370 371 for (uint32_t i = 0; i < MachineType_NUM; i++) { 372 for (map<uint32_t, AbstractController*>::iterator it = 373 m_ruby_system->m_abstract_controls[i].begin(); 374 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 375 376 AbstractController *ctr = (*it).second; 377 Sequencer *seq = ctr->getCPUSequencer(); 378 if (seq != NULL) { 379 m_outstandReqHistSeqr.add(seq->getOutstandReqHist()); 380 } 381#ifdef BUILD_GPU 382 GPUCoalescer *coal = ctr->getGPUCoalescer(); 383 if (coal != NULL) { 384 m_outstandReqHistCoalsr.add(coal->getOutstandReqHist()); 385 } 386#endif 387 } 388 } 389 390 for (uint32_t i = 0; i < MachineType_NUM; i++) { 391 for (map<uint32_t, AbstractController*>::iterator it = 392 m_ruby_system->m_abstract_controls[i].begin(); 393 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 394 395 AbstractController *ctr = (*it).second; 396 Sequencer *seq = ctr->getCPUSequencer(); 397 if (seq != NULL) { 398 // add all the latencies 399 m_latencyHistSeqr.add(seq->getLatencyHist()); 400 m_hitLatencyHistSeqr.add(seq->getHitLatencyHist()); 401 m_missLatencyHistSeqr.add(seq->getMissLatencyHist()); 402 403 // add the per request type latencies 404 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { 405 m_typeLatencyHistSeqr[j] 406 ->add(seq->getTypeLatencyHist(j)); 407 m_hitTypeLatencyHistSeqr[j] 408 ->add(seq->getHitTypeLatencyHist(j)); 409 m_missTypeLatencyHistSeqr[j] 410 ->add(seq->getMissTypeLatencyHist(j)); 411 } 412 413 // add the per machine type miss latencies 414 for (uint32_t j = 0; j < MachineType_NUM; ++j) { 415 m_hitMachLatencyHistSeqr[j] 416 ->add(seq->getHitMachLatencyHist(j)); 417 m_missMachLatencyHistSeqr[j] 418 ->add(seq->getMissMachLatencyHist(j)); 419 420 m_IssueToInitialDelayHistSeqr[j]->add( 421 seq->getIssueToInitialDelayHist(MachineType(j))); 422 423 m_InitialToForwardDelayHistSeqr[j]->add( 424 seq->getInitialToForwardDelayHist(MachineType(j))); 425 m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq-> 426 getForwardRequestToFirstResponseHist(MachineType(j))); 427 428 m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq-> 429 getFirstResponseToCompletionDelayHist( 430 MachineType(j))); 431 m_IncompleteTimesSeqr[j] += 432 seq->getIncompleteTimes(MachineType(j)); 433 } 434 435 // add the per (request, machine) type miss latencies 436 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 437 for (uint32_t k = 0; k < MachineType_NUM; k++) { 438 m_hitTypeMachLatencyHistSeqr[j][k]->add( 439 seq->getHitTypeMachLatencyHist(j,k)); 440 m_missTypeMachLatencyHistSeqr[j][k]->add( 441 seq->getMissTypeMachLatencyHist(j,k)); 442 } 443 } 444 } 445#ifdef BUILD_GPU 446 GPUCoalescer *coal = ctr->getGPUCoalescer(); 447 if (coal != NULL) { 448 // add all the latencies 449 m_latencyHistCoalsr.add(coal->getLatencyHist()); 450 m_missLatencyHistCoalsr.add(coal->getMissLatencyHist()); 451 452 // add the per request type latencies 453 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { 454 m_typeLatencyHistCoalsr[j] 455 ->add(coal->getTypeLatencyHist(j)); 456 m_missTypeLatencyHistCoalsr[j] 457 ->add(coal->getMissTypeLatencyHist(j)); 458 } 459 460 // add the per machine type miss latencies 461 for (uint32_t j = 0; j < MachineType_NUM; ++j) { 462 m_missMachLatencyHistCoalsr[j] 463 ->add(coal->getMissMachLatencyHist(j)); 464 465 m_IssueToInitialDelayHistCoalsr[j]->add( 466 coal->getIssueToInitialDelayHist(MachineType(j))); 467 468 m_InitialToForwardDelayHistCoalsr[j]->add( 469 coal->getInitialToForwardDelayHist(MachineType(j))); 470 m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal-> 471 getForwardRequestToFirstResponseHist(MachineType(j))); 472 473 m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal-> 474 getFirstResponseToCompletionDelayHist( 475 MachineType(j))); 476 } 477 478 // add the per (request, machine) type miss latencies 479 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 480 for (uint32_t k = 0; k < MachineType_NUM; k++) { 481 m_missTypeMachLatencyHistCoalsr[j][k]->add( 482 coal->getMissTypeMachLatencyHist(j,k)); 483 } 484 } 485 } 486#endif 487 } 488 } 489} 490 491void 492Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) 493{ 494 if (msg.getType() != RubyRequestType_IFETCH) { 495 // Note: The following line should be commented out if you 496 // want to use the special profiling that is part of the GS320 497 // protocol 498 499 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be 500 // profiled by the AddressProfiler 501 m_address_profiler_ptr-> 502 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), 503 msg.getType(), msg.getAccessMode(), id, false); 504 } 505} 506