Profiler.cc revision 11309:9be8a40026df
1/* 2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45#include "mem/ruby/profiler/Profiler.hh" 46 47#include <sys/types.h> 48#include <unistd.h> 49 50#include <algorithm> 51#include <fstream> 52 53#include "base/stl_helpers.hh" 54#include "base/str.hh" 55#include "mem/protocol/MachineType.hh" 56#include "mem/protocol/RubyRequest.hh" 57#include "mem/ruby/network/Network.hh" 58#include "mem/ruby/profiler/AddressProfiler.hh" 59#include "mem/ruby/system/GPUCoalescer.hh" 60#include "mem/ruby/system/Sequencer.hh" 61 62using namespace std; 63using m5::stl_helpers::operator<<; 64 65Profiler::Profiler(const RubySystemParams *p, RubySystem *rs) 66 : m_ruby_system(rs), m_hot_lines(p->hot_lines), 67 m_all_instructions(p->all_instructions), 68 m_num_vnets(p->number_of_virtual_networks) 69{ 70 m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); 71 m_address_profiler_ptr->setHotLines(m_hot_lines); 72 m_address_profiler_ptr->setAllInstructions(m_all_instructions); 73 74 if (m_all_instructions) { 75 m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); 76 m_inst_profiler_ptr->setHotLines(m_hot_lines); 77 m_inst_profiler_ptr->setAllInstructions(m_all_instructions); 78 } 79} 80 81Profiler::~Profiler() 82{ 83} 84 85void 86Profiler::regStats(const std::string &pName) 87{ 88 if (!m_all_instructions) { 89 m_address_profiler_ptr->regStats(pName); 90 } 91 92 if (m_all_instructions) { 93 m_inst_profiler_ptr->regStats(pName); 94 } 95 96 delayHistogram 97 .init(10) 98 .name(pName + ".delayHist") 99 .desc("delay histogram for all message") 100 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 101 102 for (int i = 0; i < m_num_vnets; i++) { 103 delayVCHistogram.push_back(new Stats::Histogram()); 104 delayVCHistogram[i] 105 ->init(10) 106 .name(pName + csprintf(".delayVCHist.vnet_%i", i)) 107 .desc(csprintf("delay histogram for vnet_%i", i)) 108 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 109 } 110 111 m_outstandReqHistSeqr 112 .init(10) 113 .name(pName + ".outstanding_req_hist_seqr") 114 .desc("") 115 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 116 117 m_outstandReqHistCoalsr 118 .init(10) 119 .name(pName + ".outstanding_req_hist_coalsr") 120 .desc("") 121 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 122 123 m_latencyHistSeqr 124 .init(10) 125 .name(pName + ".latency_hist_seqr") 126 .desc("") 127 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 128 129 m_latencyHistCoalsr 130 .init(10) 131 .name(pName + ".latency_hist_coalsr") 132 .desc("") 133 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 134 135 m_hitLatencyHistSeqr 136 .init(10) 137 .name(pName + ".hit_latency_hist_seqr") 138 .desc("") 139 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 140 141 m_missLatencyHistSeqr 142 .init(10) 143 .name(pName + ".miss_latency_hist_seqr") 144 .desc("") 145 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 146 147 m_missLatencyHistCoalsr 148 .init(10) 149 .name(pName + ".miss_latency_hist_coalsr") 150 .desc("") 151 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 152 153 for (int i = 0; i < RubyRequestType_NUM; i++) { 154 m_typeLatencyHistSeqr.push_back(new Stats::Histogram()); 155 m_typeLatencyHistSeqr[i] 156 ->init(10) 157 .name(pName + csprintf(".%s.latency_hist_seqr", 158 RubyRequestType(i))) 159 .desc("") 160 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 161 162 m_typeLatencyHistCoalsr.push_back(new Stats::Histogram()); 163 m_typeLatencyHistCoalsr[i] 164 ->init(10) 165 .name(pName + csprintf(".%s.latency_hist_coalsr", 166 RubyRequestType(i))) 167 .desc("") 168 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 169 170 m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram()); 171 m_hitTypeLatencyHistSeqr[i] 172 ->init(10) 173 .name(pName + csprintf(".%s.hit_latency_hist_seqr", 174 RubyRequestType(i))) 175 .desc("") 176 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 177 178 m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram()); 179 m_missTypeLatencyHistSeqr[i] 180 ->init(10) 181 .name(pName + csprintf(".%s.miss_latency_hist_seqr", 182 RubyRequestType(i))) 183 .desc("") 184 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 185 186 m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram()); 187 m_missTypeLatencyHistCoalsr[i] 188 ->init(10) 189 .name(pName + csprintf(".%s.miss_latency_hist_coalsr", 190 RubyRequestType(i))) 191 .desc("") 192 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 193 } 194 195 for (int i = 0; i < MachineType_NUM; i++) { 196 m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram()); 197 m_hitMachLatencyHistSeqr[i] 198 ->init(10) 199 .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr", 200 MachineType(i))) 201 .desc("") 202 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 203 204 m_missMachLatencyHistSeqr.push_back(new Stats::Histogram()); 205 m_missMachLatencyHistSeqr[i] 206 ->init(10) 207 .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr", 208 MachineType(i))) 209 .desc("") 210 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 211 212 m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram()); 213 m_missMachLatencyHistCoalsr[i] 214 ->init(10) 215 .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr", 216 MachineType(i))) 217 .desc("") 218 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 219 220 m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram()); 221 m_IssueToInitialDelayHistSeqr[i] 222 ->init(10) 223 .name(pName + csprintf( 224 ".%s.miss_latency_hist_seqr.issue_to_initial_request", 225 MachineType(i))) 226 .desc("") 227 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 228 229 m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram()); 230 m_IssueToInitialDelayHistCoalsr[i] 231 ->init(10) 232 .name(pName + csprintf( 233 ".%s.miss_latency_hist_coalsr.issue_to_initial_request", 234 MachineType(i))) 235 .desc("") 236 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 237 238 m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram()); 239 m_InitialToForwardDelayHistSeqr[i] 240 ->init(10) 241 .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward", 242 MachineType(i))) 243 .desc("") 244 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 245 246 m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram()); 247 m_InitialToForwardDelayHistCoalsr[i] 248 ->init(10) 249 .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward", 250 MachineType(i))) 251 .desc("") 252 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 253 254 m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram()); 255 m_ForwardToFirstResponseDelayHistSeqr[i] 256 ->init(10) 257 .name(pName + csprintf( 258 ".%s.miss_latency_hist_seqr.forward_to_first_response", 259 MachineType(i))) 260 .desc("") 261 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 262 263 m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram()); 264 m_ForwardToFirstResponseDelayHistCoalsr[i] 265 ->init(10) 266 .name(pName + csprintf( 267 ".%s.miss_latency_hist_coalsr.forward_to_first_response", 268 MachineType(i))) 269 .desc("") 270 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 271 272 m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram()); 273 m_FirstResponseToCompletionDelayHistSeqr[i] 274 ->init(10) 275 .name(pName + csprintf( 276 ".%s.miss_latency_hist_seqr.first_response_to_completion", 277 MachineType(i))) 278 .desc("") 279 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 280 281 m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram()); 282 m_FirstResponseToCompletionDelayHistCoalsr[i] 283 ->init(10) 284 .name(pName + csprintf( 285 ".%s.miss_latency_hist_coalsr.first_response_to_completion", 286 MachineType(i))) 287 .desc("") 288 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 289 290 m_IncompleteTimesSeqr[i] 291 .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i))) 292 .desc("") 293 .flags(Stats::nozero); 294 } 295 296 for (int i = 0; i < RubyRequestType_NUM; i++) { 297 m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>()); 298 m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>()); 299 m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>()); 300 301 for (int j = 0; j < MachineType_NUM; j++) { 302 m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); 303 m_hitTypeMachLatencyHistSeqr[i][j] 304 ->init(10) 305 .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr", 306 RubyRequestType(i), MachineType(j))) 307 .desc("") 308 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 309 310 m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); 311 m_missTypeMachLatencyHistSeqr[i][j] 312 ->init(10) 313 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr", 314 RubyRequestType(i), MachineType(j))) 315 .desc("") 316 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 317 318 m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram()); 319 m_missTypeMachLatencyHistCoalsr[i][j] 320 ->init(10) 321 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr", 322 RubyRequestType(i), MachineType(j))) 323 .desc("") 324 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 325 } 326 } 327} 328 329void 330Profiler::collateStats() 331{ 332 if (!m_all_instructions) { 333 m_address_profiler_ptr->collateStats(); 334 } 335 336 if (m_all_instructions) { 337 m_inst_profiler_ptr->collateStats(); 338 } 339 340 for (uint32_t i = 0; i < MachineType_NUM; i++) { 341 for (map<uint32_t, AbstractController*>::iterator it = 342 m_ruby_system->m_abstract_controls[i].begin(); 343 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 344 345 AbstractController *ctr = (*it).second; 346 delayHistogram.add(ctr->getDelayHist()); 347 348 for (uint32_t i = 0; i < m_num_vnets; i++) { 349 delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); 350 } 351 } 352 } 353 354 for (uint32_t i = 0; i < MachineType_NUM; i++) { 355 for (map<uint32_t, AbstractController*>::iterator it = 356 m_ruby_system->m_abstract_controls[i].begin(); 357 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 358 359 AbstractController *ctr = (*it).second; 360 Sequencer *seq = ctr->getCPUSequencer(); 361 if (seq != NULL) { 362 m_outstandReqHistSeqr.add(seq->getOutstandReqHist()); 363 } 364 GPUCoalescer *coal = ctr->getGPUCoalescer(); 365 if (coal != NULL) { 366 m_outstandReqHistCoalsr.add(coal->getOutstandReqHist()); 367 } 368 } 369 } 370 371 for (uint32_t i = 0; i < MachineType_NUM; i++) { 372 for (map<uint32_t, AbstractController*>::iterator it = 373 m_ruby_system->m_abstract_controls[i].begin(); 374 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 375 376 AbstractController *ctr = (*it).second; 377 Sequencer *seq = ctr->getCPUSequencer(); 378 if (seq != NULL) { 379 // add all the latencies 380 m_latencyHistSeqr.add(seq->getLatencyHist()); 381 m_hitLatencyHistSeqr.add(seq->getHitLatencyHist()); 382 m_missLatencyHistSeqr.add(seq->getMissLatencyHist()); 383 384 // add the per request type latencies 385 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { 386 m_typeLatencyHistSeqr[j] 387 ->add(seq->getTypeLatencyHist(j)); 388 m_hitTypeLatencyHistSeqr[j] 389 ->add(seq->getHitTypeLatencyHist(j)); 390 m_missTypeLatencyHistSeqr[j] 391 ->add(seq->getMissTypeLatencyHist(j)); 392 } 393 394 // add the per machine type miss latencies 395 for (uint32_t j = 0; j < MachineType_NUM; ++j) { 396 m_hitMachLatencyHistSeqr[j] 397 ->add(seq->getHitMachLatencyHist(j)); 398 m_missMachLatencyHistSeqr[j] 399 ->add(seq->getMissMachLatencyHist(j)); 400 401 m_IssueToInitialDelayHistSeqr[j]->add( 402 seq->getIssueToInitialDelayHist(MachineType(j))); 403 404 m_InitialToForwardDelayHistSeqr[j]->add( 405 seq->getInitialToForwardDelayHist(MachineType(j))); 406 m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq-> 407 getForwardRequestToFirstResponseHist(MachineType(j))); 408 409 m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq-> 410 getFirstResponseToCompletionDelayHist( 411 MachineType(j))); 412 m_IncompleteTimesSeqr[j] += 413 seq->getIncompleteTimes(MachineType(j)); 414 } 415 416 // add the per (request, machine) type miss latencies 417 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 418 for (uint32_t k = 0; k < MachineType_NUM; k++) { 419 m_hitTypeMachLatencyHistSeqr[j][k]->add( 420 seq->getHitTypeMachLatencyHist(j,k)); 421 m_missTypeMachLatencyHistSeqr[j][k]->add( 422 seq->getMissTypeMachLatencyHist(j,k)); 423 } 424 } 425 } 426 427 GPUCoalescer *coal = ctr->getGPUCoalescer(); 428 if (coal != NULL) { 429 // add all the latencies 430 m_latencyHistCoalsr.add(coal->getLatencyHist()); 431 m_missLatencyHistCoalsr.add(coal->getMissLatencyHist()); 432 433 // add the per request type latencies 434 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { 435 m_typeLatencyHistCoalsr[j] 436 ->add(coal->getTypeLatencyHist(j)); 437 m_missTypeLatencyHistCoalsr[j] 438 ->add(coal->getMissTypeLatencyHist(j)); 439 } 440 441 // add the per machine type miss latencies 442 for (uint32_t j = 0; j < MachineType_NUM; ++j) { 443 m_missMachLatencyHistCoalsr[j] 444 ->add(coal->getMissMachLatencyHist(j)); 445 446 m_IssueToInitialDelayHistCoalsr[j]->add( 447 coal->getIssueToInitialDelayHist(MachineType(j))); 448 449 m_InitialToForwardDelayHistCoalsr[j]->add( 450 coal->getInitialToForwardDelayHist(MachineType(j))); 451 m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal-> 452 getForwardRequestToFirstResponseHist(MachineType(j))); 453 454 m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal-> 455 getFirstResponseToCompletionDelayHist( 456 MachineType(j))); 457 } 458 459 // add the per (request, machine) type miss latencies 460 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 461 for (uint32_t k = 0; k < MachineType_NUM; k++) { 462 m_missTypeMachLatencyHistCoalsr[j][k]->add( 463 coal->getMissTypeMachLatencyHist(j,k)); 464 } 465 } 466 } 467 } 468 } 469} 470 471void 472Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) 473{ 474 if (msg.getType() != RubyRequestType_IFETCH) { 475 // Note: The following line should be commented out if you 476 // want to use the special profiling that is part of the GS320 477 // protocol 478 479 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be 480 // profiled by the AddressProfiler 481 m_address_profiler_ptr-> 482 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), 483 msg.getType(), msg.getAccessMode(), id, false); 484 } 485} 486