1/* 2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 This file has been modified by Kevin Moore and Dan Nussbaum of the 31 Scalable Systems Research Group at Sun Microsystems Laboratories 32 (http://research.sun.com/scalable/) to support the Adaptive 33 Transactional Memory Test Platform (ATMTP). 34 35 Please send email to atmtp-interest@sun.com with feedback, questions, or 36 to request future announcements about ATMTP. 37 38 ---------------------------------------------------------------------- 39 40 File modification date: 2008-02-23 41 42 ---------------------------------------------------------------------- 43*/ 44 45#include "mem/ruby/profiler/Profiler.hh" 46 47#include <sys/types.h> 48#include <unistd.h> 49 50#include <algorithm> 51#include <fstream> 52 53#include "base/stl_helpers.hh" 54#include "base/str.hh" 55#include "mem/ruby/network/Network.hh" 56#include "mem/ruby/profiler/AddressProfiler.hh" 57#include "mem/ruby/protocol/MachineType.hh" 58#include "mem/ruby/protocol/RubyRequest.hh" 59 60/** 61 * the profiler uses GPUCoalescer code even 62 * though the GPUCoalescer is not built for 63 * all ISAs, which can lead to run/link time 64 * errors. here we guard the coalescer code 65 * with ifdefs as there is no easy way to 66 * refactor this code without removing 67 * GPUCoalescer stats from the profiler. 68 * 69 * eventually we should use probe points 70 * here, but until then these ifdefs will 71 * serve. 72 */ 73#ifdef BUILD_GPU 74#include "mem/ruby/system/GPUCoalescer.hh" 75 76#endif 77 78#include "mem/ruby/system/Sequencer.hh" 79 80using namespace std; 81using m5::stl_helpers::operator<<; 82 83Profiler::Profiler(const RubySystemParams *p, RubySystem *rs) 84 : m_ruby_system(rs), m_hot_lines(p->hot_lines), 85 m_all_instructions(p->all_instructions), 86 m_num_vnets(p->number_of_virtual_networks) 87{ 88 m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); 89 m_address_profiler_ptr->setHotLines(m_hot_lines); 90 m_address_profiler_ptr->setAllInstructions(m_all_instructions); 91 92 if (m_all_instructions) { 93 m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); 94 m_inst_profiler_ptr->setHotLines(m_hot_lines); 95 m_inst_profiler_ptr->setAllInstructions(m_all_instructions); 96 } 97} 98 99Profiler::~Profiler() 100{ 101} 102 103void 104Profiler::regStats(const std::string &pName) 105{ 106 if (!m_all_instructions) { 107 m_address_profiler_ptr->regStats(pName); 108 } 109 110 if (m_all_instructions) { 111 m_inst_profiler_ptr->regStats(pName); 112 } 113 114 delayHistogram 115 .init(10) 116 .name(pName + ".delayHist") 117 .desc("delay histogram for all message") 118 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 119 120 for (int i = 0; i < m_num_vnets; i++) { 121 delayVCHistogram.push_back(new Stats::Histogram()); 122 delayVCHistogram[i] 123 ->init(10) 124 .name(pName + csprintf(".delayVCHist.vnet_%i", i)) 125 .desc(csprintf("delay histogram for vnet_%i", i)) 126 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 127 } 128 129 m_outstandReqHistSeqr 130 .init(10) 131 .name(pName + ".outstanding_req_hist_seqr") 132 .desc("") 133 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 134 135 m_outstandReqHistCoalsr 136 .init(10) 137 .name(pName + ".outstanding_req_hist_coalsr") 138 .desc("") 139 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 140 141 m_latencyHistSeqr 142 .init(10) 143 .name(pName + ".latency_hist_seqr") 144 .desc("") 145 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 146 147 m_latencyHistCoalsr 148 .init(10) 149 .name(pName + ".latency_hist_coalsr") 150 .desc("") 151 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 152 153 m_hitLatencyHistSeqr 154 .init(10) 155 .name(pName + ".hit_latency_hist_seqr") 156 .desc("") 157 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 158 159 m_missLatencyHistSeqr 160 .init(10) 161 .name(pName + ".miss_latency_hist_seqr") 162 .desc("") 163 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 164 165 m_missLatencyHistCoalsr 166 .init(10) 167 .name(pName + ".miss_latency_hist_coalsr") 168 .desc("") 169 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 170 171 for (int i = 0; i < RubyRequestType_NUM; i++) { 172 m_typeLatencyHistSeqr.push_back(new Stats::Histogram()); 173 m_typeLatencyHistSeqr[i] 174 ->init(10) 175 .name(pName + csprintf(".%s.latency_hist_seqr", 176 RubyRequestType(i))) 177 .desc("") 178 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 179 180 m_typeLatencyHistCoalsr.push_back(new Stats::Histogram()); 181 m_typeLatencyHistCoalsr[i] 182 ->init(10) 183 .name(pName + csprintf(".%s.latency_hist_coalsr", 184 RubyRequestType(i))) 185 .desc("") 186 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 187 188 m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram()); 189 m_hitTypeLatencyHistSeqr[i] 190 ->init(10) 191 .name(pName + csprintf(".%s.hit_latency_hist_seqr", 192 RubyRequestType(i))) 193 .desc("") 194 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 195 196 m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram()); 197 m_missTypeLatencyHistSeqr[i] 198 ->init(10) 199 .name(pName + csprintf(".%s.miss_latency_hist_seqr", 200 RubyRequestType(i))) 201 .desc("") 202 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 203 204 m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram()); 205 m_missTypeLatencyHistCoalsr[i] 206 ->init(10) 207 .name(pName + csprintf(".%s.miss_latency_hist_coalsr", 208 RubyRequestType(i))) 209 .desc("") 210 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 211 } 212 213 for (int i = 0; i < MachineType_NUM; i++) { 214 m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram()); 215 m_hitMachLatencyHistSeqr[i] 216 ->init(10) 217 .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr", 218 MachineType(i))) 219 .desc("") 220 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 221 222 m_missMachLatencyHistSeqr.push_back(new Stats::Histogram()); 223 m_missMachLatencyHistSeqr[i] 224 ->init(10) 225 .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr", 226 MachineType(i))) 227 .desc("") 228 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 229 230 m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram()); 231 m_missMachLatencyHistCoalsr[i] 232 ->init(10) 233 .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr", 234 MachineType(i))) 235 .desc("") 236 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 237 238 m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram()); 239 m_IssueToInitialDelayHistSeqr[i] 240 ->init(10) 241 .name(pName + csprintf( 242 ".%s.miss_latency_hist_seqr.issue_to_initial_request", 243 MachineType(i))) 244 .desc("") 245 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 246 247 m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram()); 248 m_IssueToInitialDelayHistCoalsr[i] 249 ->init(10) 250 .name(pName + csprintf( 251 ".%s.miss_latency_hist_coalsr.issue_to_initial_request", 252 MachineType(i))) 253 .desc("") 254 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 255 256 m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram()); 257 m_InitialToForwardDelayHistSeqr[i] 258 ->init(10) 259 .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward", 260 MachineType(i))) 261 .desc("") 262 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 263 264 m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram()); 265 m_InitialToForwardDelayHistCoalsr[i] 266 ->init(10) 267 .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward", 268 MachineType(i))) 269 .desc("") 270 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 271 272 m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram()); 273 m_ForwardToFirstResponseDelayHistSeqr[i] 274 ->init(10) 275 .name(pName + csprintf( 276 ".%s.miss_latency_hist_seqr.forward_to_first_response", 277 MachineType(i))) 278 .desc("") 279 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 280 281 m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram()); 282 m_ForwardToFirstResponseDelayHistCoalsr[i] 283 ->init(10) 284 .name(pName + csprintf( 285 ".%s.miss_latency_hist_coalsr.forward_to_first_response", 286 MachineType(i))) 287 .desc("") 288 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 289 290 m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram()); 291 m_FirstResponseToCompletionDelayHistSeqr[i] 292 ->init(10) 293 .name(pName + csprintf( 294 ".%s.miss_latency_hist_seqr.first_response_to_completion", 295 MachineType(i))) 296 .desc("") 297 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 298 299 m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram()); 300 m_FirstResponseToCompletionDelayHistCoalsr[i] 301 ->init(10) 302 .name(pName + csprintf( 303 ".%s.miss_latency_hist_coalsr.first_response_to_completion", 304 MachineType(i))) 305 .desc("") 306 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 307 308 m_IncompleteTimesSeqr[i] 309 .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i))) 310 .desc("") 311 .flags(Stats::nozero); 312 } 313 314 for (int i = 0; i < RubyRequestType_NUM; i++) { 315 m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>()); 316 m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>()); 317 m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>()); 318 319 for (int j = 0; j < MachineType_NUM; j++) { 320 m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); 321 m_hitTypeMachLatencyHistSeqr[i][j] 322 ->init(10) 323 .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr", 324 RubyRequestType(i), MachineType(j))) 325 .desc("") 326 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 327 328 m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram()); 329 m_missTypeMachLatencyHistSeqr[i][j] 330 ->init(10) 331 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr", 332 RubyRequestType(i), MachineType(j))) 333 .desc("") 334 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 335 336 m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram()); 337 m_missTypeMachLatencyHistCoalsr[i][j] 338 ->init(10) 339 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr", 340 RubyRequestType(i), MachineType(j))) 341 .desc("") 342 .flags(Stats::nozero | Stats::pdf | Stats::oneline); 343 } 344 } 345} 346 347void 348Profiler::collateStats() 349{ 350 if (!m_all_instructions) { 351 m_address_profiler_ptr->collateStats(); 352 } 353 354 if (m_all_instructions) { 355 m_inst_profiler_ptr->collateStats(); 356 } 357 358 for (uint32_t i = 0; i < MachineType_NUM; i++) { 359 for (map<uint32_t, AbstractController*>::iterator it = 360 m_ruby_system->m_abstract_controls[i].begin(); 361 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 362 363 AbstractController *ctr = (*it).second; 364 delayHistogram.add(ctr->getDelayHist()); 365 366 for (uint32_t i = 0; i < m_num_vnets; i++) { 367 delayVCHistogram[i]->add(ctr->getDelayVCHist(i)); 368 } 369 } 370 } 371 372 for (uint32_t i = 0; i < MachineType_NUM; i++) { 373 for (map<uint32_t, AbstractController*>::iterator it = 374 m_ruby_system->m_abstract_controls[i].begin(); 375 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 376 377 AbstractController *ctr = (*it).second; 378 Sequencer *seq = ctr->getCPUSequencer(); 379 if (seq != NULL) { 380 m_outstandReqHistSeqr.add(seq->getOutstandReqHist()); 381 } 382#ifdef BUILD_GPU 383 GPUCoalescer *coal = ctr->getGPUCoalescer(); 384 if (coal != NULL) { 385 m_outstandReqHistCoalsr.add(coal->getOutstandReqHist()); 386 } 387#endif 388 } 389 } 390 391 for (uint32_t i = 0; i < MachineType_NUM; i++) { 392 for (map<uint32_t, AbstractController*>::iterator it = 393 m_ruby_system->m_abstract_controls[i].begin(); 394 it != m_ruby_system->m_abstract_controls[i].end(); ++it) { 395 396 AbstractController *ctr = (*it).second; 397 Sequencer *seq = ctr->getCPUSequencer(); 398 if (seq != NULL) { 399 // add all the latencies 400 m_latencyHistSeqr.add(seq->getLatencyHist()); 401 m_hitLatencyHistSeqr.add(seq->getHitLatencyHist()); 402 m_missLatencyHistSeqr.add(seq->getMissLatencyHist()); 403 404 // add the per request type latencies 405 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { 406 m_typeLatencyHistSeqr[j] 407 ->add(seq->getTypeLatencyHist(j)); 408 m_hitTypeLatencyHistSeqr[j] 409 ->add(seq->getHitTypeLatencyHist(j)); 410 m_missTypeLatencyHistSeqr[j] 411 ->add(seq->getMissTypeLatencyHist(j)); 412 } 413 414 // add the per machine type miss latencies 415 for (uint32_t j = 0; j < MachineType_NUM; ++j) { 416 m_hitMachLatencyHistSeqr[j] 417 ->add(seq->getHitMachLatencyHist(j)); 418 m_missMachLatencyHistSeqr[j] 419 ->add(seq->getMissMachLatencyHist(j)); 420 421 m_IssueToInitialDelayHistSeqr[j]->add( 422 seq->getIssueToInitialDelayHist(MachineType(j))); 423 424 m_InitialToForwardDelayHistSeqr[j]->add( 425 seq->getInitialToForwardDelayHist(MachineType(j))); 426 m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq-> 427 getForwardRequestToFirstResponseHist(MachineType(j))); 428 429 m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq-> 430 getFirstResponseToCompletionDelayHist( 431 MachineType(j))); 432 m_IncompleteTimesSeqr[j] += 433 seq->getIncompleteTimes(MachineType(j)); 434 } 435 436 // add the per (request, machine) type miss latencies 437 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 438 for (uint32_t k = 0; k < MachineType_NUM; k++) { 439 m_hitTypeMachLatencyHistSeqr[j][k]->add( 440 seq->getHitTypeMachLatencyHist(j,k)); 441 m_missTypeMachLatencyHistSeqr[j][k]->add( 442 seq->getMissTypeMachLatencyHist(j,k)); 443 } 444 } 445 } 446#ifdef BUILD_GPU 447 GPUCoalescer *coal = ctr->getGPUCoalescer(); 448 if (coal != NULL) { 449 // add all the latencies 450 m_latencyHistCoalsr.add(coal->getLatencyHist()); 451 m_missLatencyHistCoalsr.add(coal->getMissLatencyHist()); 452 453 // add the per request type latencies 454 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) { 455 m_typeLatencyHistCoalsr[j] 456 ->add(coal->getTypeLatencyHist(j)); 457 m_missTypeLatencyHistCoalsr[j] 458 ->add(coal->getMissTypeLatencyHist(j)); 459 } 460 461 // add the per machine type miss latencies 462 for (uint32_t j = 0; j < MachineType_NUM; ++j) { 463 m_missMachLatencyHistCoalsr[j] 464 ->add(coal->getMissMachLatencyHist(j)); 465 466 m_IssueToInitialDelayHistCoalsr[j]->add( 467 coal->getIssueToInitialDelayHist(MachineType(j))); 468 469 m_InitialToForwardDelayHistCoalsr[j]->add( 470 coal->getInitialToForwardDelayHist(MachineType(j))); 471 m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal-> 472 getForwardRequestToFirstResponseHist(MachineType(j))); 473 474 m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal-> 475 getFirstResponseToCompletionDelayHist( 476 MachineType(j))); 477 } 478 479 // add the per (request, machine) type miss latencies 480 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) { 481 for (uint32_t k = 0; k < MachineType_NUM; k++) { 482 m_missTypeMachLatencyHistCoalsr[j][k]->add( 483 coal->getMissTypeMachLatencyHist(j,k)); 484 } 485 } 486 } 487#endif 488 } 489 } 490} 491 492void 493Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id) 494{ 495 if (msg.getType() != RubyRequestType_IFETCH) { 496 // Note: The following line should be commented out if you 497 // want to use the special profiling that is part of the GS320 498 // protocol 499 500 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be 501 // profiled by the AddressProfiler 502 m_address_profiler_ptr-> 503 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(), 504 msg.getType(), msg.getAccessMode(), id, false); 505 } 506} 507