Profiler.cc revision 11798:e034a4566653
1/*
2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45#include "mem/ruby/profiler/Profiler.hh"
46
47#include <sys/types.h>
48#include <unistd.h>
49
50#include <algorithm>
51#include <fstream>
52
53#include "base/stl_helpers.hh"
54#include "base/str.hh"
55#include "mem/protocol/MachineType.hh"
56#include "mem/protocol/RubyRequest.hh"
57#include "mem/ruby/network/Network.hh"
58#include "mem/ruby/profiler/AddressProfiler.hh"
59
60/**
61 * the profiler uses GPUCoalescer code even
62 * though the GPUCoalescer is not built for
63 * all ISAs, which can lead to run/link time
64 * errors. here we guard the coalescer code
65 * with ifdefs as there is no easy way to
66 * refactor this code without removing
67 * GPUCoalescer stats from the profiler.
68 *
69 * eventually we should use probe points
70 * here, but until then these ifdefs will
71 * serve.
72 */
73#ifdef BUILD_GPU
74#include "mem/ruby/system/GPUCoalescer.hh"
75#endif
76
77#include "mem/ruby/system/Sequencer.hh"
78
79using namespace std;
80using m5::stl_helpers::operator<<;
81
82Profiler::Profiler(const RubySystemParams *p, RubySystem *rs)
83    : m_ruby_system(rs), m_hot_lines(p->hot_lines),
84      m_all_instructions(p->all_instructions),
85      m_num_vnets(p->number_of_virtual_networks)
86{
87    m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this);
88    m_address_profiler_ptr->setHotLines(m_hot_lines);
89    m_address_profiler_ptr->setAllInstructions(m_all_instructions);
90
91    if (m_all_instructions) {
92        m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this);
93        m_inst_profiler_ptr->setHotLines(m_hot_lines);
94        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
95    }
96}
97
98Profiler::~Profiler()
99{
100}
101
102void
103Profiler::regStats(const std::string &pName)
104{
105    if (!m_all_instructions) {
106        m_address_profiler_ptr->regStats(pName);
107    }
108
109    if (m_all_instructions) {
110        m_inst_profiler_ptr->regStats(pName);
111    }
112
113    delayHistogram
114        .init(10)
115        .name(pName + ".delayHist")
116        .desc("delay histogram for all message")
117        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
118
119    for (int i = 0; i < m_num_vnets; i++) {
120        delayVCHistogram.push_back(new Stats::Histogram());
121        delayVCHistogram[i]
122            ->init(10)
123            .name(pName + csprintf(".delayVCHist.vnet_%i", i))
124            .desc(csprintf("delay histogram for vnet_%i", i))
125            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
126    }
127
128    m_outstandReqHistSeqr
129        .init(10)
130        .name(pName + ".outstanding_req_hist_seqr")
131        .desc("")
132        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
133
134    m_outstandReqHistCoalsr
135        .init(10)
136        .name(pName + ".outstanding_req_hist_coalsr")
137        .desc("")
138        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
139
140    m_latencyHistSeqr
141        .init(10)
142        .name(pName + ".latency_hist_seqr")
143        .desc("")
144        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
145
146    m_latencyHistCoalsr
147        .init(10)
148        .name(pName + ".latency_hist_coalsr")
149        .desc("")
150        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
151
152    m_hitLatencyHistSeqr
153        .init(10)
154        .name(pName + ".hit_latency_hist_seqr")
155        .desc("")
156        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
157
158    m_missLatencyHistSeqr
159        .init(10)
160        .name(pName + ".miss_latency_hist_seqr")
161        .desc("")
162        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
163
164    m_missLatencyHistCoalsr
165        .init(10)
166        .name(pName + ".miss_latency_hist_coalsr")
167        .desc("")
168        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
169
170    for (int i = 0; i < RubyRequestType_NUM; i++) {
171        m_typeLatencyHistSeqr.push_back(new Stats::Histogram());
172        m_typeLatencyHistSeqr[i]
173            ->init(10)
174            .name(pName + csprintf(".%s.latency_hist_seqr",
175                                    RubyRequestType(i)))
176            .desc("")
177            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
178
179        m_typeLatencyHistCoalsr.push_back(new Stats::Histogram());
180        m_typeLatencyHistCoalsr[i]
181            ->init(10)
182            .name(pName + csprintf(".%s.latency_hist_coalsr",
183                                    RubyRequestType(i)))
184            .desc("")
185            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
186
187        m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram());
188        m_hitTypeLatencyHistSeqr[i]
189            ->init(10)
190            .name(pName + csprintf(".%s.hit_latency_hist_seqr",
191                                    RubyRequestType(i)))
192            .desc("")
193            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
194
195        m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram());
196        m_missTypeLatencyHistSeqr[i]
197            ->init(10)
198            .name(pName + csprintf(".%s.miss_latency_hist_seqr",
199                                    RubyRequestType(i)))
200            .desc("")
201            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
202
203        m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram());
204        m_missTypeLatencyHistCoalsr[i]
205            ->init(10)
206            .name(pName + csprintf(".%s.miss_latency_hist_coalsr",
207                                    RubyRequestType(i)))
208            .desc("")
209            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
210    }
211
212    for (int i = 0; i < MachineType_NUM; i++) {
213        m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram());
214        m_hitMachLatencyHistSeqr[i]
215            ->init(10)
216            .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr",
217                                    MachineType(i)))
218            .desc("")
219            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
220
221        m_missMachLatencyHistSeqr.push_back(new Stats::Histogram());
222        m_missMachLatencyHistSeqr[i]
223            ->init(10)
224            .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr",
225                                    MachineType(i)))
226            .desc("")
227            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
228
229        m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram());
230        m_missMachLatencyHistCoalsr[i]
231            ->init(10)
232            .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr",
233                                    MachineType(i)))
234            .desc("")
235            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
236
237        m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram());
238        m_IssueToInitialDelayHistSeqr[i]
239            ->init(10)
240            .name(pName + csprintf(
241                ".%s.miss_latency_hist_seqr.issue_to_initial_request",
242                MachineType(i)))
243            .desc("")
244            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
245
246        m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram());
247        m_IssueToInitialDelayHistCoalsr[i]
248            ->init(10)
249            .name(pName + csprintf(
250                ".%s.miss_latency_hist_coalsr.issue_to_initial_request",
251                MachineType(i)))
252            .desc("")
253            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
254
255        m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram());
256        m_InitialToForwardDelayHistSeqr[i]
257            ->init(10)
258            .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward",
259                                   MachineType(i)))
260            .desc("")
261            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
262
263        m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram());
264        m_InitialToForwardDelayHistCoalsr[i]
265            ->init(10)
266            .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward",
267                                   MachineType(i)))
268            .desc("")
269            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
270
271        m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram());
272        m_ForwardToFirstResponseDelayHistSeqr[i]
273            ->init(10)
274            .name(pName + csprintf(
275                ".%s.miss_latency_hist_seqr.forward_to_first_response",
276                MachineType(i)))
277            .desc("")
278            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
279
280        m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram());
281        m_ForwardToFirstResponseDelayHistCoalsr[i]
282            ->init(10)
283            .name(pName + csprintf(
284                ".%s.miss_latency_hist_coalsr.forward_to_first_response",
285                MachineType(i)))
286            .desc("")
287            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
288
289        m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram());
290        m_FirstResponseToCompletionDelayHistSeqr[i]
291            ->init(10)
292            .name(pName + csprintf(
293                ".%s.miss_latency_hist_seqr.first_response_to_completion",
294                MachineType(i)))
295            .desc("")
296            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
297
298        m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram());
299        m_FirstResponseToCompletionDelayHistCoalsr[i]
300            ->init(10)
301            .name(pName + csprintf(
302                ".%s.miss_latency_hist_coalsr.first_response_to_completion",
303                MachineType(i)))
304            .desc("")
305            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
306
307        m_IncompleteTimesSeqr[i]
308            .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i)))
309            .desc("")
310            .flags(Stats::nozero);
311    }
312
313    for (int i = 0; i < RubyRequestType_NUM; i++) {
314        m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
315        m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
316        m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>());
317
318        for (int j = 0; j < MachineType_NUM; j++) {
319            m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
320            m_hitTypeMachLatencyHistSeqr[i][j]
321                ->init(10)
322                .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr",
323                                       RubyRequestType(i), MachineType(j)))
324                .desc("")
325                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
326
327            m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
328            m_missTypeMachLatencyHistSeqr[i][j]
329                ->init(10)
330                .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr",
331                                       RubyRequestType(i), MachineType(j)))
332                .desc("")
333                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
334
335            m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram());
336            m_missTypeMachLatencyHistCoalsr[i][j]
337                ->init(10)
338                .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr",
339                                       RubyRequestType(i), MachineType(j)))
340                .desc("")
341                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
342        }
343    }
344}
345
346void
347Profiler::collateStats()
348{
349    if (!m_all_instructions) {
350        m_address_profiler_ptr->collateStats();
351    }
352
353    if (m_all_instructions) {
354        m_inst_profiler_ptr->collateStats();
355    }
356
357    for (uint32_t i = 0; i < MachineType_NUM; i++) {
358        for (map<uint32_t, AbstractController*>::iterator it =
359                  m_ruby_system->m_abstract_controls[i].begin();
360             it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
361
362            AbstractController *ctr = (*it).second;
363            delayHistogram.add(ctr->getDelayHist());
364
365            for (uint32_t i = 0; i < m_num_vnets; i++) {
366                delayVCHistogram[i]->add(ctr->getDelayVCHist(i));
367            }
368        }
369    }
370
371    for (uint32_t i = 0; i < MachineType_NUM; i++) {
372        for (map<uint32_t, AbstractController*>::iterator it =
373                m_ruby_system->m_abstract_controls[i].begin();
374                it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
375
376            AbstractController *ctr = (*it).second;
377            Sequencer *seq = ctr->getCPUSequencer();
378            if (seq != NULL) {
379                m_outstandReqHistSeqr.add(seq->getOutstandReqHist());
380            }
381#ifdef BUILD_GPU
382            GPUCoalescer *coal = ctr->getGPUCoalescer();
383            if (coal != NULL) {
384                m_outstandReqHistCoalsr.add(coal->getOutstandReqHist());
385            }
386#endif
387        }
388    }
389
390    for (uint32_t i = 0; i < MachineType_NUM; i++) {
391        for (map<uint32_t, AbstractController*>::iterator it =
392                m_ruby_system->m_abstract_controls[i].begin();
393                it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
394
395            AbstractController *ctr = (*it).second;
396            Sequencer *seq = ctr->getCPUSequencer();
397            if (seq != NULL) {
398                // add all the latencies
399                m_latencyHistSeqr.add(seq->getLatencyHist());
400                m_hitLatencyHistSeqr.add(seq->getHitLatencyHist());
401                m_missLatencyHistSeqr.add(seq->getMissLatencyHist());
402
403                // add the per request type latencies
404                for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
405                    m_typeLatencyHistSeqr[j]
406                        ->add(seq->getTypeLatencyHist(j));
407                    m_hitTypeLatencyHistSeqr[j]
408                        ->add(seq->getHitTypeLatencyHist(j));
409                    m_missTypeLatencyHistSeqr[j]
410                        ->add(seq->getMissTypeLatencyHist(j));
411                }
412
413                // add the per machine type miss latencies
414                for (uint32_t j = 0; j < MachineType_NUM; ++j) {
415                    m_hitMachLatencyHistSeqr[j]
416                        ->add(seq->getHitMachLatencyHist(j));
417                    m_missMachLatencyHistSeqr[j]
418                        ->add(seq->getMissMachLatencyHist(j));
419
420                    m_IssueToInitialDelayHistSeqr[j]->add(
421                        seq->getIssueToInitialDelayHist(MachineType(j)));
422
423                    m_InitialToForwardDelayHistSeqr[j]->add(
424                        seq->getInitialToForwardDelayHist(MachineType(j)));
425                    m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq->
426                        getForwardRequestToFirstResponseHist(MachineType(j)));
427
428                    m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq->
429                        getFirstResponseToCompletionDelayHist(
430                            MachineType(j)));
431                    m_IncompleteTimesSeqr[j] +=
432                        seq->getIncompleteTimes(MachineType(j));
433                }
434
435                // add the per (request, machine) type miss latencies
436                for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
437                    for (uint32_t k = 0; k < MachineType_NUM; k++) {
438                        m_hitTypeMachLatencyHistSeqr[j][k]->add(
439                                seq->getHitTypeMachLatencyHist(j,k));
440                        m_missTypeMachLatencyHistSeqr[j][k]->add(
441                                seq->getMissTypeMachLatencyHist(j,k));
442                    }
443                }
444            }
445#ifdef BUILD_GPU
446            GPUCoalescer *coal = ctr->getGPUCoalescer();
447            if (coal != NULL) {
448                // add all the latencies
449                m_latencyHistCoalsr.add(coal->getLatencyHist());
450                m_missLatencyHistCoalsr.add(coal->getMissLatencyHist());
451
452                // add the per request type latencies
453                for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
454                    m_typeLatencyHistCoalsr[j]
455                        ->add(coal->getTypeLatencyHist(j));
456                    m_missTypeLatencyHistCoalsr[j]
457                        ->add(coal->getMissTypeLatencyHist(j));
458                }
459
460                // add the per machine type miss latencies
461                for (uint32_t j = 0; j < MachineType_NUM; ++j) {
462                    m_missMachLatencyHistCoalsr[j]
463                        ->add(coal->getMissMachLatencyHist(j));
464
465                    m_IssueToInitialDelayHistCoalsr[j]->add(
466                        coal->getIssueToInitialDelayHist(MachineType(j)));
467
468                    m_InitialToForwardDelayHistCoalsr[j]->add(
469                        coal->getInitialToForwardDelayHist(MachineType(j)));
470                    m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal->
471                        getForwardRequestToFirstResponseHist(MachineType(j)));
472
473                    m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal->
474                        getFirstResponseToCompletionDelayHist(
475                            MachineType(j)));
476                }
477
478                // add the per (request, machine) type miss latencies
479                for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
480                    for (uint32_t k = 0; k < MachineType_NUM; k++) {
481                        m_missTypeMachLatencyHistCoalsr[j][k]->add(
482                                coal->getMissTypeMachLatencyHist(j,k));
483                    }
484                }
485            }
486#endif
487        }
488    }
489}
490
491void
492Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
493{
494    if (msg.getType() != RubyRequestType_IFETCH) {
495        // Note: The following line should be commented out if you
496        // want to use the special profiling that is part of the GS320
497        // protocol
498
499        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
500        // profiled by the AddressProfiler
501        m_address_profiler_ptr->
502            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
503                           msg.getType(), msg.getAccessMode(), id, false);
504    }
505}
506