1/*
2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45#include "mem/ruby/profiler/Profiler.hh"
46
47#include <sys/types.h>
48#include <unistd.h>
49
50#include <algorithm>
51#include <fstream>
52
53#include "base/stl_helpers.hh"
54#include "base/str.hh"
55#include "mem/ruby/network/Network.hh"
56#include "mem/ruby/profiler/AddressProfiler.hh"
57#include "mem/ruby/protocol/MachineType.hh"
58#include "mem/ruby/protocol/RubyRequest.hh"
59
60/**
61 * the profiler uses GPUCoalescer code even
62 * though the GPUCoalescer is not built for
63 * all ISAs, which can lead to run/link time
64 * errors. here we guard the coalescer code
65 * with ifdefs as there is no easy way to
66 * refactor this code without removing
67 * GPUCoalescer stats from the profiler.
68 *
69 * eventually we should use probe points
70 * here, but until then these ifdefs will
71 * serve.
72 */
73#ifdef BUILD_GPU
74#include "mem/ruby/system/GPUCoalescer.hh"
75
76#endif
77
78#include "mem/ruby/system/Sequencer.hh"
79
80using namespace std;
81using m5::stl_helpers::operator<<;
82
83Profiler::Profiler(const RubySystemParams *p, RubySystem *rs)
84    : m_ruby_system(rs), m_hot_lines(p->hot_lines),
85      m_all_instructions(p->all_instructions),
86      m_num_vnets(p->number_of_virtual_networks)
87{
88    m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this);
89    m_address_profiler_ptr->setHotLines(m_hot_lines);
90    m_address_profiler_ptr->setAllInstructions(m_all_instructions);
91
92    if (m_all_instructions) {
93        m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this);
94        m_inst_profiler_ptr->setHotLines(m_hot_lines);
95        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
96    }
97}
98
99Profiler::~Profiler()
100{
101}
102
103void
104Profiler::regStats(const std::string &pName)
105{
106    if (!m_all_instructions) {
107        m_address_profiler_ptr->regStats(pName);
108    }
109
110    if (m_all_instructions) {
111        m_inst_profiler_ptr->regStats(pName);
112    }
113
114    delayHistogram
115        .init(10)
116        .name(pName + ".delayHist")
117        .desc("delay histogram for all message")
118        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
119
120    for (int i = 0; i < m_num_vnets; i++) {
121        delayVCHistogram.push_back(new Stats::Histogram());
122        delayVCHistogram[i]
123            ->init(10)
124            .name(pName + csprintf(".delayVCHist.vnet_%i", i))
125            .desc(csprintf("delay histogram for vnet_%i", i))
126            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
127    }
128
129    m_outstandReqHistSeqr
130        .init(10)
131        .name(pName + ".outstanding_req_hist_seqr")
132        .desc("")
133        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
134
135    m_outstandReqHistCoalsr
136        .init(10)
137        .name(pName + ".outstanding_req_hist_coalsr")
138        .desc("")
139        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
140
141    m_latencyHistSeqr
142        .init(10)
143        .name(pName + ".latency_hist_seqr")
144        .desc("")
145        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
146
147    m_latencyHistCoalsr
148        .init(10)
149        .name(pName + ".latency_hist_coalsr")
150        .desc("")
151        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
152
153    m_hitLatencyHistSeqr
154        .init(10)
155        .name(pName + ".hit_latency_hist_seqr")
156        .desc("")
157        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
158
159    m_missLatencyHistSeqr
160        .init(10)
161        .name(pName + ".miss_latency_hist_seqr")
162        .desc("")
163        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
164
165    m_missLatencyHistCoalsr
166        .init(10)
167        .name(pName + ".miss_latency_hist_coalsr")
168        .desc("")
169        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
170
171    for (int i = 0; i < RubyRequestType_NUM; i++) {
172        m_typeLatencyHistSeqr.push_back(new Stats::Histogram());
173        m_typeLatencyHistSeqr[i]
174            ->init(10)
175            .name(pName + csprintf(".%s.latency_hist_seqr",
176                                    RubyRequestType(i)))
177            .desc("")
178            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
179
180        m_typeLatencyHistCoalsr.push_back(new Stats::Histogram());
181        m_typeLatencyHistCoalsr[i]
182            ->init(10)
183            .name(pName + csprintf(".%s.latency_hist_coalsr",
184                                    RubyRequestType(i)))
185            .desc("")
186            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
187
188        m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram());
189        m_hitTypeLatencyHistSeqr[i]
190            ->init(10)
191            .name(pName + csprintf(".%s.hit_latency_hist_seqr",
192                                    RubyRequestType(i)))
193            .desc("")
194            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
195
196        m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram());
197        m_missTypeLatencyHistSeqr[i]
198            ->init(10)
199            .name(pName + csprintf(".%s.miss_latency_hist_seqr",
200                                    RubyRequestType(i)))
201            .desc("")
202            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
203
204        m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram());
205        m_missTypeLatencyHistCoalsr[i]
206            ->init(10)
207            .name(pName + csprintf(".%s.miss_latency_hist_coalsr",
208                                    RubyRequestType(i)))
209            .desc("")
210            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
211    }
212
213    for (int i = 0; i < MachineType_NUM; i++) {
214        m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram());
215        m_hitMachLatencyHistSeqr[i]
216            ->init(10)
217            .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr",
218                                    MachineType(i)))
219            .desc("")
220            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
221
222        m_missMachLatencyHistSeqr.push_back(new Stats::Histogram());
223        m_missMachLatencyHistSeqr[i]
224            ->init(10)
225            .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr",
226                                    MachineType(i)))
227            .desc("")
228            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
229
230        m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram());
231        m_missMachLatencyHistCoalsr[i]
232            ->init(10)
233            .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr",
234                                    MachineType(i)))
235            .desc("")
236            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
237
238        m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram());
239        m_IssueToInitialDelayHistSeqr[i]
240            ->init(10)
241            .name(pName + csprintf(
242                ".%s.miss_latency_hist_seqr.issue_to_initial_request",
243                MachineType(i)))
244            .desc("")
245            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
246
247        m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram());
248        m_IssueToInitialDelayHistCoalsr[i]
249            ->init(10)
250            .name(pName + csprintf(
251                ".%s.miss_latency_hist_coalsr.issue_to_initial_request",
252                MachineType(i)))
253            .desc("")
254            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
255
256        m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram());
257        m_InitialToForwardDelayHistSeqr[i]
258            ->init(10)
259            .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward",
260                                   MachineType(i)))
261            .desc("")
262            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
263
264        m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram());
265        m_InitialToForwardDelayHistCoalsr[i]
266            ->init(10)
267            .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward",
268                                   MachineType(i)))
269            .desc("")
270            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
271
272        m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram());
273        m_ForwardToFirstResponseDelayHistSeqr[i]
274            ->init(10)
275            .name(pName + csprintf(
276                ".%s.miss_latency_hist_seqr.forward_to_first_response",
277                MachineType(i)))
278            .desc("")
279            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
280
281        m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram());
282        m_ForwardToFirstResponseDelayHistCoalsr[i]
283            ->init(10)
284            .name(pName + csprintf(
285                ".%s.miss_latency_hist_coalsr.forward_to_first_response",
286                MachineType(i)))
287            .desc("")
288            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
289
290        m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram());
291        m_FirstResponseToCompletionDelayHistSeqr[i]
292            ->init(10)
293            .name(pName + csprintf(
294                ".%s.miss_latency_hist_seqr.first_response_to_completion",
295                MachineType(i)))
296            .desc("")
297            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
298
299        m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram());
300        m_FirstResponseToCompletionDelayHistCoalsr[i]
301            ->init(10)
302            .name(pName + csprintf(
303                ".%s.miss_latency_hist_coalsr.first_response_to_completion",
304                MachineType(i)))
305            .desc("")
306            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
307
308        m_IncompleteTimesSeqr[i]
309            .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i)))
310            .desc("")
311            .flags(Stats::nozero);
312    }
313
314    for (int i = 0; i < RubyRequestType_NUM; i++) {
315        m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
316        m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
317        m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>());
318
319        for (int j = 0; j < MachineType_NUM; j++) {
320            m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
321            m_hitTypeMachLatencyHistSeqr[i][j]
322                ->init(10)
323                .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr",
324                                       RubyRequestType(i), MachineType(j)))
325                .desc("")
326                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
327
328            m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
329            m_missTypeMachLatencyHistSeqr[i][j]
330                ->init(10)
331                .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr",
332                                       RubyRequestType(i), MachineType(j)))
333                .desc("")
334                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
335
336            m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram());
337            m_missTypeMachLatencyHistCoalsr[i][j]
338                ->init(10)
339                .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr",
340                                       RubyRequestType(i), MachineType(j)))
341                .desc("")
342                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
343        }
344    }
345}
346
347void
348Profiler::collateStats()
349{
350    if (!m_all_instructions) {
351        m_address_profiler_ptr->collateStats();
352    }
353
354    if (m_all_instructions) {
355        m_inst_profiler_ptr->collateStats();
356    }
357
358    for (uint32_t i = 0; i < MachineType_NUM; i++) {
359        for (map<uint32_t, AbstractController*>::iterator it =
360                  m_ruby_system->m_abstract_controls[i].begin();
361             it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
362
363            AbstractController *ctr = (*it).second;
364            delayHistogram.add(ctr->getDelayHist());
365
366            for (uint32_t i = 0; i < m_num_vnets; i++) {
367                delayVCHistogram[i]->add(ctr->getDelayVCHist(i));
368            }
369        }
370    }
371
372    for (uint32_t i = 0; i < MachineType_NUM; i++) {
373        for (map<uint32_t, AbstractController*>::iterator it =
374                m_ruby_system->m_abstract_controls[i].begin();
375                it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
376
377            AbstractController *ctr = (*it).second;
378            Sequencer *seq = ctr->getCPUSequencer();
379            if (seq != NULL) {
380                m_outstandReqHistSeqr.add(seq->getOutstandReqHist());
381            }
382#ifdef BUILD_GPU
383            GPUCoalescer *coal = ctr->getGPUCoalescer();
384            if (coal != NULL) {
385                m_outstandReqHistCoalsr.add(coal->getOutstandReqHist());
386            }
387#endif
388        }
389    }
390
391    for (uint32_t i = 0; i < MachineType_NUM; i++) {
392        for (map<uint32_t, AbstractController*>::iterator it =
393                m_ruby_system->m_abstract_controls[i].begin();
394                it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
395
396            AbstractController *ctr = (*it).second;
397            Sequencer *seq = ctr->getCPUSequencer();
398            if (seq != NULL) {
399                // add all the latencies
400                m_latencyHistSeqr.add(seq->getLatencyHist());
401                m_hitLatencyHistSeqr.add(seq->getHitLatencyHist());
402                m_missLatencyHistSeqr.add(seq->getMissLatencyHist());
403
404                // add the per request type latencies
405                for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
406                    m_typeLatencyHistSeqr[j]
407                        ->add(seq->getTypeLatencyHist(j));
408                    m_hitTypeLatencyHistSeqr[j]
409                        ->add(seq->getHitTypeLatencyHist(j));
410                    m_missTypeLatencyHistSeqr[j]
411                        ->add(seq->getMissTypeLatencyHist(j));
412                }
413
414                // add the per machine type miss latencies
415                for (uint32_t j = 0; j < MachineType_NUM; ++j) {
416                    m_hitMachLatencyHistSeqr[j]
417                        ->add(seq->getHitMachLatencyHist(j));
418                    m_missMachLatencyHistSeqr[j]
419                        ->add(seq->getMissMachLatencyHist(j));
420
421                    m_IssueToInitialDelayHistSeqr[j]->add(
422                        seq->getIssueToInitialDelayHist(MachineType(j)));
423
424                    m_InitialToForwardDelayHistSeqr[j]->add(
425                        seq->getInitialToForwardDelayHist(MachineType(j)));
426                    m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq->
427                        getForwardRequestToFirstResponseHist(MachineType(j)));
428
429                    m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq->
430                        getFirstResponseToCompletionDelayHist(
431                            MachineType(j)));
432                    m_IncompleteTimesSeqr[j] +=
433                        seq->getIncompleteTimes(MachineType(j));
434                }
435
436                // add the per (request, machine) type miss latencies
437                for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
438                    for (uint32_t k = 0; k < MachineType_NUM; k++) {
439                        m_hitTypeMachLatencyHistSeqr[j][k]->add(
440                                seq->getHitTypeMachLatencyHist(j,k));
441                        m_missTypeMachLatencyHistSeqr[j][k]->add(
442                                seq->getMissTypeMachLatencyHist(j,k));
443                    }
444                }
445            }
446#ifdef BUILD_GPU
447            GPUCoalescer *coal = ctr->getGPUCoalescer();
448            if (coal != NULL) {
449                // add all the latencies
450                m_latencyHistCoalsr.add(coal->getLatencyHist());
451                m_missLatencyHistCoalsr.add(coal->getMissLatencyHist());
452
453                // add the per request type latencies
454                for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
455                    m_typeLatencyHistCoalsr[j]
456                        ->add(coal->getTypeLatencyHist(j));
457                    m_missTypeLatencyHistCoalsr[j]
458                        ->add(coal->getMissTypeLatencyHist(j));
459                }
460
461                // add the per machine type miss latencies
462                for (uint32_t j = 0; j < MachineType_NUM; ++j) {
463                    m_missMachLatencyHistCoalsr[j]
464                        ->add(coal->getMissMachLatencyHist(j));
465
466                    m_IssueToInitialDelayHistCoalsr[j]->add(
467                        coal->getIssueToInitialDelayHist(MachineType(j)));
468
469                    m_InitialToForwardDelayHistCoalsr[j]->add(
470                        coal->getInitialToForwardDelayHist(MachineType(j)));
471                    m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal->
472                        getForwardRequestToFirstResponseHist(MachineType(j)));
473
474                    m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal->
475                        getFirstResponseToCompletionDelayHist(
476                            MachineType(j)));
477                }
478
479                // add the per (request, machine) type miss latencies
480                for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
481                    for (uint32_t k = 0; k < MachineType_NUM; k++) {
482                        m_missTypeMachLatencyHistCoalsr[j][k]->add(
483                                coal->getMissTypeMachLatencyHist(j,k));
484                    }
485                }
486            }
487#endif
488        }
489    }
490}
491
492void
493Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
494{
495    if (msg.getType() != RubyRequestType_IFETCH) {
496        // Note: The following line should be commented out if you
497        // want to use the special profiling that is part of the GS320
498        // protocol
499
500        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
501        // profiled by the AddressProfiler
502        m_address_profiler_ptr->
503            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
504                           msg.getType(), msg.getAccessMode(), id, false);
505    }
506}
507