Profiler.cc revision 11309:9be8a40026df
1/*
2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45#include "mem/ruby/profiler/Profiler.hh"
46
47#include <sys/types.h>
48#include <unistd.h>
49
50#include <algorithm>
51#include <fstream>
52
53#include "base/stl_helpers.hh"
54#include "base/str.hh"
55#include "mem/protocol/MachineType.hh"
56#include "mem/protocol/RubyRequest.hh"
57#include "mem/ruby/network/Network.hh"
58#include "mem/ruby/profiler/AddressProfiler.hh"
59#include "mem/ruby/system/GPUCoalescer.hh"
60#include "mem/ruby/system/Sequencer.hh"
61
62using namespace std;
63using m5::stl_helpers::operator<<;
64
65Profiler::Profiler(const RubySystemParams *p, RubySystem *rs)
66    : m_ruby_system(rs), m_hot_lines(p->hot_lines),
67      m_all_instructions(p->all_instructions),
68      m_num_vnets(p->number_of_virtual_networks)
69{
70    m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this);
71    m_address_profiler_ptr->setHotLines(m_hot_lines);
72    m_address_profiler_ptr->setAllInstructions(m_all_instructions);
73
74    if (m_all_instructions) {
75        m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this);
76        m_inst_profiler_ptr->setHotLines(m_hot_lines);
77        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
78    }
79}
80
81Profiler::~Profiler()
82{
83}
84
85void
86Profiler::regStats(const std::string &pName)
87{
88    if (!m_all_instructions) {
89        m_address_profiler_ptr->regStats(pName);
90    }
91
92    if (m_all_instructions) {
93        m_inst_profiler_ptr->regStats(pName);
94    }
95
96    delayHistogram
97        .init(10)
98        .name(pName + ".delayHist")
99        .desc("delay histogram for all message")
100        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
101
102    for (int i = 0; i < m_num_vnets; i++) {
103        delayVCHistogram.push_back(new Stats::Histogram());
104        delayVCHistogram[i]
105            ->init(10)
106            .name(pName + csprintf(".delayVCHist.vnet_%i", i))
107            .desc(csprintf("delay histogram for vnet_%i", i))
108            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
109    }
110
111    m_outstandReqHistSeqr
112        .init(10)
113        .name(pName + ".outstanding_req_hist_seqr")
114        .desc("")
115        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
116
117    m_outstandReqHistCoalsr
118        .init(10)
119        .name(pName + ".outstanding_req_hist_coalsr")
120        .desc("")
121        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
122
123    m_latencyHistSeqr
124        .init(10)
125        .name(pName + ".latency_hist_seqr")
126        .desc("")
127        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
128
129    m_latencyHistCoalsr
130        .init(10)
131        .name(pName + ".latency_hist_coalsr")
132        .desc("")
133        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
134
135    m_hitLatencyHistSeqr
136        .init(10)
137        .name(pName + ".hit_latency_hist_seqr")
138        .desc("")
139        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
140
141    m_missLatencyHistSeqr
142        .init(10)
143        .name(pName + ".miss_latency_hist_seqr")
144        .desc("")
145        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
146
147    m_missLatencyHistCoalsr
148        .init(10)
149        .name(pName + ".miss_latency_hist_coalsr")
150        .desc("")
151        .flags(Stats::nozero | Stats::pdf | Stats::oneline);
152
153    for (int i = 0; i < RubyRequestType_NUM; i++) {
154        m_typeLatencyHistSeqr.push_back(new Stats::Histogram());
155        m_typeLatencyHistSeqr[i]
156            ->init(10)
157            .name(pName + csprintf(".%s.latency_hist_seqr",
158                                    RubyRequestType(i)))
159            .desc("")
160            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
161
162        m_typeLatencyHistCoalsr.push_back(new Stats::Histogram());
163        m_typeLatencyHistCoalsr[i]
164            ->init(10)
165            .name(pName + csprintf(".%s.latency_hist_coalsr",
166                                    RubyRequestType(i)))
167            .desc("")
168            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
169
170        m_hitTypeLatencyHistSeqr.push_back(new Stats::Histogram());
171        m_hitTypeLatencyHistSeqr[i]
172            ->init(10)
173            .name(pName + csprintf(".%s.hit_latency_hist_seqr",
174                                    RubyRequestType(i)))
175            .desc("")
176            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
177
178        m_missTypeLatencyHistSeqr.push_back(new Stats::Histogram());
179        m_missTypeLatencyHistSeqr[i]
180            ->init(10)
181            .name(pName + csprintf(".%s.miss_latency_hist_seqr",
182                                    RubyRequestType(i)))
183            .desc("")
184            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
185
186        m_missTypeLatencyHistCoalsr.push_back(new Stats::Histogram());
187        m_missTypeLatencyHistCoalsr[i]
188            ->init(10)
189            .name(pName + csprintf(".%s.miss_latency_hist_coalsr",
190                                    RubyRequestType(i)))
191            .desc("")
192            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
193    }
194
195    for (int i = 0; i < MachineType_NUM; i++) {
196        m_hitMachLatencyHistSeqr.push_back(new Stats::Histogram());
197        m_hitMachLatencyHistSeqr[i]
198            ->init(10)
199            .name(pName + csprintf(".%s.hit_mach_latency_hist_seqr",
200                                    MachineType(i)))
201            .desc("")
202            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
203
204        m_missMachLatencyHistSeqr.push_back(new Stats::Histogram());
205        m_missMachLatencyHistSeqr[i]
206            ->init(10)
207            .name(pName + csprintf(".%s.miss_mach_latency_hist_seqr",
208                                    MachineType(i)))
209            .desc("")
210            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
211
212        m_missMachLatencyHistCoalsr.push_back(new Stats::Histogram());
213        m_missMachLatencyHistCoalsr[i]
214            ->init(10)
215            .name(pName + csprintf(".%s.miss_mach_latency_hist_coalsr",
216                                    MachineType(i)))
217            .desc("")
218            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
219
220        m_IssueToInitialDelayHistSeqr.push_back(new Stats::Histogram());
221        m_IssueToInitialDelayHistSeqr[i]
222            ->init(10)
223            .name(pName + csprintf(
224                ".%s.miss_latency_hist_seqr.issue_to_initial_request",
225                MachineType(i)))
226            .desc("")
227            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
228
229        m_IssueToInitialDelayHistCoalsr.push_back(new Stats::Histogram());
230        m_IssueToInitialDelayHistCoalsr[i]
231            ->init(10)
232            .name(pName + csprintf(
233                ".%s.miss_latency_hist_coalsr.issue_to_initial_request",
234                MachineType(i)))
235            .desc("")
236            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
237
238        m_InitialToForwardDelayHistSeqr.push_back(new Stats::Histogram());
239        m_InitialToForwardDelayHistSeqr[i]
240            ->init(10)
241            .name(pName + csprintf(".%s.miss_latency_hist_seqr.initial_to_forward",
242                                   MachineType(i)))
243            .desc("")
244            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
245
246        m_InitialToForwardDelayHistCoalsr.push_back(new Stats::Histogram());
247        m_InitialToForwardDelayHistCoalsr[i]
248            ->init(10)
249            .name(pName + csprintf(".%s.miss_latency_hist_coalsr.initial_to_forward",
250                                   MachineType(i)))
251            .desc("")
252            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
253
254        m_ForwardToFirstResponseDelayHistSeqr.push_back(new Stats::Histogram());
255        m_ForwardToFirstResponseDelayHistSeqr[i]
256            ->init(10)
257            .name(pName + csprintf(
258                ".%s.miss_latency_hist_seqr.forward_to_first_response",
259                MachineType(i)))
260            .desc("")
261            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
262
263        m_ForwardToFirstResponseDelayHistCoalsr.push_back(new Stats::Histogram());
264        m_ForwardToFirstResponseDelayHistCoalsr[i]
265            ->init(10)
266            .name(pName + csprintf(
267                ".%s.miss_latency_hist_coalsr.forward_to_first_response",
268                MachineType(i)))
269            .desc("")
270            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
271
272        m_FirstResponseToCompletionDelayHistSeqr.push_back(new Stats::Histogram());
273        m_FirstResponseToCompletionDelayHistSeqr[i]
274            ->init(10)
275            .name(pName + csprintf(
276                ".%s.miss_latency_hist_seqr.first_response_to_completion",
277                MachineType(i)))
278            .desc("")
279            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
280
281        m_FirstResponseToCompletionDelayHistCoalsr.push_back(new Stats::Histogram());
282        m_FirstResponseToCompletionDelayHistCoalsr[i]
283            ->init(10)
284            .name(pName + csprintf(
285                ".%s.miss_latency_hist_coalsr.first_response_to_completion",
286                MachineType(i)))
287            .desc("")
288            .flags(Stats::nozero | Stats::pdf | Stats::oneline);
289
290        m_IncompleteTimesSeqr[i]
291            .name(pName + csprintf(".%s.incomplete_times_seqr", MachineType(i)))
292            .desc("")
293            .flags(Stats::nozero);
294    }
295
296    for (int i = 0; i < RubyRequestType_NUM; i++) {
297        m_hitTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
298        m_missTypeMachLatencyHistSeqr.push_back(std::vector<Stats::Histogram *>());
299        m_missTypeMachLatencyHistCoalsr.push_back(std::vector<Stats::Histogram *>());
300
301        for (int j = 0; j < MachineType_NUM; j++) {
302            m_hitTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
303            m_hitTypeMachLatencyHistSeqr[i][j]
304                ->init(10)
305                .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist_seqr",
306                                       RubyRequestType(i), MachineType(j)))
307                .desc("")
308                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
309
310            m_missTypeMachLatencyHistSeqr[i].push_back(new Stats::Histogram());
311            m_missTypeMachLatencyHistSeqr[i][j]
312                ->init(10)
313                .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_seqr",
314                                       RubyRequestType(i), MachineType(j)))
315                .desc("")
316                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
317
318            m_missTypeMachLatencyHistCoalsr[i].push_back(new Stats::Histogram());
319            m_missTypeMachLatencyHistCoalsr[i][j]
320                ->init(10)
321                .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist_coalsr",
322                                       RubyRequestType(i), MachineType(j)))
323                .desc("")
324                .flags(Stats::nozero | Stats::pdf | Stats::oneline);
325        }
326    }
327}
328
329void
330Profiler::collateStats()
331{
332    if (!m_all_instructions) {
333        m_address_profiler_ptr->collateStats();
334    }
335
336    if (m_all_instructions) {
337        m_inst_profiler_ptr->collateStats();
338    }
339
340    for (uint32_t i = 0; i < MachineType_NUM; i++) {
341        for (map<uint32_t, AbstractController*>::iterator it =
342                  m_ruby_system->m_abstract_controls[i].begin();
343             it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
344
345            AbstractController *ctr = (*it).second;
346            delayHistogram.add(ctr->getDelayHist());
347
348            for (uint32_t i = 0; i < m_num_vnets; i++) {
349                delayVCHistogram[i]->add(ctr->getDelayVCHist(i));
350            }
351        }
352    }
353
354    for (uint32_t i = 0; i < MachineType_NUM; i++) {
355        for (map<uint32_t, AbstractController*>::iterator it =
356                m_ruby_system->m_abstract_controls[i].begin();
357                it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
358
359            AbstractController *ctr = (*it).second;
360            Sequencer *seq = ctr->getCPUSequencer();
361            if (seq != NULL) {
362                m_outstandReqHistSeqr.add(seq->getOutstandReqHist());
363            }
364            GPUCoalescer *coal = ctr->getGPUCoalescer();
365            if (coal != NULL) {
366                m_outstandReqHistCoalsr.add(coal->getOutstandReqHist());
367            }
368        }
369    }
370
371    for (uint32_t i = 0; i < MachineType_NUM; i++) {
372        for (map<uint32_t, AbstractController*>::iterator it =
373                m_ruby_system->m_abstract_controls[i].begin();
374                it != m_ruby_system->m_abstract_controls[i].end(); ++it) {
375
376            AbstractController *ctr = (*it).second;
377            Sequencer *seq = ctr->getCPUSequencer();
378            if (seq != NULL) {
379                // add all the latencies
380                m_latencyHistSeqr.add(seq->getLatencyHist());
381                m_hitLatencyHistSeqr.add(seq->getHitLatencyHist());
382                m_missLatencyHistSeqr.add(seq->getMissLatencyHist());
383
384                // add the per request type latencies
385                for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
386                    m_typeLatencyHistSeqr[j]
387                        ->add(seq->getTypeLatencyHist(j));
388                    m_hitTypeLatencyHistSeqr[j]
389                        ->add(seq->getHitTypeLatencyHist(j));
390                    m_missTypeLatencyHistSeqr[j]
391                        ->add(seq->getMissTypeLatencyHist(j));
392                }
393
394                // add the per machine type miss latencies
395                for (uint32_t j = 0; j < MachineType_NUM; ++j) {
396                    m_hitMachLatencyHistSeqr[j]
397                        ->add(seq->getHitMachLatencyHist(j));
398                    m_missMachLatencyHistSeqr[j]
399                        ->add(seq->getMissMachLatencyHist(j));
400
401                    m_IssueToInitialDelayHistSeqr[j]->add(
402                        seq->getIssueToInitialDelayHist(MachineType(j)));
403
404                    m_InitialToForwardDelayHistSeqr[j]->add(
405                        seq->getInitialToForwardDelayHist(MachineType(j)));
406                    m_ForwardToFirstResponseDelayHistSeqr[j]->add(seq->
407                        getForwardRequestToFirstResponseHist(MachineType(j)));
408
409                    m_FirstResponseToCompletionDelayHistSeqr[j]->add(seq->
410                        getFirstResponseToCompletionDelayHist(
411                            MachineType(j)));
412                    m_IncompleteTimesSeqr[j] +=
413                        seq->getIncompleteTimes(MachineType(j));
414                }
415
416                // add the per (request, machine) type miss latencies
417                for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
418                    for (uint32_t k = 0; k < MachineType_NUM; k++) {
419                        m_hitTypeMachLatencyHistSeqr[j][k]->add(
420                                seq->getHitTypeMachLatencyHist(j,k));
421                        m_missTypeMachLatencyHistSeqr[j][k]->add(
422                                seq->getMissTypeMachLatencyHist(j,k));
423                    }
424                }
425            }
426
427            GPUCoalescer *coal = ctr->getGPUCoalescer();
428            if (coal != NULL) {
429                // add all the latencies
430                m_latencyHistCoalsr.add(coal->getLatencyHist());
431                m_missLatencyHistCoalsr.add(coal->getMissLatencyHist());
432
433                // add the per request type latencies
434                for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
435                    m_typeLatencyHistCoalsr[j]
436                        ->add(coal->getTypeLatencyHist(j));
437                    m_missTypeLatencyHistCoalsr[j]
438                        ->add(coal->getMissTypeLatencyHist(j));
439                }
440
441                // add the per machine type miss latencies
442                for (uint32_t j = 0; j < MachineType_NUM; ++j) {
443                    m_missMachLatencyHistCoalsr[j]
444                        ->add(coal->getMissMachLatencyHist(j));
445
446                    m_IssueToInitialDelayHistCoalsr[j]->add(
447                        coal->getIssueToInitialDelayHist(MachineType(j)));
448
449                    m_InitialToForwardDelayHistCoalsr[j]->add(
450                        coal->getInitialToForwardDelayHist(MachineType(j)));
451                    m_ForwardToFirstResponseDelayHistCoalsr[j]->add(coal->
452                        getForwardRequestToFirstResponseHist(MachineType(j)));
453
454                    m_FirstResponseToCompletionDelayHistCoalsr[j]->add(coal->
455                        getFirstResponseToCompletionDelayHist(
456                            MachineType(j)));
457                }
458
459                // add the per (request, machine) type miss latencies
460                for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
461                    for (uint32_t k = 0; k < MachineType_NUM; k++) {
462                        m_missTypeMachLatencyHistCoalsr[j][k]->add(
463                                coal->getMissTypeMachLatencyHist(j,k));
464                    }
465                }
466            }
467        }
468    }
469}
470
471void
472Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
473{
474    if (msg.getType() != RubyRequestType_IFETCH) {
475        // Note: The following line should be commented out if you
476        // want to use the special profiling that is part of the GS320
477        // protocol
478
479        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
480        // profiled by the AddressProfiler
481        m_address_profiler_ptr->
482            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
483                           msg.getType(), msg.getAccessMode(), id, false);
484    }
485}
486