Profiler.cc revision 9598
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45// Allows use of times() library call, which determines virtual runtime
46#include <sys/resource.h>
47#include <sys/times.h>
48#include <sys/types.h>
49#include <unistd.h>
50
51#include <algorithm>
52#include <fstream>
53
54#include "base/stl_helpers.hh"
55#include "base/str.hh"
56#include "mem/protocol/MachineType.hh"
57#include "mem/protocol/RubyRequest.hh"
58#include "mem/ruby/network/Network.hh"
59#include "mem/ruby/profiler/AddressProfiler.hh"
60#include "mem/ruby/profiler/Profiler.hh"
61#include "mem/ruby/system/Sequencer.hh"
62#include "mem/ruby/system/System.hh"
63
64using namespace std;
65using m5::stl_helpers::operator<<;
66
67static double process_memory_total();
68static double process_memory_resident();
69
70Profiler::Profiler(const Params *p)
71    : SimObject(p), m_event(this)
72{
73    m_inst_profiler_ptr = NULL;
74    m_address_profiler_ptr = NULL;
75
76    m_real_time_start_time = time(NULL); // Not reset in clearStats()
77    m_stats_period = 1000000; // Default
78    m_periodic_output_file_ptr = &cerr;
79
80    m_hot_lines = p->hot_lines;
81    m_all_instructions = p->all_instructions;
82
83    m_num_of_sequencers = p->num_of_sequencers;
84
85    m_hot_lines = false;
86    m_all_instructions = false;
87
88    m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
89    m_address_profiler_ptr->setHotLines(m_hot_lines);
90    m_address_profiler_ptr->setAllInstructions(m_all_instructions);
91
92    if (m_all_instructions) {
93        m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
94        m_inst_profiler_ptr->setHotLines(m_hot_lines);
95        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
96    }
97
98    p->ruby_system->registerProfiler(this);
99}
100
101Profiler::~Profiler()
102{
103    if (m_periodic_output_file_ptr != &cerr) {
104        delete m_periodic_output_file_ptr;
105    }
106}
107
108void
109Profiler::wakeup()
110{
111    // FIXME - avoid the repeated code
112
113    vector<int64_t> perProcCycleCount(m_num_of_sequencers);
114
115    for (int i = 0; i < m_num_of_sequencers; i++) {
116        perProcCycleCount[i] =
117            g_system_ptr->curCycle() - m_cycles_executed_at_start[i] + 1;
118        // The +1 allows us to avoid division by zero
119    }
120
121    ostream &out = *m_periodic_output_file_ptr;
122
123    out << "ruby_cycles: " << g_system_ptr->curCycle()-m_ruby_start << endl
124        << "mbytes_resident: " << process_memory_resident() << endl
125        << "mbytes_total: " << process_memory_total() << endl;
126
127    if (process_memory_total() > 0) {
128        out << "resident_ratio: "
129            << process_memory_resident() / process_memory_total() << endl;
130    }
131
132    out << "miss_latency: " << m_allMissLatencyHistogram << endl;
133
134    out << endl;
135
136    if (m_all_instructions) {
137        m_inst_profiler_ptr->printStats(out);
138    }
139
140    //g_system_ptr->getNetwork()->printStats(out);
141    schedule(m_event, g_system_ptr->clockEdge(Cycles(m_stats_period)));
142}
143
144void
145Profiler::setPeriodicStatsFile(const string& filename)
146{
147    cout << "Recording periodic statistics to file '" << filename << "' every "
148         << m_stats_period << " Ruby cycles" << endl;
149
150    if (m_periodic_output_file_ptr != &cerr) {
151        delete m_periodic_output_file_ptr;
152    }
153
154    m_periodic_output_file_ptr = new ofstream(filename.c_str());
155    schedule(m_event, g_system_ptr->clockEdge(Cycles(1)));
156}
157
158void
159Profiler::setPeriodicStatsInterval(int64_t period)
160{
161    cout << "Recording periodic statistics every " << m_stats_period
162         << " Ruby cycles" << endl;
163
164    m_stats_period = period;
165    schedule(m_event, g_system_ptr->clockEdge(Cycles(1)));
166}
167
168void
169Profiler::print(ostream& out) const
170{
171    out << "[Profiler]";
172}
173
174void
175Profiler::printRequestProfile(ostream &out) const
176{
177    out << "Request vs. RubySystem State Profile" << endl;
178    out << "--------------------------------" << endl;
179    out << endl;
180
181    map<string, uint64_t> m_requestProfileMap;
182    uint64_t m_requests = 0;
183
184    for (uint32_t i = 0; i < MachineType_NUM; i++) {
185        for (map<uint32_t, AbstractController*>::iterator it =
186                  g_abs_controls[i].begin();
187             it != g_abs_controls[i].end(); ++it) {
188
189            AbstractController *ctr = (*it).second;
190            map<string, uint64_t> mp = ctr->getRequestProfileMap();
191
192            for (map<string, uint64_t>::iterator jt = mp.begin();
193                 jt != mp.end(); ++jt) {
194
195                map<string, uint64_t>::iterator kt =
196                    m_requestProfileMap.find((*jt).first);
197                if (kt != m_requestProfileMap.end()) {
198                    (*kt).second += (*jt).second;
199                } else {
200                    m_requestProfileMap[(*jt).first] = (*jt).second;
201                }
202            }
203
204            m_requests += ctr->getRequestCount();
205        }
206    }
207
208    map<string, uint64_t>::const_iterator i = m_requestProfileMap.begin();
209    map<string, uint64_t>::const_iterator end = m_requestProfileMap.end();
210    for (; i != end; ++i) {
211        const string &key = i->first;
212        uint64_t count = i->second;
213
214        double percent = (100.0 * double(count)) / double(m_requests);
215        vector<string> items;
216        tokenize(items, key, ':');
217        vector<string>::iterator j = items.begin();
218        vector<string>::iterator end = items.end();
219        for (; j != end; ++i)
220            out << setw(10) << *j;
221        out << setw(11) << count;
222        out << setw(14) << percent << endl;
223    }
224    out << endl;
225}
226
227void
228Profiler::printDelayProfile(ostream &out) const
229{
230    out << "Message Delayed Cycles" << endl;
231    out << "----------------------" << endl;
232
233    uint32_t numVNets = Network::getNumberOfVirtualNetworks();
234    Histogram delayHistogram;
235    std::vector<Histogram> delayVCHistogram(numVNets);
236
237    for (uint32_t i = 0; i < MachineType_NUM; i++) {
238        for (map<uint32_t, AbstractController*>::iterator it =
239                  g_abs_controls[i].begin();
240             it != g_abs_controls[i].end(); ++it) {
241
242            AbstractController *ctr = (*it).second;
243            delayHistogram.add(ctr->getDelayHist());
244
245            for (uint32_t i = 0; i < numVNets; i++) {
246                delayVCHistogram[i].add(ctr->getDelayVCHist(i));
247            }
248        }
249    }
250
251    out << "Total_delay_cycles: " <<   delayHistogram << endl;
252
253    for (int i = 0; i < numVNets; i++) {
254        out << "  virtual_network_" << i << "_delay_cycles: "
255            << delayVCHistogram[i] << endl;
256    }
257}
258
259void
260Profiler::printOutstandingReqProfile(ostream &out) const
261{
262    Histogram sequencerRequests;
263
264    for (uint32_t i = 0; i < MachineType_NUM; i++) {
265        for (map<uint32_t, AbstractController*>::iterator it =
266                  g_abs_controls[i].begin();
267             it != g_abs_controls[i].end(); ++it) {
268
269            AbstractController *ctr = (*it).second;
270            Sequencer *seq = ctr->getSequencer();
271            if (seq != NULL) {
272                sequencerRequests.add(seq->getOutstandReqHist());
273            }
274        }
275    }
276
277    out << "sequencer_requests_outstanding: "
278        << sequencerRequests << endl;
279}
280
281void
282Profiler::printStats(ostream& out, bool short_stats)
283{
284    out << endl;
285    if (short_stats) {
286        out << "SHORT ";
287    }
288    out << "Profiler Stats" << endl;
289    out << "--------------" << endl;
290
291    time_t real_time_current = time(NULL);
292    double seconds = difftime(real_time_current, m_real_time_start_time);
293    double minutes = seconds / 60.0;
294    double hours = minutes / 60.0;
295    double days = hours / 24.0;
296    Cycles ruby_cycles = g_system_ptr->curCycle()-m_ruby_start;
297
298    if (!short_stats) {
299        out << "Elapsed_time_in_seconds: " << seconds << endl;
300        out << "Elapsed_time_in_minutes: " << minutes << endl;
301        out << "Elapsed_time_in_hours: " << hours << endl;
302        out << "Elapsed_time_in_days: " << days << endl;
303        out << endl;
304    }
305
306    // print the virtual runtimes as well
307    struct tms vtime;
308    times(&vtime);
309    seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
310    minutes = seconds / 60.0;
311    hours = minutes / 60.0;
312    days = hours / 24.0;
313    out << "Virtual_time_in_seconds: " << seconds << endl;
314    out << "Virtual_time_in_minutes: " << minutes << endl;
315    out << "Virtual_time_in_hours:   " << hours << endl;
316    out << "Virtual_time_in_days:    " << days << endl;
317    out << endl;
318
319    out << "Ruby_current_time: " << g_system_ptr->curCycle() << endl;
320    out << "Ruby_start_time: " << m_ruby_start << endl;
321    out << "Ruby_cycles: " << ruby_cycles << endl;
322    out << endl;
323
324    if (!short_stats) {
325        out << "mbytes_resident: " << process_memory_resident() << endl;
326        out << "mbytes_total: " << process_memory_total() << endl;
327        if (process_memory_total() > 0) {
328            out << "resident_ratio: "
329                << process_memory_resident()/process_memory_total() << endl;
330        }
331        out << endl;
332    }
333
334    vector<int64_t> perProcCycleCount(m_num_of_sequencers);
335
336    for (int i = 0; i < m_num_of_sequencers; i++) {
337        perProcCycleCount[i] =
338            g_system_ptr->curCycle() - m_cycles_executed_at_start[i] + 1;
339        // The +1 allows us to avoid division by zero
340    }
341
342    out << "ruby_cycles_executed: " << perProcCycleCount << endl;
343
344    out << endl;
345
346    if (!short_stats) {
347        out << "Busy Controller Counts:" << endl;
348        for (uint32_t i = 0; i < MachineType_NUM; i++) {
349            uint32_t size = MachineType_base_count((MachineType)i);
350
351            for (uint32_t j = 0; j < size; j++) {
352                MachineID machID;
353                machID.type = (MachineType)i;
354                machID.num = j;
355
356                AbstractController *ctr =
357                    (*(g_abs_controls[i].find(j))).second;
358                out << machID << ":" << ctr->getFullyBusyCycles() << "  ";
359                if ((j + 1) % 8 == 0) {
360                    out << endl;
361                }
362            }
363            out << endl;
364        }
365        out << endl;
366
367        out << "Busy Bank Count:" << m_busyBankCount << endl;
368        out << endl;
369
370        printOutstandingReqProfile(out);
371        out << endl;
372    }
373
374    if (!short_stats) {
375        out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
376        out << "----------------------------------------" << endl;
377        out << "miss_latency: " << m_allMissLatencyHistogram << endl;
378        for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
379            if (m_missLatencyHistograms[i].size() > 0) {
380                out << "miss_latency_" << RubyRequestType(i) << ": "
381                    << m_missLatencyHistograms[i] << endl;
382            }
383        }
384        for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
385            if (m_machLatencyHistograms[i].size() > 0) {
386                out << "miss_latency_" << GenericMachineType(i) << ": "
387                    << m_machLatencyHistograms[i] << endl;
388            }
389        }
390
391        out << "miss_latency_wCC_issue_to_initial_request: "
392            << m_wCCIssueToInitialRequestHistogram << endl;
393        out << "miss_latency_wCC_initial_forward_request: "
394            << m_wCCInitialRequestToForwardRequestHistogram << endl;
395        out << "miss_latency_wCC_forward_to_first_response: "
396            << m_wCCForwardRequestToFirstResponseHistogram << endl;
397        out << "miss_latency_wCC_first_response_to_completion: "
398            << m_wCCFirstResponseToCompleteHistogram << endl;
399        out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes << endl;
400        out << "miss_latency_dir_issue_to_initial_request: "
401            << m_dirIssueToInitialRequestHistogram << endl;
402        out << "miss_latency_dir_initial_forward_request: "
403            << m_dirInitialRequestToForwardRequestHistogram << endl;
404        out << "miss_latency_dir_forward_to_first_response: "
405            << m_dirForwardRequestToFirstResponseHistogram << endl;
406        out << "miss_latency_dir_first_response_to_completion: "
407            << m_dirFirstResponseToCompleteHistogram << endl;
408        out << "imcomplete_dir_Times: " << m_dirIncompleteTimes << endl;
409
410        for (int i = 0; i < m_missMachLatencyHistograms.size(); i++) {
411            for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
412                if (m_missMachLatencyHistograms[i][j].size() > 0) {
413                    out << "miss_latency_" << RubyRequestType(i)
414                        << "_" << GenericMachineType(j) << ": "
415                        << m_missMachLatencyHistograms[i][j] << endl;
416                }
417            }
418        }
419
420        out << endl;
421
422        out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
423        out << "------------------------------------" << endl;
424        out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
425        for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
426            if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
427                out << "prefetch_latency_" << RubyRequestType(i) << ": "
428                    << m_SWPrefetchLatencyHistograms[i] << endl;
429            }
430        }
431        for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
432            if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
433                out << "prefetch_latency_" << GenericMachineType(i) << ": "
434                    << m_SWPrefetchMachLatencyHistograms[i] << endl;
435            }
436        }
437        out << "prefetch_latency_L2Miss:"
438            << m_SWPrefetchL2MissLatencyHistogram << endl;
439
440        if (m_all_sharing_histogram.size() > 0) {
441            out << "all_sharing: " << m_all_sharing_histogram << endl;
442            out << "read_sharing: " << m_read_sharing_histogram << endl;
443            out << "write_sharing: " << m_write_sharing_histogram << endl;
444
445            out << "all_sharing_percent: ";
446            m_all_sharing_histogram.printPercent(out);
447            out << endl;
448
449            out << "read_sharing_percent: ";
450            m_read_sharing_histogram.printPercent(out);
451            out << endl;
452
453            out << "write_sharing_percent: ";
454            m_write_sharing_histogram.printPercent(out);
455            out << endl;
456
457            int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
458            out << "all_misses: " << total_miss << endl;
459            out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
460            out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
461            out << "cache_to_cache_percent: "
462                << 100.0 * (double(m_cache_to_cache) / double(total_miss))
463                << endl;
464            out << "memory_to_cache_percent: "
465                << 100.0 * (double(m_memory_to_cache) / double(total_miss))
466                << endl;
467            out << endl;
468        }
469
470        if (m_outstanding_requests.size() > 0) {
471            out << "outstanding_requests: ";
472            m_outstanding_requests.printPercent(out);
473            out << endl;
474            out << endl;
475        }
476    }
477
478    if (!short_stats) {
479        printRequestProfile(out);
480
481        out << "filter_action: " << m_filter_action_histogram << endl;
482
483        if (!m_all_instructions) {
484            m_address_profiler_ptr->printStats(out);
485        }
486
487        if (m_all_instructions) {
488            m_inst_profiler_ptr->printStats(out);
489        }
490
491        out << endl;
492        printDelayProfile(out);
493        printResourceUsage(out);
494    }
495}
496
497void
498Profiler::printResourceUsage(ostream& out) const
499{
500    out << endl;
501    out << "Resource Usage" << endl;
502    out << "--------------" << endl;
503
504    int64_t pagesize = getpagesize(); // page size in bytes
505    out << "page_size: " << pagesize << endl;
506
507    rusage usage;
508    getrusage (RUSAGE_SELF, &usage);
509
510    out << "user_time: " << usage.ru_utime.tv_sec << endl;
511    out << "system_time: " << usage.ru_stime.tv_sec << endl;
512    out << "page_reclaims: " << usage.ru_minflt << endl;
513    out << "page_faults: " << usage.ru_majflt << endl;
514    out << "swaps: " << usage.ru_nswap << endl;
515    out << "block_inputs: " << usage.ru_inblock << endl;
516    out << "block_outputs: " << usage.ru_oublock << endl;
517}
518
519void
520Profiler::clearStats()
521{
522    m_ruby_start = g_system_ptr->curCycle();
523    m_real_time_start_time = time(NULL);
524
525    m_cycles_executed_at_start.resize(m_num_of_sequencers);
526    for (int i = 0; i < m_num_of_sequencers; i++) {
527        if (g_system_ptr == NULL) {
528            m_cycles_executed_at_start[i] = 0;
529        } else {
530            m_cycles_executed_at_start[i] = g_system_ptr->curCycle();
531        }
532    }
533
534    m_busyBankCount = 0;
535
536    m_missLatencyHistograms.resize(RubyRequestType_NUM);
537    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
538        m_missLatencyHistograms[i].clear(200);
539    }
540    m_machLatencyHistograms.resize(GenericMachineType_NUM+1);
541    for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
542        m_machLatencyHistograms[i].clear(200);
543    }
544    m_missMachLatencyHistograms.resize(RubyRequestType_NUM);
545    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
546        m_missMachLatencyHistograms[i].resize(GenericMachineType_NUM+1);
547        for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
548            m_missMachLatencyHistograms[i][j].clear(200);
549        }
550    }
551    m_allMissLatencyHistogram.clear(200);
552    m_wCCIssueToInitialRequestHistogram.clear(200);
553    m_wCCInitialRequestToForwardRequestHistogram.clear(200);
554    m_wCCForwardRequestToFirstResponseHistogram.clear(200);
555    m_wCCFirstResponseToCompleteHistogram.clear(200);
556    m_wCCIncompleteTimes = 0;
557    m_dirIssueToInitialRequestHistogram.clear(200);
558    m_dirInitialRequestToForwardRequestHistogram.clear(200);
559    m_dirForwardRequestToFirstResponseHistogram.clear(200);
560    m_dirFirstResponseToCompleteHistogram.clear(200);
561    m_dirIncompleteTimes = 0;
562
563    m_SWPrefetchLatencyHistograms.resize(RubyRequestType_NUM);
564    for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
565        m_SWPrefetchLatencyHistograms[i].clear(200);
566    }
567    m_SWPrefetchMachLatencyHistograms.resize(GenericMachineType_NUM+1);
568    for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
569        m_SWPrefetchMachLatencyHistograms[i].clear(200);
570    }
571    m_allSWPrefetchLatencyHistogram.clear(200);
572
573    m_read_sharing_histogram.clear();
574    m_write_sharing_histogram.clear();
575    m_all_sharing_histogram.clear();
576    m_cache_to_cache = 0;
577    m_memory_to_cache = 0;
578
579    m_outstanding_requests.clear();
580    m_outstanding_persistent_requests.clear();
581
582    // Flush the prefetches through the system - used so that there
583    // are no outstanding requests after stats are cleared
584    //g_eventQueue_ptr->triggerAllEvents();
585
586    // update the start time
587    m_ruby_start = g_system_ptr->curCycle();
588}
589
590void
591Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
592{
593    if (msg.getType() != RubyRequestType_IFETCH) {
594        // Note: The following line should be commented out if you
595        // want to use the special profiling that is part of the GS320
596        // protocol
597
598        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
599        // profiled by the AddressProfiler
600        m_address_profiler_ptr->
601            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
602                           msg.getType(), msg.getAccessMode(), id, false);
603    }
604}
605
606void
607Profiler::profileSharing(const Address& addr, AccessType type,
608                         NodeID requestor, const Set& sharers,
609                         const Set& owner)
610{
611    Set set_contacted(owner);
612    if (type == AccessType_Write) {
613        set_contacted.addSet(sharers);
614    }
615    set_contacted.remove(requestor);
616    int number_contacted = set_contacted.count();
617
618    if (type == AccessType_Write) {
619        m_write_sharing_histogram.add(number_contacted);
620    } else {
621        m_read_sharing_histogram.add(number_contacted);
622    }
623    m_all_sharing_histogram.add(number_contacted);
624
625    if (number_contacted == 0) {
626        m_memory_to_cache++;
627    } else {
628        m_cache_to_cache++;
629    }
630}
631
632void
633Profiler::profilePFWait(Cycles waitTime)
634{
635    m_prefetchWaitHistogram.add(waitTime);
636}
637
638void
639Profiler::bankBusy()
640{
641    m_busyBankCount++;
642}
643
644// non-zero cycle demand request
645void
646Profiler::missLatency(Cycles cycles,
647                      RubyRequestType type,
648                      const GenericMachineType respondingMach)
649{
650    m_allMissLatencyHistogram.add(cycles);
651    m_missLatencyHistograms[type].add(cycles);
652    m_machLatencyHistograms[respondingMach].add(cycles);
653    m_missMachLatencyHistograms[type][respondingMach].add(cycles);
654}
655
656void
657Profiler::missLatencyWcc(Cycles issuedTime,
658                         Cycles initialRequestTime,
659                         Cycles forwardRequestTime,
660                         Cycles firstResponseTime,
661                         Cycles completionTime)
662{
663    if ((issuedTime <= initialRequestTime) &&
664        (initialRequestTime <= forwardRequestTime) &&
665        (forwardRequestTime <= firstResponseTime) &&
666        (firstResponseTime <= completionTime)) {
667        m_wCCIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
668
669        m_wCCInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
670                                                         initialRequestTime);
671
672        m_wCCForwardRequestToFirstResponseHistogram.add(firstResponseTime -
673                                                        forwardRequestTime);
674
675        m_wCCFirstResponseToCompleteHistogram.add(completionTime -
676                                                  firstResponseTime);
677    } else {
678        m_wCCIncompleteTimes++;
679    }
680}
681
682void
683Profiler::missLatencyDir(Cycles issuedTime,
684                         Cycles initialRequestTime,
685                         Cycles forwardRequestTime,
686                         Cycles firstResponseTime,
687                         Cycles completionTime)
688{
689    if ((issuedTime <= initialRequestTime) &&
690        (initialRequestTime <= forwardRequestTime) &&
691        (forwardRequestTime <= firstResponseTime) &&
692        (firstResponseTime <= completionTime)) {
693        m_dirIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
694
695        m_dirInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
696                                                         initialRequestTime);
697
698        m_dirForwardRequestToFirstResponseHistogram.add(firstResponseTime -
699                                                        forwardRequestTime);
700
701        m_dirFirstResponseToCompleteHistogram.add(completionTime -
702                                                  firstResponseTime);
703    } else {
704        m_dirIncompleteTimes++;
705    }
706}
707
708// non-zero cycle prefetch request
709void
710Profiler::swPrefetchLatency(Cycles cycles, RubyRequestType type,
711                            const GenericMachineType respondingMach)
712{
713    m_allSWPrefetchLatencyHistogram.add(cycles);
714    m_SWPrefetchLatencyHistograms[type].add(cycles);
715    m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
716
717    if (respondingMach == GenericMachineType_Directory ||
718        respondingMach == GenericMachineType_NUM) {
719        m_SWPrefetchL2MissLatencyHistogram.add(cycles);
720    }
721}
722
723// Helper function
724static double
725process_memory_total()
726{
727    // 4kB page size, 1024*1024 bytes per MB,
728    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
729    ifstream proc_file;
730    proc_file.open("/proc/self/statm");
731    int total_size_in_pages = 0;
732    int res_size_in_pages = 0;
733    proc_file >> total_size_in_pages;
734    proc_file >> res_size_in_pages;
735    return double(total_size_in_pages) * MULTIPLIER; // size in megabytes
736}
737
738static double
739process_memory_resident()
740{
741    // 4kB page size, 1024*1024 bytes per MB,
742    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
743    ifstream proc_file;
744    proc_file.open("/proc/self/statm");
745    int total_size_in_pages = 0;
746    int res_size_in_pages = 0;
747    proc_file >> total_size_in_pages;
748    proc_file >> res_size_in_pages;
749    return double(res_size_in_pages) * MULTIPLIER; // size in megabytes
750}
751
752void
753Profiler::rubyWatch(int id)
754{
755    uint64 tr = 0;
756    Address watch_address = Address(tr);
757
758    DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->curCycle(), id,
759        watch_address);
760
761    // don't care about success or failure
762    m_watch_address_set.insert(watch_address);
763}
764
765bool
766Profiler::watchAddress(Address addr)
767{
768    return m_watch_address_set.count(addr) > 0;
769}
770
771Profiler *
772RubyProfilerParams::create()
773{
774    return new Profiler(this);
775}
776