Profiler.cc revision 7832:de7601e6e19d
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45// Allows use of times() library call, which determines virtual runtime
46#include <sys/resource.h>
47#include <sys/times.h>
48
49#include <algorithm>
50#include <fstream>
51
52#include "base/stl_helpers.hh"
53#include "base/str.hh"
54#include "mem/protocol/CacheMsg.hh"
55#include "mem/protocol/MachineType.hh"
56#include "mem/protocol/Protocol.hh"
57#include "mem/ruby/network/Network.hh"
58#include "mem/ruby/profiler/AddressProfiler.hh"
59#include "mem/ruby/profiler/Profiler.hh"
60#include "mem/ruby/system/System.hh"
61#include "mem/ruby/system/System.hh"
62
63using namespace std;
64using m5::stl_helpers::operator<<;
65
66static double process_memory_total();
67static double process_memory_resident();
68
69Profiler::Profiler(const Params *p)
70    : SimObject(p)
71{
72    m_inst_profiler_ptr = NULL;
73    m_address_profiler_ptr = NULL;
74
75    m_real_time_start_time = time(NULL); // Not reset in clearStats()
76    m_stats_period = 1000000; // Default
77    m_periodic_output_file_ptr = &cerr;
78
79    m_hot_lines = p->hot_lines;
80    m_all_instructions = p->all_instructions;
81
82    m_num_of_sequencers = p->num_of_sequencers;
83
84    m_hot_lines = false;
85    m_all_instructions = false;
86
87    m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
88    m_address_profiler_ptr->setHotLines(m_hot_lines);
89    m_address_profiler_ptr->setAllInstructions(m_all_instructions);
90
91    if (m_all_instructions) {
92        m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
93        m_inst_profiler_ptr->setHotLines(m_hot_lines);
94        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
95    }
96}
97
98Profiler::~Profiler()
99{
100    if (m_periodic_output_file_ptr != &cerr) {
101        delete m_periodic_output_file_ptr;
102    }
103}
104
105void
106Profiler::wakeup()
107{
108    // FIXME - avoid the repeated code
109
110    vector<integer_t> perProcCycleCount(m_num_of_sequencers);
111
112    for (int i = 0; i < m_num_of_sequencers; i++) {
113        perProcCycleCount[i] =
114            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
115        // The +1 allows us to avoid division by zero
116    }
117
118    ostream &out = *m_periodic_output_file_ptr;
119
120    out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl
121        << "mbytes_resident: " << process_memory_resident() << endl
122        << "mbytes_total: " << process_memory_total() << endl;
123
124    if (process_memory_total() > 0) {
125        out << "resident_ratio: "
126            << process_memory_resident() / process_memory_total() << endl;
127    }
128
129    out << "miss_latency: " << m_allMissLatencyHistogram << endl;
130
131    out << endl;
132
133    if (m_all_instructions) {
134        m_inst_profiler_ptr->printStats(out);
135    }
136
137    //g_system_ptr->getNetwork()->printStats(out);
138    g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
139}
140
141void
142Profiler::setPeriodicStatsFile(const string& filename)
143{
144    cout << "Recording periodic statistics to file '" << filename << "' every "
145         << m_stats_period << " Ruby cycles" << endl;
146
147    if (m_periodic_output_file_ptr != &cerr) {
148        delete m_periodic_output_file_ptr;
149    }
150
151    m_periodic_output_file_ptr = new ofstream(filename.c_str());
152    g_eventQueue_ptr->scheduleEvent(this, 1);
153}
154
155void
156Profiler::setPeriodicStatsInterval(integer_t period)
157{
158    cout << "Recording periodic statistics every " << m_stats_period
159         << " Ruby cycles" << endl;
160
161    m_stats_period = period;
162    g_eventQueue_ptr->scheduleEvent(this, 1);
163}
164
165void
166Profiler::printConfig(ostream& out) const
167{
168    out << endl;
169    out << "Profiler Configuration" << endl;
170    out << "----------------------" << endl;
171    out << "periodic_stats_period: " << m_stats_period << endl;
172}
173
174void
175Profiler::print(ostream& out) const
176{
177    out << "[Profiler]";
178}
179
180void
181Profiler::printStats(ostream& out, bool short_stats)
182{
183    out << endl;
184    if (short_stats) {
185        out << "SHORT ";
186    }
187    out << "Profiler Stats" << endl;
188    out << "--------------" << endl;
189
190    time_t real_time_current = time(NULL);
191    double seconds = difftime(real_time_current, m_real_time_start_time);
192    double minutes = seconds / 60.0;
193    double hours = minutes / 60.0;
194    double days = hours / 24.0;
195    Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;
196
197    if (!short_stats) {
198        out << "Elapsed_time_in_seconds: " << seconds << endl;
199        out << "Elapsed_time_in_minutes: " << minutes << endl;
200        out << "Elapsed_time_in_hours: " << hours << endl;
201        out << "Elapsed_time_in_days: " << days << endl;
202        out << endl;
203    }
204
205    // print the virtual runtimes as well
206    struct tms vtime;
207    times(&vtime);
208    seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
209    minutes = seconds / 60.0;
210    hours = minutes / 60.0;
211    days = hours / 24.0;
212    out << "Virtual_time_in_seconds: " << seconds << endl;
213    out << "Virtual_time_in_minutes: " << minutes << endl;
214    out << "Virtual_time_in_hours:   " << hours << endl;
215    out << "Virtual_time_in_days:    " << days << endl;
216    out << endl;
217
218    out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
219    out << "Ruby_start_time: " << m_ruby_start << endl;
220    out << "Ruby_cycles: " << ruby_cycles << endl;
221    out << endl;
222
223    if (!short_stats) {
224        out << "mbytes_resident: " << process_memory_resident() << endl;
225        out << "mbytes_total: " << process_memory_total() << endl;
226        if (process_memory_total() > 0) {
227            out << "resident_ratio: "
228                << process_memory_resident()/process_memory_total() << endl;
229        }
230        out << endl;
231    }
232
233    vector<integer_t> perProcCycleCount(m_num_of_sequencers);
234
235    for (int i = 0; i < m_num_of_sequencers; i++) {
236        perProcCycleCount[i] =
237            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
238        // The +1 allows us to avoid division by zero
239    }
240
241    out << "ruby_cycles_executed: " << perProcCycleCount << endl;
242
243    out << endl;
244
245    if (!short_stats) {
246        out << "Busy Controller Counts:" << endl;
247        for (int i = 0; i < MachineType_NUM; i++) {
248            int size = MachineType_base_count((MachineType)i);
249            for (int j = 0; j < size; j++) {
250                MachineID machID;
251                machID.type = (MachineType)i;
252                machID.num = j;
253                out << machID << ":" << m_busyControllerCount[i][j] << "  ";
254                if ((j + 1) % 8 == 0) {
255                    out << endl;
256                }
257            }
258            out << endl;
259        }
260        out << endl;
261
262        out << "Busy Bank Count:" << m_busyBankCount << endl;
263        out << endl;
264
265        out << "sequencer_requests_outstanding: "
266            << m_sequencer_requests << endl;
267        out << endl;
268    }
269
270    if (!short_stats) {
271        out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
272        out << "----------------------------------------" << endl;
273        out << "miss_latency: " << m_allMissLatencyHistogram << endl;
274        for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
275            if (m_missLatencyHistograms[i].size() > 0) {
276                out << "miss_latency_" << RubyRequestType(i) << ": "
277                    << m_missLatencyHistograms[i] << endl;
278            }
279        }
280        for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
281            if (m_machLatencyHistograms[i].size() > 0) {
282                out << "miss_latency_" << GenericMachineType(i) << ": "
283                    << m_machLatencyHistograms[i] << endl;
284            }
285        }
286
287        out << "miss_latency_wCC_issue_to_initial_request: "
288            << m_wCCIssueToInitialRequestHistogram << endl;
289        out << "miss_latency_wCC_initial_forward_request: "
290            << m_wCCInitialRequestToForwardRequestHistogram << endl;
291        out << "miss_latency_wCC_forward_to_first_response: "
292            << m_wCCForwardRequestToFirstResponseHistogram << endl;
293        out << "miss_latency_wCC_first_response_to_completion: "
294            << m_wCCFirstResponseToCompleteHistogram << endl;
295        out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes << endl;
296        out << "miss_latency_dir_issue_to_initial_request: "
297            << m_dirIssueToInitialRequestHistogram << endl;
298        out << "miss_latency_dir_initial_forward_request: "
299            << m_dirInitialRequestToForwardRequestHistogram << endl;
300        out << "miss_latency_dir_forward_to_first_response: "
301            << m_dirForwardRequestToFirstResponseHistogram << endl;
302        out << "miss_latency_dir_first_response_to_completion: "
303            << m_dirFirstResponseToCompleteHistogram << endl;
304        out << "imcomplete_dir_Times: " << m_dirIncompleteTimes << endl;
305
306        for (int i = 0; i < m_missMachLatencyHistograms.size(); i++) {
307            for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
308                if (m_missMachLatencyHistograms[i][j].size() > 0) {
309                    out << "miss_latency_" << RubyRequestType(i)
310                        << "_" << GenericMachineType(j) << ": "
311                        << m_missMachLatencyHistograms[i][j] << endl;
312                }
313            }
314        }
315
316        out << endl;
317
318        out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
319        out << "------------------------------------" << endl;
320        out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
321        for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
322            if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
323                out << "prefetch_latency_" << CacheRequestType(i) << ": "
324                    << m_SWPrefetchLatencyHistograms[i] << endl;
325            }
326        }
327        for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
328            if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
329                out << "prefetch_latency_" << GenericMachineType(i) << ": "
330                    << m_SWPrefetchMachLatencyHistograms[i] << endl;
331            }
332        }
333        out << "prefetch_latency_L2Miss:"
334            << m_SWPrefetchL2MissLatencyHistogram << endl;
335
336        if (m_all_sharing_histogram.size() > 0) {
337            out << "all_sharing: " << m_all_sharing_histogram << endl;
338            out << "read_sharing: " << m_read_sharing_histogram << endl;
339            out << "write_sharing: " << m_write_sharing_histogram << endl;
340
341            out << "all_sharing_percent: ";
342            m_all_sharing_histogram.printPercent(out);
343            out << endl;
344
345            out << "read_sharing_percent: ";
346            m_read_sharing_histogram.printPercent(out);
347            out << endl;
348
349            out << "write_sharing_percent: ";
350            m_write_sharing_histogram.printPercent(out);
351            out << endl;
352
353            int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
354            out << "all_misses: " << total_miss << endl;
355            out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
356            out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
357            out << "cache_to_cache_percent: "
358                << 100.0 * (double(m_cache_to_cache) / double(total_miss))
359                << endl;
360            out << "memory_to_cache_percent: "
361                << 100.0 * (double(m_memory_to_cache) / double(total_miss))
362                << endl;
363            out << endl;
364        }
365
366        if (m_outstanding_requests.size() > 0) {
367            out << "outstanding_requests: ";
368            m_outstanding_requests.printPercent(out);
369            out << endl;
370            out << endl;
371        }
372    }
373
374    if (!short_stats) {
375        out << "Request vs. RubySystem State Profile" << endl;
376        out << "--------------------------------" << endl;
377        out << endl;
378
379        map<string, int>::const_iterator i = m_requestProfileMap.begin();
380        map<string, int>::const_iterator end = m_requestProfileMap.end();
381        for (; i != end; ++i) {
382            const string &key = i->first;
383            int count = i->second;
384
385            double percent = (100.0 * double(count)) / double(m_requests);
386            vector<string> items;
387            tokenize(items, key, ':');
388            vector<string>::iterator j = items.begin();
389            vector<string>::iterator end = items.end();
390            for (; j != end; ++i)
391                out << setw(10) << *j;
392            out << setw(11) << count;
393            out << setw(14) << percent << endl;
394        }
395        out << endl;
396
397        out << "filter_action: " << m_filter_action_histogram << endl;
398
399        if (!m_all_instructions) {
400            m_address_profiler_ptr->printStats(out);
401        }
402
403        if (m_all_instructions) {
404            m_inst_profiler_ptr->printStats(out);
405        }
406
407        out << endl;
408        out << "Message Delayed Cycles" << endl;
409        out << "----------------------" << endl;
410        out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
411        out << "Total_nonPF_delay_cycles: "
412            << m_delayedCyclesNonPFHistogram << endl;
413        for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
414            out << "  virtual_network_" << i << "_delay_cycles: "
415                << m_delayedCyclesVCHistograms[i] << endl;
416        }
417
418        printResourceUsage(out);
419    }
420}
421
422void
423Profiler::printResourceUsage(ostream& out) const
424{
425    out << endl;
426    out << "Resource Usage" << endl;
427    out << "--------------" << endl;
428
429    integer_t pagesize = getpagesize(); // page size in bytes
430    out << "page_size: " << pagesize << endl;
431
432    rusage usage;
433    getrusage (RUSAGE_SELF, &usage);
434
435    out << "user_time: " << usage.ru_utime.tv_sec << endl;
436    out << "system_time: " << usage.ru_stime.tv_sec << endl;
437    out << "page_reclaims: " << usage.ru_minflt << endl;
438    out << "page_faults: " << usage.ru_majflt << endl;
439    out << "swaps: " << usage.ru_nswap << endl;
440    out << "block_inputs: " << usage.ru_inblock << endl;
441    out << "block_outputs: " << usage.ru_oublock << endl;
442}
443
444void
445Profiler::clearStats()
446{
447    m_ruby_start = g_eventQueue_ptr->getTime();
448
449    m_cycles_executed_at_start.resize(m_num_of_sequencers);
450    for (int i = 0; i < m_num_of_sequencers; i++) {
451        if (g_system_ptr == NULL) {
452            m_cycles_executed_at_start[i] = 0;
453        } else {
454            m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
455        }
456    }
457
458    m_busyControllerCount.resize(MachineType_NUM); // all machines
459    for (int i = 0; i < MachineType_NUM; i++) {
460        int size = MachineType_base_count((MachineType)i);
461        m_busyControllerCount[i].resize(size);
462        for (int j = 0; j < size; j++) {
463            m_busyControllerCount[i][j] = 0;
464        }
465    }
466    m_busyBankCount = 0;
467
468    m_delayedCyclesHistogram.clear();
469    m_delayedCyclesNonPFHistogram.clear();
470    int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks();
471    m_delayedCyclesVCHistograms.resize(size);
472    for (int i = 0; i < size; i++) {
473        m_delayedCyclesVCHistograms[i].clear();
474    }
475
476    m_missLatencyHistograms.resize(RubyRequestType_NUM);
477    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
478        m_missLatencyHistograms[i].clear(200);
479    }
480    m_machLatencyHistograms.resize(GenericMachineType_NUM+1);
481    for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
482        m_machLatencyHistograms[i].clear(200);
483    }
484    m_missMachLatencyHistograms.resize(RubyRequestType_NUM);
485    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
486        m_missMachLatencyHistograms[i].resize(GenericMachineType_NUM+1);
487        for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
488            m_missMachLatencyHistograms[i][j].clear(200);
489        }
490    }
491    m_allMissLatencyHistogram.clear(200);
492    m_wCCIssueToInitialRequestHistogram.clear(200);
493    m_wCCInitialRequestToForwardRequestHistogram.clear(200);
494    m_wCCForwardRequestToFirstResponseHistogram.clear(200);
495    m_wCCFirstResponseToCompleteHistogram.clear(200);
496    m_wCCIncompleteTimes = 0;
497    m_dirIssueToInitialRequestHistogram.clear(200);
498    m_dirInitialRequestToForwardRequestHistogram.clear(200);
499    m_dirForwardRequestToFirstResponseHistogram.clear(200);
500    m_dirFirstResponseToCompleteHistogram.clear(200);
501    m_dirIncompleteTimes = 0;
502
503    m_SWPrefetchLatencyHistograms.resize(CacheRequestType_NUM);
504    for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
505        m_SWPrefetchLatencyHistograms[i].clear(200);
506    }
507    m_SWPrefetchMachLatencyHistograms.resize(GenericMachineType_NUM+1);
508    for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
509        m_SWPrefetchMachLatencyHistograms[i].clear(200);
510    }
511    m_allSWPrefetchLatencyHistogram.clear(200);
512
513    m_sequencer_requests.clear();
514    m_read_sharing_histogram.clear();
515    m_write_sharing_histogram.clear();
516    m_all_sharing_histogram.clear();
517    m_cache_to_cache = 0;
518    m_memory_to_cache = 0;
519
520    // clear HashMaps
521    m_requestProfileMap.clear();
522
523    // count requests profiled
524    m_requests = 0;
525
526    m_outstanding_requests.clear();
527    m_outstanding_persistent_requests.clear();
528
529    // Flush the prefetches through the system - used so that there
530    // are no outstanding requests after stats are cleared
531    //g_eventQueue_ptr->triggerAllEvents();
532
533    // update the start time
534    m_ruby_start = g_eventQueue_ptr->getTime();
535}
536
537void
538Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
539{
540    if (msg.getType() != CacheRequestType_IFETCH) {
541        // Note: The following line should be commented out if you
542        // want to use the special profiling that is part of the GS320
543        // protocol
544
545        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
546        // profiled by the AddressProfiler
547        m_address_profiler_ptr->
548            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
549                           msg.getType(), msg.getAccessMode(), id, false);
550    }
551}
552
553void
554Profiler::profileSharing(const Address& addr, AccessType type,
555                         NodeID requestor, const Set& sharers,
556                         const Set& owner)
557{
558    Set set_contacted(owner);
559    if (type == AccessType_Write) {
560        set_contacted.addSet(sharers);
561    }
562    set_contacted.remove(requestor);
563    int number_contacted = set_contacted.count();
564
565    if (type == AccessType_Write) {
566        m_write_sharing_histogram.add(number_contacted);
567    } else {
568        m_read_sharing_histogram.add(number_contacted);
569    }
570    m_all_sharing_histogram.add(number_contacted);
571
572    if (number_contacted == 0) {
573        m_memory_to_cache++;
574    } else {
575        m_cache_to_cache++;
576    }
577}
578
579void
580Profiler::profileMsgDelay(int virtualNetwork, int delayCycles)
581{
582    assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
583    m_delayedCyclesHistogram.add(delayCycles);
584    m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
585    if (virtualNetwork != 0) {
586        m_delayedCyclesNonPFHistogram.add(delayCycles);
587    }
588}
589
590// profiles original cache requests including PUTs
591void
592Profiler::profileRequest(const string& requestStr)
593{
594    m_requests++;
595
596    // if it doesn't exist, conveniently, it will be created with the
597    // default value which is 0
598    m_requestProfileMap[requestStr]++;
599}
600
601void
602Profiler::controllerBusy(MachineID machID)
603{
604    m_busyControllerCount[(int)machID.type][(int)machID.num]++;
605}
606
607void
608Profiler::profilePFWait(Time waitTime)
609{
610    m_prefetchWaitHistogram.add(waitTime);
611}
612
613void
614Profiler::bankBusy()
615{
616    m_busyBankCount++;
617}
618
619// non-zero cycle demand request
620void
621Profiler::missLatency(Time cycles,
622                      RubyRequestType type,
623                      const GenericMachineType respondingMach)
624{
625    m_allMissLatencyHistogram.add(cycles);
626    m_missLatencyHistograms[type].add(cycles);
627    m_machLatencyHistograms[respondingMach].add(cycles);
628    m_missMachLatencyHistograms[type][respondingMach].add(cycles);
629}
630
631void
632Profiler::missLatencyWcc(Time issuedTime,
633                         Time initialRequestTime,
634                         Time forwardRequestTime,
635                         Time firstResponseTime,
636                         Time completionTime)
637{
638    if ((issuedTime <= initialRequestTime) &&
639        (initialRequestTime <= forwardRequestTime) &&
640        (forwardRequestTime <= firstResponseTime) &&
641        (firstResponseTime <= completionTime)) {
642        m_wCCIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
643
644        m_wCCInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
645                                                         initialRequestTime);
646
647        m_wCCForwardRequestToFirstResponseHistogram.add(firstResponseTime -
648                                                        forwardRequestTime);
649
650        m_wCCFirstResponseToCompleteHistogram.add(completionTime -
651                                                  firstResponseTime);
652    } else {
653        m_wCCIncompleteTimes++;
654    }
655}
656
657void
658Profiler::missLatencyDir(Time issuedTime,
659                         Time initialRequestTime,
660                         Time forwardRequestTime,
661                         Time firstResponseTime,
662                         Time completionTime)
663{
664    if ((issuedTime <= initialRequestTime) &&
665        (initialRequestTime <= forwardRequestTime) &&
666        (forwardRequestTime <= firstResponseTime) &&
667        (firstResponseTime <= completionTime)) {
668        m_dirIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
669
670        m_dirInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
671                                                         initialRequestTime);
672
673        m_dirForwardRequestToFirstResponseHistogram.add(firstResponseTime -
674                                                        forwardRequestTime);
675
676        m_dirFirstResponseToCompleteHistogram.add(completionTime -
677                                                  firstResponseTime);
678    } else {
679        m_dirIncompleteTimes++;
680    }
681}
682
683// non-zero cycle prefetch request
684void
685Profiler::swPrefetchLatency(Time cycles,
686                            CacheRequestType type,
687                            const GenericMachineType respondingMach)
688{
689    m_allSWPrefetchLatencyHistogram.add(cycles);
690    m_SWPrefetchLatencyHistograms[type].add(cycles);
691    m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
692    if (respondingMach == GenericMachineType_Directory ||
693        respondingMach == GenericMachineType_NUM) {
694        m_SWPrefetchL2MissLatencyHistogram.add(cycles);
695    }
696}
697
698// Helper function
699static double
700process_memory_total()
701{
702    // 4kB page size, 1024*1024 bytes per MB,
703    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
704    ifstream proc_file;
705    proc_file.open("/proc/self/statm");
706    int total_size_in_pages = 0;
707    int res_size_in_pages = 0;
708    proc_file >> total_size_in_pages;
709    proc_file >> res_size_in_pages;
710    return double(total_size_in_pages) * MULTIPLIER; // size in megabytes
711}
712
713static double
714process_memory_resident()
715{
716    // 4kB page size, 1024*1024 bytes per MB,
717    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
718    ifstream proc_file;
719    proc_file.open("/proc/self/statm");
720    int total_size_in_pages = 0;
721    int res_size_in_pages = 0;
722    proc_file >> total_size_in_pages;
723    proc_file >> res_size_in_pages;
724    return double(res_size_in_pages) * MULTIPLIER; // size in megabytes
725}
726
727void
728Profiler::rubyWatch(int id)
729{
730    uint64 tr = 0;
731    Address watch_address = Address(tr);
732
733    DPRINTFN("%7s %3s RUBY WATCH %d\n", g_eventQueue_ptr->getTime(), id,
734        watch_address);
735
736    // don't care about success or failure
737    m_watch_address_set.insert(watch_address);
738}
739
740bool
741Profiler::watchAddress(Address addr)
742{
743    return m_watch_address_set.count(addr) > 0;
744}
745
746Profiler *
747RubyProfilerParams::create()
748{
749    return new Profiler(this);
750}
751