Profiler.cc revision 8229:78bf55f23338
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30   This file has been modified by Kevin Moore and Dan Nussbaum of the
31   Scalable Systems Research Group at Sun Microsystems Laboratories
32   (http://research.sun.com/scalable/) to support the Adaptive
33   Transactional Memory Test Platform (ATMTP).
34
35   Please send email to atmtp-interest@sun.com with feedback, questions, or
36   to request future announcements about ATMTP.
37
38   ----------------------------------------------------------------------
39
40   File modification date: 2008-02-23
41
42   ----------------------------------------------------------------------
43*/
44
45// Allows use of times() library call, which determines virtual runtime
46#include <sys/resource.h>
47#include <sys/times.h>
48
49#include <algorithm>
50#include <fstream>
51
52#include "base/stl_helpers.hh"
53#include "base/str.hh"
54#include "mem/protocol/MachineType.hh"
55#include "mem/protocol/Protocol.hh"
56#include "mem/protocol/RubyRequest.hh"
57#include "mem/ruby/network/Network.hh"
58#include "mem/ruby/profiler/AddressProfiler.hh"
59#include "mem/ruby/profiler/Profiler.hh"
60#include "mem/ruby/system/System.hh"
61
62using namespace std;
63using m5::stl_helpers::operator<<;
64
65static double process_memory_total();
66static double process_memory_resident();
67
68Profiler::Profiler(const Params *p)
69    : SimObject(p)
70{
71    m_inst_profiler_ptr = NULL;
72    m_address_profiler_ptr = NULL;
73
74    m_real_time_start_time = time(NULL); // Not reset in clearStats()
75    m_stats_period = 1000000; // Default
76    m_periodic_output_file_ptr = &cerr;
77
78    m_hot_lines = p->hot_lines;
79    m_all_instructions = p->all_instructions;
80
81    m_num_of_sequencers = p->num_of_sequencers;
82
83    m_hot_lines = false;
84    m_all_instructions = false;
85
86    m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
87    m_address_profiler_ptr->setHotLines(m_hot_lines);
88    m_address_profiler_ptr->setAllInstructions(m_all_instructions);
89
90    if (m_all_instructions) {
91        m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
92        m_inst_profiler_ptr->setHotLines(m_hot_lines);
93        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
94    }
95}
96
97Profiler::~Profiler()
98{
99    if (m_periodic_output_file_ptr != &cerr) {
100        delete m_periodic_output_file_ptr;
101    }
102}
103
104void
105Profiler::wakeup()
106{
107    // FIXME - avoid the repeated code
108
109    vector<integer_t> perProcCycleCount(m_num_of_sequencers);
110
111    for (int i = 0; i < m_num_of_sequencers; i++) {
112        perProcCycleCount[i] =
113            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
114        // The +1 allows us to avoid division by zero
115    }
116
117    ostream &out = *m_periodic_output_file_ptr;
118
119    out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl
120        << "mbytes_resident: " << process_memory_resident() << endl
121        << "mbytes_total: " << process_memory_total() << endl;
122
123    if (process_memory_total() > 0) {
124        out << "resident_ratio: "
125            << process_memory_resident() / process_memory_total() << endl;
126    }
127
128    out << "miss_latency: " << m_allMissLatencyHistogram << endl;
129
130    out << endl;
131
132    if (m_all_instructions) {
133        m_inst_profiler_ptr->printStats(out);
134    }
135
136    //g_system_ptr->getNetwork()->printStats(out);
137    g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
138}
139
140void
141Profiler::setPeriodicStatsFile(const string& filename)
142{
143    cout << "Recording periodic statistics to file '" << filename << "' every "
144         << m_stats_period << " Ruby cycles" << endl;
145
146    if (m_periodic_output_file_ptr != &cerr) {
147        delete m_periodic_output_file_ptr;
148    }
149
150    m_periodic_output_file_ptr = new ofstream(filename.c_str());
151    g_eventQueue_ptr->scheduleEvent(this, 1);
152}
153
154void
155Profiler::setPeriodicStatsInterval(integer_t period)
156{
157    cout << "Recording periodic statistics every " << m_stats_period
158         << " Ruby cycles" << endl;
159
160    m_stats_period = period;
161    g_eventQueue_ptr->scheduleEvent(this, 1);
162}
163
164void
165Profiler::printConfig(ostream& out) const
166{
167    out << endl;
168    out << "Profiler Configuration" << endl;
169    out << "----------------------" << endl;
170    out << "periodic_stats_period: " << m_stats_period << endl;
171}
172
173void
174Profiler::print(ostream& out) const
175{
176    out << "[Profiler]";
177}
178
179void
180Profiler::printStats(ostream& out, bool short_stats)
181{
182    out << endl;
183    if (short_stats) {
184        out << "SHORT ";
185    }
186    out << "Profiler Stats" << endl;
187    out << "--------------" << endl;
188
189    time_t real_time_current = time(NULL);
190    double seconds = difftime(real_time_current, m_real_time_start_time);
191    double minutes = seconds / 60.0;
192    double hours = minutes / 60.0;
193    double days = hours / 24.0;
194    Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;
195
196    if (!short_stats) {
197        out << "Elapsed_time_in_seconds: " << seconds << endl;
198        out << "Elapsed_time_in_minutes: " << minutes << endl;
199        out << "Elapsed_time_in_hours: " << hours << endl;
200        out << "Elapsed_time_in_days: " << days << endl;
201        out << endl;
202    }
203
204    // print the virtual runtimes as well
205    struct tms vtime;
206    times(&vtime);
207    seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
208    minutes = seconds / 60.0;
209    hours = minutes / 60.0;
210    days = hours / 24.0;
211    out << "Virtual_time_in_seconds: " << seconds << endl;
212    out << "Virtual_time_in_minutes: " << minutes << endl;
213    out << "Virtual_time_in_hours:   " << hours << endl;
214    out << "Virtual_time_in_days:    " << days << endl;
215    out << endl;
216
217    out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
218    out << "Ruby_start_time: " << m_ruby_start << endl;
219    out << "Ruby_cycles: " << ruby_cycles << endl;
220    out << endl;
221
222    if (!short_stats) {
223        out << "mbytes_resident: " << process_memory_resident() << endl;
224        out << "mbytes_total: " << process_memory_total() << endl;
225        if (process_memory_total() > 0) {
226            out << "resident_ratio: "
227                << process_memory_resident()/process_memory_total() << endl;
228        }
229        out << endl;
230    }
231
232    vector<integer_t> perProcCycleCount(m_num_of_sequencers);
233
234    for (int i = 0; i < m_num_of_sequencers; i++) {
235        perProcCycleCount[i] =
236            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
237        // The +1 allows us to avoid division by zero
238    }
239
240    out << "ruby_cycles_executed: " << perProcCycleCount << endl;
241
242    out << endl;
243
244    if (!short_stats) {
245        out << "Busy Controller Counts:" << endl;
246        for (int i = 0; i < MachineType_NUM; i++) {
247            int size = MachineType_base_count((MachineType)i);
248            for (int j = 0; j < size; j++) {
249                MachineID machID;
250                machID.type = (MachineType)i;
251                machID.num = j;
252                out << machID << ":" << m_busyControllerCount[i][j] << "  ";
253                if ((j + 1) % 8 == 0) {
254                    out << endl;
255                }
256            }
257            out << endl;
258        }
259        out << endl;
260
261        out << "Busy Bank Count:" << m_busyBankCount << endl;
262        out << endl;
263
264        out << "sequencer_requests_outstanding: "
265            << m_sequencer_requests << endl;
266        out << endl;
267    }
268
269    if (!short_stats) {
270        out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
271        out << "----------------------------------------" << endl;
272        out << "miss_latency: " << m_allMissLatencyHistogram << endl;
273        for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
274            if (m_missLatencyHistograms[i].size() > 0) {
275                out << "miss_latency_" << RubyRequestType(i) << ": "
276                    << m_missLatencyHistograms[i] << endl;
277            }
278        }
279        for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
280            if (m_machLatencyHistograms[i].size() > 0) {
281                out << "miss_latency_" << GenericMachineType(i) << ": "
282                    << m_machLatencyHistograms[i] << endl;
283            }
284        }
285
286        out << "miss_latency_wCC_issue_to_initial_request: "
287            << m_wCCIssueToInitialRequestHistogram << endl;
288        out << "miss_latency_wCC_initial_forward_request: "
289            << m_wCCInitialRequestToForwardRequestHistogram << endl;
290        out << "miss_latency_wCC_forward_to_first_response: "
291            << m_wCCForwardRequestToFirstResponseHistogram << endl;
292        out << "miss_latency_wCC_first_response_to_completion: "
293            << m_wCCFirstResponseToCompleteHistogram << endl;
294        out << "imcomplete_wCC_Times: " << m_wCCIncompleteTimes << endl;
295        out << "miss_latency_dir_issue_to_initial_request: "
296            << m_dirIssueToInitialRequestHistogram << endl;
297        out << "miss_latency_dir_initial_forward_request: "
298            << m_dirInitialRequestToForwardRequestHistogram << endl;
299        out << "miss_latency_dir_forward_to_first_response: "
300            << m_dirForwardRequestToFirstResponseHistogram << endl;
301        out << "miss_latency_dir_first_response_to_completion: "
302            << m_dirFirstResponseToCompleteHistogram << endl;
303        out << "imcomplete_dir_Times: " << m_dirIncompleteTimes << endl;
304
305        for (int i = 0; i < m_missMachLatencyHistograms.size(); i++) {
306            for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
307                if (m_missMachLatencyHistograms[i][j].size() > 0) {
308                    out << "miss_latency_" << RubyRequestType(i)
309                        << "_" << GenericMachineType(j) << ": "
310                        << m_missMachLatencyHistograms[i][j] << endl;
311                }
312            }
313        }
314
315        out << endl;
316
317        out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
318        out << "------------------------------------" << endl;
319        out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
320        for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
321            if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
322                out << "prefetch_latency_" << RubyRequestType(i) << ": "
323                    << m_SWPrefetchLatencyHistograms[i] << endl;
324            }
325        }
326        for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
327            if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
328                out << "prefetch_latency_" << GenericMachineType(i) << ": "
329                    << m_SWPrefetchMachLatencyHistograms[i] << endl;
330            }
331        }
332        out << "prefetch_latency_L2Miss:"
333            << m_SWPrefetchL2MissLatencyHistogram << endl;
334
335        if (m_all_sharing_histogram.size() > 0) {
336            out << "all_sharing: " << m_all_sharing_histogram << endl;
337            out << "read_sharing: " << m_read_sharing_histogram << endl;
338            out << "write_sharing: " << m_write_sharing_histogram << endl;
339
340            out << "all_sharing_percent: ";
341            m_all_sharing_histogram.printPercent(out);
342            out << endl;
343
344            out << "read_sharing_percent: ";
345            m_read_sharing_histogram.printPercent(out);
346            out << endl;
347
348            out << "write_sharing_percent: ";
349            m_write_sharing_histogram.printPercent(out);
350            out << endl;
351
352            int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
353            out << "all_misses: " << total_miss << endl;
354            out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
355            out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
356            out << "cache_to_cache_percent: "
357                << 100.0 * (double(m_cache_to_cache) / double(total_miss))
358                << endl;
359            out << "memory_to_cache_percent: "
360                << 100.0 * (double(m_memory_to_cache) / double(total_miss))
361                << endl;
362            out << endl;
363        }
364
365        if (m_outstanding_requests.size() > 0) {
366            out << "outstanding_requests: ";
367            m_outstanding_requests.printPercent(out);
368            out << endl;
369            out << endl;
370        }
371    }
372
373    if (!short_stats) {
374        out << "Request vs. RubySystem State Profile" << endl;
375        out << "--------------------------------" << endl;
376        out << endl;
377
378        map<string, int>::const_iterator i = m_requestProfileMap.begin();
379        map<string, int>::const_iterator end = m_requestProfileMap.end();
380        for (; i != end; ++i) {
381            const string &key = i->first;
382            int count = i->second;
383
384            double percent = (100.0 * double(count)) / double(m_requests);
385            vector<string> items;
386            tokenize(items, key, ':');
387            vector<string>::iterator j = items.begin();
388            vector<string>::iterator end = items.end();
389            for (; j != end; ++i)
390                out << setw(10) << *j;
391            out << setw(11) << count;
392            out << setw(14) << percent << endl;
393        }
394        out << endl;
395
396        out << "filter_action: " << m_filter_action_histogram << endl;
397
398        if (!m_all_instructions) {
399            m_address_profiler_ptr->printStats(out);
400        }
401
402        if (m_all_instructions) {
403            m_inst_profiler_ptr->printStats(out);
404        }
405
406        out << endl;
407        out << "Message Delayed Cycles" << endl;
408        out << "----------------------" << endl;
409        out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
410        out << "Total_nonPF_delay_cycles: "
411            << m_delayedCyclesNonPFHistogram << endl;
412        for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
413            out << "  virtual_network_" << i << "_delay_cycles: "
414                << m_delayedCyclesVCHistograms[i] << endl;
415        }
416
417        printResourceUsage(out);
418    }
419}
420
421void
422Profiler::printResourceUsage(ostream& out) const
423{
424    out << endl;
425    out << "Resource Usage" << endl;
426    out << "--------------" << endl;
427
428    integer_t pagesize = getpagesize(); // page size in bytes
429    out << "page_size: " << pagesize << endl;
430
431    rusage usage;
432    getrusage (RUSAGE_SELF, &usage);
433
434    out << "user_time: " << usage.ru_utime.tv_sec << endl;
435    out << "system_time: " << usage.ru_stime.tv_sec << endl;
436    out << "page_reclaims: " << usage.ru_minflt << endl;
437    out << "page_faults: " << usage.ru_majflt << endl;
438    out << "swaps: " << usage.ru_nswap << endl;
439    out << "block_inputs: " << usage.ru_inblock << endl;
440    out << "block_outputs: " << usage.ru_oublock << endl;
441}
442
443void
444Profiler::clearStats()
445{
446    m_ruby_start = g_eventQueue_ptr->getTime();
447
448    m_cycles_executed_at_start.resize(m_num_of_sequencers);
449    for (int i = 0; i < m_num_of_sequencers; i++) {
450        if (g_system_ptr == NULL) {
451            m_cycles_executed_at_start[i] = 0;
452        } else {
453            m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
454        }
455    }
456
457    m_busyControllerCount.resize(MachineType_NUM); // all machines
458    for (int i = 0; i < MachineType_NUM; i++) {
459        int size = MachineType_base_count((MachineType)i);
460        m_busyControllerCount[i].resize(size);
461        for (int j = 0; j < size; j++) {
462            m_busyControllerCount[i][j] = 0;
463        }
464    }
465    m_busyBankCount = 0;
466
467    m_delayedCyclesHistogram.clear();
468    m_delayedCyclesNonPFHistogram.clear();
469    int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks();
470    m_delayedCyclesVCHistograms.resize(size);
471    for (int i = 0; i < size; i++) {
472        m_delayedCyclesVCHistograms[i].clear();
473    }
474
475    m_missLatencyHistograms.resize(RubyRequestType_NUM);
476    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
477        m_missLatencyHistograms[i].clear(200);
478    }
479    m_machLatencyHistograms.resize(GenericMachineType_NUM+1);
480    for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
481        m_machLatencyHistograms[i].clear(200);
482    }
483    m_missMachLatencyHistograms.resize(RubyRequestType_NUM);
484    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
485        m_missMachLatencyHistograms[i].resize(GenericMachineType_NUM+1);
486        for (int j = 0; j < m_missMachLatencyHistograms[i].size(); j++) {
487            m_missMachLatencyHistograms[i][j].clear(200);
488        }
489    }
490    m_allMissLatencyHistogram.clear(200);
491    m_wCCIssueToInitialRequestHistogram.clear(200);
492    m_wCCInitialRequestToForwardRequestHistogram.clear(200);
493    m_wCCForwardRequestToFirstResponseHistogram.clear(200);
494    m_wCCFirstResponseToCompleteHistogram.clear(200);
495    m_wCCIncompleteTimes = 0;
496    m_dirIssueToInitialRequestHistogram.clear(200);
497    m_dirInitialRequestToForwardRequestHistogram.clear(200);
498    m_dirForwardRequestToFirstResponseHistogram.clear(200);
499    m_dirFirstResponseToCompleteHistogram.clear(200);
500    m_dirIncompleteTimes = 0;
501
502    m_SWPrefetchLatencyHistograms.resize(RubyRequestType_NUM);
503    for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
504        m_SWPrefetchLatencyHistograms[i].clear(200);
505    }
506    m_SWPrefetchMachLatencyHistograms.resize(GenericMachineType_NUM+1);
507    for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
508        m_SWPrefetchMachLatencyHistograms[i].clear(200);
509    }
510    m_allSWPrefetchLatencyHistogram.clear(200);
511
512    m_sequencer_requests.clear();
513    m_read_sharing_histogram.clear();
514    m_write_sharing_histogram.clear();
515    m_all_sharing_histogram.clear();
516    m_cache_to_cache = 0;
517    m_memory_to_cache = 0;
518
519    // clear HashMaps
520    m_requestProfileMap.clear();
521
522    // count requests profiled
523    m_requests = 0;
524
525    m_outstanding_requests.clear();
526    m_outstanding_persistent_requests.clear();
527
528    // Flush the prefetches through the system - used so that there
529    // are no outstanding requests after stats are cleared
530    //g_eventQueue_ptr->triggerAllEvents();
531
532    // update the start time
533    m_ruby_start = g_eventQueue_ptr->getTime();
534}
535
536void
537Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
538{
539    if (msg.getType() != RubyRequestType_IFETCH) {
540        // Note: The following line should be commented out if you
541        // want to use the special profiling that is part of the GS320
542        // protocol
543
544        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
545        // profiled by the AddressProfiler
546        m_address_profiler_ptr->
547            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
548                           msg.getType(), msg.getAccessMode(), id, false);
549    }
550}
551
552void
553Profiler::profileSharing(const Address& addr, AccessType type,
554                         NodeID requestor, const Set& sharers,
555                         const Set& owner)
556{
557    Set set_contacted(owner);
558    if (type == AccessType_Write) {
559        set_contacted.addSet(sharers);
560    }
561    set_contacted.remove(requestor);
562    int number_contacted = set_contacted.count();
563
564    if (type == AccessType_Write) {
565        m_write_sharing_histogram.add(number_contacted);
566    } else {
567        m_read_sharing_histogram.add(number_contacted);
568    }
569    m_all_sharing_histogram.add(number_contacted);
570
571    if (number_contacted == 0) {
572        m_memory_to_cache++;
573    } else {
574        m_cache_to_cache++;
575    }
576}
577
578void
579Profiler::profileMsgDelay(int virtualNetwork, int delayCycles)
580{
581    assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
582    m_delayedCyclesHistogram.add(delayCycles);
583    m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
584    if (virtualNetwork != 0) {
585        m_delayedCyclesNonPFHistogram.add(delayCycles);
586    }
587}
588
589// profiles original cache requests including PUTs
590void
591Profiler::profileRequest(const string& requestStr)
592{
593    m_requests++;
594
595    // if it doesn't exist, conveniently, it will be created with the
596    // default value which is 0
597    m_requestProfileMap[requestStr]++;
598}
599
600void
601Profiler::controllerBusy(MachineID machID)
602{
603    m_busyControllerCount[(int)machID.type][(int)machID.num]++;
604}
605
606void
607Profiler::profilePFWait(Time waitTime)
608{
609    m_prefetchWaitHistogram.add(waitTime);
610}
611
612void
613Profiler::bankBusy()
614{
615    m_busyBankCount++;
616}
617
618// non-zero cycle demand request
619void
620Profiler::missLatency(Time cycles,
621                      RubyRequestType type,
622                      const GenericMachineType respondingMach)
623{
624    m_allMissLatencyHistogram.add(cycles);
625    m_missLatencyHistograms[type].add(cycles);
626    m_machLatencyHistograms[respondingMach].add(cycles);
627    m_missMachLatencyHistograms[type][respondingMach].add(cycles);
628}
629
630void
631Profiler::missLatencyWcc(Time issuedTime,
632                         Time initialRequestTime,
633                         Time forwardRequestTime,
634                         Time firstResponseTime,
635                         Time completionTime)
636{
637    if ((issuedTime <= initialRequestTime) &&
638        (initialRequestTime <= forwardRequestTime) &&
639        (forwardRequestTime <= firstResponseTime) &&
640        (firstResponseTime <= completionTime)) {
641        m_wCCIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
642
643        m_wCCInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
644                                                         initialRequestTime);
645
646        m_wCCForwardRequestToFirstResponseHistogram.add(firstResponseTime -
647                                                        forwardRequestTime);
648
649        m_wCCFirstResponseToCompleteHistogram.add(completionTime -
650                                                  firstResponseTime);
651    } else {
652        m_wCCIncompleteTimes++;
653    }
654}
655
656void
657Profiler::missLatencyDir(Time issuedTime,
658                         Time initialRequestTime,
659                         Time forwardRequestTime,
660                         Time firstResponseTime,
661                         Time completionTime)
662{
663    if ((issuedTime <= initialRequestTime) &&
664        (initialRequestTime <= forwardRequestTime) &&
665        (forwardRequestTime <= firstResponseTime) &&
666        (firstResponseTime <= completionTime)) {
667        m_dirIssueToInitialRequestHistogram.add(initialRequestTime - issuedTime);
668
669        m_dirInitialRequestToForwardRequestHistogram.add(forwardRequestTime -
670                                                         initialRequestTime);
671
672        m_dirForwardRequestToFirstResponseHistogram.add(firstResponseTime -
673                                                        forwardRequestTime);
674
675        m_dirFirstResponseToCompleteHistogram.add(completionTime -
676                                                  firstResponseTime);
677    } else {
678        m_dirIncompleteTimes++;
679    }
680}
681
682// non-zero cycle prefetch request
683void
684Profiler::swPrefetchLatency(Time cycles,
685                            RubyRequestType type,
686                            const GenericMachineType respondingMach)
687{
688    m_allSWPrefetchLatencyHistogram.add(cycles);
689    m_SWPrefetchLatencyHistograms[type].add(cycles);
690    m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
691    if (respondingMach == GenericMachineType_Directory ||
692        respondingMach == GenericMachineType_NUM) {
693        m_SWPrefetchL2MissLatencyHistogram.add(cycles);
694    }
695}
696
697// Helper function
698static double
699process_memory_total()
700{
701    // 4kB page size, 1024*1024 bytes per MB,
702    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
703    ifstream proc_file;
704    proc_file.open("/proc/self/statm");
705    int total_size_in_pages = 0;
706    int res_size_in_pages = 0;
707    proc_file >> total_size_in_pages;
708    proc_file >> res_size_in_pages;
709    return double(total_size_in_pages) * MULTIPLIER; // size in megabytes
710}
711
712static double
713process_memory_resident()
714{
715    // 4kB page size, 1024*1024 bytes per MB,
716    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
717    ifstream proc_file;
718    proc_file.open("/proc/self/statm");
719    int total_size_in_pages = 0;
720    int res_size_in_pages = 0;
721    proc_file >> total_size_in_pages;
722    proc_file >> res_size_in_pages;
723    return double(res_size_in_pages) * MULTIPLIER; // size in megabytes
724}
725
726void
727Profiler::rubyWatch(int id)
728{
729    uint64 tr = 0;
730    Address watch_address = Address(tr);
731
732    DPRINTFN("%7s %3s RUBY WATCH %d\n", g_eventQueue_ptr->getTime(), id,
733        watch_address);
734
735    // don't care about success or failure
736    m_watch_address_set.insert(watch_address);
737}
738
739bool
740Profiler::watchAddress(Address addr)
741{
742    return m_watch_address_set.count(addr) > 0;
743}
744
745Profiler *
746RubyProfilerParams::create()
747{
748    return new Profiler(this);
749}
750